Pre-allocate vids for branches (#2882)

Each branch node may have up to 16 sub-items - currently, these are
given VertexID based when they are first needed leading to a
mostly-random order of vertexid for each subitem.

Here, we pre-allocate all 16 vertex ids such that when a branch subitem
is filled, it already has a vertexid waiting for it. This brings several
important benefits:

* subitems are sorted and "close" in their id sequencing - this means
that when rocksdb stores them, they are likely to end up in the same
data block thus improving read efficiency
* because the ids are consequtive, we can store just the starting id and
a bitmap representing which subitems are in use - this reduces disk
space usage for branches allowing more of them fit into a single disk
read, further improving disk read and caching performance - disk usage
at block 18M is down from 84 to 78gb!
* the in-memory footprint of VertexRef reduced allowing more instances
to fit into caches and less memory to be used overall.

Because of the increased locality of reference, it turns out that we no
longer need to iterate over the entire database to efficiently generate
the hash key database because the normal computation is now faster -
this significantly benefits "live" chain processing as well where each
dirtied key must be accompanied by a read of all branch subitems next to
it - most of the performance benefit in this branch comes from this
locality-of-reference improvement.

On a sample resync, there's already ~20% improvement with later blocks
seeing increasing benefit (because the trie is deeper in later blocks
leading to more benefit from branch read perf improvements)

```
blocks: 18729664, baseline: 190h43m49s, contender: 153h59m0s
Time (total): -36h44m48s, -19.27%
```

Note: clients need to be resynced as the PR changes the on-disk format

R.I.P. little bloom filter - your life in the repo was short but
valuable
This commit is contained in:
Jacek Sieka 2024-12-04 11:42:04 +01:00 committed by GitHub
parent 5a3bfe486f
commit f034af422a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
31 changed files with 411 additions and 694 deletions

View File

@ -159,7 +159,7 @@ proc blobifyTo*(pyl: LeafPayload, data: var seq[byte]) =
data &= pyl.stoData.blobify().data
data &= [0x20.byte]
proc blobifyTo*(vtx: VertexRef; key: HashKey, data: var seq[byte]): Result[void,AristoError] =
proc blobifyTo*(vtx: VertexRef, key: HashKey, data: var seq[byte]) =
## This function serialises the vertex argument to a database record.
## Contrary to RLP based serialisation, these records aim to align on
## fixed byte boundaries.
@ -181,72 +181,53 @@ proc blobifyTo*(vtx: VertexRef; key: HashKey, data: var seq[byte]): Result[void,
## ::
## 8 * n * ((access shr (n * 4)) and 15)
##
if not vtx.isValid:
return err(BlobifyNilVertex)
case vtx.vType:
of Branch:
let code = if key.isValid:
data.add byte(key.len)
data.add key.data()
# TODO using 0 here for legacy reasons - a bit flag would be easier
0'u8 shl 6
else:
2'u8 shl 6
var
lens = 0u64
pos = data.len
for n in 0..15:
if vtx.bVid[n].isValid:
let tmp = vtx.bVid[n].blobify()
lens += uint64(tmp.len) shl (n * 4)
data &= tmp.data()
if data.len == pos:
return err(BlobifyBranchMissingRefs)
doAssert vtx.isValid
let
pSegm =
if vtx.pfx.len > 0:
vtx.pfx.toHexPrefix(isleaf = false)
else:
default(HexPrefixBuf)
psLen = pSegm.len.byte
if 33 < psLen:
return err(BlobifyExtPathOverflow)
let
bits =
case vtx.vType
of Branch:
let bits =
if key.isValid and key.len == 32:
# Shorter keys can be loaded from the vertex directly
data.add key.data()
0b10'u8
else:
0b00'u8
data &= pSegm.data()
data &= lens.toBytesBE
data &= [code or psLen]
data.add vtx.startVid.blobify().data()
data.add toBytesBE(vtx.used)
bits
of Leaf:
vtx.lData.blobifyTo(data)
0b01'u8
of Leaf:
let
pSegm = vtx.pfx.toHexPrefix(isleaf = true)
psLen = pSegm.len.byte
if psLen == 0 or 33 < psLen:
return err(BlobifyLeafPathOverflow)
vtx.lData.blobifyTo(data)
data &= pSegm.data()
data &= [(3'u8 shl 6) or psLen]
pSegm =
if vtx.pfx.len > 0:
vtx.pfx.toHexPrefix(isleaf = vtx.vType == Leaf)
else:
default(HexPrefixBuf)
psLen = pSegm.len.byte
ok()
data &= pSegm.data()
data &= [(bits shl 6) or psLen]
proc blobify*(vtx: VertexRef, key: HashKey): seq[byte] =
## Variant of `blobify()`
result = newSeqOfCap[byte](128)
if vtx.blobifyTo(key, result).isErr:
result.setLen(0) # blobify only fails on invalid verticies
vtx.blobifyTo(key, result)
proc blobifyTo*(lSst: SavedState; data: var seq[byte]): Result[void,AristoError] =
proc blobifyTo*(lSst: SavedState; data: var seq[byte]) =
## Serialise a last saved state record
data.add lSst.key.data
data.add lSst.serial.toBytesBE
data.add @[0x7fu8]
ok()
proc blobify*(lSst: SavedState): Result[seq[byte],AristoError] =
proc blobify*(lSst: SavedState): seq[byte] =
## Variant of `blobify()`
var data: seq[byte]
? lSst.blobifyTo data
ok(move(data))
lSst.blobifyTo data
data
# -------------
proc deblobify(
@ -296,11 +277,10 @@ proc deblobifyType*(record: openArray[byte]; T: type VertexRef):
if record.len < 3: # minimum `Leaf` record
return err(DeblobVtxTooShort)
ok case record[^1] shr 6:
of 0, 2: Branch
of 3: Leaf
ok if ((record[^1] shr 6) and 0b01'u8) > 0:
Leaf
else:
return err(DeblobUnknown)
Branch
proc deblobify*(
record: openArray[byte];
@ -308,67 +288,44 @@ proc deblobify*(
): Result[T,AristoError] =
## De-serialise a data record encoded with `blobify()`. The second
## argument `vtx` can be `nil`.
if record.len < 3: # minimum `Leaf` record
if record.len < 3: # minimum `Leaf` record
return err(DeblobVtxTooShort)
let kind = record[^1] shr 6
let start = if kind == 0:
int(record[0] + 1)
else:
0
ok case kind:
of 0, 2: # `Branch` vertex
if record.len - start < 11: # at least two edges
return err(DeblobBranchTooShort)
let
bits = record[^1] shr 6
vType = if (bits and 0b01'u8) > 0: Leaf else: Branch
hasKey = (bits and 0b10'u8) > 0
psLen = int(record[^1] and 0b00111111)
start = if hasKey: 32 else: 0
if psLen > record.len - 2 or start > record.len - 2 - psLen:
return err(DeblobBranchTooShort)
let
psPos = record.len - psLen - 1
(_, pathSegment) =
NibblesBuf.fromHexPrefix record.toOpenArray(psPos, record.len - 2)
ok case vType
of Branch:
var pos = start
let
aInx = record.len - 9
aIny = record.len - 2
var
offs = start
lens = uint64.fromBytesBE record.toOpenArray(aInx, aIny) # bitmap
vtxList: array[16,VertexID]
n = 0
while lens != 0:
let len = lens and 0b1111
if len > 0:
vtxList[n] = VertexID(? load64(record, offs, int(len)))
inc n
lens = lens shr 4
svLen = psPos - pos - 2
startVid = VertexID(?load64(record, pos, svLen))
used = uint16.fromBytesBE(record.toOpenArray(pos, pos + 1))
let (isLeaf, pathSegment) =
NibblesBuf.fromHexPrefix record.toOpenArray(offs, aInx - 1)
if isLeaf:
return err(DeblobBranchGotLeafPrefix)
pos += 2
# End `while`
VertexRef(
vType: Branch,
pfx: pathSegment,
bVid: vtxList)
VertexRef(vType: Branch, pfx: pathSegment, startVid: startVid, used: used)
of Leaf:
let vtx = VertexRef(vType: Leaf, pfx: pathSegment)
of 3: # `Leaf` vertex
let
sLen = record[^1].int and 0x3f # length of path segment
rLen = record.len - 1 # payload + path segment
pLen = rLen - sLen # payload length
if rLen < sLen or pLen < 1:
return err(DeblobLeafSizeGarbled)
let (isLeaf, pathSegment) =
NibblesBuf.fromHexPrefix record.toOpenArray(pLen, rLen-1)
if not isLeaf:
return err(DeblobLeafGotExtPrefix)
let vtx = VertexRef(
vType: Leaf,
pfx: pathSegment)
? record.toOpenArray(start, pLen - 1).deblobify(vtx.lData)
?record.toOpenArray(start, psPos - 1).deblobify(vtx.lData)
vtx
else:
return err(DeblobUnknown)
proc deblobify*(record: openArray[byte], T: type HashKey): Opt[HashKey] =
if record.len > 1 and ((record[^1] shr 6) == 0) and (int(record[0]) + 1) < record.len:
HashKey.fromBytes(record.toOpenArray(1, int(record[0])))
if record.len > 33 and (((record[^1] shr 6) and 0b10'u8) > 0):
HashKey.fromBytes(record.toOpenArray(0, 31))
else:
Opt.none(HashKey)

View File

@ -42,11 +42,10 @@ proc checkBE*[T: RdbBackendRef|MemBackendRef|VoidBackendRef](
of Branch:
block check42Links:
var seen = false
for n in 0 .. 15:
if vtx.bVid[n].isValid:
if seen:
break check42Links
seen = true
for _, _ in vtx.pairs():
if seen:
break check42Links
seen = true
return err((rvid.vid,CheckBeVtxBranchLinksMissing))
for (rvid,key) in T.walkKeyBe db:

View File

@ -100,11 +100,10 @@ proc checkTopCommon*(
of Branch:
block check42Links:
var seen = false
for n in 0 .. 15:
if vtx.bVid[n].isValid:
if seen:
break check42Links
seen = true
for _, _ in vtx.pairs():
if seen:
break check42Links
seen = true
return err((rvid.vid,CheckAnyVtxBranchLinksMissing))
else:
nNilVtx.inc

View File

@ -11,102 +11,13 @@
{.push raises: [].}
import
system/ansi_c,
std/[strformat, math, hashes],
stew/staticfor,
std/[strformat, math],
chronicles,
eth/common,
results,
"."/[aristo_desc, aristo_get, aristo_serialise, aristo_walk/persistent],
"."/[aristo_desc, aristo_get, aristo_walk/persistent],
./aristo_desc/desc_backend
type BasicBloomFilter = object
# School book implementation of bloom filter based on
# https://github.com/save-buffer/bloomfilter_benchmarks.
#
# In theory, this bloom filter could be turned into a reusable component but
# it is fairly specialised to the particular use case and gets used in a
# tight/hot loop in the code - a generalisation would require care so as not
# to introduce overhead but could of course be further optimised using
bytes: ptr UncheckedArray[byte]
proc computeBits(n: int, epsilon: float): int =
# Number of bits in the bloom filter required for n elements and eposilon
# false positive rate
int(-1.4427 * float(n) * log2(epsilon) + 0.5)
proc computeHashFns(epsilon: float): int =
# Number of hash functions given the desired false positive rate
int(-log2(epsilon) + 0.5)
const
bloomRate = 0.002
# The leaf cache computation is fairly sensitive to false positives as these
# ripple up the branch trie with false postivies being amplified by trie
# branching - this has to be balanced with the cost which
# goes up fairly quickly with ~13 bits per key at 0.002, meaning ~2gb of
# memory for the current setting below!
bloomHashes = computeHashFns(bloomRate)
expectedKeys = 1500000000
# expected number of elements in the bloom filter - this is reported as
# `keys` below and will need adjusting - the value is more or less accurate
# on mainnet as of block 2100000 (~oct 2024) for the number of leaves
# present - we use leaf count because bloom filter accuracy is most
# important for the first round of branches.
# TODO rocksdb can estimate the number of keys present in the vertex table -
# this would provide a reasonable estimate of what the bloom table size
# should be, though in reality we want leaf count per above argument -
# at the time of writing leaves make up around 3/4 of all verticies
bloomSize = uint32((computeBits(expectedKeys, bloomRate) + 7) / 8)
func hashes(v: uint64): (uint32, uint32) =
# Use the two halves of an uint64 to create two independent hashes functions
# for the bloom that allow efficiently generating more bloom hash functions
# per Kirsch and Mitzenmacher:
# https://www.eecs.harvard.edu/~michaelm/postscripts/tr-02-05.pdf
let
v = uint64(hash(v)) # `hash` for a better spread of bits into upper half
h1 = uint32(v)
h2 = uint32(v shr 32)
(h1, h2)
func insert(filter: var BasicBloomFilter, v: uint64) =
let (h1, h2) = hashes(v)
staticFor i, 0 ..< bloomHashes:
let
hash = (h1 + i * h2)
bitIdx = uint8(hash mod 8)
byteIdx = (hash div 8) mod bloomSize
filter.bytes[byteIdx] = filter.bytes[byteIdx] or (1'u8 shl bitIdx)
func query(filter: BasicBloomFilter, v: uint64): bool =
let (h1, h2) = hashes(v)
var match = 1'u8
staticFor i, 0 ..< bloomHashes:
let
hash = (h1 + i * h2)
bitIdx = uint8(hash mod 8)
byteIdx = (hash div 8) mod bloomSize
match = match and ((filter.bytes[byteIdx] shr bitIdx) and 1)
match > 0
proc init(T: type BasicBloomFilter): T =
# We use the C memory allocator so as to return memory to the operating system
# at the end of the computation - we don't want the one-off blob to remain in
# the hands of the Nim GC.
# `calloc` to get zeroed memory out of the box
let memory = c_calloc(csize_t(bloomSize), 1)
doAssert memory != nil, "Could not allocate memory for bloom filter"
T(bytes: cast[ptr UncheckedArray[byte]](memory))
proc release(v: BasicBloomFilter) =
# TODO with orc, this could be a destructor
c_free(v.bytes)
type WriteBatch = tuple[writer: PutHdlRef, count: int, depth: int, prefix: uint64]
# Keep write batch size _around_ 1mb, give or take some overhead - this is a
@ -141,12 +52,12 @@ func progress(batch: WriteBatch): string =
# looking at the path prefix that we're currently processing
&"{(float(batch.prefix) / float(uint64.high)) * 100:02.2f}%"
func enter(batch: var WriteBatch, nibble: int) =
func enter(batch: var WriteBatch, nibble: uint8) =
batch.depth += 1
if batch.depth <= 16:
batch.prefix += uint64(nibble) shl ((16 - batch.depth) * 4)
func leave(batch: var WriteBatch, nibble: int) =
func leave(batch: var WriteBatch, nibble: uint8) =
if batch.depth <= 16:
batch.prefix -= uint64(nibble) shl ((16 - batch.depth) * 4)
batch.depth -= 1
@ -196,9 +107,9 @@ template encodeLeaf(w: var RlpWriter, pfx: NibblesBuf, leafData: untyped): HashK
w.append(leafData)
w.finish().digestTo(HashKey)
template encodeBranch(w: var RlpWriter, subKeyForN: untyped): HashKey =
template encodeBranch(w: var RlpWriter, vtx: VertexRef, subKeyForN: untyped): HashKey =
w.startList(17)
for n {.inject.} in 0 .. 15:
for (n {.inject.}, subvid {.inject.}) in vtx.allPairs():
w.append(subKeyForN)
w.append EmptyBlob
w.finish().digestTo(HashKey)
@ -209,23 +120,26 @@ template encodeExt(w: var RlpWriter, pfx: NibblesBuf, branchKey: HashKey): HashK
w.append(branchKey)
w.finish().digestTo(HashKey)
proc getKey(
db: AristoDbRef, rvid: RootedVertexID, skipLayers: static bool
): Result[((HashKey, VertexRef), int), AristoError] =
ok when skipLayers:
(?db.getKeyUbe(rvid, {GetVtxFlag.PeekCache}), -2)
else:
?db.getKeyRc(rvid, {})
proc computeKeyImpl(
db: AristoDbRef,
rvid: RootedVertexID,
batch: var WriteBatch,
bloom: ptr BasicBloomFilter = nil,
vtxl: (VertexRef, int),
skipLayers: static bool,
): Result[(HashKey, int), AristoError] =
# The bloom filter available used only when creating the key cache from an
# empty state
if bloom == nil or bloom[].query(uint64(rvid.vid)):
db.getKeyRc(rvid).isErrOr:
# Value cached either in layers or database
return ok value
let (vtx, vl) = ?db.getVtxRc(rvid, {GetVtxFlag.PeekCache})
# Top-most level of all the verticies this hash compution depends on
var level = vl
# Top-most level of all the verticies this hash computation depends on
var (vtx, level) = vtxl
# TODO this is the same code as when serializing NodeRef, without the NodeRef
var writer = initRlpWriter()
@ -240,8 +154,16 @@ proc computeKeyImpl(
stoID = vtx.lData.stoID
skey =
if stoID.isValid:
let (skey, sl) =
?db.computeKeyImpl((stoID.vid, stoID.vid), batch, bloom)
let
keyvtxl = ?db.getKey((stoID.vid, stoID.vid), skipLayers)
(skey, sl) =
if keyvtxl[0][0].isValid:
(keyvtxl[0][0], keyvtxl[1])
else:
let vtxl = (keyvtxl[0][1], keyvtxl[1])
?db.computeKeyImpl(
(stoID.vid, stoID.vid), batch, vtxl, skipLayers = skipLayers
)
level = maxLevel(level, sl)
skey
else:
@ -257,12 +179,26 @@ proc computeKeyImpl(
# TODO avoid memory allocation when encoding storage data
rlp.encode(vtx.lData.stoData)
of Branch:
# For branches, we need to load the verticies before recursing into them
# to exploit their on-disk order
var keyvtxs: array[16, ((HashKey, VertexRef), int)]
for n, subvid in vtx.pairs:
keyvtxs[n] = ?db.getKey((rvid.root, subvid), skipLayers)
template writeBranch(w: var RlpWriter): HashKey =
w.encodeBranch:
let vid = vtx.bVid[n]
if vid.isValid:
w.encodeBranch(vtx):
if subvid.isValid:
batch.enter(n)
let (bkey, bl) = ?db.computeKeyImpl((rvid.root, vid), batch, bloom)
let (bkey, bl) =
if keyvtxs[n][0][0].isValid:
(keyvtxs[n][0][0], keyvtxs[n][1])
else:
?db.computeKeyImpl(
(rvid.root, subvid),
batch,
(keyvtxs[n][0][1], keyvtxs[n][1]),
skipLayers = skipLayers,
)
batch.leave(n)
level = maxLevel(level, bl)
@ -289,10 +225,19 @@ proc computeKeyImpl(
ok (key, level)
proc computeKeyImpl(
db: AristoDbRef, rvid: RootedVertexID, bloom: ptr BasicBloomFilter
db: AristoDbRef, rvid: RootedVertexID, skipLayers: static bool
): Result[HashKey, AristoError] =
let (keyvtx, level) =
when skipLayers:
(?db.getKeyUbe(rvid, {GetVtxFlag.PeekCache}), -2)
else:
?db.getKeyRc(rvid, {})
if keyvtx[0].isValid:
return ok(keyvtx[0])
var batch: WriteBatch
let res = computeKeyImpl(db, rvid, batch, bloom)
let res = computeKeyImpl(db, rvid, batch, (keyvtx[1], level), skipLayers = skipLayers)
if res.isOk:
?batch.flush(db)
@ -313,208 +258,12 @@ proc computeKey*(
## state/hash, it must be converted to a `Hash32` (using (`.to(Hash32)`) as
## in `db.computeKey(rvid).value.to(Hash32)` which always results in a
## 32 byte value.
computeKeyImpl(db, rvid, nil)
proc computeLeafKeysImpl(
T: type, db: AristoDbRef, root: VertexID
): Result[void, AristoError] =
# Key computation function that works by iterating over the entries in the
# database (instead of traversing trie using point lookups) - due to how
# rocksdb is organised, this cache-friendly traversal order turns out to be
# more efficient even if we "touch" a lot of irrelevant entries.
# Computation works bottom-up starting with the leaves and proceeding with
# branches whose children were computed in the previous round one "layer"
# at a time until the the number of successfully computed nodes grows low.
# TODO progress indicator
block:
if db.getKeyUbe((root, root)).isOk():
return ok() # Fast path for when the root is in the database already
# Smoke check to see if we can find lots of branch nodes with keys already
var branches, found: int
for (rvid, vtx) in T.walkVtxBe(db, {Branch}):
branches += 1
if db.getKeyUbe(rvid).isOk:
found += 1
# 10% found on the initial sample.. good enough? Some more randomness
# here would maybe make sense
if branches > 1000:
if found * 10 > branches:
return ok()
break
info "Writing key cache (this may take a while)"
var batch: WriteBatch
# Bloom filter keeping track of keys we're added to the database already so
# as to avoid expensive speculative lookups
var bloom = BasicBloomFilter.init()
defer:
bloom.release()
var
# Reuse rlp writers to avoid superfluous memory allocations
writer = initRlpWriter()
writer2 = initRlpWriter()
writer3 = initRlpWriter()
level = 0
leaves = 0
# Load leaves into bloom filter so we can quickly skip over branch nodes where
# we know the lookup will fail.
# At the time of writing, this is roughly 3/4 of the of the entries in the
# database on mainnet - the ratio roughly corresponds to the fill ratio of the
# deepest branch nodes as nodes close to the MPT root don't come in
# significant numbers
# Leaf keys are not computed to save space - instead, if they are needed they
# are computed from the leaf data.
for (rvid, vtx) in T.walkVtxBe(db, {Leaf}):
if vtx.lData.pType == AccountData and vtx.lData.stoID.isValid:
# Accounts whose key depends on the storage trie typically will not yet
# have their root node computed and several such contracts are
# significant in size, meaning that we might as well let their leaves
# be computed and then top up during regular trie traversal.
continue
bloom.insert(uint64(rvid.vid))
leaves += 1
# The leaves have been loaded into the bloom filter - we'll now proceed to
# branches expecting diminishing returns for each layer - not only beacuse
# there are fewer nodes closer to the root in the trie but also because leaves
# we skipped over lead larger and larger branch gaps and the advantage of
# iterating in disk order is lost
var lastRound = leaves
level += 1
# 16*16 looks like "2 levels of MPT" but in reality, the branch nodes close
# to the leaves are sparse - on average about 4 nodes per branch on mainnet -
# meaning that we'll do 3-4 levels of branch depending on the network
var branches = 0
while lastRound > (leaves div (16 * 16)):
info "Starting branch layer", keys = batch.count, lastRound, level
var round = 0
branches = 0
for (rvid, vtx) in T.walkVtxBe(db, {Branch}):
branches += 1
if vtx.pfx.len > 0:
# TODO there shouldn't be many extension nodes - is it worth the lookup?
continue
if level > 1:
# A hit on the bloom filter here means we **maybe** already computed a
# key for this branch node - we could verify this with a lookup but
# the generally low false positive rate makes this check more expensive
# than simply revisiting the node using trie traversal.
if bloom.query(uint64(rvid.vid)):
continue
block branchKey:
for b in vtx.bVid:
if b.isValid and not bloom.query(uint64(b)):
# If any child is missing from the branch, we can't compute the key
# trivially
break branchKey
writer.clear()
let key = writer.encodeBranch:
let vid = vtx.bVid[n]
if vid.isValid:
let bkeyOpt =
if level == 1: # No leaf keys in database
Result[HashKey, AristoError].err(GetKeyNotFound)
else:
db.getKeyUbe((rvid.root, vid))
bkeyOpt.valueOr:
let bvtx = db.getVtxUbe((rvid.root, vid)).valueOr:
# Incomplete database?
break branchKey
if bvtx == nil or (
bvtx.vType == Leaf and bvtx.lData.pType == AccountData and
bvtx.lData.stoID.isValid
):
# It's unlikely storage root key has been computed already, so
# skip
# TODO maybe worth revisting - a not insignificant number of
# contracts have only a leaf storage slot so for those we
# could trivially compute account storage root..
break branchKey
case bvtx.vType
of Leaf:
writer2.clear()
writer2.encodeLeaf(bvtx.pfx):
writer3.clear()
case bvtx.lData.pType
of AccountData:
writer3.append Account(
nonce: bvtx.lData.account.nonce,
balance: bvtx.lData.account.balance,
# Accounts with storage filtered out above
storageRoot: EMPTY_ROOT_HASH,
codeHash: bvtx.lData.account.codeHash,
)
of StoData:
writer3.append(bvtx.lData.stoData)
writer3.finish()
of Branch:
break branchKey
else:
VOID_HASH_KEY
?batch.putVtx(db, rvid, vtx, key)
if batch.count mod batchSize == 0:
?batch.flush(db)
if batch.count mod (batchSize * 100) == 0:
info "Writing branches", keys = batch.count, round, level
else:
debug "Writing branches", keys = batch.count, round, level
round += 1
bloom.insert(uint64(rvid.vid))
lastRound = round
level += 1
?batch.flush(db)
info "Key cache base written",
keys = batch.count, lastRound, leaves, branches
let rc = computeKeyImpl(db, (root, root), addr bloom)
if rc.isOk() or rc.error() == GetVtxNotFound:
# When there's no root vertex, the database is likely empty
ok()
else:
err(rc.error())
computeKeyImpl(db, rvid, skipLayers = false)
proc computeKeys*(db: AristoDbRef, root: VertexID): Result[void, AristoError] =
## Computing the leaf keys is a pre-processing step for when hash cache is
## empty.
##
## Computing it by traversing the trie can take days because of the mismatch
## between trie traversal order and the on-disk VertexID-based sorting.
##
## This implementation speeds up the inital seeding of the cache by traversing
## the full state in on-disk order and computing hashes bottom-up instead.
case db.backend.kind
of BackendMemory:
MemBackendRef.computeLeafKeysImpl db, root
of BackendRocksDB, BackendRdbHosting:
RdbBackendRef.computeLeafKeysImpl db, root
of BackendVoid:
ok()
## Ensure that key cache is topped up with the latest state root
discard db.computeKeyImpl((root, root), skipLayers = true)
ok()
# ------------------------------------------------------------------------------
# End

View File

@ -200,9 +200,9 @@ func ppVtx(nd: VertexRef, db: AristoDbRef, rvid: RootedVertexID): string =
result &= nd.pfx.ppPathPfx & "," & nd.lData.ppPayload(db)
of Branch:
result &= nd.pfx.ppPathPfx & ":"
for n in 0..15:
if nd.bVid[n].isValid:
result &= nd.bVid[n].ppVid
for n in 0'u8..15'u8:
if nd.bVid(n).isValid:
result &= nd.bVid(n).ppVid
if n < 15:
result &= ","
result &= ")"
@ -238,12 +238,12 @@ proc ppNode(
else:
result &= nd.vtx.lData.ppPayload(db)
of Branch:
let keyOnly = nd.vtx.bVid.toSeq.filterIt(it.isValid).len == 0
let keyOnly = nd.vtx.subVids.toSeq.filterIt(it.isValid).len == 0
result &= nd.vtx.pfx.ppPathPfx & ":"
for n in 0..15:
if nd.vtx.bVid[n].isValid:
let tag = db.ppKeyOk(nd.key[n],(rvid.root,nd.vtx.bVid[n]))
result &= nd.vtx.bVid[n].ppVid & tag
for n in 0'u8..15'u8:
if nd.vtx.bVid(n).isValid:
let tag = db.ppKeyOk(nd.key[n],(rvid.root,nd.vtx.bVid(n)))
result &= nd.vtx.bVid(n).ppVid & tag
elif keyOnly and nd.key[n].isValid:
result &= nd.key[n].ppKey(db)
if n < 15:

View File

@ -25,14 +25,14 @@ import
# Private heplers
# ------------------------------------------------------------------------------
proc branchStillNeeded(vtx: VertexRef, removed: int): Result[int,void] =
proc branchStillNeeded(vtx: VertexRef, removed: int8): Result[int8,void] =
## Returns the nibble if there is only one reference left.
var nibble = -1
for n in 0 .. 15:
var nibble = -1'i8
for n in 0'i8 .. 15'i8:
if n == removed:
continue
if vtx.bVid[n].isValid:
if vtx.bVid(uint8 n).isValid:
if 0 <= nibble:
return ok(-1)
nibble = n
@ -84,7 +84,7 @@ proc deleteImpl(
# Get child vertex (there must be one after a `Branch` node)
let
vid = br.vtx.bVid[nbl]
vid = br.vtx.bVid(uint8 nbl)
nxt = db.getVtx (hike.root, vid)
if not nxt.isValid:
return err(DelVidStaleVtx)
@ -103,7 +103,8 @@ proc deleteImpl(
VertexRef(
vType: Branch,
pfx: br.vtx.pfx & NibblesBuf.nibble(nbl.byte) & nxt.pfx,
bVid: nxt.bVid)
startVid: nxt.startVid,
used: nxt.used)
# Put the new vertex at the id of the obsolete branch
db.layersPutVtx((hike.root, br.vid), vtx)
@ -115,7 +116,7 @@ proc deleteImpl(
else:
# Clear the removed leaf from the branch (that still contains other children)
let brDup = br.vtx.dup
brDup.bVid[hike.legs[^2].nibble] = VertexID(0)
discard brDup.setUsed(uint8 hike.legs[^2].nibble, false)
db.layersPutVtx((hike.root, br.vid), brDup)
ok(nil)

View File

@ -29,9 +29,8 @@ proc delSubTreeNow(
return err(error)
if vtx.vType == Branch:
for n in 0..15:
if vtx.bVid[n].isValid:
? db.delSubTreeNow((rvid.root,vtx.bVid[n]))
for _, subvid in vtx.pairs():
? db.delSubTreeNow((rvid.root, subvid))
db.layersResVtx(rvid)
@ -53,11 +52,10 @@ proc delStoTreeNow(
case vtx.vType
of Branch:
for i in 0..15:
if vtx.bVid[i].isValid:
? db.delStoTreeNow(
(rvid.root, vtx.bVid[i]), accPath,
stoPath & vtx.pfx & NibblesBuf.nibble(byte i))
for n, subvid in vtx.pairs():
? db.delStoTreeNow(
(rvid.root, subvid), accPath,
stoPath & vtx.pfx & NibblesBuf.nibble(n))
of Leaf:
let stoPath = Hash32((stoPath & vtx.pfx).getBytes())

View File

@ -35,9 +35,9 @@ proc revSubTree(
return err((rvid.vid,rc.error))
key = block:
let rc = db.getKeyUbe rvid
let rc = db.getKeyUbe(rvid, {})
if rc.isOk:
rc.value
rc.value[0]
elif rc.error == GetKeyNotFound:
VOID_HASH_KEY
else:
@ -89,9 +89,9 @@ proc revFilter*(
# Calculate reverse changes for the `kMap[]` structural table.
for rvid in filter.kMap.keys:
let rc = db.getKeyUbe rvid
let rc = db.getKeyUbe(rvid, {})
if rc.isOk:
rev.kMap[rvid] = rc.value
rev.kMap[rvid] = rc.value[0]
elif rc.error == GetKeyNotFound:
rev.kMap[rvid] = VOID_HASH_KEY
else:

View File

@ -25,7 +25,7 @@ type
## `Aristo DB` data record.
GetKeyFn* =
proc(rvid: RootedVertexID): Result[HashKey,AristoError] {.gcsafe, raises: [].}
proc(rvid: RootedVertexID, flags: set[GetVtxFlag]): Result[(HashKey, VertexRef),AristoError] {.gcsafe, raises: [].}
## Generic backend database retrieval function for a single
## `Aristo DB` hash lookup value.

View File

@ -383,6 +383,12 @@ func hash*(a: HashKey): Hash =
## Table/KeyedQueue mixin
hash(a.data)
func append*(w: var RlpWriter; key: HashKey) =
if 1 < key.len and key.len < 32:
w.appendRawBytes key.data
else:
w.append key.data
# ------------------------------------------------------------------------------
# Miscellaneous helpers
# ------------------------------------------------------------------------------

View File

@ -73,7 +73,8 @@ type
of Leaf:
lData*: LeafPayload ## Reference to data payload
of Branch:
bVid*: array[16,VertexID] ## Edge list with vertex IDs
startVid*: VertexID
used*: uint16
NodeRef* = ref object of RootRef
## Combined record for a *traditional* ``Merkle Patricia Tree` node merged
@ -135,6 +136,20 @@ type
# Public helpers (misc)
# ------------------------------------------------------------------------------
func bVid*(vtx: VertexRef, nibble: uint8): VertexID =
if (vtx.used and (1'u16 shl nibble)) > 0:
VertexID(uint64(vtx.startVid) + nibble)
else:
default(VertexID)
func setUsed*(vtx: VertexRef, nibble: uint8, used: static bool): VertexID =
vtx.used =
when used:
vtx.used or (1'u16 shl nibble)
else:
vtx.used and (not (1'u16 shl nibble))
vtx.bVid(nibble)
func init*(T: type LayerRef): T =
## Constructor, returns empty layer
T()
@ -176,18 +191,41 @@ proc `==`*(a, b: VertexRef): bool =
if a.pfx != b.pfx or a.lData != b.lData:
return false
of Branch:
if a.pfx != b.pfx or a.bVid != b.bVid:
if a.pfx != b.pfx or a.startVid != b.startVid or a.used != b.used:
return false
true
iterator pairs*(vtx: VertexRef): tuple[nibble: uint8, vid: VertexID] =
## Iterates over the sub-vids of a branch (does nothing for leaves)
case vtx.vType:
of Leaf:
discard
of Branch:
for n in 0'u8 .. 15'u8:
if (vtx.used and (1'u16 shl n)) > 0:
yield (n, VertexID(uint64(vtx.startVid) + n))
iterator allPairs*(vtx: VertexRef): tuple[nibble: uint8, vid: VertexID] =
## Iterates over the sub-vids of a branch (does nothing for leaves) including
## currently unset nodes
case vtx.vType:
of Leaf:
discard
of Branch:
for n in 0'u8 .. 15'u8:
if (vtx.used and (1'u16 shl n)) > 0:
yield (n, VertexID(uint64(vtx.startVid) + n))
else:
yield (n, default(VertexID))
proc `==`*(a, b: NodeRef): bool =
## Beware, potential deep comparison
if a.vtx != b.vtx:
return false
case a.vtx.vType:
of Branch:
for n in 0..15:
if a.vtx.bVid[n] != 0.VertexID or b.vtx.bVid[n] != 0.VertexID:
for n in 0'u8..15'u8:
if a.vtx.bVid(n) != 0.VertexID or b.vtx.bVid(n) != 0.VertexID:
if a.key[n] != b.key[n]:
return false
else:
@ -228,7 +266,8 @@ func dup*(vtx: VertexRef): VertexRef =
VertexRef(
vType: Branch,
pfx: vtx.pfx,
bVid: vtx.bVid)
startVid: vtx.startVid,
used: vtx.used)
func dup*(node: NodeRef): NodeRef =
## Duplicate node.

View File

@ -83,7 +83,7 @@ proc retrieveMerkleHash(
): Result[Hash32,AristoError] =
let key =
db.computeKey((root, root)).valueOr:
if error == GetVtxNotFound:
if error in [GetVtxNotFound, GetKeyNotFound]:
return ok(EMPTY_ROOT_HASH)
return err(error)

View File

@ -54,11 +54,12 @@ proc getVtxUbe*(
proc getKeyUbe*(
db: AristoDbRef;
rvid: RootedVertexID;
): Result[HashKey,AristoError] =
flags: set[GetVtxFlag];
): Result[(HashKey, VertexRef),AristoError] =
## Get the Merkle hash/key from the unfiltered backend if available.
let be = db.backend
if not be.isNil:
return be.getKeyFn rvid
return be.getKeyFn(rvid, flags)
err GetKeyNotFound
# ------------------
@ -87,14 +88,18 @@ proc getVtxBE*(
proc getKeyBE*(
db: AristoDbRef;
rvid: RootedVertexID;
): Result[(HashKey, int),AristoError] =
flags: set[GetVtxFlag];
): Result[((HashKey, VertexRef), int),AristoError] =
## Get the merkle hash/key from the (filtered) backend if available.
if not db.balancer.isNil:
db.balancer.kMap.withValue(rvid, w):
if w[].isValid:
return ok((w[], -1))
return err(GetKeyNotFound)
ok ((?db.getKeyUbe rvid), -2)
return ok(((w[], nil), -1))
db.balancer.sTab.withValue(rvid, s):
if s[].isValid:
return ok(((VOID_HASH_KEY, s[]), -1))
return err(GetKeyNotFound)
ok ((?db.getKeyUbe(rvid, flags)), -2)
# ------------------
@ -124,7 +129,8 @@ proc getVtx*(db: AristoDbRef; rvid: RootedVertexID, flags: set[GetVtxFlag] = {})
##
db.getVtxRc(rvid).valueOr((VertexRef(nil), 0))[0]
proc getKeyRc*(db: AristoDbRef; rvid: RootedVertexID): Result[(HashKey, int),AristoError] =
proc getKeyRc*(
db: AristoDbRef; rvid: RootedVertexID, flags: set[GetVtxFlag]): Result[((HashKey, VertexRef), int),AristoError] =
## Cascaded attempt to fetch a Merkle hash from the cache layers or the
## backend. This function will never return a `VOID_HASH_KEY` but rather
## some `GetKeyNotFound` or `GetKeyUpdateNeeded` error.
@ -135,27 +141,27 @@ proc getKeyRc*(db: AristoDbRef; rvid: RootedVertexID): Result[(HashKey, int),Ari
# If there is a zero key value, the entry is either marked for being
# updated or for deletion on the database. So check below.
if key[0].isValid:
return ok key
return ok ((key[0], nil), key[1])
# The zero key value does not refer to an update mark if there is no
# valid vertex (either on the cache or the backend whatever comes first.)
let vtx = db.layersGetVtx(rvid).valueOr:
# There was no vertex on the cache. So there must be one the backend (the
# reason for the key lable to exists, at all.)
return err(GetKeyUpdateNeeded)
# reason for the key label to exists, at all.)
return err(GetKeyNotFound)
if vtx[0].isValid:
return err(GetKeyUpdateNeeded)
return ok ((VOID_HASH_KEY, vtx[0]), vtx[1])
else:
# The vertex is to be deleted. So is the value key.
return err(GetKeyNotFound)
db.getKeyBE rvid
db.getKeyBE(rvid, flags)
proc getKey*(db: AristoDbRef; rvid: RootedVertexID): HashKey =
## Cascaded attempt to fetch a vertex from the cache layers or the backend.
## The function returns `nil` on error or failure.
##
(db.getKeyRc(rvid).valueOr((VOID_HASH_KEY, 0)))[0]
(db.getKeyRc(rvid, {}).valueOr(((VOID_HASH_KEY, nil), 0)))[0][0]
# ------------------------------------------------------------------------------
# End

View File

@ -91,8 +91,8 @@ proc step*(
return err(HikeBranchTailEmpty)
let
nibble = path[vtx.pfx.len].int8
nextVid = vtx.bVid[nibble]
nibble = path[vtx.pfx.len]
nextVid = vtx.bVid(nibble)
if not nextVid.isValid:
return err(HikeBranchMissingEdge)

View File

@ -97,13 +97,14 @@ proc getVtxFn(db: MemBackendRef): GetVtxFn =
proc getKeyFn(db: MemBackendRef): GetKeyFn =
result =
proc(rvid: RootedVertexID): Result[HashKey,AristoError] =
proc(rvid: RootedVertexID, flags: set[GetVtxFlag]): Result[(HashKey, VertexRef),AristoError] =
let data = db.mdb.sTab.getOrDefault(rvid, EmptyBlob)
if 0 < data.len:
let key = data.deblobify(HashKey).valueOr:
return err(GetKeyNotFound)
if key.isValid:
return ok(key)
let vtx = data.deblobify(VertexRef).valueOr:
return err(GetKeyNotFound)
return ok((VOID_HASH_KEY, vtx))
return ok((key, nil))
err(GetKeyNotFound)
proc getTuvFn(db: MemBackendRef): GetTuvFn =
@ -150,14 +151,7 @@ proc putLstFn(db: MemBackendRef): PutLstFn =
proc(hdl: PutHdlRef; lst: SavedState) =
let hdl = hdl.getSession db
if hdl.error.isNil:
let rc = lst.blobify # test
if rc.isOk:
hdl.lSst = Opt.some(lst)
else:
hdl.error = TypedPutHdlErrRef(
pfx: AdmPfx,
aid: AdmTabIdLst,
code: rc.error)
hdl.lSst = Opt.some(lst)
proc putEndFn(db: MemBackendRef): PutEndFn =
result =

View File

@ -90,15 +90,15 @@ proc getVtxFn(db: RdbBackendRef): GetVtxFn =
proc getKeyFn(db: RdbBackendRef): GetKeyFn =
result =
proc(rvid: RootedVertexID): Result[HashKey,AristoError] =
proc(rvid: RootedVertexID, flags: set[GetVtxFlag]): Result[(HashKey, VertexRef),AristoError] =
# Fetch serialised data record
let key = db.rdb.getKey(rvid).valueOr:
let key = db.rdb.getKey(rvid, flags).valueOr:
when extraTraceMessages:
trace logTxt "getKeyFn: failed", rvid, error=error[0], info=error[1]
return err(error[0])
if key.isValid:
if (key[0].isValid or key[1].isValid):
return ok(key)
err(GetKeyNotFound)
@ -173,12 +173,7 @@ proc putLstFn(db: RdbBackendRef): PutLstFn =
proc(hdl: PutHdlRef; lst: SavedState) =
let hdl = hdl.getSession db
if hdl.error.isNil:
let data = lst.blobify.valueOr:
hdl.error = TypedPutHdlErrRef(
pfx: AdmPfx,
aid: AdmTabIdLst,
code: error)
return
let data = lst.blobify
db.rdb.putAdm(AdmTabIdLst, data).isOkOr:
hdl.error = TypedPutHdlErrRef(
pfx: AdmPfx,

View File

@ -22,25 +22,21 @@ import
./rdb_desc,
std/concurrency/atomics
const
extraTraceMessages = false
## Enable additional logging noise
const extraTraceMessages = false ## Enable additional logging noise
when extraTraceMessages:
import
chronicles
import chronicles
logScope:
topics = "aristo-rocksdb"
when defined(metrics):
import
metrics
import metrics
type
RdbVtxLruCounter = ref object of Counter
RdbKeyLruCounter = ref object of Counter
var
rdbVtxLruStatsMetric {.used.} = RdbVtxLruCounter.newCollector(
"aristo_rdb_vtx_lru_total",
@ -50,10 +46,10 @@ when defined(metrics):
rdbKeyLruStatsMetric {.used.} = RdbKeyLruCounter.newCollector(
"aristo_rdb_key_lru_total", "HashKey LRU lookup", labels = ["state", "hit"]
)
method collect*(collector: RdbVtxLruCounter, output: MetricHandler) =
let timestamp = collector.now()
# We don't care about synchronization between each type of metric or between
# the metrics thread and others since small differences like this don't matter
for state in RdbStateType:
@ -66,10 +62,10 @@ when defined(metrics):
labelValues = [$state, $vtype, $ord(hit)],
timestamp = timestamp,
)
method collect*(collector: RdbKeyLruCounter, output: MetricHandler) =
let timestamp = collector.now()
for state in RdbStateType:
for hit in [false, true]:
output(
@ -84,16 +80,16 @@ when defined(metrics):
# Public functions
# ------------------------------------------------------------------------------
proc getAdm*(rdb: RdbInst; xid: AdminTabID): Result[seq[byte],(AristoError,string)] =
proc getAdm*(rdb: RdbInst, xid: AdminTabID): Result[seq[byte], (AristoError, string)] =
var res: seq[byte]
let onData = proc(data: openArray[byte]) =
res = @data
let gotData = rdb.admCol.get(xid.toOpenArray, onData).valueOr:
const errSym = RdbBeDriverGetAdmError
when extraTraceMessages:
trace logTxt "getAdm", xid, error=errSym, info=error
return err((errSym,error))
const errSym = RdbBeDriverGetAdmError
when extraTraceMessages:
trace logTxt "getAdm", xid, error = errSym, info = error
return err((errSym, error))
# Correct result if needed
if not gotData:
@ -101,43 +97,64 @@ proc getAdm*(rdb: RdbInst; xid: AdminTabID): Result[seq[byte],(AristoError,strin
ok move(res)
proc getKey*(
rdb: var RdbInst;
rvid: RootedVertexID;
): Result[HashKey,(AristoError,string)] =
# Try LRU cache first
var rc = rdb.rdKeyLru.get(rvid.vid)
if rc.isOk:
rdbKeyLruStats[rvid.to(RdbStateType)].inc(true)
return ok(move(rc.value))
rdb: var RdbInst, rvid: RootedVertexID, flags: set[GetVtxFlag]
): Result[(HashKey, VertexRef), (AristoError, string)] =
block:
# Try LRU cache first
let rc =
if GetVtxFlag.PeekCache in flags:
rdb.rdKeyLru.peek(rvid.vid)
else:
rdb.rdKeyLru.get(rvid.vid)
rdbKeyLruStats[rvid.to(RdbStateType)].inc(false)
if rc.isOk:
rdbKeyLruStats[rvid.to(RdbStateType)].inc(true)
return ok((rc.value, nil))
rdbKeyLruStats[rvid.to(RdbStateType)].inc(false)
block:
# We don't store keys for leaves, no need to hit the database
let rc = rdb.rdVtxLru.peek(rvid.vid)
if rc.isOk():
if rc.value().vType == Leaf:
return ok((VOID_HASH_KEY, rc.value()))
# Otherwise fetch from backend database
# A threadvar is used to avoid allocating an environment for onData
var res{.threadvar.}: Opt[HashKey]
var res {.threadvar.}: Opt[HashKey]
var vtx {.threadvar.}: Result[VertexRef, AristoError]
let onData = proc(data: openArray[byte]) =
res = data.deblobify(HashKey)
if res.isSome():
reset(vtx)
else:
vtx = data.deblobify(VertexRef)
let gotData = rdb.vtxCol.get(rvid.blobify().data(), onData).valueOr:
const errSym = RdbBeDriverGetKeyError
when extraTraceMessages:
trace logTxt "getKey", rvid, error=errSym, info=error
return err((errSym,error))
const errSym = RdbBeDriverGetKeyError
when extraTraceMessages:
trace logTxt "getKey", rvid, error = errSym, info = error
return err((errSym, error))
# Correct result if needed
if not gotData or res.isNone():
res.ok(VOID_HASH_KEY)
if not gotData:
return ok((VOID_HASH_KEY, nil))
# Update cache and return
rdb.rdKeyLru.put(rvid.vid, res.value())
# Update cache and return - in peek mode, avoid evicting cache items
if res.isSome() and
(GetVtxFlag.PeekCache notin flags or rdb.rdKeyLru.len < rdb.rdKeyLru.capacity):
rdb.rdKeyLru.put(rvid.vid, res.value())
ok res.value()
if vtx.isOk() and
(GetVtxFlag.PeekCache notin flags or rdb.rdVtxLru.len < rdb.rdVtxLru.capacity):
rdb.rdVtxLru.put(rvid.vid, vtx.value())
ok (res.valueOr(VOID_HASH_KEY), vtx.valueOr(nil))
proc getVtx*(
rdb: var RdbInst;
rvid: RootedVertexID;
flags: set[GetVtxFlag];
): Result[VertexRef,(AristoError,string)] =
rdb: var RdbInst, rvid: RootedVertexID, flags: set[GetVtxFlag]
): Result[VertexRef, (AristoError, string)] =
# Try LRU cache first
var rc =
if GetVtxFlag.PeekCache in flags:
@ -151,15 +168,15 @@ proc getVtx*(
# Otherwise fetch from backend database
# A threadvar is used to avoid allocating an environment for onData
var res {.threadvar.}: Result[VertexRef,AristoError]
var res {.threadvar.}: Result[VertexRef, AristoError]
let onData = proc(data: openArray[byte]) =
res = data.deblobify(VertexRef)
let gotData = rdb.vtxCol.get(rvid.blobify().data(), onData).valueOr:
const errSym = RdbBeDriverGetVtxError
when extraTraceMessages:
trace logTxt "getVtx", vid, error=errSym, info=error
return err((errSym,error))
trace logTxt "getVtx", vid, error = errSym, info = error
return err((errSym, error))
if not gotData:
# As a hack, we count missing data as leaf nodes

View File

@ -96,15 +96,13 @@ proc mergePayloadImpl(
else:
# Turn leaf into a branch (or extension) then insert the two leaves
# into the branch
let branch = VertexRef(vType: Branch, pfx: path.slice(0, n))
let branch = VertexRef(vType: Branch, pfx: path.slice(0, n), startVid: db.vidFetch(16))
let other = block: # Copy of existing leaf node, now one level deeper
let local = db.vidFetch()
branch.bVid[vtx.pfx[n]] = local
let local = branch.setUsed(vtx.pfx[n], true)
db.layersPutLeaf((root, local), vtx.pfx.slice(n + 1), vtx.lData)
let leafVtx = block: # Newly inserted leaf node
let local = db.vidFetch()
branch.bVid[path[n]] = local
let local = branch.setUsed(path[n], true)
db.layersPutLeaf((root, local), path.slice(n + 1), payload)
# Put the branch at the vid where the leaf was
@ -121,7 +119,7 @@ proc mergePayloadImpl(
# The existing branch is a prefix of the new entry
let
nibble = path[vtx.pfx.len]
next = vtx.bVid[nibble]
next = vtx.bVid(nibble)
if next.isValid:
cur = next
@ -135,32 +133,30 @@ proc mergePayloadImpl(
else:
# There's no vertex at the branch point - insert the payload as a new
# leaf and update the existing branch
let
local = db.vidFetch()
leafVtx = db.layersPutLeaf((root, local), path.slice(n + 1), payload)
brDup = vtx.dup()
brDup.bVid[nibble] = local
let brDup = vtx.dup()
let local = brDup.setUsed(nibble, true)
db.layersPutVtx((root, cur), brDup)
let
leafVtx = db.layersPutLeaf((root, local), path.slice(n + 1), payload)
resetKeys()
return ok((leafVtx, nil, nil))
else:
# Partial path match - we need to split the existing branch at
# the point of divergence, inserting a new branch
let branch = VertexRef(vType: Branch, pfx: path.slice(0, n))
let branch = VertexRef(vType: Branch, pfx: path.slice(0, n), startVid: db.vidFetch(16))
block: # Copy the existing vertex and add it to the new branch
let local = db.vidFetch()
branch.bVid[vtx.pfx[n]] = local
let local = branch.setUsed(vtx.pfx[n], true)
db.layersPutVtx(
(root, local),
VertexRef(vType: Branch, pfx: vtx.pfx.slice(n + 1), bVid: vtx.bVid),
VertexRef(vType: Branch, pfx: vtx.pfx.slice(n + 1), startVid: vtx.startVid, used: vtx.used),
)
let leafVtx = block: # add the new entry
let local = db.vidFetch()
branch.bVid[path[n]] = local
let local = branch.setUsed(path[n], true)
db.layersPutLeaf((root, local), path.slice(n + 1), payload)
db.layersPutVtx((root, cur), branch)

View File

@ -57,7 +57,7 @@ proc branchNibbleMin*(vtx: VertexRef; minInx: int8): int8 =
## greater or equal the argument `nibble`.
if vtx.vType == Branch:
for n in minInx .. 15:
if vtx.bVid[n].isValid:
if vtx.bVid(uint8 n).isValid:
return n
-1
@ -66,7 +66,7 @@ proc branchNibbleMax*(vtx: VertexRef; maxInx: int8): int8 =
## less or equal the argument `nibble`.
if vtx.vType == Branch:
for n in maxInx.countdown 0:
if vtx.bVid[n].isValid:
if vtx.bVid(uint8 n).isValid:
return n
-1
@ -112,7 +112,7 @@ proc complete(
else:
leg.nibble = vtx.branchNibbleMax 15
if 0 <= leg.nibble:
vid = vtx.bVid[leg.nibble]
vid = vtx.bVid(uint8 leg.nibble)
vtx = db.getVtx (hike.root, vid)
if vtx.isValid:
uHike.legs.add leg
@ -225,7 +225,7 @@ proc finalise(
if 0 <= top.nibble and top.nibble == top.wp.vtx.branchBorderNibble:
# Check the following up vertex
let
vid = top.wp.vtx.bVid[top.nibble]
vid = top.wp.vtx.bVid(uint8 top.nibble)
vtx = db.getVtx (hike.root, vid)
if not vtx.isValid:
return err((vid,NearbyDanglingLink))
@ -298,7 +298,7 @@ proc nearbyNext(
# Look ahead checking next vertex
if start:
let vid = top.wp.vtx.bVid[top.nibble]
let vid = top.wp.vtx.bVid(uint8 top.nibble)
if not vid.isValid:
return err((top.wp.vid,NearbyDanglingLink)) # error
@ -322,7 +322,7 @@ proc nearbyNext(
if 0 <= n:
uHike.legs[^1].nibble = n
return uHike.complete(
step.wp.vtx.bVid[n], db, hikeLenMax, doLeast=moveRight)
step.wp.vtx.bVid(uint8 n), db, hikeLenMax, doLeast=moveRight)
if start:
# Retry without look ahead
@ -550,7 +550,7 @@ proc rightMissing*(
if top.wp.vtx.vType != Branch or top.nibble < 0:
return err(NearbyBranchError)
let vid = top.wp.vtx.bVid[top.nibble]
let vid = top.wp.vtx.bVid(uint8 top.nibble)
if not vid.isValid:
return err(NearbyDanglingLink) # error

View File

@ -242,7 +242,8 @@ proc partPut*(
of Leaf:
node.vtx.lData = vtx.lData
of Branch:
node.vtx.bVid = vtx.bVid
node.vtx.startVid = vtx.startVid
node.vtx.used = vtx.used
ps.addCore(root, key) # register core node
ps.pureExt.del key # core node can't be an extension
continue
@ -266,7 +267,7 @@ proc partPut*(
for n in 0 .. 15:
let bKey = node.key[n]
if bKey.isValid:
node.vtx.bVid[n] = (? ps.getRvid(root, bKey))[0].vid
doAssert false, "TODO node.vtx.bVid[n] = (? ps.getRvid(root, bKey))[0].vid"
ps.addCore(root, key) # register core node
ps.pureExt.del key # core node can't be an extension
@ -444,7 +445,7 @@ proc partWithExtEnd*(ps: PartStateRef): Result[void,AristoError] =
return err(PartExtVtxHasVanished)
if vtx.vType != Branch or
vtx.pfx != ext.xPfx or
vtx.bVid != array[16,VertexID].default:
vtx.used != uint16.default:
restore()
return err(PartExtVtxWasModified)
rollback.add (rvid,ext)

View File

@ -62,10 +62,10 @@ proc chainRlpNodes*(
let
nibble = path[nChewOff]
rest = path.slice(nChewOff+1)
if not vtx.bVid[nibble].isValid:
if not vtx.bVid(nibble).isValid:
return err(PartChnBranchVoidEdge)
# Recursion!
db.chainRlpNodes((rvid.root,vtx.bVid[nibble]), rest, chain)
db.chainRlpNodes((rvid.root,vtx.bVid(nibble)), rest, chain)
proc trackRlpNodes*(

View File

@ -29,7 +29,7 @@ proc newCtx(ps: PartStateRef; hike: Hike): Result[PartStateCtx,AristoError] =
let
wp = hike.legs[^1].wp
nibble = hike.legs[^1].nibble
fromVid = wp.vtx.bVid[nibble]
fromVid = wp.vtx.bVid(uint8 nibble)
if not ps.isPerimeter(fromVid) or ps.isExtension(fromVid):
return err(PartCtxNotAvailable)
@ -43,7 +43,7 @@ proc newCtx(ps: PartStateRef; hike: Hike): Result[PartStateCtx,AristoError] =
fromVid: fromVid)
# Update database so that is space for adding a new sub-tree here
vtx2.bVid[nibble] = VertexID(0)
discard vtx2.setUsed(uint8 nibble, false)
ps.db.layersPutVtx(psc.location,vtx2)
ok psc
@ -97,12 +97,12 @@ proc ctxAcceptChange(psc: PartStateCtx): Result[bool,AristoError] =
ps = psc.ps
db = ps.db
(vtx,_) = ? db.getVtxRc psc.location
toVid = vtx.bVid[psc.nibble]
toVid = vtx.bVid(uint8 psc.nibble)
if not toVid.isValid:
# Nothing changed, so restore
let vtx2 = vtx.dup
vtx2.bVid[psc.nibble] = psc.fromVid
doAssert false, "TODO vtx2.bVid[psc.nibble] = psc.fromVid"
db.layersPutVtx(psc.location, vtx2)
ok(false)

View File

@ -13,7 +13,7 @@
import
eth/[common, rlp],
results,
"."/[aristo_constants, aristo_desc, aristo_get]
"."/[aristo_constants, aristo_desc, aristo_compute]
type
ResolveVidFn = proc(
@ -55,14 +55,6 @@ proc serialise(
# Public RLP transcoder mixins
# ------------------------------------------------------------------------------
func append*(w: var RlpWriter; key: HashKey) =
if 1 < key.len and key.len < 32:
w.appendRawBytes key.data
else:
w.append key.data
# ---------------------
proc to*(node: NodeRef; T: type seq[seq[byte]]): T =
## Convert the argument pait `w` to a single or a double item list item of
## `<rlp-encoded-node>` type entries. Only in case of a combined extension
@ -150,7 +142,7 @@ proc serialise*(
## of account type, otherwise pass the data as is.
##
proc getKey(vid: VertexID): Result[HashKey,AristoError] =
ok (?db.getKeyRc((root, vid)))[0]
ok (?db.computeKey((root, vid)))
pyl.serialise getKey

View File

@ -159,8 +159,8 @@ proc findTx*(
# Try `(vid,key)` on unfiltered backend
block:
let beKey = db.getKeyUbe(rvid).valueOr: VOID_HASH_KEY
if beKey == key:
let beKey = db.getKeyUbe(rvid, {}).valueOr: (VOID_HASH_KEY, nil)
if beKey[0] == key:
return ok(-2)
err(TxNotFound)

View File

@ -54,12 +54,10 @@ proc toNode*(
of Branch:
let node = NodeRef(vtx: vtx.dup())
for n in 0 .. 15:
let vid = vtx.bVid[n]
if vid.isValid:
let key = db.computeKey((root, vid)).valueOr:
return err(@[vid])
node.key[n] = key
for n, subvid in vtx.pairs():
let key = db.computeKey((root, subvid)).valueOr:
return err(@[subvid])
node.key[n] = key
return ok node
iterator subVids*(vtx: VertexRef): VertexID =
@ -71,9 +69,8 @@ iterator subVids*(vtx: VertexRef): VertexID =
if stoID.isValid:
yield stoID.vid
of Branch:
for vid in vtx.bVid:
if vid.isValid:
yield vid
for _, subvid in vtx.pairs():
yield subvid
iterator subVidKeys*(node: NodeRef): (VertexID,HashKey) =
## Simolar to `subVids()` but for nodes
@ -84,10 +81,8 @@ iterator subVidKeys*(node: NodeRef): (VertexID,HashKey) =
if stoID.isValid:
yield (stoID.vid, node.key[0])
of Branch:
for n in 0 .. 15:
let vid = node.vtx.bVid[n]
if vid.isValid:
yield (vid,node.key[n])
for n, subvid in node.vtx.pairs():
yield (subvid,node.key[n])
# ------------------------------------------------------------------------------
# End

View File

@ -20,14 +20,15 @@ import
# Public functions
# ------------------------------------------------------------------------------
proc vidFetch*(db: AristoDbRef): VertexID =
proc vidFetch*(db: AristoDbRef, n = 1): VertexID =
## Fetch next vertex ID.
##
if db.top.vTop == 0:
db.top.vTop = VertexID(LEAST_FREE_VID)
else:
db.top.vTop.inc
db.top.vTop
var ret = db.top.vTop
ret.inc
db.top.vTop.inc(n)
ret
# ------------------------------------------------------------------------------
# End

View File

@ -19,7 +19,8 @@ import
export
memory_db,
memory_only
memory_only,
aristo_desc
# ------------------------------------------------------------------------------
# Public iterators (all in one)

View File

@ -1,41 +1,39 @@
alabaster==0.7.16
attrs==23.2.0
Babel==2.15.0
cattrs==23.2.3
certifi==2024.07.04
charset-normalizer==3.3.2
contourpy==1.2.1
#
# This file is autogenerated by pip-compile with Python 3.13
# by the following command:
#
# pip-compile
#
contourpy==1.3.1
# via matplotlib
cycler==0.12.1
docutils==0.20.1
esbonio==0.16.4
fonttools==4.53.0
idna==3.7
imagesize==1.4.1
Jinja2==3.1.4
kiwisolver==1.4.5
lsprotocol==2023.0.1
MarkupSafe==2.1.5
matplotlib==3.9.0
numpy==1.26.4
packaging==24.0
pandas==2.2.2
pillow==10.3.0
platformdirs==4.2.1
pygls==1.3.1
Pygments==2.18.0
pyparsing==3.1.2
pyspellchecker==0.8.1
# via matplotlib
fonttools==4.55.0
# via matplotlib
kiwisolver==1.4.7
# via matplotlib
matplotlib==3.9.2
# via -r requirements.in
numpy==2.1.3
# via
# contourpy
# matplotlib
# pandas
packaging==24.2
# via matplotlib
pandas==2.2.3
# via -r requirements.in
pillow==11.0.0
# via matplotlib
pyparsing==3.2.0
# via matplotlib
python-dateutil==2.9.0.post0
pytz==2024.1
requests==2.32.2
# via
# matplotlib
# pandas
pytz==2024.2
# via pandas
six==1.16.0
snowballstemmer==2.2.0
Sphinx==7.3.7
sphinxcontrib-applehelp==1.0.8
sphinxcontrib-devhelp==1.0.6
sphinxcontrib-htmlhelp==2.0.5
sphinxcontrib-jsmath==1.0.1
sphinxcontrib-qthelp==1.0.7
sphinxcontrib-serializinghtml==1.1.10
tzdata==2024.1
urllib3==2.2.2
# via python-dateutil
tzdata==2024.2
# via pandas

View File

@ -204,15 +204,15 @@ proc isDbEq(a, b: LayerRef; db: AristoDbRef; noisy = true): bool =
if aKey.isValid and bKey.isValid:
return false
# The valid one must match the backend data
let rc = db.getKeyUbe vid
let rc = db.getKeyUbe(vid, {})
if rc.isErr:
return false
let key = if aKey.isValid: aKey else: bKey
if key != rc.value:
if key != rc.value[0]:
return false
elif not vid.isValid and not bMap.hasKey vid:
let rc = db.getKeyUbe vid
let rc = db.getKeyUbe(vid, {})
if rc.isOk:
return false # Exists on backend but missing on `bMap[]`
elif rc.error != GetKeyNotFound:

View File

@ -10,60 +10,33 @@
{.used.}
import unittest2, ../../nimbus/db/aristo/aristo_blobify
import unittest2, std/sequtils, ../../nimbus/db/aristo/aristo_blobify
suite "Aristo blobify":
test "VertexRef roundtrip":
let
leafAccount = VertexRef(vType: Leaf, lData: LeafPayload(pType: AccountData))
leafStoData =
VertexRef(vType: Leaf, lData: LeafPayload(pType: StoData, stoData: 42.u256))
branch = VertexRef(
vType: Branch,
bVid: [
VertexID(0),
VertexID(1),
VertexID(0),
VertexID(0),
VertexID(4),
VertexID(0),
VertexID(0),
VertexID(0),
VertexID(0),
VertexID(0),
VertexID(0),
VertexID(0),
VertexID(0),
VertexID(0),
VertexID(0),
VertexID(0),
],
leafAccount = VertexRef(
vType: Leaf,
pfx: NibblesBuf.nibble(1),
lData: LeafPayload(
pType: AccountData, account: AristoAccount(nonce: 100, balance: 123.u256)
),
)
leafStoData = VertexRef(
vType: Leaf,
pfx: NibblesBuf.nibble(3),
lData: LeafPayload(pType: StoData, stoData: 42.u256),
)
branch = VertexRef(vType: Branch, startVid: VertexID(0x334452), used: 0x43'u16)
extension = VertexRef(
vType: Branch,
pfx: NibblesBuf.nibble(2),
bVid: [
VertexID(0),
VertexID(0),
VertexID(2),
VertexID(0),
VertexID(0),
VertexID(5),
VertexID(0),
VertexID(0),
VertexID(0),
VertexID(0),
VertexID(0),
VertexID(0),
VertexID(0),
VertexID(0),
VertexID(0),
VertexID(0),
],
startVid: VertexID(0x55),
used: 0x12'u16,
)
key = HashKey.fromBytes(rlp.encode([10'u64]))[]
key = HashKey.fromBytes(repeat(0x34'u8, 32))[]
check:
deblobify(blobify(leafAccount, key), VertexRef)[] == leafAccount

View File

@ -92,10 +92,10 @@ proc payloadAsBlob(pyl: LeafPayload; ps: PartStateRef): seq[byte] =
of AccountData:
let key = block:
if pyl.stoID.isValid:
let rc = ps.db.getKeyRc (VertexID(1),pyl.stoID.vid)
let rc = ps.db.getKeyRc((VertexID(1),pyl.stoID.vid), {})
if rc.isErr:
raiseAssert info & ": getKey => " & $rc.error
rc.value[0]
rc.value[0][0]
else:
VOID_HASH_KEY