Store cached hash at the layer corresponding to the source data (#2492)

When lazily verifying state roots, we may end up with an entire state
without roots that gets computed for the whole database - in the current
design, that would result in hashes for the entire trie being held in
memory.

Since the hash depends only on the data in the vertex, we can store it
directly at the top-most level derived from the verticies it depends on
- be that memory or database - this makes the memory usage broadly
linear with respect to the already-existing in-memory change set stored
in the layers.

It also ensures that if we have multiple forks in memory, hashes get
cached in the correct layer maximising reuse between forks.

The same layer numbering scheme as elsewhere is reused, where -2 is the
backend, -1 is the balancer, then 0+ is the top of the stack and stack.

A downside of this approach is that we create many small batches - a
future improvement could be to collect all such writes in a single
batch, though the memory profile of this approach should be examined
first (where is the batch kept, exactly?).
This commit is contained in:
Jacek Sieka 2024-07-18 09:13:56 +02:00 committed by GitHub
parent 6677f57ea9
commit df4a21c910
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 122 additions and 79 deletions

View File

@ -81,7 +81,7 @@ proc checkBE*[T: RdbBackendRef|MemBackendRef|VoidBackendRef](
for (rvid,vtx) in db.layersWalkVtx:
if vtx.isValid and topVidCache.vid < rvid.vid:
topVidCache = rvid
let key = db.layersGetKey(rvid).valueOr: VOID_HASH_KEY
let (key, _) = db.layersGetKey(rvid).valueOr: (VOID_HASH_KEY, 0)
if not vtx.isValid:
# Some vertex is to be deleted, the key must be empty
if key.isValid:

View File

@ -110,7 +110,7 @@ proc checkTopCommon*(
let rc = db.layersGetKey rvid
if rc.isErr:
return err((rvid.vid,CheckAnyVtxEmptyKeyMissing))
if rc.value.isValid:
if rc.value[0].isValid:
return err((rvid.vid,CheckAnyVtxEmptyKeyExpected))
if vTop.distinctBase < LEAST_FREE_VID:

View File

@ -13,40 +13,54 @@
import
eth/common,
results,
"."/[aristo_desc, aristo_get, aristo_layers, aristo_serialise]
"."/[aristo_desc, aristo_get, aristo_serialise]
proc computeKey*(
proc putKeyAtLevel(
db: AristoDbRef, rvid: RootedVertexID, key: HashKey, level: int
): Result[void, AristoError] =
## Store a hash key in the given layer or directly to the underlying database
## which helps ensure that memory usage is proportional to the pending change
## set (vertex data may have been committed to disk without computing the
## corresponding hash!)
if level == -2:
let be = db.backend
doAssert be != nil, "source data is from the backend"
# TODO long-running batch here?
let writeBatch = ?be.putBegFn()
be.putKeyFn(writeBatch, rvid, key)
?be.putEndFn writeBatch
ok()
else:
db.deltaAtLevel(level).kMap[rvid] = key
ok()
func maxLevel(cur, other: int): int =
# Compare two levels and return the topmost in the stack, taking into account
# the odd reversal of order around the zero point
if cur < 0:
max(cur, other) # >= 0 is always more topmost than <0
elif other < 0:
cur
else:
min(cur, other) # Here the order is reversed and 0 is the top layer
proc computeKeyImpl(
db: AristoDbRef; # Database, top layer
rvid: RootedVertexID; # Vertex to convert
): Result[HashKey, AristoError] =
): Result[(HashKey, int), AristoError] =
## Compute the key for an arbitrary vertex ID. If successful, the length of
## the resulting key might be smaller than 32. If it is used as a root vertex
## state/hash, it must be converted to a `Hash256` (using (`.to(Hash256)`) as
## in `db.computeKey(rvid).value.to(Hash256)` which always results in a
## 32 byte value.
##
# This is a variation on getKeyRc which computes the key instead of returning
# an error
# TODO it should not always write the key to the persistent storage
proc getKey(db: AristoDbRef; rvid: RootedVertexID): HashKey =
block body:
let key = db.layersGetKey(rvid).valueOr:
break body
if key.isValid:
return key
else:
return VOID_HASH_KEY
let rc = db.getKeyBE rvid
if rc.isOk:
return rc.value
VOID_HASH_KEY
db.getKeyRc(rvid).isErrOr:
# Value cached either in layers or database
return ok value
let (vtx, vl) = ? db.getVtxRc rvid
let key = getKey(db, rvid)
if key.isValid():
return ok key
let vtx = ? db.getVtxRc rvid
# Top-most level of all the verticies this hash compution depends on
var level = vl
# TODO this is the same code as when serializing NodeRef, without the NodeRef
var writer = initRlpWriter()
@ -55,20 +69,23 @@ proc computeKey*(
of Leaf:
writer.startList(2)
writer.append(vtx.lPfx.toHexPrefix(isLeaf = true))
# Need to resolve storage root for account leaf
case vtx.lData.pType
of AccountData:
let
stoID = vtx.lData.stoID
key = if stoID.isValid:
?db.computeKey((stoID, stoID))
skey =
if stoID.isValid:
let (skey, sl) = ?db.computeKeyImpl((stoID, stoID))
level = maxLevel(level, sl)
skey
else:
VOID_HASH_KEY
writer.append(encode Account(
nonce: vtx.lData.account.nonce,
balance: vtx.lData.account.balance,
storageRoot: key.to(Hash256),
storageRoot: skey.to(Hash256),
codeHash: vtx.lData.account.codeHash)
)
of RawData:
@ -83,7 +100,9 @@ proc computeKey*(
for n in 0..15:
let vid = vtx.bVid[n]
if vid.isValid:
w.append(?db.computeKey((rvid.root, vid)))
let (bkey, bl) = ?db.computeKeyImpl((rvid.root, vid))
level = maxLevel(level, bl)
w.append(bkey)
else:
w.append(VOID_HASH_KEY)
w.append EmptyBlob
@ -97,15 +116,23 @@ proc computeKey*(
else:
writeBranch(writer)
var h = writer.finish().digestTo(HashKey)
let h = writer.finish().digestTo(HashKey)
# TODO This shouldn't necessarily go into the database if we're just computing
# a key ephemerally - it should however be cached for some tiem since
# deep hash computations are expensive
db.layersPutKey(rvid, h)
ok h
# Cache the hash int the same storage layer as the the top-most value that it
# depends on (recursively) - this could be an ephemeral in-memory layer or the
# underlying database backend - typically, values closer to the root are more
# likely to live in an in-memory layer since any leaf change will lead to the
# root key also changing while leaves that have never been hashed will see
# their hash being saved directly to the backend.
? db.putKeyAtLevel(rvid, h, level)
ok (h, level)
proc computeKey*(
db: AristoDbRef; # Database, top layer
rvid: RootedVertexID; # Vertex to convert
): Result[HashKey, AristoError] =
ok (?computeKeyImpl(db, rvid))[0]
# ------------------------------------------------------------------------------
# End

View File

@ -61,7 +61,7 @@ proc delSubTreeImpl(
## Implementation of *delete* sub-trie.
var
dispose = @[root]
rootVtx = db.getVtxRc((root, root)).valueOr:
(rootVtx, _) = db.getVtxRc((root, root)).valueOr:
if error == GetVtxNotFound:
return ok()
return err(error)
@ -73,7 +73,7 @@ proc delSubTreeImpl(
for vtx in follow:
for vid in vtx.subVids:
# Exiting here leaves the tree as-is
let vtx = ? db.getVtxRc((root, vid))
let vtx = (? db.getVtxRc((root, vid)))[0]
redo.add vtx
dispose.add vid
redo.swap follow
@ -92,7 +92,7 @@ proc delStoTreeImpl(
): Result[void,AristoError] =
## Implementation of *delete* sub-trie.
let vtx = db.getVtxRc(rvid).valueOr:
let (vtx, _) = db.getVtxRc(rvid).valueOr:
if error == GetVtxNotFound:
return ok()
return err(error)

View File

@ -34,8 +34,8 @@ from ./aristo_desc/desc_backend
# Not auto-exporting backend
export
aristo_constants, desc_error, desc_identifiers, desc_nibbles, desc_structural,
keyed_queue
tables, aristo_constants, desc_error, desc_identifiers, desc_nibbles,
desc_structural, keyed_queue
const
accLruSize* = 1024 * 1024
@ -313,6 +313,21 @@ iterator rstack*(db: AristoDbRef): LayerRef =
for i in 0..<db.stack.len:
yield db.stack[db.stack.len - i - 1]
proc deltaAtLevel*(db: AristoDbRef, level: int): LayerDeltaRef =
if level == 0:
db.top.delta
elif level > 0:
doAssert level <= db.stack.len
db.stack[^level].delta
elif level == -1:
doAssert db.balancer != nil
db.balancer
elif level == -2:
nil
else:
raiseAssert "Unknown level " & $level
# ------------------------------------------------------------------------------
# End
# ------------------------------------------------------------------------------

View File

@ -85,17 +85,18 @@ proc retrieveMerkleHash(
root: VertexID;
updateOk: bool;
): Result[Hash256,AristoError] =
let key = block:
let key =
if updateOk:
db.computeKey((root, root)).valueOr:
if error == GetVtxNotFound:
return ok(EMPTY_ROOT_HASH)
return err(error)
else:
db.getKeyRc((root, root)).valueOr:
let (key, _) = db.getKeyRc((root, root)).valueOr:
if error == GetKeyNotFound:
return ok(EMPTY_ROOT_HASH) # empty sub-tree
return err(error)
key
ok key.to(Hash256)

View File

@ -73,33 +73,33 @@ proc getTuvBE*(
proc getVtxBE*(
db: AristoDbRef;
rvid: RootedVertexID;
): Result[VertexRef,AristoError] =
): Result[(VertexRef, int),AristoError] =
## Get the vertex from the (filtered) backened if available.
if not db.balancer.isNil:
db.balancer.sTab.withValue(rvid, w):
if w[].isValid:
return ok(w[])
return ok (w[], -1)
return err(GetVtxNotFound)
db.getVtxUbe rvid
ok (? db.getVtxUbe rvid, -2)
proc getKeyBE*(
db: AristoDbRef;
rvid: RootedVertexID;
): Result[HashKey,AristoError] =
): Result[(HashKey, int),AristoError] =
## Get the merkle hash/key from the (filtered) backend if available.
if not db.balancer.isNil:
db.balancer.kMap.withValue(rvid, w):
if w[].isValid:
return ok(w[])
return ok((w[], -1))
return err(GetKeyNotFound)
db.getKeyUbe rvid
ok ((?db.getKeyUbe rvid), -2)
# ------------------
proc getVtxRc*(
db: AristoDbRef;
rvid: RootedVertexID
): Result[VertexRef,AristoError] =
): Result[(VertexRef, int),AristoError] =
## Cascaded attempt to fetch a vertex from the cache layers or the backend.
##
block body:
@ -108,7 +108,7 @@ proc getVtxRc*(
# error symbol `GetVtxNotFound`.
let vtx = db.layersGetVtx(rvid).valueOr:
break body
if vtx.isValid:
if vtx[0].isValid:
return ok vtx
else:
return err(GetVtxNotFound)
@ -119,10 +119,10 @@ proc getVtx*(db: AristoDbRef; rvid: RootedVertexID): VertexRef =
## Cascaded attempt to fetch a vertex from the cache layers or the backend.
## The function returns `nil` on error or failure.
##
db.getVtxRc(rvid).valueOr: VertexRef(nil)
db.getVtxRc(rvid).valueOr((VertexRef(nil), 0))[0]
proc getKeyRc*(db: AristoDbRef; rvid: RootedVertexID): Result[HashKey,AristoError] =
proc getKeyRc*(db: AristoDbRef; rvid: RootedVertexID): Result[(HashKey, int),AristoError] =
## Cascaded attempt to fetch a Merkle hash from the cache layers or the
## backend. This function will never return a `VOID_HASH_KEY` but rather
## some `GetKeyNotFound` or `GetKeyUpdateNeeded` error.
@ -132,7 +132,7 @@ proc getKeyRc*(db: AristoDbRef; rvid: RootedVertexID): Result[HashKey,AristoErro
break body
# If there is a zero key value, the entry is either marked for being
# updated or for deletion on the database. So check below.
if key.isValid:
if key[0].isValid:
return ok key
# The zero key value does not refer to an update mark if there is no
@ -141,7 +141,7 @@ proc getKeyRc*(db: AristoDbRef; rvid: RootedVertexID): Result[HashKey,AristoErro
# There was no vertex on the cache. So there must be one the backend (the
# reason for the key lable to exists, at all.)
return err(GetKeyUpdateNeeded)
if vtx.isValid:
if vtx[0].isValid:
return err(GetKeyUpdateNeeded)
else:
# The vertex is to be deleted. So is the value key.
@ -153,7 +153,7 @@ proc getKey*(db: AristoDbRef; rvid: RootedVertexID): HashKey =
## Cascaded attempt to fetch a vertex from the cache layers or the backend.
## The function returns `nil` on error or failure.
##
db.getKeyRc(rvid).valueOr: VOID_HASH_KEY
(db.getKeyRc(rvid).valueOr((VOID_HASH_KEY, 0)))[0]
# ------------------------------------------------------------------------------
# End

View File

@ -77,7 +77,7 @@ proc step*(
path: NibblesBuf, rvid: RootedVertexID, db: AristoDbRef
): Result[(VertexRef, NibblesBuf, VertexID), AristoError] =
# Fetch next vertex
let vtx = db.getVtxRc(rvid).valueOr:
let (vtx, _) = db.getVtxRc(rvid).valueOr:
if error != GetVtxNotFound:
return err(error)

View File

@ -11,7 +11,7 @@
{.push raises: [].}
import
std/[sequtils, sets, tables],
std/[enumerate, sequtils, sets, tables],
eth/common,
results,
./aristo_desc
@ -56,40 +56,40 @@ func nLayersKey*(db: AristoDbRef): int =
# Public functions: getter variants
# ------------------------------------------------------------------------------
func layersGetVtx*(db: AristoDbRef; rvid: RootedVertexID): Opt[VertexRef] =
func layersGetVtx*(db: AristoDbRef; rvid: RootedVertexID): Opt[(VertexRef, int)] =
## Find a vertex on the cache layers. An `ok()` result might contain a
## `nil` vertex if it is stored on the cache that way.
##
db.top.delta.sTab.withValue(rvid, item):
return Opt.some(item[])
return Opt.some((item[], 0))
for w in db.rstack:
for i, w in enumerate(db.rstack):
w.delta.sTab.withValue(rvid, item):
return Opt.some(item[])
return Opt.some((item[], i + 1))
Opt.none(VertexRef)
Opt.none((VertexRef, int))
func layersGetVtxOrVoid*(db: AristoDbRef; rvid: RootedVertexID): VertexRef =
## Simplified version of `layersGetVtx()`
db.layersGetVtx(rvid).valueOr: VertexRef(nil)
db.layersGetVtx(rvid).valueOr((VertexRef(nil), 0))[0]
func layersGetKey*(db: AristoDbRef; rvid: RootedVertexID): Opt[HashKey] =
func layersGetKey*(db: AristoDbRef; rvid: RootedVertexID): Opt[(HashKey, int)] =
## Find a hash key on the cache layers. An `ok()` result might contain a void
## hash key if it is stored on the cache that way.
##
db.top.delta.kMap.withValue(rvid, item):
return Opt.some(item[])
return Opt.some((item[], 0))
for w in db.rstack:
for i, w in enumerate(db.rstack):
w.delta.kMap.withValue(rvid, item):
return ok(item[])
return ok((item[], i + 1))
Opt.none(HashKey)
Opt.none((HashKey, int))
func layersGetKeyOrVoid*(db: AristoDbRef; rvid: RootedVertexID): HashKey =
## Simplified version of `layersGetKey()`
db.layersGetKey(rvid).valueOr: VOID_HASH_KEY
(db.layersGetKey(rvid).valueOr (VOID_HASH_KEY, 0))[0]
func layersGetAccLeaf*(db: AristoDbRef; accPath: Hash256): Opt[VertexRef] =
db.top.delta.accLeaves.withValue(accPath, item):

View File

@ -50,7 +50,7 @@ proc mergePayloadImpl*(
cur = root
touched: array[NibblesBuf.high + 1, VertexID]
pos = 0
vtx = db.getVtxRc((root, cur)).valueOr:
(vtx, _) = db.getVtxRc((root, cur)).valueOr:
if error != GetVtxNotFound:
return err(error)
@ -120,7 +120,7 @@ proc mergePayloadImpl*(
if next.isValid:
cur = next
path = path.slice(n + 1)
vtx = ?db.getVtxRc((root, next))
(vtx, _) = ?db.getVtxRc((root, next))
else:
# There's no vertex at the branch point - insert the payload as a new
# leaf and update the existing branch

View File

@ -149,7 +149,7 @@ proc serialise*(
## account type, otherwise pass the data as is.
##
proc getKey(vid: VertexID): Result[HashKey,AristoError] =
db.getKeyRc((root, vid))
ok (?db.getKeyRc((root, vid)))[0]
pyl.serialise getKey

View File

@ -44,14 +44,14 @@ proc toNode*(
block body:
let key = db.layersGetKey(rvid).valueOr:
break body
if key.isValid:
return key
if key[0].isValid:
return key[0]
else:
return VOID_HASH_KEY
if beOk:
let rc = db.getKeyBE rvid
if rc.isOk:
return rc.value
return rc.value[0]
VOID_HASH_KEY
case vtx.vType: