Store cached hash at the layer corresponding to the source data (#2492)

When lazily verifying state roots, we may end up with an entire state
without roots that gets computed for the whole database - in the current
design, that would result in hashes for the entire trie being held in
memory.

Since the hash depends only on the data in the vertex, we can store it
directly at the top-most level derived from the verticies it depends on
- be that memory or database - this makes the memory usage broadly
linear with respect to the already-existing in-memory change set stored
in the layers.

It also ensures that if we have multiple forks in memory, hashes get
cached in the correct layer maximising reuse between forks.

The same layer numbering scheme as elsewhere is reused, where -2 is the
backend, -1 is the balancer, then 0+ is the top of the stack and stack.

A downside of this approach is that we create many small batches - a
future improvement could be to collect all such writes in a single
batch, though the memory profile of this approach should be examined
first (where is the batch kept, exactly?).
This commit is contained in:
Jacek Sieka 2024-07-18 09:13:56 +02:00 committed by GitHub
parent 6677f57ea9
commit df4a21c910
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 122 additions and 79 deletions

View File

@ -81,7 +81,7 @@ proc checkBE*[T: RdbBackendRef|MemBackendRef|VoidBackendRef](
for (rvid,vtx) in db.layersWalkVtx: for (rvid,vtx) in db.layersWalkVtx:
if vtx.isValid and topVidCache.vid < rvid.vid: if vtx.isValid and topVidCache.vid < rvid.vid:
topVidCache = rvid topVidCache = rvid
let key = db.layersGetKey(rvid).valueOr: VOID_HASH_KEY let (key, _) = db.layersGetKey(rvid).valueOr: (VOID_HASH_KEY, 0)
if not vtx.isValid: if not vtx.isValid:
# Some vertex is to be deleted, the key must be empty # Some vertex is to be deleted, the key must be empty
if key.isValid: if key.isValid:

View File

@ -110,7 +110,7 @@ proc checkTopCommon*(
let rc = db.layersGetKey rvid let rc = db.layersGetKey rvid
if rc.isErr: if rc.isErr:
return err((rvid.vid,CheckAnyVtxEmptyKeyMissing)) return err((rvid.vid,CheckAnyVtxEmptyKeyMissing))
if rc.value.isValid: if rc.value[0].isValid:
return err((rvid.vid,CheckAnyVtxEmptyKeyExpected)) return err((rvid.vid,CheckAnyVtxEmptyKeyExpected))
if vTop.distinctBase < LEAST_FREE_VID: if vTop.distinctBase < LEAST_FREE_VID:

View File

@ -13,40 +13,54 @@
import import
eth/common, eth/common,
results, results,
"."/[aristo_desc, aristo_get, aristo_layers, aristo_serialise] "."/[aristo_desc, aristo_get, aristo_serialise]
proc computeKey*( proc putKeyAtLevel(
db: AristoDbRef, rvid: RootedVertexID, key: HashKey, level: int
): Result[void, AristoError] =
## Store a hash key in the given layer or directly to the underlying database
## which helps ensure that memory usage is proportional to the pending change
## set (vertex data may have been committed to disk without computing the
## corresponding hash!)
if level == -2:
let be = db.backend
doAssert be != nil, "source data is from the backend"
# TODO long-running batch here?
let writeBatch = ?be.putBegFn()
be.putKeyFn(writeBatch, rvid, key)
?be.putEndFn writeBatch
ok()
else:
db.deltaAtLevel(level).kMap[rvid] = key
ok()
func maxLevel(cur, other: int): int =
# Compare two levels and return the topmost in the stack, taking into account
# the odd reversal of order around the zero point
if cur < 0:
max(cur, other) # >= 0 is always more topmost than <0
elif other < 0:
cur
else:
min(cur, other) # Here the order is reversed and 0 is the top layer
proc computeKeyImpl(
db: AristoDbRef; # Database, top layer db: AristoDbRef; # Database, top layer
rvid: RootedVertexID; # Vertex to convert rvid: RootedVertexID; # Vertex to convert
): Result[HashKey, AristoError] = ): Result[(HashKey, int), AristoError] =
## Compute the key for an arbitrary vertex ID. If successful, the length of ## Compute the key for an arbitrary vertex ID. If successful, the length of
## the resulting key might be smaller than 32. If it is used as a root vertex ## the resulting key might be smaller than 32. If it is used as a root vertex
## state/hash, it must be converted to a `Hash256` (using (`.to(Hash256)`) as ## state/hash, it must be converted to a `Hash256` (using (`.to(Hash256)`) as
## in `db.computeKey(rvid).value.to(Hash256)` which always results in a ## in `db.computeKey(rvid).value.to(Hash256)` which always results in a
## 32 byte value. ## 32 byte value.
##
# This is a variation on getKeyRc which computes the key instead of returning
# an error
# TODO it should not always write the key to the persistent storage
proc getKey(db: AristoDbRef; rvid: RootedVertexID): HashKey = db.getKeyRc(rvid).isErrOr:
block body: # Value cached either in layers or database
let key = db.layersGetKey(rvid).valueOr: return ok value
break body let (vtx, vl) = ? db.getVtxRc rvid
if key.isValid:
return key
else:
return VOID_HASH_KEY
let rc = db.getKeyBE rvid
if rc.isOk:
return rc.value
VOID_HASH_KEY
let key = getKey(db, rvid) # Top-most level of all the verticies this hash compution depends on
if key.isValid(): var level = vl
return ok key
let vtx = ? db.getVtxRc rvid
# TODO this is the same code as when serializing NodeRef, without the NodeRef # TODO this is the same code as when serializing NodeRef, without the NodeRef
var writer = initRlpWriter() var writer = initRlpWriter()
@ -55,20 +69,23 @@ proc computeKey*(
of Leaf: of Leaf:
writer.startList(2) writer.startList(2)
writer.append(vtx.lPfx.toHexPrefix(isLeaf = true)) writer.append(vtx.lPfx.toHexPrefix(isLeaf = true))
# Need to resolve storage root for account leaf
case vtx.lData.pType case vtx.lData.pType
of AccountData: of AccountData:
let let
stoID = vtx.lData.stoID stoID = vtx.lData.stoID
key = if stoID.isValid: skey =
?db.computeKey((stoID, stoID)) if stoID.isValid:
else: let (skey, sl) = ?db.computeKeyImpl((stoID, stoID))
VOID_HASH_KEY level = maxLevel(level, sl)
skey
else:
VOID_HASH_KEY
writer.append(encode Account( writer.append(encode Account(
nonce: vtx.lData.account.nonce, nonce: vtx.lData.account.nonce,
balance: vtx.lData.account.balance, balance: vtx.lData.account.balance,
storageRoot: key.to(Hash256), storageRoot: skey.to(Hash256),
codeHash: vtx.lData.account.codeHash) codeHash: vtx.lData.account.codeHash)
) )
of RawData: of RawData:
@ -83,7 +100,9 @@ proc computeKey*(
for n in 0..15: for n in 0..15:
let vid = vtx.bVid[n] let vid = vtx.bVid[n]
if vid.isValid: if vid.isValid:
w.append(?db.computeKey((rvid.root, vid))) let (bkey, bl) = ?db.computeKeyImpl((rvid.root, vid))
level = maxLevel(level, bl)
w.append(bkey)
else: else:
w.append(VOID_HASH_KEY) w.append(VOID_HASH_KEY)
w.append EmptyBlob w.append EmptyBlob
@ -97,15 +116,23 @@ proc computeKey*(
else: else:
writeBranch(writer) writeBranch(writer)
var h = writer.finish().digestTo(HashKey) let h = writer.finish().digestTo(HashKey)
# TODO This shouldn't necessarily go into the database if we're just computing # Cache the hash int the same storage layer as the the top-most value that it
# a key ephemerally - it should however be cached for some tiem since # depends on (recursively) - this could be an ephemeral in-memory layer or the
# deep hash computations are expensive # underlying database backend - typically, values closer to the root are more
db.layersPutKey(rvid, h) # likely to live in an in-memory layer since any leaf change will lead to the
ok h # root key also changing while leaves that have never been hashed will see
# their hash being saved directly to the backend.
? db.putKeyAtLevel(rvid, h, level)
ok (h, level)
proc computeKey*(
db: AristoDbRef; # Database, top layer
rvid: RootedVertexID; # Vertex to convert
): Result[HashKey, AristoError] =
ok (?computeKeyImpl(db, rvid))[0]
# ------------------------------------------------------------------------------ # ------------------------------------------------------------------------------
# End # End

View File

@ -61,7 +61,7 @@ proc delSubTreeImpl(
## Implementation of *delete* sub-trie. ## Implementation of *delete* sub-trie.
var var
dispose = @[root] dispose = @[root]
rootVtx = db.getVtxRc((root, root)).valueOr: (rootVtx, _) = db.getVtxRc((root, root)).valueOr:
if error == GetVtxNotFound: if error == GetVtxNotFound:
return ok() return ok()
return err(error) return err(error)
@ -73,7 +73,7 @@ proc delSubTreeImpl(
for vtx in follow: for vtx in follow:
for vid in vtx.subVids: for vid in vtx.subVids:
# Exiting here leaves the tree as-is # Exiting here leaves the tree as-is
let vtx = ? db.getVtxRc((root, vid)) let vtx = (? db.getVtxRc((root, vid)))[0]
redo.add vtx redo.add vtx
dispose.add vid dispose.add vid
redo.swap follow redo.swap follow
@ -92,7 +92,7 @@ proc delStoTreeImpl(
): Result[void,AristoError] = ): Result[void,AristoError] =
## Implementation of *delete* sub-trie. ## Implementation of *delete* sub-trie.
let vtx = db.getVtxRc(rvid).valueOr: let (vtx, _) = db.getVtxRc(rvid).valueOr:
if error == GetVtxNotFound: if error == GetVtxNotFound:
return ok() return ok()
return err(error) return err(error)

View File

@ -34,8 +34,8 @@ from ./aristo_desc/desc_backend
# Not auto-exporting backend # Not auto-exporting backend
export export
aristo_constants, desc_error, desc_identifiers, desc_nibbles, desc_structural, tables, aristo_constants, desc_error, desc_identifiers, desc_nibbles,
keyed_queue desc_structural, keyed_queue
const const
accLruSize* = 1024 * 1024 accLruSize* = 1024 * 1024
@ -313,6 +313,21 @@ iterator rstack*(db: AristoDbRef): LayerRef =
for i in 0..<db.stack.len: for i in 0..<db.stack.len:
yield db.stack[db.stack.len - i - 1] yield db.stack[db.stack.len - i - 1]
proc deltaAtLevel*(db: AristoDbRef, level: int): LayerDeltaRef =
if level == 0:
db.top.delta
elif level > 0:
doAssert level <= db.stack.len
db.stack[^level].delta
elif level == -1:
doAssert db.balancer != nil
db.balancer
elif level == -2:
nil
else:
raiseAssert "Unknown level " & $level
# ------------------------------------------------------------------------------ # ------------------------------------------------------------------------------
# End # End
# ------------------------------------------------------------------------------ # ------------------------------------------------------------------------------

View File

@ -85,17 +85,18 @@ proc retrieveMerkleHash(
root: VertexID; root: VertexID;
updateOk: bool; updateOk: bool;
): Result[Hash256,AristoError] = ): Result[Hash256,AristoError] =
let key = block: let key =
if updateOk: if updateOk:
db.computeKey((root, root)).valueOr: db.computeKey((root, root)).valueOr:
if error == GetVtxNotFound: if error == GetVtxNotFound:
return ok(EMPTY_ROOT_HASH) return ok(EMPTY_ROOT_HASH)
return err(error) return err(error)
else: else:
db.getKeyRc((root, root)).valueOr: let (key, _) = db.getKeyRc((root, root)).valueOr:
if error == GetKeyNotFound: if error == GetKeyNotFound:
return ok(EMPTY_ROOT_HASH) # empty sub-tree return ok(EMPTY_ROOT_HASH) # empty sub-tree
return err(error) return err(error)
key
ok key.to(Hash256) ok key.to(Hash256)

View File

@ -73,33 +73,33 @@ proc getTuvBE*(
proc getVtxBE*( proc getVtxBE*(
db: AristoDbRef; db: AristoDbRef;
rvid: RootedVertexID; rvid: RootedVertexID;
): Result[VertexRef,AristoError] = ): Result[(VertexRef, int),AristoError] =
## Get the vertex from the (filtered) backened if available. ## Get the vertex from the (filtered) backened if available.
if not db.balancer.isNil: if not db.balancer.isNil:
db.balancer.sTab.withValue(rvid, w): db.balancer.sTab.withValue(rvid, w):
if w[].isValid: if w[].isValid:
return ok(w[]) return ok (w[], -1)
return err(GetVtxNotFound) return err(GetVtxNotFound)
db.getVtxUbe rvid ok (? db.getVtxUbe rvid, -2)
proc getKeyBE*( proc getKeyBE*(
db: AristoDbRef; db: AristoDbRef;
rvid: RootedVertexID; rvid: RootedVertexID;
): Result[HashKey,AristoError] = ): Result[(HashKey, int),AristoError] =
## Get the merkle hash/key from the (filtered) backend if available. ## Get the merkle hash/key from the (filtered) backend if available.
if not db.balancer.isNil: if not db.balancer.isNil:
db.balancer.kMap.withValue(rvid, w): db.balancer.kMap.withValue(rvid, w):
if w[].isValid: if w[].isValid:
return ok(w[]) return ok((w[], -1))
return err(GetKeyNotFound) return err(GetKeyNotFound)
db.getKeyUbe rvid ok ((?db.getKeyUbe rvid), -2)
# ------------------ # ------------------
proc getVtxRc*( proc getVtxRc*(
db: AristoDbRef; db: AristoDbRef;
rvid: RootedVertexID rvid: RootedVertexID
): Result[VertexRef,AristoError] = ): Result[(VertexRef, int),AristoError] =
## Cascaded attempt to fetch a vertex from the cache layers or the backend. ## Cascaded attempt to fetch a vertex from the cache layers or the backend.
## ##
block body: block body:
@ -108,7 +108,7 @@ proc getVtxRc*(
# error symbol `GetVtxNotFound`. # error symbol `GetVtxNotFound`.
let vtx = db.layersGetVtx(rvid).valueOr: let vtx = db.layersGetVtx(rvid).valueOr:
break body break body
if vtx.isValid: if vtx[0].isValid:
return ok vtx return ok vtx
else: else:
return err(GetVtxNotFound) return err(GetVtxNotFound)
@ -119,10 +119,10 @@ proc getVtx*(db: AristoDbRef; rvid: RootedVertexID): VertexRef =
## Cascaded attempt to fetch a vertex from the cache layers or the backend. ## Cascaded attempt to fetch a vertex from the cache layers or the backend.
## The function returns `nil` on error or failure. ## The function returns `nil` on error or failure.
## ##
db.getVtxRc(rvid).valueOr: VertexRef(nil) db.getVtxRc(rvid).valueOr((VertexRef(nil), 0))[0]
proc getKeyRc*(db: AristoDbRef; rvid: RootedVertexID): Result[HashKey,AristoError] = proc getKeyRc*(db: AristoDbRef; rvid: RootedVertexID): Result[(HashKey, int),AristoError] =
## Cascaded attempt to fetch a Merkle hash from the cache layers or the ## Cascaded attempt to fetch a Merkle hash from the cache layers or the
## backend. This function will never return a `VOID_HASH_KEY` but rather ## backend. This function will never return a `VOID_HASH_KEY` but rather
## some `GetKeyNotFound` or `GetKeyUpdateNeeded` error. ## some `GetKeyNotFound` or `GetKeyUpdateNeeded` error.
@ -132,7 +132,7 @@ proc getKeyRc*(db: AristoDbRef; rvid: RootedVertexID): Result[HashKey,AristoErro
break body break body
# If there is a zero key value, the entry is either marked for being # If there is a zero key value, the entry is either marked for being
# updated or for deletion on the database. So check below. # updated or for deletion on the database. So check below.
if key.isValid: if key[0].isValid:
return ok key return ok key
# The zero key value does not refer to an update mark if there is no # The zero key value does not refer to an update mark if there is no
@ -141,7 +141,7 @@ proc getKeyRc*(db: AristoDbRef; rvid: RootedVertexID): Result[HashKey,AristoErro
# There was no vertex on the cache. So there must be one the backend (the # There was no vertex on the cache. So there must be one the backend (the
# reason for the key lable to exists, at all.) # reason for the key lable to exists, at all.)
return err(GetKeyUpdateNeeded) return err(GetKeyUpdateNeeded)
if vtx.isValid: if vtx[0].isValid:
return err(GetKeyUpdateNeeded) return err(GetKeyUpdateNeeded)
else: else:
# The vertex is to be deleted. So is the value key. # The vertex is to be deleted. So is the value key.
@ -153,7 +153,7 @@ proc getKey*(db: AristoDbRef; rvid: RootedVertexID): HashKey =
## Cascaded attempt to fetch a vertex from the cache layers or the backend. ## Cascaded attempt to fetch a vertex from the cache layers or the backend.
## The function returns `nil` on error or failure. ## The function returns `nil` on error or failure.
## ##
db.getKeyRc(rvid).valueOr: VOID_HASH_KEY (db.getKeyRc(rvid).valueOr((VOID_HASH_KEY, 0)))[0]
# ------------------------------------------------------------------------------ # ------------------------------------------------------------------------------
# End # End

View File

@ -77,7 +77,7 @@ proc step*(
path: NibblesBuf, rvid: RootedVertexID, db: AristoDbRef path: NibblesBuf, rvid: RootedVertexID, db: AristoDbRef
): Result[(VertexRef, NibblesBuf, VertexID), AristoError] = ): Result[(VertexRef, NibblesBuf, VertexID), AristoError] =
# Fetch next vertex # Fetch next vertex
let vtx = db.getVtxRc(rvid).valueOr: let (vtx, _) = db.getVtxRc(rvid).valueOr:
if error != GetVtxNotFound: if error != GetVtxNotFound:
return err(error) return err(error)

View File

@ -11,7 +11,7 @@
{.push raises: [].} {.push raises: [].}
import import
std/[sequtils, sets, tables], std/[enumerate, sequtils, sets, tables],
eth/common, eth/common,
results, results,
./aristo_desc ./aristo_desc
@ -56,40 +56,40 @@ func nLayersKey*(db: AristoDbRef): int =
# Public functions: getter variants # Public functions: getter variants
# ------------------------------------------------------------------------------ # ------------------------------------------------------------------------------
func layersGetVtx*(db: AristoDbRef; rvid: RootedVertexID): Opt[VertexRef] = func layersGetVtx*(db: AristoDbRef; rvid: RootedVertexID): Opt[(VertexRef, int)] =
## Find a vertex on the cache layers. An `ok()` result might contain a ## Find a vertex on the cache layers. An `ok()` result might contain a
## `nil` vertex if it is stored on the cache that way. ## `nil` vertex if it is stored on the cache that way.
## ##
db.top.delta.sTab.withValue(rvid, item): db.top.delta.sTab.withValue(rvid, item):
return Opt.some(item[]) return Opt.some((item[], 0))
for w in db.rstack: for i, w in enumerate(db.rstack):
w.delta.sTab.withValue(rvid, item): w.delta.sTab.withValue(rvid, item):
return Opt.some(item[]) return Opt.some((item[], i + 1))
Opt.none(VertexRef) Opt.none((VertexRef, int))
func layersGetVtxOrVoid*(db: AristoDbRef; rvid: RootedVertexID): VertexRef = func layersGetVtxOrVoid*(db: AristoDbRef; rvid: RootedVertexID): VertexRef =
## Simplified version of `layersGetVtx()` ## Simplified version of `layersGetVtx()`
db.layersGetVtx(rvid).valueOr: VertexRef(nil) db.layersGetVtx(rvid).valueOr((VertexRef(nil), 0))[0]
func layersGetKey*(db: AristoDbRef; rvid: RootedVertexID): Opt[HashKey] = func layersGetKey*(db: AristoDbRef; rvid: RootedVertexID): Opt[(HashKey, int)] =
## Find a hash key on the cache layers. An `ok()` result might contain a void ## Find a hash key on the cache layers. An `ok()` result might contain a void
## hash key if it is stored on the cache that way. ## hash key if it is stored on the cache that way.
## ##
db.top.delta.kMap.withValue(rvid, item): db.top.delta.kMap.withValue(rvid, item):
return Opt.some(item[]) return Opt.some((item[], 0))
for w in db.rstack: for i, w in enumerate(db.rstack):
w.delta.kMap.withValue(rvid, item): w.delta.kMap.withValue(rvid, item):
return ok(item[]) return ok((item[], i + 1))
Opt.none(HashKey) Opt.none((HashKey, int))
func layersGetKeyOrVoid*(db: AristoDbRef; rvid: RootedVertexID): HashKey = func layersGetKeyOrVoid*(db: AristoDbRef; rvid: RootedVertexID): HashKey =
## Simplified version of `layersGetKey()` ## Simplified version of `layersGetKey()`
db.layersGetKey(rvid).valueOr: VOID_HASH_KEY (db.layersGetKey(rvid).valueOr (VOID_HASH_KEY, 0))[0]
func layersGetAccLeaf*(db: AristoDbRef; accPath: Hash256): Opt[VertexRef] = func layersGetAccLeaf*(db: AristoDbRef; accPath: Hash256): Opt[VertexRef] =
db.top.delta.accLeaves.withValue(accPath, item): db.top.delta.accLeaves.withValue(accPath, item):

View File

@ -50,7 +50,7 @@ proc mergePayloadImpl*(
cur = root cur = root
touched: array[NibblesBuf.high + 1, VertexID] touched: array[NibblesBuf.high + 1, VertexID]
pos = 0 pos = 0
vtx = db.getVtxRc((root, cur)).valueOr: (vtx, _) = db.getVtxRc((root, cur)).valueOr:
if error != GetVtxNotFound: if error != GetVtxNotFound:
return err(error) return err(error)
@ -120,7 +120,7 @@ proc mergePayloadImpl*(
if next.isValid: if next.isValid:
cur = next cur = next
path = path.slice(n + 1) path = path.slice(n + 1)
vtx = ?db.getVtxRc((root, next)) (vtx, _) = ?db.getVtxRc((root, next))
else: else:
# There's no vertex at the branch point - insert the payload as a new # There's no vertex at the branch point - insert the payload as a new
# leaf and update the existing branch # leaf and update the existing branch

View File

@ -149,7 +149,7 @@ proc serialise*(
## account type, otherwise pass the data as is. ## account type, otherwise pass the data as is.
## ##
proc getKey(vid: VertexID): Result[HashKey,AristoError] = proc getKey(vid: VertexID): Result[HashKey,AristoError] =
db.getKeyRc((root, vid)) ok (?db.getKeyRc((root, vid)))[0]
pyl.serialise getKey pyl.serialise getKey

View File

@ -44,14 +44,14 @@ proc toNode*(
block body: block body:
let key = db.layersGetKey(rvid).valueOr: let key = db.layersGetKey(rvid).valueOr:
break body break body
if key.isValid: if key[0].isValid:
return key return key[0]
else: else:
return VOID_HASH_KEY return VOID_HASH_KEY
if beOk: if beOk:
let rc = db.getKeyBE rvid let rc = db.getKeyBE rvid
if rc.isOk: if rc.isOk:
return rc.value return rc.value[0]
VOID_HASH_KEY VOID_HASH_KEY
case vtx.vType: case vtx.vType: