Store cached hash at the layer corresponding to the source data (#2492)
When lazily verifying state roots, we may end up with an entire state without roots that gets computed for the whole database - in the current design, that would result in hashes for the entire trie being held in memory. Since the hash depends only on the data in the vertex, we can store it directly at the top-most level derived from the verticies it depends on - be that memory or database - this makes the memory usage broadly linear with respect to the already-existing in-memory change set stored in the layers. It also ensures that if we have multiple forks in memory, hashes get cached in the correct layer maximising reuse between forks. The same layer numbering scheme as elsewhere is reused, where -2 is the backend, -1 is the balancer, then 0+ is the top of the stack and stack. A downside of this approach is that we create many small batches - a future improvement could be to collect all such writes in a single batch, though the memory profile of this approach should be examined first (where is the batch kept, exactly?).
This commit is contained in:
parent
6677f57ea9
commit
df4a21c910
|
@ -81,7 +81,7 @@ proc checkBE*[T: RdbBackendRef|MemBackendRef|VoidBackendRef](
|
|||
for (rvid,vtx) in db.layersWalkVtx:
|
||||
if vtx.isValid and topVidCache.vid < rvid.vid:
|
||||
topVidCache = rvid
|
||||
let key = db.layersGetKey(rvid).valueOr: VOID_HASH_KEY
|
||||
let (key, _) = db.layersGetKey(rvid).valueOr: (VOID_HASH_KEY, 0)
|
||||
if not vtx.isValid:
|
||||
# Some vertex is to be deleted, the key must be empty
|
||||
if key.isValid:
|
||||
|
|
|
@ -110,7 +110,7 @@ proc checkTopCommon*(
|
|||
let rc = db.layersGetKey rvid
|
||||
if rc.isErr:
|
||||
return err((rvid.vid,CheckAnyVtxEmptyKeyMissing))
|
||||
if rc.value.isValid:
|
||||
if rc.value[0].isValid:
|
||||
return err((rvid.vid,CheckAnyVtxEmptyKeyExpected))
|
||||
|
||||
if vTop.distinctBase < LEAST_FREE_VID:
|
||||
|
|
|
@ -13,40 +13,54 @@
|
|||
import
|
||||
eth/common,
|
||||
results,
|
||||
"."/[aristo_desc, aristo_get, aristo_layers, aristo_serialise]
|
||||
"."/[aristo_desc, aristo_get, aristo_serialise]
|
||||
|
||||
proc computeKey*(
|
||||
proc putKeyAtLevel(
|
||||
db: AristoDbRef, rvid: RootedVertexID, key: HashKey, level: int
|
||||
): Result[void, AristoError] =
|
||||
## Store a hash key in the given layer or directly to the underlying database
|
||||
## which helps ensure that memory usage is proportional to the pending change
|
||||
## set (vertex data may have been committed to disk without computing the
|
||||
## corresponding hash!)
|
||||
if level == -2:
|
||||
let be = db.backend
|
||||
doAssert be != nil, "source data is from the backend"
|
||||
# TODO long-running batch here?
|
||||
let writeBatch = ?be.putBegFn()
|
||||
be.putKeyFn(writeBatch, rvid, key)
|
||||
?be.putEndFn writeBatch
|
||||
ok()
|
||||
else:
|
||||
db.deltaAtLevel(level).kMap[rvid] = key
|
||||
ok()
|
||||
|
||||
func maxLevel(cur, other: int): int =
|
||||
# Compare two levels and return the topmost in the stack, taking into account
|
||||
# the odd reversal of order around the zero point
|
||||
if cur < 0:
|
||||
max(cur, other) # >= 0 is always more topmost than <0
|
||||
elif other < 0:
|
||||
cur
|
||||
else:
|
||||
min(cur, other) # Here the order is reversed and 0 is the top layer
|
||||
|
||||
proc computeKeyImpl(
|
||||
db: AristoDbRef; # Database, top layer
|
||||
rvid: RootedVertexID; # Vertex to convert
|
||||
): Result[HashKey, AristoError] =
|
||||
): Result[(HashKey, int), AristoError] =
|
||||
## Compute the key for an arbitrary vertex ID. If successful, the length of
|
||||
## the resulting key might be smaller than 32. If it is used as a root vertex
|
||||
## state/hash, it must be converted to a `Hash256` (using (`.to(Hash256)`) as
|
||||
## in `db.computeKey(rvid).value.to(Hash256)` which always results in a
|
||||
## 32 byte value.
|
||||
##
|
||||
# This is a variation on getKeyRc which computes the key instead of returning
|
||||
# an error
|
||||
# TODO it should not always write the key to the persistent storage
|
||||
|
||||
proc getKey(db: AristoDbRef; rvid: RootedVertexID): HashKey =
|
||||
block body:
|
||||
let key = db.layersGetKey(rvid).valueOr:
|
||||
break body
|
||||
if key.isValid:
|
||||
return key
|
||||
else:
|
||||
return VOID_HASH_KEY
|
||||
let rc = db.getKeyBE rvid
|
||||
if rc.isOk:
|
||||
return rc.value
|
||||
VOID_HASH_KEY
|
||||
db.getKeyRc(rvid).isErrOr:
|
||||
# Value cached either in layers or database
|
||||
return ok value
|
||||
let (vtx, vl) = ? db.getVtxRc rvid
|
||||
|
||||
let key = getKey(db, rvid)
|
||||
if key.isValid():
|
||||
return ok key
|
||||
|
||||
let vtx = ? db.getVtxRc rvid
|
||||
# Top-most level of all the verticies this hash compution depends on
|
||||
var level = vl
|
||||
|
||||
# TODO this is the same code as when serializing NodeRef, without the NodeRef
|
||||
var writer = initRlpWriter()
|
||||
|
@ -55,20 +69,23 @@ proc computeKey*(
|
|||
of Leaf:
|
||||
writer.startList(2)
|
||||
writer.append(vtx.lPfx.toHexPrefix(isLeaf = true))
|
||||
# Need to resolve storage root for account leaf
|
||||
|
||||
case vtx.lData.pType
|
||||
of AccountData:
|
||||
let
|
||||
stoID = vtx.lData.stoID
|
||||
key = if stoID.isValid:
|
||||
?db.computeKey((stoID, stoID))
|
||||
skey =
|
||||
if stoID.isValid:
|
||||
let (skey, sl) = ?db.computeKeyImpl((stoID, stoID))
|
||||
level = maxLevel(level, sl)
|
||||
skey
|
||||
else:
|
||||
VOID_HASH_KEY
|
||||
|
||||
writer.append(encode Account(
|
||||
nonce: vtx.lData.account.nonce,
|
||||
balance: vtx.lData.account.balance,
|
||||
storageRoot: key.to(Hash256),
|
||||
storageRoot: skey.to(Hash256),
|
||||
codeHash: vtx.lData.account.codeHash)
|
||||
)
|
||||
of RawData:
|
||||
|
@ -83,7 +100,9 @@ proc computeKey*(
|
|||
for n in 0..15:
|
||||
let vid = vtx.bVid[n]
|
||||
if vid.isValid:
|
||||
w.append(?db.computeKey((rvid.root, vid)))
|
||||
let (bkey, bl) = ?db.computeKeyImpl((rvid.root, vid))
|
||||
level = maxLevel(level, bl)
|
||||
w.append(bkey)
|
||||
else:
|
||||
w.append(VOID_HASH_KEY)
|
||||
w.append EmptyBlob
|
||||
|
@ -97,15 +116,23 @@ proc computeKey*(
|
|||
else:
|
||||
writeBranch(writer)
|
||||
|
||||
var h = writer.finish().digestTo(HashKey)
|
||||
let h = writer.finish().digestTo(HashKey)
|
||||
|
||||
# TODO This shouldn't necessarily go into the database if we're just computing
|
||||
# a key ephemerally - it should however be cached for some tiem since
|
||||
# deep hash computations are expensive
|
||||
db.layersPutKey(rvid, h)
|
||||
ok h
|
||||
# Cache the hash int the same storage layer as the the top-most value that it
|
||||
# depends on (recursively) - this could be an ephemeral in-memory layer or the
|
||||
# underlying database backend - typically, values closer to the root are more
|
||||
# likely to live in an in-memory layer since any leaf change will lead to the
|
||||
# root key also changing while leaves that have never been hashed will see
|
||||
# their hash being saved directly to the backend.
|
||||
? db.putKeyAtLevel(rvid, h, level)
|
||||
|
||||
ok (h, level)
|
||||
|
||||
proc computeKey*(
|
||||
db: AristoDbRef; # Database, top layer
|
||||
rvid: RootedVertexID; # Vertex to convert
|
||||
): Result[HashKey, AristoError] =
|
||||
ok (?computeKeyImpl(db, rvid))[0]
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
# End
|
||||
|
|
|
@ -61,7 +61,7 @@ proc delSubTreeImpl(
|
|||
## Implementation of *delete* sub-trie.
|
||||
var
|
||||
dispose = @[root]
|
||||
rootVtx = db.getVtxRc((root, root)).valueOr:
|
||||
(rootVtx, _) = db.getVtxRc((root, root)).valueOr:
|
||||
if error == GetVtxNotFound:
|
||||
return ok()
|
||||
return err(error)
|
||||
|
@ -73,7 +73,7 @@ proc delSubTreeImpl(
|
|||
for vtx in follow:
|
||||
for vid in vtx.subVids:
|
||||
# Exiting here leaves the tree as-is
|
||||
let vtx = ? db.getVtxRc((root, vid))
|
||||
let vtx = (? db.getVtxRc((root, vid)))[0]
|
||||
redo.add vtx
|
||||
dispose.add vid
|
||||
redo.swap follow
|
||||
|
@ -92,7 +92,7 @@ proc delStoTreeImpl(
|
|||
): Result[void,AristoError] =
|
||||
## Implementation of *delete* sub-trie.
|
||||
|
||||
let vtx = db.getVtxRc(rvid).valueOr:
|
||||
let (vtx, _) = db.getVtxRc(rvid).valueOr:
|
||||
if error == GetVtxNotFound:
|
||||
return ok()
|
||||
return err(error)
|
||||
|
|
|
@ -34,8 +34,8 @@ from ./aristo_desc/desc_backend
|
|||
|
||||
# Not auto-exporting backend
|
||||
export
|
||||
aristo_constants, desc_error, desc_identifiers, desc_nibbles, desc_structural,
|
||||
keyed_queue
|
||||
tables, aristo_constants, desc_error, desc_identifiers, desc_nibbles,
|
||||
desc_structural, keyed_queue
|
||||
|
||||
const
|
||||
accLruSize* = 1024 * 1024
|
||||
|
@ -313,6 +313,21 @@ iterator rstack*(db: AristoDbRef): LayerRef =
|
|||
for i in 0..<db.stack.len:
|
||||
yield db.stack[db.stack.len - i - 1]
|
||||
|
||||
proc deltaAtLevel*(db: AristoDbRef, level: int): LayerDeltaRef =
|
||||
if level == 0:
|
||||
db.top.delta
|
||||
elif level > 0:
|
||||
doAssert level <= db.stack.len
|
||||
db.stack[^level].delta
|
||||
elif level == -1:
|
||||
doAssert db.balancer != nil
|
||||
db.balancer
|
||||
elif level == -2:
|
||||
nil
|
||||
else:
|
||||
raiseAssert "Unknown level " & $level
|
||||
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
# End
|
||||
# ------------------------------------------------------------------------------
|
||||
|
|
|
@ -85,17 +85,18 @@ proc retrieveMerkleHash(
|
|||
root: VertexID;
|
||||
updateOk: bool;
|
||||
): Result[Hash256,AristoError] =
|
||||
let key = block:
|
||||
let key =
|
||||
if updateOk:
|
||||
db.computeKey((root, root)).valueOr:
|
||||
if error == GetVtxNotFound:
|
||||
return ok(EMPTY_ROOT_HASH)
|
||||
return err(error)
|
||||
else:
|
||||
db.getKeyRc((root, root)).valueOr:
|
||||
let (key, _) = db.getKeyRc((root, root)).valueOr:
|
||||
if error == GetKeyNotFound:
|
||||
return ok(EMPTY_ROOT_HASH) # empty sub-tree
|
||||
return err(error)
|
||||
key
|
||||
ok key.to(Hash256)
|
||||
|
||||
|
||||
|
|
|
@ -73,33 +73,33 @@ proc getTuvBE*(
|
|||
proc getVtxBE*(
|
||||
db: AristoDbRef;
|
||||
rvid: RootedVertexID;
|
||||
): Result[VertexRef,AristoError] =
|
||||
): Result[(VertexRef, int),AristoError] =
|
||||
## Get the vertex from the (filtered) backened if available.
|
||||
if not db.balancer.isNil:
|
||||
db.balancer.sTab.withValue(rvid, w):
|
||||
if w[].isValid:
|
||||
return ok(w[])
|
||||
return ok (w[], -1)
|
||||
return err(GetVtxNotFound)
|
||||
db.getVtxUbe rvid
|
||||
ok (? db.getVtxUbe rvid, -2)
|
||||
|
||||
proc getKeyBE*(
|
||||
db: AristoDbRef;
|
||||
rvid: RootedVertexID;
|
||||
): Result[HashKey,AristoError] =
|
||||
): Result[(HashKey, int),AristoError] =
|
||||
## Get the merkle hash/key from the (filtered) backend if available.
|
||||
if not db.balancer.isNil:
|
||||
db.balancer.kMap.withValue(rvid, w):
|
||||
if w[].isValid:
|
||||
return ok(w[])
|
||||
return ok((w[], -1))
|
||||
return err(GetKeyNotFound)
|
||||
db.getKeyUbe rvid
|
||||
ok ((?db.getKeyUbe rvid), -2)
|
||||
|
||||
# ------------------
|
||||
|
||||
proc getVtxRc*(
|
||||
db: AristoDbRef;
|
||||
rvid: RootedVertexID
|
||||
): Result[VertexRef,AristoError] =
|
||||
): Result[(VertexRef, int),AristoError] =
|
||||
## Cascaded attempt to fetch a vertex from the cache layers or the backend.
|
||||
##
|
||||
block body:
|
||||
|
@ -108,7 +108,7 @@ proc getVtxRc*(
|
|||
# error symbol `GetVtxNotFound`.
|
||||
let vtx = db.layersGetVtx(rvid).valueOr:
|
||||
break body
|
||||
if vtx.isValid:
|
||||
if vtx[0].isValid:
|
||||
return ok vtx
|
||||
else:
|
||||
return err(GetVtxNotFound)
|
||||
|
@ -119,10 +119,10 @@ proc getVtx*(db: AristoDbRef; rvid: RootedVertexID): VertexRef =
|
|||
## Cascaded attempt to fetch a vertex from the cache layers or the backend.
|
||||
## The function returns `nil` on error or failure.
|
||||
##
|
||||
db.getVtxRc(rvid).valueOr: VertexRef(nil)
|
||||
db.getVtxRc(rvid).valueOr((VertexRef(nil), 0))[0]
|
||||
|
||||
|
||||
proc getKeyRc*(db: AristoDbRef; rvid: RootedVertexID): Result[HashKey,AristoError] =
|
||||
proc getKeyRc*(db: AristoDbRef; rvid: RootedVertexID): Result[(HashKey, int),AristoError] =
|
||||
## Cascaded attempt to fetch a Merkle hash from the cache layers or the
|
||||
## backend. This function will never return a `VOID_HASH_KEY` but rather
|
||||
## some `GetKeyNotFound` or `GetKeyUpdateNeeded` error.
|
||||
|
@ -132,7 +132,7 @@ proc getKeyRc*(db: AristoDbRef; rvid: RootedVertexID): Result[HashKey,AristoErro
|
|||
break body
|
||||
# If there is a zero key value, the entry is either marked for being
|
||||
# updated or for deletion on the database. So check below.
|
||||
if key.isValid:
|
||||
if key[0].isValid:
|
||||
return ok key
|
||||
|
||||
# The zero key value does not refer to an update mark if there is no
|
||||
|
@ -141,7 +141,7 @@ proc getKeyRc*(db: AristoDbRef; rvid: RootedVertexID): Result[HashKey,AristoErro
|
|||
# There was no vertex on the cache. So there must be one the backend (the
|
||||
# reason for the key lable to exists, at all.)
|
||||
return err(GetKeyUpdateNeeded)
|
||||
if vtx.isValid:
|
||||
if vtx[0].isValid:
|
||||
return err(GetKeyUpdateNeeded)
|
||||
else:
|
||||
# The vertex is to be deleted. So is the value key.
|
||||
|
@ -153,7 +153,7 @@ proc getKey*(db: AristoDbRef; rvid: RootedVertexID): HashKey =
|
|||
## Cascaded attempt to fetch a vertex from the cache layers or the backend.
|
||||
## The function returns `nil` on error or failure.
|
||||
##
|
||||
db.getKeyRc(rvid).valueOr: VOID_HASH_KEY
|
||||
(db.getKeyRc(rvid).valueOr((VOID_HASH_KEY, 0)))[0]
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
# End
|
||||
|
|
|
@ -77,7 +77,7 @@ proc step*(
|
|||
path: NibblesBuf, rvid: RootedVertexID, db: AristoDbRef
|
||||
): Result[(VertexRef, NibblesBuf, VertexID), AristoError] =
|
||||
# Fetch next vertex
|
||||
let vtx = db.getVtxRc(rvid).valueOr:
|
||||
let (vtx, _) = db.getVtxRc(rvid).valueOr:
|
||||
if error != GetVtxNotFound:
|
||||
return err(error)
|
||||
|
||||
|
|
|
@ -11,7 +11,7 @@
|
|||
{.push raises: [].}
|
||||
|
||||
import
|
||||
std/[sequtils, sets, tables],
|
||||
std/[enumerate, sequtils, sets, tables],
|
||||
eth/common,
|
||||
results,
|
||||
./aristo_desc
|
||||
|
@ -56,40 +56,40 @@ func nLayersKey*(db: AristoDbRef): int =
|
|||
# Public functions: getter variants
|
||||
# ------------------------------------------------------------------------------
|
||||
|
||||
func layersGetVtx*(db: AristoDbRef; rvid: RootedVertexID): Opt[VertexRef] =
|
||||
func layersGetVtx*(db: AristoDbRef; rvid: RootedVertexID): Opt[(VertexRef, int)] =
|
||||
## Find a vertex on the cache layers. An `ok()` result might contain a
|
||||
## `nil` vertex if it is stored on the cache that way.
|
||||
##
|
||||
db.top.delta.sTab.withValue(rvid, item):
|
||||
return Opt.some(item[])
|
||||
return Opt.some((item[], 0))
|
||||
|
||||
for w in db.rstack:
|
||||
for i, w in enumerate(db.rstack):
|
||||
w.delta.sTab.withValue(rvid, item):
|
||||
return Opt.some(item[])
|
||||
return Opt.some((item[], i + 1))
|
||||
|
||||
Opt.none(VertexRef)
|
||||
Opt.none((VertexRef, int))
|
||||
|
||||
func layersGetVtxOrVoid*(db: AristoDbRef; rvid: RootedVertexID): VertexRef =
|
||||
## Simplified version of `layersGetVtx()`
|
||||
db.layersGetVtx(rvid).valueOr: VertexRef(nil)
|
||||
db.layersGetVtx(rvid).valueOr((VertexRef(nil), 0))[0]
|
||||
|
||||
|
||||
func layersGetKey*(db: AristoDbRef; rvid: RootedVertexID): Opt[HashKey] =
|
||||
func layersGetKey*(db: AristoDbRef; rvid: RootedVertexID): Opt[(HashKey, int)] =
|
||||
## Find a hash key on the cache layers. An `ok()` result might contain a void
|
||||
## hash key if it is stored on the cache that way.
|
||||
##
|
||||
db.top.delta.kMap.withValue(rvid, item):
|
||||
return Opt.some(item[])
|
||||
return Opt.some((item[], 0))
|
||||
|
||||
for w in db.rstack:
|
||||
for i, w in enumerate(db.rstack):
|
||||
w.delta.kMap.withValue(rvid, item):
|
||||
return ok(item[])
|
||||
return ok((item[], i + 1))
|
||||
|
||||
Opt.none(HashKey)
|
||||
Opt.none((HashKey, int))
|
||||
|
||||
func layersGetKeyOrVoid*(db: AristoDbRef; rvid: RootedVertexID): HashKey =
|
||||
## Simplified version of `layersGetKey()`
|
||||
db.layersGetKey(rvid).valueOr: VOID_HASH_KEY
|
||||
(db.layersGetKey(rvid).valueOr (VOID_HASH_KEY, 0))[0]
|
||||
|
||||
func layersGetAccLeaf*(db: AristoDbRef; accPath: Hash256): Opt[VertexRef] =
|
||||
db.top.delta.accLeaves.withValue(accPath, item):
|
||||
|
|
|
@ -50,7 +50,7 @@ proc mergePayloadImpl*(
|
|||
cur = root
|
||||
touched: array[NibblesBuf.high + 1, VertexID]
|
||||
pos = 0
|
||||
vtx = db.getVtxRc((root, cur)).valueOr:
|
||||
(vtx, _) = db.getVtxRc((root, cur)).valueOr:
|
||||
if error != GetVtxNotFound:
|
||||
return err(error)
|
||||
|
||||
|
@ -120,7 +120,7 @@ proc mergePayloadImpl*(
|
|||
if next.isValid:
|
||||
cur = next
|
||||
path = path.slice(n + 1)
|
||||
vtx = ?db.getVtxRc((root, next))
|
||||
(vtx, _) = ?db.getVtxRc((root, next))
|
||||
else:
|
||||
# There's no vertex at the branch point - insert the payload as a new
|
||||
# leaf and update the existing branch
|
||||
|
|
|
@ -149,7 +149,7 @@ proc serialise*(
|
|||
## account type, otherwise pass the data as is.
|
||||
##
|
||||
proc getKey(vid: VertexID): Result[HashKey,AristoError] =
|
||||
db.getKeyRc((root, vid))
|
||||
ok (?db.getKeyRc((root, vid)))[0]
|
||||
|
||||
pyl.serialise getKey
|
||||
|
||||
|
|
|
@ -44,14 +44,14 @@ proc toNode*(
|
|||
block body:
|
||||
let key = db.layersGetKey(rvid).valueOr:
|
||||
break body
|
||||
if key.isValid:
|
||||
return key
|
||||
if key[0].isValid:
|
||||
return key[0]
|
||||
else:
|
||||
return VOID_HASH_KEY
|
||||
if beOk:
|
||||
let rc = db.getKeyBE rvid
|
||||
if rc.isOk:
|
||||
return rc.value
|
||||
return rc.value[0]
|
||||
VOID_HASH_KEY
|
||||
|
||||
case vtx.vType:
|
||||
|
|
Loading…
Reference in New Issue