From df4a21c9105535525957e6442f3b3602549428c0 Mon Sep 17 00:00:00 2001 From: Jacek Sieka Date: Thu, 18 Jul 2024 09:13:56 +0200 Subject: [PATCH] Store cached hash at the layer corresponding to the source data (#2492) When lazily verifying state roots, we may end up with an entire state without roots that gets computed for the whole database - in the current design, that would result in hashes for the entire trie being held in memory. Since the hash depends only on the data in the vertex, we can store it directly at the top-most level derived from the verticies it depends on - be that memory or database - this makes the memory usage broadly linear with respect to the already-existing in-memory change set stored in the layers. It also ensures that if we have multiple forks in memory, hashes get cached in the correct layer maximising reuse between forks. The same layer numbering scheme as elsewhere is reused, where -2 is the backend, -1 is the balancer, then 0+ is the top of the stack and stack. A downside of this approach is that we create many small batches - a future improvement could be to collect all such writes in a single batch, though the memory profile of this approach should be examined first (where is the batch kept, exactly?). --- nimbus/db/aristo/aristo_check/check_be.nim | 2 +- nimbus/db/aristo/aristo_check/check_top.nim | 2 +- nimbus/db/aristo/aristo_compute.nim | 101 +++++++++++------- nimbus/db/aristo/aristo_delete.nim | 6 +- nimbus/db/aristo/aristo_desc.nim | 19 +++- nimbus/db/aristo/aristo_fetch.nim | 5 +- nimbus/db/aristo/aristo_get.nim | 26 ++--- nimbus/db/aristo/aristo_hike.nim | 2 +- nimbus/db/aristo/aristo_layers.nim | 26 ++--- .../aristo_merge/merge_payload_helper.nim | 4 +- nimbus/db/aristo/aristo_serialise.nim | 2 +- nimbus/db/aristo/aristo_utils.nim | 6 +- 12 files changed, 122 insertions(+), 79 deletions(-) diff --git a/nimbus/db/aristo/aristo_check/check_be.nim b/nimbus/db/aristo/aristo_check/check_be.nim index e94c769c8..6bee548d4 100644 --- a/nimbus/db/aristo/aristo_check/check_be.nim +++ b/nimbus/db/aristo/aristo_check/check_be.nim @@ -81,7 +81,7 @@ proc checkBE*[T: RdbBackendRef|MemBackendRef|VoidBackendRef]( for (rvid,vtx) in db.layersWalkVtx: if vtx.isValid and topVidCache.vid < rvid.vid: topVidCache = rvid - let key = db.layersGetKey(rvid).valueOr: VOID_HASH_KEY + let (key, _) = db.layersGetKey(rvid).valueOr: (VOID_HASH_KEY, 0) if not vtx.isValid: # Some vertex is to be deleted, the key must be empty if key.isValid: diff --git a/nimbus/db/aristo/aristo_check/check_top.nim b/nimbus/db/aristo/aristo_check/check_top.nim index 10ae7207b..e9058bef1 100644 --- a/nimbus/db/aristo/aristo_check/check_top.nim +++ b/nimbus/db/aristo/aristo_check/check_top.nim @@ -110,7 +110,7 @@ proc checkTopCommon*( let rc = db.layersGetKey rvid if rc.isErr: return err((rvid.vid,CheckAnyVtxEmptyKeyMissing)) - if rc.value.isValid: + if rc.value[0].isValid: return err((rvid.vid,CheckAnyVtxEmptyKeyExpected)) if vTop.distinctBase < LEAST_FREE_VID: diff --git a/nimbus/db/aristo/aristo_compute.nim b/nimbus/db/aristo/aristo_compute.nim index 41ab71469..dbbf408b2 100644 --- a/nimbus/db/aristo/aristo_compute.nim +++ b/nimbus/db/aristo/aristo_compute.nim @@ -13,40 +13,54 @@ import eth/common, results, - "."/[aristo_desc, aristo_get, aristo_layers, aristo_serialise] + "."/[aristo_desc, aristo_get, aristo_serialise] -proc computeKey*( +proc putKeyAtLevel( + db: AristoDbRef, rvid: RootedVertexID, key: HashKey, level: int +): Result[void, AristoError] = + ## Store a hash key in the given layer or directly to the underlying database + ## which helps ensure that memory usage is proportional to the pending change + ## set (vertex data may have been committed to disk without computing the + ## corresponding hash!) + if level == -2: + let be = db.backend + doAssert be != nil, "source data is from the backend" + # TODO long-running batch here? + let writeBatch = ?be.putBegFn() + be.putKeyFn(writeBatch, rvid, key) + ?be.putEndFn writeBatch + ok() + else: + db.deltaAtLevel(level).kMap[rvid] = key + ok() + +func maxLevel(cur, other: int): int = + # Compare two levels and return the topmost in the stack, taking into account + # the odd reversal of order around the zero point + if cur < 0: + max(cur, other) # >= 0 is always more topmost than <0 + elif other < 0: + cur + else: + min(cur, other) # Here the order is reversed and 0 is the top layer + +proc computeKeyImpl( db: AristoDbRef; # Database, top layer rvid: RootedVertexID; # Vertex to convert - ): Result[HashKey, AristoError] = + ): Result[(HashKey, int), AristoError] = ## Compute the key for an arbitrary vertex ID. If successful, the length of ## the resulting key might be smaller than 32. If it is used as a root vertex ## state/hash, it must be converted to a `Hash256` (using (`.to(Hash256)`) as ## in `db.computeKey(rvid).value.to(Hash256)` which always results in a ## 32 byte value. - ## - # This is a variation on getKeyRc which computes the key instead of returning - # an error - # TODO it should not always write the key to the persistent storage - proc getKey(db: AristoDbRef; rvid: RootedVertexID): HashKey = - block body: - let key = db.layersGetKey(rvid).valueOr: - break body - if key.isValid: - return key - else: - return VOID_HASH_KEY - let rc = db.getKeyBE rvid - if rc.isOk: - return rc.value - VOID_HASH_KEY + db.getKeyRc(rvid).isErrOr: + # Value cached either in layers or database + return ok value + let (vtx, vl) = ? db.getVtxRc rvid - let key = getKey(db, rvid) - if key.isValid(): - return ok key - - let vtx = ? db.getVtxRc rvid + # Top-most level of all the verticies this hash compution depends on + var level = vl # TODO this is the same code as when serializing NodeRef, without the NodeRef var writer = initRlpWriter() @@ -55,20 +69,23 @@ proc computeKey*( of Leaf: writer.startList(2) writer.append(vtx.lPfx.toHexPrefix(isLeaf = true)) - # Need to resolve storage root for account leaf + case vtx.lData.pType of AccountData: let stoID = vtx.lData.stoID - key = if stoID.isValid: - ?db.computeKey((stoID, stoID)) - else: - VOID_HASH_KEY + skey = + if stoID.isValid: + let (skey, sl) = ?db.computeKeyImpl((stoID, stoID)) + level = maxLevel(level, sl) + skey + else: + VOID_HASH_KEY writer.append(encode Account( nonce: vtx.lData.account.nonce, balance: vtx.lData.account.balance, - storageRoot: key.to(Hash256), + storageRoot: skey.to(Hash256), codeHash: vtx.lData.account.codeHash) ) of RawData: @@ -83,7 +100,9 @@ proc computeKey*( for n in 0..15: let vid = vtx.bVid[n] if vid.isValid: - w.append(?db.computeKey((rvid.root, vid))) + let (bkey, bl) = ?db.computeKeyImpl((rvid.root, vid)) + level = maxLevel(level, bl) + w.append(bkey) else: w.append(VOID_HASH_KEY) w.append EmptyBlob @@ -97,15 +116,23 @@ proc computeKey*( else: writeBranch(writer) - var h = writer.finish().digestTo(HashKey) + let h = writer.finish().digestTo(HashKey) - # TODO This shouldn't necessarily go into the database if we're just computing - # a key ephemerally - it should however be cached for some tiem since - # deep hash computations are expensive - db.layersPutKey(rvid, h) - ok h + # Cache the hash int the same storage layer as the the top-most value that it + # depends on (recursively) - this could be an ephemeral in-memory layer or the + # underlying database backend - typically, values closer to the root are more + # likely to live in an in-memory layer since any leaf change will lead to the + # root key also changing while leaves that have never been hashed will see + # their hash being saved directly to the backend. + ? db.putKeyAtLevel(rvid, h, level) + ok (h, level) +proc computeKey*( + db: AristoDbRef; # Database, top layer + rvid: RootedVertexID; # Vertex to convert + ): Result[HashKey, AristoError] = + ok (?computeKeyImpl(db, rvid))[0] # ------------------------------------------------------------------------------ # End diff --git a/nimbus/db/aristo/aristo_delete.nim b/nimbus/db/aristo/aristo_delete.nim index 116ba5516..2459de7e9 100644 --- a/nimbus/db/aristo/aristo_delete.nim +++ b/nimbus/db/aristo/aristo_delete.nim @@ -61,7 +61,7 @@ proc delSubTreeImpl( ## Implementation of *delete* sub-trie. var dispose = @[root] - rootVtx = db.getVtxRc((root, root)).valueOr: + (rootVtx, _) = db.getVtxRc((root, root)).valueOr: if error == GetVtxNotFound: return ok() return err(error) @@ -73,7 +73,7 @@ proc delSubTreeImpl( for vtx in follow: for vid in vtx.subVids: # Exiting here leaves the tree as-is - let vtx = ? db.getVtxRc((root, vid)) + let vtx = (? db.getVtxRc((root, vid)))[0] redo.add vtx dispose.add vid redo.swap follow @@ -92,7 +92,7 @@ proc delStoTreeImpl( ): Result[void,AristoError] = ## Implementation of *delete* sub-trie. - let vtx = db.getVtxRc(rvid).valueOr: + let (vtx, _) = db.getVtxRc(rvid).valueOr: if error == GetVtxNotFound: return ok() return err(error) diff --git a/nimbus/db/aristo/aristo_desc.nim b/nimbus/db/aristo/aristo_desc.nim index dfcb79e00..f46ca9092 100644 --- a/nimbus/db/aristo/aristo_desc.nim +++ b/nimbus/db/aristo/aristo_desc.nim @@ -34,8 +34,8 @@ from ./aristo_desc/desc_backend # Not auto-exporting backend export - aristo_constants, desc_error, desc_identifiers, desc_nibbles, desc_structural, - keyed_queue + tables, aristo_constants, desc_error, desc_identifiers, desc_nibbles, + desc_structural, keyed_queue const accLruSize* = 1024 * 1024 @@ -313,6 +313,21 @@ iterator rstack*(db: AristoDbRef): LayerRef = for i in 0.. 0: + doAssert level <= db.stack.len + db.stack[^level].delta + elif level == -1: + doAssert db.balancer != nil + db.balancer + elif level == -2: + nil + else: + raiseAssert "Unknown level " & $level + + # ------------------------------------------------------------------------------ # End # ------------------------------------------------------------------------------ diff --git a/nimbus/db/aristo/aristo_fetch.nim b/nimbus/db/aristo/aristo_fetch.nim index ae3e7b33b..9f1b6db10 100644 --- a/nimbus/db/aristo/aristo_fetch.nim +++ b/nimbus/db/aristo/aristo_fetch.nim @@ -85,17 +85,18 @@ proc retrieveMerkleHash( root: VertexID; updateOk: bool; ): Result[Hash256,AristoError] = - let key = block: + let key = if updateOk: db.computeKey((root, root)).valueOr: if error == GetVtxNotFound: return ok(EMPTY_ROOT_HASH) return err(error) else: - db.getKeyRc((root, root)).valueOr: + let (key, _) = db.getKeyRc((root, root)).valueOr: if error == GetKeyNotFound: return ok(EMPTY_ROOT_HASH) # empty sub-tree return err(error) + key ok key.to(Hash256) diff --git a/nimbus/db/aristo/aristo_get.nim b/nimbus/db/aristo/aristo_get.nim index 05eaae450..4945f0f20 100644 --- a/nimbus/db/aristo/aristo_get.nim +++ b/nimbus/db/aristo/aristo_get.nim @@ -73,33 +73,33 @@ proc getTuvBE*( proc getVtxBE*( db: AristoDbRef; rvid: RootedVertexID; - ): Result[VertexRef,AristoError] = + ): Result[(VertexRef, int),AristoError] = ## Get the vertex from the (filtered) backened if available. if not db.balancer.isNil: db.balancer.sTab.withValue(rvid, w): if w[].isValid: - return ok(w[]) + return ok (w[], -1) return err(GetVtxNotFound) - db.getVtxUbe rvid + ok (? db.getVtxUbe rvid, -2) proc getKeyBE*( db: AristoDbRef; rvid: RootedVertexID; - ): Result[HashKey,AristoError] = + ): Result[(HashKey, int),AristoError] = ## Get the merkle hash/key from the (filtered) backend if available. if not db.balancer.isNil: db.balancer.kMap.withValue(rvid, w): if w[].isValid: - return ok(w[]) + return ok((w[], -1)) return err(GetKeyNotFound) - db.getKeyUbe rvid + ok ((?db.getKeyUbe rvid), -2) # ------------------ proc getVtxRc*( db: AristoDbRef; rvid: RootedVertexID - ): Result[VertexRef,AristoError] = + ): Result[(VertexRef, int),AristoError] = ## Cascaded attempt to fetch a vertex from the cache layers or the backend. ## block body: @@ -108,7 +108,7 @@ proc getVtxRc*( # error symbol `GetVtxNotFound`. let vtx = db.layersGetVtx(rvid).valueOr: break body - if vtx.isValid: + if vtx[0].isValid: return ok vtx else: return err(GetVtxNotFound) @@ -119,10 +119,10 @@ proc getVtx*(db: AristoDbRef; rvid: RootedVertexID): VertexRef = ## Cascaded attempt to fetch a vertex from the cache layers or the backend. ## The function returns `nil` on error or failure. ## - db.getVtxRc(rvid).valueOr: VertexRef(nil) + db.getVtxRc(rvid).valueOr((VertexRef(nil), 0))[0] -proc getKeyRc*(db: AristoDbRef; rvid: RootedVertexID): Result[HashKey,AristoError] = +proc getKeyRc*(db: AristoDbRef; rvid: RootedVertexID): Result[(HashKey, int),AristoError] = ## Cascaded attempt to fetch a Merkle hash from the cache layers or the ## backend. This function will never return a `VOID_HASH_KEY` but rather ## some `GetKeyNotFound` or `GetKeyUpdateNeeded` error. @@ -132,7 +132,7 @@ proc getKeyRc*(db: AristoDbRef; rvid: RootedVertexID): Result[HashKey,AristoErro break body # If there is a zero key value, the entry is either marked for being # updated or for deletion on the database. So check below. - if key.isValid: + if key[0].isValid: return ok key # The zero key value does not refer to an update mark if there is no @@ -141,7 +141,7 @@ proc getKeyRc*(db: AristoDbRef; rvid: RootedVertexID): Result[HashKey,AristoErro # There was no vertex on the cache. So there must be one the backend (the # reason for the key lable to exists, at all.) return err(GetKeyUpdateNeeded) - if vtx.isValid: + if vtx[0].isValid: return err(GetKeyUpdateNeeded) else: # The vertex is to be deleted. So is the value key. @@ -153,7 +153,7 @@ proc getKey*(db: AristoDbRef; rvid: RootedVertexID): HashKey = ## Cascaded attempt to fetch a vertex from the cache layers or the backend. ## The function returns `nil` on error or failure. ## - db.getKeyRc(rvid).valueOr: VOID_HASH_KEY + (db.getKeyRc(rvid).valueOr((VOID_HASH_KEY, 0)))[0] # ------------------------------------------------------------------------------ # End diff --git a/nimbus/db/aristo/aristo_hike.nim b/nimbus/db/aristo/aristo_hike.nim index 5fef1f0a9..3befb60c6 100644 --- a/nimbus/db/aristo/aristo_hike.nim +++ b/nimbus/db/aristo/aristo_hike.nim @@ -77,7 +77,7 @@ proc step*( path: NibblesBuf, rvid: RootedVertexID, db: AristoDbRef ): Result[(VertexRef, NibblesBuf, VertexID), AristoError] = # Fetch next vertex - let vtx = db.getVtxRc(rvid).valueOr: + let (vtx, _) = db.getVtxRc(rvid).valueOr: if error != GetVtxNotFound: return err(error) diff --git a/nimbus/db/aristo/aristo_layers.nim b/nimbus/db/aristo/aristo_layers.nim index 2d978b88d..c09a5cb10 100644 --- a/nimbus/db/aristo/aristo_layers.nim +++ b/nimbus/db/aristo/aristo_layers.nim @@ -11,7 +11,7 @@ {.push raises: [].} import - std/[sequtils, sets, tables], + std/[enumerate, sequtils, sets, tables], eth/common, results, ./aristo_desc @@ -56,40 +56,40 @@ func nLayersKey*(db: AristoDbRef): int = # Public functions: getter variants # ------------------------------------------------------------------------------ -func layersGetVtx*(db: AristoDbRef; rvid: RootedVertexID): Opt[VertexRef] = +func layersGetVtx*(db: AristoDbRef; rvid: RootedVertexID): Opt[(VertexRef, int)] = ## Find a vertex on the cache layers. An `ok()` result might contain a ## `nil` vertex if it is stored on the cache that way. ## db.top.delta.sTab.withValue(rvid, item): - return Opt.some(item[]) + return Opt.some((item[], 0)) - for w in db.rstack: + for i, w in enumerate(db.rstack): w.delta.sTab.withValue(rvid, item): - return Opt.some(item[]) + return Opt.some((item[], i + 1)) - Opt.none(VertexRef) + Opt.none((VertexRef, int)) func layersGetVtxOrVoid*(db: AristoDbRef; rvid: RootedVertexID): VertexRef = ## Simplified version of `layersGetVtx()` - db.layersGetVtx(rvid).valueOr: VertexRef(nil) + db.layersGetVtx(rvid).valueOr((VertexRef(nil), 0))[0] -func layersGetKey*(db: AristoDbRef; rvid: RootedVertexID): Opt[HashKey] = +func layersGetKey*(db: AristoDbRef; rvid: RootedVertexID): Opt[(HashKey, int)] = ## Find a hash key on the cache layers. An `ok()` result might contain a void ## hash key if it is stored on the cache that way. ## db.top.delta.kMap.withValue(rvid, item): - return Opt.some(item[]) + return Opt.some((item[], 0)) - for w in db.rstack: + for i, w in enumerate(db.rstack): w.delta.kMap.withValue(rvid, item): - return ok(item[]) + return ok((item[], i + 1)) - Opt.none(HashKey) + Opt.none((HashKey, int)) func layersGetKeyOrVoid*(db: AristoDbRef; rvid: RootedVertexID): HashKey = ## Simplified version of `layersGetKey()` - db.layersGetKey(rvid).valueOr: VOID_HASH_KEY + (db.layersGetKey(rvid).valueOr (VOID_HASH_KEY, 0))[0] func layersGetAccLeaf*(db: AristoDbRef; accPath: Hash256): Opt[VertexRef] = db.top.delta.accLeaves.withValue(accPath, item): diff --git a/nimbus/db/aristo/aristo_merge/merge_payload_helper.nim b/nimbus/db/aristo/aristo_merge/merge_payload_helper.nim index 435036763..c0e418cec 100644 --- a/nimbus/db/aristo/aristo_merge/merge_payload_helper.nim +++ b/nimbus/db/aristo/aristo_merge/merge_payload_helper.nim @@ -50,7 +50,7 @@ proc mergePayloadImpl*( cur = root touched: array[NibblesBuf.high + 1, VertexID] pos = 0 - vtx = db.getVtxRc((root, cur)).valueOr: + (vtx, _) = db.getVtxRc((root, cur)).valueOr: if error != GetVtxNotFound: return err(error) @@ -120,7 +120,7 @@ proc mergePayloadImpl*( if next.isValid: cur = next path = path.slice(n + 1) - vtx = ?db.getVtxRc((root, next)) + (vtx, _) = ?db.getVtxRc((root, next)) else: # There's no vertex at the branch point - insert the payload as a new # leaf and update the existing branch diff --git a/nimbus/db/aristo/aristo_serialise.nim b/nimbus/db/aristo/aristo_serialise.nim index 4b81e9563..4e9f63ed7 100644 --- a/nimbus/db/aristo/aristo_serialise.nim +++ b/nimbus/db/aristo/aristo_serialise.nim @@ -149,7 +149,7 @@ proc serialise*( ## account type, otherwise pass the data as is. ## proc getKey(vid: VertexID): Result[HashKey,AristoError] = - db.getKeyRc((root, vid)) + ok (?db.getKeyRc((root, vid)))[0] pyl.serialise getKey diff --git a/nimbus/db/aristo/aristo_utils.nim b/nimbus/db/aristo/aristo_utils.nim index f351ddb12..4aa3de0a3 100644 --- a/nimbus/db/aristo/aristo_utils.nim +++ b/nimbus/db/aristo/aristo_utils.nim @@ -44,14 +44,14 @@ proc toNode*( block body: let key = db.layersGetKey(rvid).valueOr: break body - if key.isValid: - return key + if key[0].isValid: + return key[0] else: return VOID_HASH_KEY if beOk: let rc = db.getKeyBE rvid if rc.isOk: - return rc.value + return rc.value[0] VOID_HASH_KEY case vtx.vType: