From b4b4d16729df75bbe0b1fd9f8b33475b7ae93faf Mon Sep 17 00:00:00 2001 From: Jacek Sieka Date: Fri, 20 Sep 2024 07:43:53 +0200 Subject: [PATCH] speed up key computation (#2642) * batch database key writes during `computeKey` calls * log progress when there are many keys to update * avoid evicting the vertex cache when traversing the trie for key computation purposes * avoid storing trivial leaf hashes that directly can be loaded from the vertex --- nimbus/db/aristo/aristo_api.nim | 4 +- nimbus/db/aristo/aristo_compute.nim | 118 ++++++++++++++---- nimbus/db/aristo/aristo_desc/desc_backend.nim | 2 +- .../db/aristo/aristo_desc/desc_structural.nim | 5 + nimbus/db/aristo/aristo_get.nim | 14 ++- nimbus/db/aristo/aristo_init/memory_db.nim | 2 +- nimbus/db/aristo/aristo_init/rocks_db.nim | 4 +- .../aristo/aristo_init/rocks_db/rdb_get.nim | 13 +- .../aristo/aristo_init/rocks_db/rdb_put.nim | 8 +- scripts/make_states.sh | 19 +-- 10 files changed, 139 insertions(+), 50 deletions(-) diff --git a/nimbus/db/aristo/aristo_api.nim b/nimbus/db/aristo/aristo_api.nim index 709fb2b98..e4e3dcd88 100644 --- a/nimbus/db/aristo/aristo_api.nim +++ b/nimbus/db/aristo/aristo_api.nim @@ -1036,9 +1036,9 @@ func init*( else: beDup.getVtxFn = - proc(a: RootedVertexID): auto = + proc(a: RootedVertexID, flags: set[GetVtxFlag]): auto = AristoApiProfBeGetVtxFn.profileRunner: - result = be.getVtxFn(a) + result = be.getVtxFn(a, flags) data.list[AristoApiProfBeGetVtxFn.ord].masked = true beDup.getKeyFn = diff --git a/nimbus/db/aristo/aristo_compute.nim b/nimbus/db/aristo/aristo_compute.nim index c5dbb3f7a..56590e635 100644 --- a/nimbus/db/aristo/aristo_compute.nim +++ b/nimbus/db/aristo/aristo_compute.nim @@ -11,24 +11,71 @@ {.push raises: [].} import + std/strformat, + chronicles, eth/common, results, - "."/[aristo_desc, aristo_get, aristo_serialise] + "."/[aristo_desc, aristo_get, aristo_serialise], + ./aristo_desc/desc_backend + +type WriteBatch = tuple[writer: PutHdlRef, count: int, depth: int, prefix: uint64] + +# Keep write batch size _around_ 1mb, give or take some overhead - this is a +# tradeoff between efficiency and memory usage with diminishing returns the +# larger it is.. +const batchSize = 1024 * 1024 div (sizeof(RootedVertexID) + sizeof(HashKey)) + +func progress(batch: WriteBatch): string = + # Return an approximation on how much of the keyspace has been covered by + # looking at the path prefix that we're currently processing + &"{(float(batch.prefix) / float(uint64.high)) * 100:02.2f}%" + +func enter(batch: var WriteBatch, nibble: int) = + batch.depth += 1 + if batch.depth <= 16: + batch.prefix += uint64(nibble) shl ((16 - batch.depth) * 4) + +func leave(batch: var WriteBatch, nibble: int) = + if batch.depth <= 16: + batch.prefix -= uint64(nibble) shl ((16 - batch.depth) * 4) + batch.depth -= 1 proc putKeyAtLevel( - db: AristoDbRef, rvid: RootedVertexID, key: HashKey, level: int + db: AristoDbRef, + rvid: RootedVertexID, + key: HashKey, + level: int, + batch: var WriteBatch, ): Result[void, AristoError] = ## Store a hash key in the given layer or directly to the underlying database ## which helps ensure that memory usage is proportional to the pending change ## set (vertex data may have been committed to disk without computing the ## corresponding hash!) + + # Only put computed keys in the database which keeps churn down by focusing on + # the ones that do not change - the ones that don't require hashing might as + # well be loaded from the vertex! if level == -2: - let be = db.backend - doAssert be != nil, "source data is from the backend" - # TODO long-running batch here? - let writeBatch = ?be.putBegFn() - be.putKeyFn(writeBatch, rvid, key) - ?be.putEndFn writeBatch + if key.len == 32: + let be = db.backend + if batch.writer == nil: + doAssert be != nil, "source data is from the backend" + # TODO long-running batch here? + batch.writer = ?be.putBegFn() + + be.putKeyFn(batch.writer, rvid, key) + batch.count += 1 + + if batch.count mod batchSize == 0: + if batch.count mod (batchSize * 100) == 0: + info "Writing computeKey cache", + count = batch.count, accounts = batch.progress + else: + debug "Writing computeKey cache", + count = batch.count, accounts = batch.progress + ?be.putEndFn batch.writer + batch.writer = nil + ok() else: db.deltaAtLevel(level).kMap[rvid] = key @@ -45,9 +92,10 @@ func maxLevel(cur, other: int): int = min(cur, other) # Here the order is reversed and 0 is the top layer proc computeKeyImpl( - db: AristoDbRef; # Database, top layer - rvid: RootedVertexID; # Vertex to convert - ): Result[(HashKey, int), AristoError] = + db: AristoDbRef, # Database, top layer + rvid: RootedVertexID, # Vertex to convert + batch: var WriteBatch, +): Result[(HashKey, int), AristoError] = ## Compute the key for an arbitrary vertex ID. If successful, the length of ## the resulting key might be smaller than 32. If it is used as a root vertex ## state/hash, it must be converted to a `Hash256` (using (`.to(Hash256)`) as @@ -57,7 +105,8 @@ proc computeKeyImpl( db.getKeyRc(rvid).isErrOr: # Value cached either in layers or database return ok value - let (vtx, vl) = ? db.getVtxRc rvid + + let (vtx, vl) = ?db.getVtxRc(rvid, {GetVtxFlag.PeekCache}) # Top-most level of all the verticies this hash compution depends on var level = vl @@ -65,7 +114,7 @@ proc computeKeyImpl( # TODO this is the same code as when serializing NodeRef, without the NodeRef var writer = initRlpWriter() - case vtx.vType: + case vtx.vType of Leaf: writer.startList(2) writer.append(vtx.pfx.toHexPrefix(isLeaf = true).data()) @@ -76,36 +125,41 @@ proc computeKeyImpl( stoID = vtx.lData.stoID skey = if stoID.isValid: - let (skey, sl) = ?db.computeKeyImpl((stoID.vid, stoID.vid)) + let (skey, sl) = ?db.computeKeyImpl((stoID.vid, stoID.vid), batch) level = maxLevel(level, sl) skey else: VOID_HASH_KEY - writer.append(encode Account( - nonce: vtx.lData.account.nonce, - balance: vtx.lData.account.balance, - storageRoot: skey.to(Hash256), - codeHash: vtx.lData.account.codeHash) + writer.append( + encode Account( + nonce: vtx.lData.account.nonce, + balance: vtx.lData.account.balance, + storageRoot: skey.to(Hash256), + codeHash: vtx.lData.account.codeHash, + ) ) of RawData: writer.append(vtx.lData.rawBlob) of StoData: # TODO avoid memory allocation when encoding storage data writer.append(rlp.encode(vtx.lData.stoData)) - of Branch: template writeBranch(w: var RlpWriter) = w.startList(17) - for n in 0..15: + for n in 0 .. 15: let vid = vtx.bVid[n] if vid.isValid: - let (bkey, bl) = ?db.computeKeyImpl((rvid.root, vid)) + batch.enter(n) + let (bkey, bl) = ?db.computeKeyImpl((rvid.root, vid), batch) + batch.leave(n) + level = maxLevel(level, bl) w.append(bkey) else: w.append(VOID_HASH_KEY) w.append EmptyBlob + if vtx.pfx.len > 0: # Extension node var bwriter = initRlpWriter() writeBranch(bwriter) @@ -124,15 +178,25 @@ proc computeKeyImpl( # likely to live in an in-memory layer since any leaf change will lead to the # root key also changing while leaves that have never been hashed will see # their hash being saved directly to the backend. - ? db.putKeyAtLevel(rvid, h, level) + ?db.putKeyAtLevel(rvid, h, level, batch) ok (h, level) proc computeKey*( - db: AristoDbRef; # Database, top layer - rvid: RootedVertexID; # Vertex to convert - ): Result[HashKey, AristoError] = - ok (?computeKeyImpl(db, rvid))[0] + db: AristoDbRef, # Database, top layer + rvid: RootedVertexID, # Vertex to convert +): Result[HashKey, AristoError] = + var batch: WriteBatch + let res = computeKeyImpl(db, rvid, batch) + if res.isOk: + if batch.writer != nil: + if batch.count >= batchSize * 100: + info "Writing computeKey cache", count = batch.count, progress = "100.00%" + else: + debug "Writing computeKey cache", count = batch.count, progress = "100.00%" + ?db.backend.putEndFn batch.writer + batch.writer = nil + ok (?res)[0] # ------------------------------------------------------------------------------ # End diff --git a/nimbus/db/aristo/aristo_desc/desc_backend.nim b/nimbus/db/aristo/aristo_desc/desc_backend.nim index 57bd10a76..e59623e77 100644 --- a/nimbus/db/aristo/aristo_desc/desc_backend.nim +++ b/nimbus/db/aristo/aristo_desc/desc_backend.nim @@ -20,7 +20,7 @@ import type GetVtxFn* = - proc(rvid: RootedVertexID): Result[VertexRef,AristoError] {.gcsafe, raises: [].} + proc(rvid: RootedVertexID, flags: set[GetVtxFlag]): Result[VertexRef,AristoError] {.gcsafe, raises: [].} ## Generic backend database retrieval function for a single structural ## `Aristo DB` data record. diff --git a/nimbus/db/aristo/aristo_desc/desc_structural.nim b/nimbus/db/aristo/aristo_desc/desc_structural.nim index 4d7136a43..354083caa 100644 --- a/nimbus/db/aristo/aristo_desc/desc_structural.nim +++ b/nimbus/db/aristo/aristo_desc/desc_structural.nim @@ -130,6 +130,11 @@ type txUid*: uint ## Transaction identifier if positive + GetVtxFlag* = enum + PeekCache + ## Peek into, but don't update cache - useful on work loads that are + ## unfriendly to caches + # ------------------------------------------------------------------------------ # Public helpers (misc) # ------------------------------------------------------------------------------ diff --git a/nimbus/db/aristo/aristo_get.nim b/nimbus/db/aristo/aristo_get.nim index 18459165c..387f7c054 100644 --- a/nimbus/db/aristo/aristo_get.nim +++ b/nimbus/db/aristo/aristo_get.nim @@ -43,11 +43,12 @@ proc getLstUbe*( proc getVtxUbe*( db: AristoDbRef; rvid: RootedVertexID; + flags: set[GetVtxFlag] = {}; ): Result[VertexRef,AristoError] = ## Get the vertex from the unfiltered backened if available. let be = db.backend if not be.isNil: - return be.getVtxFn rvid + return be.getVtxFn(rvid, flags) err GetVtxNotFound proc getKeyUbe*( @@ -73,6 +74,7 @@ proc getTuvBE*( proc getVtxBE*( db: AristoDbRef; rvid: RootedVertexID; + flags: set[GetVtxFlag] = {}; ): Result[(VertexRef, int),AristoError] = ## Get the vertex from the (filtered) backened if available. if not db.balancer.isNil: @@ -80,7 +82,7 @@ proc getVtxBE*( if w[].isValid: return ok (w[], -1) return err(GetVtxNotFound) - ok (? db.getVtxUbe rvid, -2) + ok (? db.getVtxUbe(rvid, flags), -2) proc getKeyBE*( db: AristoDbRef; @@ -98,7 +100,8 @@ proc getKeyBE*( proc getVtxRc*( db: AristoDbRef; - rvid: RootedVertexID + rvid: RootedVertexID; + flags: set[GetVtxFlag] = {}; ): Result[(VertexRef, int),AristoError] = ## Cascaded attempt to fetch a vertex from the cache layers or the backend. ## @@ -113,15 +116,14 @@ proc getVtxRc*( else: return err(GetVtxNotFound) - db.getVtxBE rvid + db.getVtxBE(rvid, flags) -proc getVtx*(db: AristoDbRef; rvid: RootedVertexID): VertexRef = +proc getVtx*(db: AristoDbRef; rvid: RootedVertexID, flags: set[GetVtxFlag] = {}): VertexRef = ## Cascaded attempt to fetch a vertex from the cache layers or the backend. ## The function returns `nil` on error or failure. ## db.getVtxRc(rvid).valueOr((VertexRef(nil), 0))[0] - proc getKeyRc*(db: AristoDbRef; rvid: RootedVertexID): Result[(HashKey, int),AristoError] = ## Cascaded attempt to fetch a Merkle hash from the cache layers or the ## backend. This function will never return a `VOID_HASH_KEY` but rather diff --git a/nimbus/db/aristo/aristo_init/memory_db.nim b/nimbus/db/aristo/aristo_init/memory_db.nim index 2e7c95a7a..3d51c085c 100644 --- a/nimbus/db/aristo/aristo_init/memory_db.nim +++ b/nimbus/db/aristo/aristo_init/memory_db.nim @@ -86,7 +86,7 @@ proc endSession(hdl: PutHdlRef; db: MemBackendRef): MemPutHdlRef = proc getVtxFn(db: MemBackendRef): GetVtxFn = result = - proc(rvid: RootedVertexID): Result[VertexRef,AristoError] = + proc(rvid: RootedVertexID, flags: set[GetVtxFlag]): Result[VertexRef,AristoError] = # Fetch serialised data record let data = db.mdb.sTab.getOrDefault(rvid, EmptyBlob) if 0 < data.len: diff --git a/nimbus/db/aristo/aristo_init/rocks_db.nim b/nimbus/db/aristo/aristo_init/rocks_db.nim index 2730e11cb..712f135bd 100644 --- a/nimbus/db/aristo/aristo_init/rocks_db.nim +++ b/nimbus/db/aristo/aristo_init/rocks_db.nim @@ -75,10 +75,10 @@ proc endSession(hdl: PutHdlRef; db: RdbBackendRef): RdbPutHdlRef = proc getVtxFn(db: RdbBackendRef): GetVtxFn = result = - proc(rvid: RootedVertexID): Result[VertexRef,AristoError] = + proc(rvid: RootedVertexID, flags: set[GetVtxFlag]): Result[VertexRef,AristoError] = # Fetch serialised data record - let vtx = db.rdb.getVtx(rvid).valueOr: + let vtx = db.rdb.getVtx(rvid, flags).valueOr: when extraTraceMessages: trace logTxt "getVtxFn() failed", rvid, error=error[0], info=error[1] return err(error[0]) diff --git a/nimbus/db/aristo/aristo_init/rocks_db/rdb_get.nim b/nimbus/db/aristo/aristo_init/rocks_db/rdb_get.nim index 190e03546..6cfc1915d 100644 --- a/nimbus/db/aristo/aristo_init/rocks_db/rdb_get.nim +++ b/nimbus/db/aristo/aristo_init/rocks_db/rdb_get.nim @@ -135,9 +135,15 @@ proc getKey*( proc getVtx*( rdb: var RdbInst; rvid: RootedVertexID; + flags: set[GetVtxFlag]; ): Result[VertexRef,(AristoError,string)] = # Try LRU cache first - var rc = rdb.rdVtxLru.get(rvid.vid) + var rc = + if GetVtxFlag.PeekCache in flags: + rdb.rdVtxLru.peek(rvid.vid) + else: + rdb.rdVtxLru.get(rvid.vid) + if rc.isOK: rdbVtxLruStats[rvid.to(RdbStateType)][rc.value().vType].inc(true) return ok(move(rc.value)) @@ -164,8 +170,9 @@ proc getVtx*( rdbVtxLruStats[rvid.to(RdbStateType)][res.value().vType].inc(false) - # Update cache and return - rdb.rdVtxLru.put(rvid.vid, res.value()) + # Update cache and return - in peek mode, avoid evicting cache items + if GetVtxFlag.PeekCache notin flags or rdb.rdVtxLru.len < rdb.rdVtxLru.capacity: + rdb.rdVtxLru.put(rvid.vid, res.value()) ok res.value() diff --git a/nimbus/db/aristo/aristo_init/rocks_db/rdb_put.nim b/nimbus/db/aristo/aristo_init/rocks_db/rdb_put.nim index 1bcef46e2..890c1a75f 100644 --- a/nimbus/db/aristo/aristo_init/rocks_db/rdb_put.nim +++ b/nimbus/db/aristo/aristo_init/rocks_db/rdb_put.nim @@ -89,7 +89,13 @@ proc putKey*( rvid: RootedVertexID, key: HashKey; ): Result[void,(VertexID,AristoError,string)] = let dsc = rdb.session - if key.isValid: + # We only write keys whose value has to be hashed - the trivial ones can be + # loaded from the corresponding vertex directly! + # TODO move this logic to a higher layer + # TODO skip the delete for trivial keys - it's here to support databases that + # were written at a time when trivial keys were also cached - it should + # be cleaned up when revising the key cache in general. + if key.isValid and key.len == 32: dsc.put(rvid.blobify().data(), key.data, rdb.keyCol.handle()).isOkOr: # Caller must `rollback()` which will flush the `rdKeyLru` cache const errSym = RdbBeDriverPutKeyError diff --git a/scripts/make_states.sh b/scripts/make_states.sh index 22452a066..b05006346 100755 --- a/scripts/make_states.sh +++ b/scripts/make_states.sh @@ -1,6 +1,6 @@ #!/bin/bash -# Create a set of states, each advanced by 100k blocks +# Create a set of states, each advanced by 1M blocks set -e @@ -18,18 +18,23 @@ SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) DATE="$(date -u +%Y%m%d_%H%M)" REV=$(git rev-parse --short=8 HEAD) DATA_DIR="$1/${DATE}-${REV}" +ERA_DIR="$2" +ERA1_DIR="$3" +STATS_DIR="$4" + +shift 4 mkdir -p "$DATA_DIR" -[ "$5" ] && cp -ar "$5"/* "$DATA_DIR" +[ -d "$1" ] && { cp -ar "$1"/* "$DATA_DIR" ; shift ; } while true; do "$SCRIPT_DIR/../build/nimbus" import \ --data-dir:"${DATA_DIR}" \ - --era1-dir:"$2" \ - --era-dir:"$3" \ - --debug-csv-stats:"$4/stats-${DATE}-${REV}.csv" \ - --max-blocks:1000000 - cp -ar "$1/${DATE}-${REV}" "$1/${DATE}-${REV}"-$(printf "%04d" $counter) + --era1-dir:"${ERA_DIR}" \ + --era-dir:"${ERA1_DIR}" \ + --debug-csv-stats:"${STATS_DIR}/stats-${DATE}-${REV}.csv" \ + --max-blocks:1000000 "$@" + cp -ar "${DATA_DIR}" "${DATA_DIR}-$(printf "%04d" $counter)" counter=$((counter+1)) done