Small cleanups (#2435)

* avoid costly hike memory allocations for operations that don't need to
re-traverse it
* avoid unnecessary state checks (which might trigger unwanted state
root computations)
* disable optimize-for-hits due to the MPT no longer being complete at
all times
This commit is contained in:
Jacek Sieka 2024-07-01 14:07:39 +02:00 committed by GitHub
parent 2c87fd1636
commit 3d3831dde8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 131 additions and 119 deletions

View File

@ -134,10 +134,6 @@ proc persistBlocksImpl(
if not updated:
debug "Cannot update VmState", blockNumber = header.number
return err("Cannot update VmState to block " & $header.number)
else:
# TODO weirdly, some tests depend on this reinit being called, even though
# in theory it's a fresh instance that should not need it (?)
doAssert vmState.reinit(header = header)
# TODO even if we're skipping validation, we should perform basic sanity
# checks on the block and header - that fields are sanely set for the
@ -178,7 +174,8 @@ proc persistBlocksImpl(
if NoPersistWithdrawals notin flags and blk.withdrawals.isSome:
c.db.persistWithdrawals(
header.withdrawalsRoot.expect("WithdrawalsRoot should be verified before"),
blk.withdrawals.get)
blk.withdrawals.get
)
# update currentBlock *after* we persist it
# so the rpc return consistent result
@ -189,6 +186,7 @@ proc persistBlocksImpl(
txs += blk.transactions.len
gas += blk.header.gasUsed
parentHash = blockHash
dbTx.commit()
# Save and record the block number before the last saved block state.

View File

@ -44,13 +44,16 @@ proc retrievePayload(
if path.len == 0:
return err(FetchPathInvalid)
let hike = NibblesBuf.fromBytes(path).hikeUp(root, db).valueOr:
if error[1] in HikeAcceptableStopsNotFound:
return err(FetchPathNotFound)
return err(error[1])
for step in stepUp(NibblesBuf.fromBytes(path), root, db):
let vtx = step.valueOr:
if error in HikeAcceptableStopsNotFound:
return err(FetchPathNotFound)
return err(error)
ok hike.legs[^1].wp.vtx.lData
if vtx.vType == Leaf:
return ok vtx.lData
return err(FetchPathNotFound)
proc retrieveMerkleHash(
db: AristoDbRef;
@ -79,11 +82,12 @@ proc hasPayload(
if path.len == 0:
return err(FetchPathInvalid)
let hike = path.hikeUp(VertexID(1), db).valueOr:
if error[1] in HikeAcceptableStopsNotFound:
return ok(false)
return err(error[1])
ok(true)
let error = db.retrievePayload(root, path).errorOr:
return ok(true)
if error == FetchPathNotFound:
return ok(false)
err(error)
# ------------------------------------------------------------------------------
# Public helpers
@ -120,11 +124,12 @@ proc fetchStorageID*(
## Public helper function fro retrieving a storage (vertex) ID for a
## given account.
let
accHike = db.fetchAccountHike(accPath).valueOr:
payload = db.retrievePayload(VertexID(1), accPath).valueOr:
if error == FetchAccInaccessible:
return err(FetchPathNotFound)
return err(error)
stoID = accHike.legs[^1].wp.vtx.lData.stoID
stoID = payload.stoID
if not stoID.isValid:
return err(FetchPathNotFound)

View File

@ -77,6 +77,77 @@ func legsTo*(hike: Hike; numLegs: int; T: type NibblesBuf): T =
# --------
proc step*(
path: NibblesBuf, vid: VertexID, db: AristoDbRef
): Result[(VertexRef, NibblesBuf, VertexID), AristoError] =
# Fetch next vertex
let vtx = db.getVtxRc(vid).valueOr:
if error != GetVtxNotFound:
return err(error)
# The vertex ID `vid` was a follow up from a parent vertex, but there is
# no child vertex on the database. So `vid` is a dangling link which is
# allowed only if there is a partial trie (e.g. with `snap` sync.)
return err(HikeDanglingEdge)
case vtx.vType:
of Leaf:
# This must be the last vertex, so there cannot be any `tail` left.
if path.len != path.sharedPrefixLen(vtx.lPfx):
return err(HikeLeafUnexpected)
ok (vtx, NibblesBuf(), VertexID(0))
of Branch:
# There must be some more data (aka `tail`) after a `Branch` vertex.
if path.len == 0:
return err(HikeBranchTailEmpty)
let
nibble = path[0].int8
nextVid = vtx.bVid[nibble]
if not nextVid.isValid:
return err(HikeBranchMissingEdge)
ok (vtx, path.slice(1), nextVid)
of Extension:
# There must be some more data (aka `tail`) after an `Extension` vertex.
if path.len == 0:
return err(HikeBranchTailEmpty)
if vtx.ePfx.len != path.sharedPrefixLen(vtx.ePfx):
return err(HikeExtTailMismatch) # Need to branch from here
let nextVid = vtx.eVid
if not nextVid.isValid:
return err(HikeExtMissingEdge)
ok (vtx, path.slice(vtx.ePfx.len), nextVid)
iterator stepUp*(
path: NibblesBuf; # Partial path
root: VertexID; # Start vertex
db: AristoDbRef; # Database
): Result[VertexRef, AristoError] =
## For the argument `path`, iterate over the logest possible path in the
## argument database `db`.
var
path = path
next = root
vtx: VertexRef
block iter:
while true:
(vtx, path, next) = step(path, next, db).valueOr:
yield Result[VertexRef, AristoError].err(error)
break iter
yield Result[VertexRef, AristoError].ok(vtx)
if path.len == 0:
break
proc hikeUp*(
path: NibblesBuf; # Partial path
root: VertexID; # Start vertex
@ -95,66 +166,26 @@ proc hikeUp*(
var vid = root
while true:
var leg = Leg(wp: VidVtxPair(vid: vid), nibble: -1)
let (vtx, path, next) = step(hike.tail, vid, db).valueOr:
return err((vid,error,hike))
# Fetch next vertex
leg.wp.vtx = db.getVtxRc(vid).valueOr:
if error != GetVtxNotFound:
return err((vid,error,hike))
if hike.legs.len == 0:
return err((vid,HikeNoLegs,hike))
# The vertex ID `vid` was a follow up from a parent vertex, but there is
# no child vertex on the database. So `vid` is a dangling link which is
# allowed only if there is a partial trie (e.g. with `snap` sync.)
return err((vid,HikeDanglingEdge,hike))
let wp = VidVtxPair(vid:vid, vtx:vtx)
case leg.wp.vtx.vType:
case vtx.vType
of Leaf:
# This must be the last vertex, so there cannot be any `tail` left.
if hike.tail.len == hike.tail.sharedPrefixLen(leg.wp.vtx.lPfx):
# Bingo, got full path
hike.legs.add leg
hike.tail = NibblesBuf()
# This is the only loop exit
break
hike.legs.add Leg(wp: wp, nibble: -1)
hike.tail = path
return err((vid,HikeLeafUnexpected,hike))
of Branch:
# There must be some more data (aka `tail`) after a `Branch` vertex.
if hike.tail.len == 0:
hike.legs.add leg
return err((vid,HikeBranchTailEmpty,hike))
let
nibble = hike.tail[0].int8
nextVid = leg.wp.vtx.bVid[nibble]
if not nextVid.isValid:
return err((vid,HikeBranchMissingEdge,hike))
leg.nibble = nibble
hike.legs.add leg
hike.tail = hike.tail.slice(1)
vid = nextVid
break
of Extension:
# There must be some more data (aka `tail`) after an `Extension` vertex.
if hike.tail.len == 0:
hike.legs.add leg
hike.tail = NibblesBuf()
return err((vid,HikeExtTailEmpty,hike)) # Well, somehow odd
hike.legs.add Leg(wp: wp, nibble: -1)
if leg.wp.vtx.ePfx.len != hike.tail.sharedPrefixLen(leg.wp.vtx.ePfx):
return err((vid,HikeExtTailMismatch,hike)) # Need to branch from here
of Branch:
hike.legs.add Leg(wp: wp, nibble: int8 hike.tail[0])
let nextVid = leg.wp.vtx.eVid
if not nextVid.isValid:
return err((vid,HikeExtMissingEdge,hike))
hike.legs.add leg
hike.tail = hike.tail.slice(leg.wp.vtx.ePfx.len)
vid = nextVid
hike.tail = path
vid = next
ok hike

View File

@ -11,7 +11,7 @@
{.push raises: [].}
import
std/[sequtils, sets, typetraits],
std/[sets, typetraits],
eth/common,
results,
".."/[aristo_desc, aristo_get, aristo_hike, aristo_layers, aristo_vid]
@ -38,8 +38,9 @@ proc clearMerkleKeys(
hike: Hike; # Implied vertex IDs to clear hashes for
vid: VertexID; # Additionall vertex IDs to clear
) =
for w in hike.legs.mapIt(it.wp.vid) & @[vid]:
db.layersResKey(hike.root, w)
for w in hike.legs:
db.layersResKey(hike.root, w.wp.vid)
db.layersResKey(hike.root, vid)
proc setVtxAndKey*(
db: AristoDbRef; # Database, top layer

View File

@ -14,7 +14,7 @@
{.push raises: [].}
import
std/[sequtils, sets, typetraits],
std/[sequtils, typetraits],
eth/common,
results,
"."/[aristo_constants, aristo_desc, aristo_get, aristo_hike, aristo_layers]
@ -95,20 +95,20 @@ proc toNode*(
return ok node
proc subVids*(vtx: VertexRef): seq[VertexID] =
iterator subVids*(vtx: VertexRef): VertexID =
## Returns the list of all sub-vertex IDs for the argument `vtx`.
case vtx.vType:
of Leaf:
if vtx.lData.pType == AccountData:
let vid = vtx.lData.stoID
if vid.isValid:
result.add vid
yield vid
of Branch:
for vid in vtx.bVid:
if vid.isValid:
result.add vid
yield vid
of Extension:
result.add vtx.eVid
yield vtx.eVid
# ---------------------

View File

@ -102,14 +102,20 @@ proc toRocksDb*(
# https://github.com/facebook/rocksdb/wiki/Compression
cfOpts.bottommostCompression = Compression.lz4Compression
# We mostly look up data we know is there, so we don't need filters at the
# last level of the database - this option saves 90% bloom filter memory usage
# TODO verify this point
# TODO In the AriVtx table, we don't do lookups that are expected to result
# in misses thus we could avoid the filter cost - this does not apply to
# other tables since their API admit queries that might result in
# not-found - specially the KVT which is exposed to external queries and
# the `HashKey` cache (AriKey)
# https://github.com/EighteenZi/rocksdb_wiki/blob/master/Memory-usage-in-RocksDB.md#indexes-and-filter-blocks
# https://github.com/facebook/rocksdb/blob/af50823069818fc127438e39fef91d2486d6e76c/include/rocksdb/advanced_options.h#L696
cfOpts.optimizeFiltersForHits = true
# cfOpts.optimizeFiltersForHits = true
cfOpts.maxBytesForLevelBase = opts.writeBufferSize
cfOpts.maxBytesForLevelBase = cfOpts.writeBufferSize
# Reduce number of files when the database grows
cfOpts.targetFileSizeBase = cfOpts.writeBufferSize div 4
cfOpts.targetFileSizeMultiplier = 4
let dbOpts = defaultDbOptions()
dbOpts.maxOpenFiles = opts.maxOpenFiles

View File

@ -627,21 +627,6 @@ proc level*(db: CoreDbRef): int =
result = db.methods.levelFn()
db.ifTrackNewApi: debug newApiTxt, api, elapsed, result
proc persistent*(
db: CoreDbRef;
): CoreDbRc[void] =
## For the legacy database, this function has no effect and succeeds always.
## It will nevertheless return a discardable error if there is a pending
## transaction (i.e. `db.level() == 0`.)
##
## Otherwise, cached data from the `Kvt`, `Mpt`, and `Acc` descriptors are
## stored on the persistent database (if any). This requires that that there
## is no transaction pending.
##
db.setTrackNewApi BasePersistentFn
result = db.methods.persistentFn Opt.none(BlockNumber)
db.ifTrackNewApi: debug newApiTxt, api, elapsed, result
proc persistent*(
db: CoreDbRef;
blockNumber: BlockNumber;

View File

@ -306,7 +306,6 @@ proc exists*(db: CoreDbRef, hash: Hash256): bool =
proc getSavedStateBlockNumber*(
db: CoreDbRef;
relax = false;
): BlockNumber =
## Returns the block number registered when the database was last time
## updated, or `BlockNumber(0)` if there was no updata found.
@ -317,20 +316,9 @@ proc getSavedStateBlockNumber*(
## can be set `true` so this function also returns the block number if the
## state consistency check fails.
##
const info = "getSavedStateBlockNumber(): "
# FIXME: This construct following will be replaced by a proper
# `CoreDb` method.
let bn = db.ctx.getColumn(CtGeneric).backend.toAristoSavedStateBlockNumber()
if relax:
return bn
else:
var header: BlockHeader
if db.getBlockHeader(bn, header):
let state = db.ctx.getAccounts.state(updateOk=true).valueOr:
raiseAssert info & $$error
if state != header.stateRoot:
raiseAssert info & ": state mismatch at " & "#" & $result
return bn
db.ctx.getColumn(CtGeneric).backend.toAristoSavedStateBlockNumber()
proc getBlockHeader*(
db: CoreDbRef;

View File

@ -154,12 +154,6 @@ proc init*(x: typedesc[AccountsLedgerRef], db: CoreDbRef,
const info = "AccountsLedgerRef.init(): "
new result
result.ledger = db.ctx.getAccounts()
if root != EMPTY_ROOT_HASH:
let rc = result.ledger.state(updateOk=true)
if rc.isErr:
raiseAssert info & $$rc.error
if rc.value != root:
raiseAssert info & ": wrong account state"
result.kvt = db.newKvt() # save manually in `persist()`
result.witnessCache = Table[EthAddress, WitnessData]()
discard result.beginSavepoint

View File

@ -6,9 +6,9 @@ set -e
trap "exit" INT
if [ -z "$3" ]
if [ -z "$3" ]
then
echo "Syntax: make_states.sh datadir era1dir statsdir"
echo "Syntax: make_states.sh datadir era1dir statsdir [startdir]"
exit 1;
fi
@ -17,11 +17,15 @@ counter=0
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
DATE="$(date -u +%Y%m%d_%H%M)"
REV=$(git rev-parse --short=8 HEAD)
DATA_DIR="$1/${DATE}-${REV}"
mkdir -p "$DATA_DIR"
[ "$4" ] && cp -ar "$4"/* "$DATA_DIR"
while true;
do
"$SCRIPT_DIR/../build/nimbus" import \
--data-dir:"$1/${DATE}-${REV}" \
--data-dir:"${DATA_DIR}" \
--era1-dir:"$2" \
--debug-csv-stats:"$3/stats-${DATE}-${REV}.csv" \
--max-blocks:100000