Beacon sync mainentenance updates (#2958)

* Fix name after API change

why:
  Slipped through (debugging mode)

* Fine tuning error counters

why:
  Previous operating mode was quite blunt and considered some unnecessary
  condition. Error handling was invoked and the peer zombified where one
  could have continued working with that peer.

* Provide `kvt` table API bypassing `FC`

details:
  Not a full bypass yet

why:
  As discussed on Discord:
    Ideally, those would pass through fc as well, as thin wrappers around
    the db calls, for now - later, we probably see some policy involved
    here and at that point, fc will be responsible for arbitrage between
    sources (ie if a rpc source sends the block the syncer is syncing
    while the syncer is working, fc is there to referee


* Apply `kvt` API from `FC` to beacon sync

* No need to use extra table for persistent header cache state record

why:
  Slot zero can do. This allows deleting that table wholesale when needed
  once thatfeature is available.

* Logger updates

details:
  + Lifting main header/block op logs from `trace` to `debug`
  + Set metrics update before nano-sleep (for task switch)
This commit is contained in:
Jordan Hrycaj 2024-12-19 12:02:36 +00:00 committed by GitHub
parent 55fd25758a
commit c801a11190
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 121 additions and 60 deletions

View File

@ -19,11 +19,12 @@ import
../../evm/state, ../../evm/state,
../validate, ../validate,
../executor/process_block, ../executor/process_block,
./forked_chain/chain_desc ./forked_chain/[chain_desc, chain_kvt]
export export
BlockDesc, BlockDesc,
ForkedChainRef, ForkedChainRef,
chain_kvt,
common, common,
core_db core_db

View File

@ -0,0 +1,72 @@
# Nimbus
# Copyright (c) 2024 Status Research & Development GmbH
# Licensed under either of
# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or
# http://www.apache.org/licenses/LICENSE-2.0)
# * MIT license ([LICENSE-MIT](LICENSE-MIT) or
# http://opensource.org/licenses/MIT)
# at your option. This file may not be copied, modified, or distributed except
# according to those terms.
## Persistent kvt that ideally bypasses the `FC` logic. This can be used as a
## cache where in-memory storade would be much of a burden (e.g. all `mainnet`
## headers.)
##
## Currently, it is not always possible to store. But fortunately (for the
## syncer application) it works for some time at the beginning when the `FC`
## module is initialised and no block operation has been performed, yet.
{.push raises: [].}
import
pkg/results,
../../../common,
../../../db/core_db,
./chain_desc
proc fcKvtAvailable*(c: ForkedChainRef): bool =
## Returns `true` if `kvt` data can be saved persistently.
c.db.txFrameLevel() == 0
proc fcKvtPersistent*(c: ForkedChainRef): bool =
## Save cached `kvt` data if possible. This function has the side effect
## that it saves all cached db data including `Aristo` data (although there
## should not be any.)
##
if c.fcKvtAvailable():
c.db.persistent(c.db.getSavedStateBlockNumber()).isOkOr:
raiseAssert "fcKvtPersistent: persistent() failed: " & $$error
return true
proc fcKvtHasKey*(c: ForkedChainRef, key: openArray[byte]): bool =
## Check whether the argument `key` exists on the `kvt` table (i.e. `get()`
## would succeed.)
##
c.db.ctx.getKvt().hasKey(key)
proc fcKvtGet*(c: ForkedChainRef, key: openArray[byte]): Opt[seq[byte]] =
## Fetch data entry from `kvt` table.
##
var w = c.db.ctx.getKvt().get(key).valueOr:
return err()
ok(move w)
proc fcKvtPut*(c: ForkedChainRef, key, data: openArray[byte]): bool =
## Cache data on the `kvt` table marked for saving persistently. If the `kvt`
## table is unavailable, this function does nothing and returns `false`.
##
if c.fcKvtAvailable():
c.db.ctx.getKvt().put(key, data).isOkOr:
raiseAssert "fcKvtPut: put() failed: " & $$error
return true
proc fcKvtDel*(c: ForkedChainRef, key: openArray[byte]): bool =
## Cache key for deletion on the `kvt` table. If the `kvt` table is
## unavailable, this function does nothing and returns `false`.
##
if c.fcKvtAvailable():
c.db.ctx.getKvt().del(key).isOkOr:
raiseAssert "fcKvtDel: del() failed: " & $$error
return true
# End

View File

@ -450,7 +450,7 @@ when AutoValidateApiHooks:
doAssert not api.hasStorageData.isNil doAssert not api.hasStorageData.isNil
doAssert not api.isTop.isNil doAssert not api.isTop.isNil
doAssert not api.level.isNil doAssert not api.txFrameLevel.isNil
doAssert not api.mergeAccountRecord.isNil doAssert not api.mergeAccountRecord.isNil
doAssert not api.mergeStorageData.isNil doAssert not api.mergeStorageData.isNil

View File

@ -24,8 +24,7 @@ type
dataDirId = 7 dataDirId = 7
safeHash = 8 safeHash = 8
finalizedHash = 9 finalizedHash = 9
beaconState = 10 beaconHeader = 10
beaconHeader = 11
DbKey* = object DbKey* = object
# The first byte stores the key type. The rest are key-specific values # The first byte stores the key type. The rest are key-specific values
@ -87,11 +86,6 @@ func hashIndexKey*(hash: Hash32, index: uint16): HashIndexKey =
result[32] = byte(index and 0xFF) result[32] = byte(index and 0xFF)
result[33] = byte((index shl 8) and 0xFF) result[33] = byte((index shl 8) and 0xFF)
func beaconStateKey*(u: uint8): DbKey =
result.data[0] = byte ord(beaconState)
result.data[1] = u
result.dataEndPos = 1
func beaconHeaderKey*(u: BlockNumber): DbKey = func beaconHeaderKey*(u: BlockNumber): DbKey =
result.data[0] = byte ord(beaconHeader) result.data[0] = byte ord(beaconHeader)
doAssert sizeof(u) <= 32 doAssert sizeof(u) <= 32

View File

@ -104,7 +104,11 @@ proc fetchAndCheck(
blk.blocks[offset + n].uncles = bodies[n].uncles blk.blocks[offset + n].uncles = bodies[n].uncles
blk.blocks[offset + n].withdrawals = bodies[n].withdrawals blk.blocks[offset + n].withdrawals = bodies[n].withdrawals
return offset < blk.blocks.len.uint64 if offset < blk.blocks.len.uint64:
return true
buddy.only.nBdyProcErrors.inc
return false
# ------------------------------------------------------------------------------ # ------------------------------------------------------------------------------
# Public functions # Public functions
@ -195,13 +199,11 @@ proc blocksStagedCollect*(
# Fetch and extend staging record # Fetch and extend staging record
if not await buddy.fetchAndCheck(ivReq, blk, info): if not await buddy.fetchAndCheck(ivReq, blk, info):
haveError = true
# Throw away first time block fetch data. Keep other data for a # Throw away first time block fetch data. Keep other data for a
# partially assembled list. # partially assembled list.
if nBlkBlocks == 0: if nBlkBlocks == 0:
buddy.only.nBdyProcErrors.inc
haveError = true
if ((0 < buddy.only.nBdyRespErrors or if ((0 < buddy.only.nBdyRespErrors or
0 < buddy.only.nBdyProcErrors) and buddy.ctrl.stopped) or 0 < buddy.only.nBdyProcErrors) and buddy.ctrl.stopped) or
fetchBodiesReqErrThresholdCount < buddy.only.nBdyRespErrors or fetchBodiesReqErrThresholdCount < buddy.only.nBdyRespErrors or
@ -288,7 +290,7 @@ proc blocksStagedImport*(
nBlocks = qItem.data.blocks.len nBlocks = qItem.data.blocks.len
iv = BnRange.new(qItem.key, qItem.key + nBlocks.uint64 - 1) iv = BnRange.new(qItem.key, qItem.key + nBlocks.uint64 - 1)
trace info & ": import blocks ..", iv, nBlocks, debug info & ": import blocks ..", iv, nBlocks,
B=ctx.chain.baseNumber.bnStr, L=ctx.chain.latestNumber.bnStr B=ctx.chain.baseNumber.bnStr, L=ctx.chain.latestNumber.bnStr
var maxImport = iv.maxPt var maxImport = iv.maxPt
@ -317,6 +319,9 @@ proc blocksStagedImport*(
maxImport = ctx.chain.latestNumber() maxImport = ctx.chain.latestNumber()
break importLoop break importLoop
# Update, so it can be followed nicely
ctx.updateMetrics()
# Allow pseudo/async thread switch. # Allow pseudo/async thread switch.
try: await sleepAsync asyncThreadSwitchTimeSlot try: await sleepAsync asyncThreadSwitchTimeSlot
except CancelledError: discard except CancelledError: discard
@ -325,9 +330,6 @@ proc blocksStagedImport*(
maxImport = ctx.chain.latestNumber() maxImport = ctx.chain.latestNumber()
break importLoop break importLoop
# Update, so it can be followed nicely
ctx.updateMetrics()
# Occasionally mark the chain finalized # Occasionally mark the chain finalized
if (n + 1) mod finaliserChainLengthMax == 0 or (n + 1) == nBlocks: if (n + 1) mod finaliserChainLengthMax == 0 or (n + 1) == nBlocks:
let let
@ -363,7 +365,7 @@ proc blocksStagedImport*(
# Update, so it can be followed nicely # Update, so it can be followed nicely
ctx.updateMetrics() ctx.updateMetrics()
trace info & ": import done", iv, nBlocks, B=ctx.chain.baseNumber.bnStr, debug info & ": import done", iv, nBlocks, B=ctx.chain.baseNumber.bnStr,
L=ctx.chain.latestNumber.bnStr, F=ctx.layout.final.bnStr L=ctx.chain.latestNumber.bnStr, F=ctx.layout.final.bnStr
return true return true

View File

@ -19,15 +19,19 @@ import
../worker_desc, ../worker_desc,
./headers_unproc ./headers_unproc
const let
LhcStateKey = 1.beaconStateKey LhcStateKey = 0.beaconHeaderKey
# ------------------------------------------------------------------------------ # ------------------------------------------------------------------------------
# Private helpers # Private helpers
# ------------------------------------------------------------------------------ # ------------------------------------------------------------------------------
template kvtNotAvailable(info: static[string]): string =
info & ": kvt table not available (locked by FC module)"
proc fetchSyncStateLayout(ctx: BeaconCtxRef): Opt[SyncStateLayout] = proc fetchSyncStateLayout(ctx: BeaconCtxRef): Opt[SyncStateLayout] =
let data = ctx.db.ctx.getKvt().get(LhcStateKey.toOpenArray).valueOr: let data = ctx.pool.chain.fcKvtGet(LhcStateKey.toOpenArray).valueOr:
return err() return err()
try: try:
return ok(rlp.decode(data, SyncStateLayout)) return ok(rlp.decode(data, SyncStateLayout))
@ -42,29 +46,25 @@ proc deleteStaleHeadersAndState(
info: static[string]; info: static[string];
) = ) =
## Delete stale headers and state ## Delete stale headers and state
let let c = ctx.pool.chain
kvt = ctx.db.ctx.getKvt() if not c.fcKvtAvailable():
stateNum = ctx.db.getSavedStateBlockNumber() # for persisting trace kvtNotAvailable(info)
return
var bn = upTo var bn = upTo
while 0 < bn and kvt.hasKey(beaconHeaderKey(bn).toOpenArray): while 0 < bn and c.fcKvtHasKey(beaconHeaderKey(bn).toOpenArray):
discard kvt.del(beaconHeaderKey(bn).toOpenArray) discard c.fcKvtDel(beaconHeaderKey(bn).toOpenArray)
bn.dec bn.dec
# Occasionallly persist the deleted headers. This will succeed if # Occasionallly persist the deleted headers (so that the internal DB cache
# this function is called early enough after restart when there is # does not grow extra large.) This will succeed if this function is called
# no database transaction pending. # early enough after restart when there is no database transaction pending.
if (upTo - bn) mod 8192 == 0: if (upTo - bn) mod 8192 == 0:
ctx.db.persistent(stateNum).isOkOr: discard c.fcKvtPersistent()
debug info & ": cannot persist deleted sync headers", error=($$error)
# So be it, stop here.
return
# Delete persistent state, there will be no use of it anymore # Delete persistent state record, there will be no use of it anymore
discard kvt.del(LhcStateKey.toOpenArray) discard c.fcKvtDel(LhcStateKey.toOpenArray)
ctx.db.persistent(stateNum).isOkOr: discard c.fcKvtPersistent()
debug info & ": cannot persist deleted sync headers", error=($$error)
return
if bn < upTo: if bn < upTo:
debug info & ": deleted stale sync headers", iv=BnRange.new(bn+1,upTo) debug info & ": deleted stale sync headers", iv=BnRange.new(bn+1,upTo)
@ -75,17 +75,12 @@ proc deleteStaleHeadersAndState(
proc dbStoreSyncStateLayout*(ctx: BeaconCtxRef; info: static[string]) = proc dbStoreSyncStateLayout*(ctx: BeaconCtxRef; info: static[string]) =
## Save chain layout to persistent db ## Save chain layout to persistent db
let data = rlp.encode(ctx.layout) let c = ctx.pool.chain
ctx.db.ctx.getKvt().put(LhcStateKey.toOpenArray, data).isOkOr: if c.fcKvtAvailable():
raiseAssert info & " put() failed: " & $$error discard c.fcKvtPut(LhcStateKey.toOpenArray, rlp.encode(ctx.layout))
discard c.fcKvtPersistent()
# While executing blocks there are frequent save cycles. Otherwise, an else:
# extra save request might help to pick up an interrupted sync session. trace kvtNotAvailable(info)
if ctx.db.txFrameLevel() == 0 and ctx.stash.len == 0:
let number = ctx.db.getSavedStateBlockNumber()
ctx.db.persistent(number).isOkOr:
raiseAssert info & " persistent() failed: " & $$error
proc dbLoadSyncStateLayout*(ctx: BeaconCtxRef; info: static[string]): bool = proc dbLoadSyncStateLayout*(ctx: BeaconCtxRef; info: static[string]): bool =
## Restore chain layout from persistent db. It returns `true` if a previous ## Restore chain layout from persistent db. It returns `true` if a previous
@ -165,18 +160,16 @@ proc dbHeadersStash*(
## .. ## ..
## ##
let let
txFrameLevel = ctx.db.txFrameLevel() c = ctx.pool.chain
last = first + revBlobs.len.uint64 - 1 last = first + revBlobs.len.uint64 - 1
if 0 < txFrameLevel: if not c.fcKvtAvailable():
# Need to cache it because FCU has blocked writing through to disk. # Need to cache it because FCU has blocked writing through to disk.
for n,data in revBlobs: for n,data in revBlobs:
ctx.stash[last - n.uint64] = data ctx.stash[last - n.uint64] = data
else: else:
let kvt = ctx.db.ctx.getKvt()
for n,data in revBlobs: for n,data in revBlobs:
let key = beaconHeaderKey(last - n.uint64) let key = beaconHeaderKey(last - n.uint64)
kvt.put(key.toOpenArray, data).isOkOr: discard c.fcKvtPut(key.toOpenArray, data)
raiseAssert info & ": put() failed: " & $$error
proc dbHeaderPeek*(ctx: BeaconCtxRef; num: BlockNumber): Opt[Header] = proc dbHeaderPeek*(ctx: BeaconCtxRef; num: BlockNumber): Opt[Header] =
## Retrieve some stashed header. ## Retrieve some stashed header.
@ -189,7 +182,7 @@ proc dbHeaderPeek*(ctx: BeaconCtxRef; num: BlockNumber): Opt[Header] =
# Use persistent storage next # Use persistent storage next
let let
key = beaconHeaderKey(num) key = beaconHeaderKey(num)
rc = ctx.db.ctx.getKvt().get(key.toOpenArray) rc = ctx.pool.chain.fcKvtGet(key.toOpenArray)
if rc.isOk: if rc.isOk:
try: try:
return ok(rlp.decode(rc.value, Header)) return ok(rlp.decode(rc.value, Header))
@ -206,7 +199,7 @@ proc dbHeaderUnstash*(ctx: BeaconCtxRef; bn: BlockNumber) =
ctx.stash.withValue(bn, _): ctx.stash.withValue(bn, _):
ctx.stash.del bn ctx.stash.del bn
return return
discard ctx.db.ctx.getKvt().del(beaconHeaderKey(bn).toOpenArray) discard ctx.pool.chain.fcKvtDel(beaconHeaderKey(bn).toOpenArray)
# ------------------------------------------------------------------------------ # ------------------------------------------------------------------------------
# End # End

View File

@ -44,6 +44,7 @@ proc fetchAndCheck(
# While assembling a `LinkedHChainRef`, verify that the `revHeaders` list # While assembling a `LinkedHChainRef`, verify that the `revHeaders` list
# was sound, i.e. contiguous, linked, etc. # was sound, i.e. contiguous, linked, etc.
if not revHeaders.extendLinkedHChain(buddy, ivReq.maxPt, lhc): if not revHeaders.extendLinkedHChain(buddy, ivReq.maxPt, lhc):
buddy.only.nHdrProcErrors.inc
return false return false
return true return true
@ -150,13 +151,11 @@ proc headersStagedCollect*(
# Fetch and extend chain record # Fetch and extend chain record
if not await buddy.fetchAndCheck(ivReq, lhc, info): if not await buddy.fetchAndCheck(ivReq, lhc, info):
haveError = true
# Throw away opportunistic data (or first time header fetch.) Keep # Throw away opportunistic data (or first time header fetch.) Keep
# other data for a partially assembled list. # other data for a partially assembled list.
if isOpportunistic or nLhcHeaders == 0: if isOpportunistic or nLhcHeaders == 0:
buddy.only.nHdrProcErrors.inc
haveError = true
if ((0 < buddy.only.nHdrRespErrors or if ((0 < buddy.only.nHdrRespErrors or
0 < buddy.only.nHdrProcErrors) and buddy.ctrl.stopped) or 0 < buddy.only.nHdrProcErrors) and buddy.ctrl.stopped) or
fetchHeadersReqErrThresholdCount < buddy.only.nHdrRespErrors or fetchHeadersReqErrThresholdCount < buddy.only.nHdrRespErrors or
@ -252,7 +251,7 @@ proc headersStagedProcess*(ctx: BeaconCtxRef; info: static[string]): int =
result += qItem.data.revHdrs.len # count headers result += qItem.data.revHdrs.len # count headers
trace info & ": stashed consecutive headers", debug info & ": stashed consecutive headers",
nListsLeft=ctx.hdr.staged.len, nStashed=result nListsLeft=ctx.hdr.staged.len, nStashed=result
if headersStagedQueueLengthLwm < ctx.hdr.staged.len: if headersStagedQueueLengthLwm < ctx.hdr.staged.len: