Beacon sync fix overlapping block list import (#2866)

* Ignore `FC` overlapping blocks and the ones <= `base`

why:
  Due to concurrently running `importBlock()` by `newPayload` RPC
  requests the `FC` module layout might differ when re-visiting for
  importing blocks.

* Update logging and docu

details:
 Reduce some logging noise
 Clarify activating/suspending syncer in log messages
This commit is contained in:
Jordan Hrycaj 2024-11-22 13:23:53 +00:00 committed by GitHub
parent 652539e628
commit 81690e0446
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 27 additions and 13 deletions

View File

@ -66,8 +66,8 @@ Implementation, The Gory Details
The following diagram depicts a most general state view of the sync and the
*FC* modules and at a given point of time
0 L (5)
o--------------------o
0 B L (5)
o---------o----------o
| <--- imported ---> |
C D H
o---------------------o----------------o
@ -75,8 +75,12 @@ The following diagram depicts a most general state view of the sync and the
where
* *B* -- **base**, current value of this entity (with the same name) of the
**FC** module (i.e. the current value when looked up.)
* *C* -- coupler, parent of the left endpoint of the chain of headers or blocks
to be fetched and imported.
to be fetched and imported. The block number of *C* is somewhere
between the ones of *B* and *C* inclusive.
* *L* -- **latest**, current value of this entity (with the same name) of the
**FC** module (i.e. the current value when looked up.) *L* need not

View File

@ -255,11 +255,10 @@ proc blocksStagedImport*(
# Fetch least record, accept only if it matches the global ledger state
block:
let imported = ctx.chain.latestNumber()
if qItem.key != imported + 1:
if imported + 1 < qItem.key:
trace info & ": there is a gap L vs. staged",
B=ctx.chain.baseNumber.bnStr, L=imported.bnStr, staged=qItem.key.bnStr,
C=ctx.layout.coupler.bnStr
doAssert imported < qItem.key
return false
# Remove from queue
@ -275,7 +274,21 @@ proc blocksStagedImport*(
var maxImport = iv.maxPt
block importLoop:
for n in 0 ..< nBlocks:
# It is known that `key <= imported + 1`. This means that some blocks
# potentally overlap with what is already known by `FC` (e.g. due to
# concurrently running `importBlock()` by a `newPayload` RPC requests.)
#
# It is not left to `FC` to ignore this record. Passing a block before
# the `base` (which also might have changed) is responded by `FC` with
# an error. This would cause throwing away all `nBlocks` rather than
# ignoring the first some.
#
let nBn = qItem.data.blocks[n].header.number
if nBn <= ctx.chain.baseNumber:
trace info & ": ignoring block <= base", n, iv,
B=ctx.chain.baseNumber.bnStr, L=ctx.chain.latestNumber.bnStr,
nthBn=nBn.bnStr, nthHash=qItem.data.getNthHash(n)
continue
ctx.pool.chain.importBlock(qItem.data.blocks[n]).isOkOr:
warn info & ": import block error", n, iv,
B=ctx.chain.baseNumber.bnStr, L=ctx.chain.latestNumber.bnStr,

View File

@ -114,9 +114,9 @@ proc dbLoadSyncStateLayout*(ctx: BeaconCtxRef; info: static[string]): bool =
# Add interval of unprocessed header range `(C,D)` from `README.md`
ctx.headersUnprocSet(ctx.layout.coupler+1, ctx.layout.dangling-1)
trace info & ": restored sync state", L=latest.bnStr,
trace info & ": restored syncer state", L=latest.bnStr,
C=ctx.layout.coupler.bnStr, D=ctx.layout.dangling.bnStr,
F=ctx.layout.final.bnStr, H=ctx.layout.head.bnStr
H=ctx.layout.head.bnStr
true

View File

@ -122,8 +122,6 @@ proc setupDatabase*(ctx: BeaconCtxRef; info: static[string]) =
# system becomes fully active. Otherwise there is some polling only waiting
# for a new target so there is reduced service (aka `hibernate`.).
ctx.hibernate = not ctx.dbLoadSyncStateLayout info
if ctx.hibernate:
trace info & ": hibernating", latest=ctx.chain.latestNumber.bnStr
# Set blocks batch import value for block import
if ctx.pool.nBodiesBatch < nFetchBodiesRequest:

View File

@ -121,8 +121,7 @@ proc startHibernating(ctx: BeaconCtxRef; info: static[string]) =
ctx.hibernate = true
trace info & ": hibernating, awaiting sync target",
L=ctx.chain.latestNumber.bnStr
trace info & ": suspending syncer", L=ctx.chain.latestNumber.bnStr
# Update, so it can be followed nicely
ctx.updateMetrics()
@ -327,7 +326,7 @@ proc updateFinalBlockHeader*(
b = ctx.chain.baseNumber()
f = finHdr.number
if f < b:
trace info & ": finalised number too low",
trace info & ": finalised block # too low",
B=b.bnStr, finalised=f.bnStr, delta=(b - f)
ctx.target.reset
@ -339,7 +338,7 @@ proc updateFinalBlockHeader*(
# Activate running (unless done yet)
if ctx.hibernate:
ctx.hibernate = false
trace info & ": activated syncer",
trace info & ": activating syncer", B=b.bnStr,
finalised=f.bnStr, head=ctx.target.consHead.bnStr
# Update, so it can be followed nicely