fix startup after BN exited between head and finalized blocks updates (#5617)

When the BN exits after writing new `head` to database, but before
completing the `updateFinalizedBlocks` call, the database is slightly
inconsistent due to the partial write. We currently fail to start up
after that. Fix that by catching up on partial `updateFinalizedBlocks`
tasks on start up, and add a test for this edge case.
This commit is contained in:
Etan Kissling 2023-11-23 00:44:20 +01:00 committed by GitHub
parent d5fbbd9b88
commit 8cea8af620
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 57 additions and 11 deletions

View File

@ -638,8 +638,9 @@ OK: 3/3 Fail: 0/3 Skip: 0/3
+ init with gaps [Preset: mainnet] OK
+ orphaned epoch block [Preset: mainnet] OK
+ prune heads on finalization [Preset: mainnet] OK
+ shutdown during finalization [Preset: mainnet] OK
```
OK: 3/3 Fail: 0/3 Skip: 0/3
OK: 4/4 Fail: 0/4 Skip: 0/4
## createValidatorFiles()
```diff
+ Add keystore files [LOCAL] OK
@ -720,4 +721,4 @@ OK: 2/2 Fail: 0/2 Skip: 0/2
OK: 9/9 Fail: 0/9 Skip: 0/9
---TOTAL---
OK: 409/414 Fail: 0/414 Skip: 5/414
OK: 410/415 Fail: 0/415 Skip: 5/415

View File

@ -1008,7 +1008,6 @@ proc init*(T: type ChainDAGRef, cfg: RuntimeConfig, db: BeaconChainDB,
# state - the tail is implicitly finalized, and if we have a finalized block
# table, that provides another hint
finalizedSlot = db.finalizedBlocks.high.get(tail.slot)
newFinalized: seq[BlockId]
cache: StateCache
foundHeadState = false
headBlocks: seq[BlockRef]
@ -1128,28 +1127,37 @@ proc init*(T: type ChainDAGRef, cfg: RuntimeConfig, db: BeaconChainDB,
doAssert dag.finalizedHead.blck != nil,
"The finalized head should exist at the slot"
doAssert dag.finalizedHead.blck.parent == nil,
"...but that's the last BlockRef with a parent"
block: # Top up finalized blocks
if db.finalizedBlocks.high.isNone or
db.finalizedBlocks.high.get() < dag.finalizedHead.blck.slot:
# Versions prior to 1.7.0 did not store finalized blocks in the
# database, and / or the application might have crashed between the head
# and finalized blocks updates.
info "Loading finalized blocks",
finHigh = db.finalizedBlocks.high,
finalizedHead = shortLog(dag.finalizedHead)
for blck in db.getAncestorSummaries(dag.finalizedHead.blck.root):
var
newFinalized: seq[BlockId]
tmp = dag.finalizedHead.blck
while tmp.parent != nil:
newFinalized.add(tmp.bid)
let p = tmp.parent
tmp.parent = nil
tmp = p
for blck in db.getAncestorSummaries(tmp.root):
if db.finalizedBlocks.high.isSome and
blck.summary.slot <= db.finalizedBlocks.high.get:
break
# Versions prior to 1.7.0 did not store finalized blocks in the
# database, and / or the application might have crashed between the head
# and finalized blocks updates.
newFinalized.add(BlockId(slot: blck.summary.slot, root: blck.root))
let finalizedBlocksTick = Moment.now()
db.updateFinalizedBlocks(newFinalized)
db.updateFinalizedBlocks(newFinalized)
doAssert dag.finalizedHead.blck.parent == nil,
"The finalized head is the last BlockRef with a parent"
block:
let finalized = db.finalizedBlocks.get(db.finalizedBlocks.high.get()).expect(

View File

@ -640,6 +640,43 @@ suite "chain DAG finalization tests" & preset():
dag2.finalizedHead.slot == dag.finalizedHead.slot
getStateRoot(dag2.headState) == getStateRoot(dag.headState)
test "shutdown during finalization" & preset():
var testPassed: bool
# Configure a hook that is called during finalization while the
# database has been partially written, to test behaviour if the
# beacon node is exited while the database is inconsistent.
proc onHeadChanged(data: HeadChangeInfoObject) =
if data.epoch_transition:
# Check test assumption: Head block was written before this callback
let headBlock = dag.db.getHeadBlock().expect("Valid DB")
doAssert headBlock == data.block_root, "Head was written before CB"
# Check test assumption: New finalized blocks were not written yet
let
stateFinalizedSlot =
dag.headState.getStateField(finalized_checkpoint).epoch.start_slot
dbFinalizedSlot =
dag.db.finalizedBlocks.high.expect("Valid DB")
doAssert stateFinalizedSlot > dbFinalizedSlot, "Finalized not written"
# If the beacon node were to exit _now_, this is what the DB looks like.
# Validate that we can initialize a new DAG from this database.
let validatorMonitor2 = newClone(ValidatorMonitor.init())
discard ChainDAGRef.init(
defaultRuntimeConfig, db, validatorMonitor2, {})
testPassed = true
dag.setHeadCb(onHeadChanged)
for blck in makeTestBlocks(
dag.headState, cache, int(SLOTS_PER_EPOCH * 4), attested = true):
let added = dag.addHeadBlock(verifier, blck.phase0Data, nilPhase0Callback)
check: added.isOk
dag.updateHead(added[], quarantine, [])
dag.pruneAtFinalization()
check testPassed
suite "Old database versions" & preset():
setup:
let