fix startup after BN exited between head and finalized blocks updates (#5617)
When the BN exits after writing new `head` to database, but before completing the `updateFinalizedBlocks` call, the database is slightly inconsistent due to the partial write. We currently fail to start up after that. Fix that by catching up on partial `updateFinalizedBlocks` tasks on start up, and add a test for this edge case.
This commit is contained in:
parent
d5fbbd9b88
commit
8cea8af620
|
@ -638,8 +638,9 @@ OK: 3/3 Fail: 0/3 Skip: 0/3
|
||||||
+ init with gaps [Preset: mainnet] OK
|
+ init with gaps [Preset: mainnet] OK
|
||||||
+ orphaned epoch block [Preset: mainnet] OK
|
+ orphaned epoch block [Preset: mainnet] OK
|
||||||
+ prune heads on finalization [Preset: mainnet] OK
|
+ prune heads on finalization [Preset: mainnet] OK
|
||||||
|
+ shutdown during finalization [Preset: mainnet] OK
|
||||||
```
|
```
|
||||||
OK: 3/3 Fail: 0/3 Skip: 0/3
|
OK: 4/4 Fail: 0/4 Skip: 0/4
|
||||||
## createValidatorFiles()
|
## createValidatorFiles()
|
||||||
```diff
|
```diff
|
||||||
+ Add keystore files [LOCAL] OK
|
+ Add keystore files [LOCAL] OK
|
||||||
|
@ -720,4 +721,4 @@ OK: 2/2 Fail: 0/2 Skip: 0/2
|
||||||
OK: 9/9 Fail: 0/9 Skip: 0/9
|
OK: 9/9 Fail: 0/9 Skip: 0/9
|
||||||
|
|
||||||
---TOTAL---
|
---TOTAL---
|
||||||
OK: 409/414 Fail: 0/414 Skip: 5/414
|
OK: 410/415 Fail: 0/415 Skip: 5/415
|
||||||
|
|
|
@ -1008,7 +1008,6 @@ proc init*(T: type ChainDAGRef, cfg: RuntimeConfig, db: BeaconChainDB,
|
||||||
# state - the tail is implicitly finalized, and if we have a finalized block
|
# state - the tail is implicitly finalized, and if we have a finalized block
|
||||||
# table, that provides another hint
|
# table, that provides another hint
|
||||||
finalizedSlot = db.finalizedBlocks.high.get(tail.slot)
|
finalizedSlot = db.finalizedBlocks.high.get(tail.slot)
|
||||||
newFinalized: seq[BlockId]
|
|
||||||
cache: StateCache
|
cache: StateCache
|
||||||
foundHeadState = false
|
foundHeadState = false
|
||||||
headBlocks: seq[BlockRef]
|
headBlocks: seq[BlockRef]
|
||||||
|
@ -1128,29 +1127,38 @@ proc init*(T: type ChainDAGRef, cfg: RuntimeConfig, db: BeaconChainDB,
|
||||||
|
|
||||||
doAssert dag.finalizedHead.blck != nil,
|
doAssert dag.finalizedHead.blck != nil,
|
||||||
"The finalized head should exist at the slot"
|
"The finalized head should exist at the slot"
|
||||||
doAssert dag.finalizedHead.blck.parent == nil,
|
|
||||||
"...but that's the last BlockRef with a parent"
|
|
||||||
|
|
||||||
block: # Top up finalized blocks
|
block: # Top up finalized blocks
|
||||||
if db.finalizedBlocks.high.isNone or
|
if db.finalizedBlocks.high.isNone or
|
||||||
db.finalizedBlocks.high.get() < dag.finalizedHead.blck.slot:
|
db.finalizedBlocks.high.get() < dag.finalizedHead.blck.slot:
|
||||||
|
# Versions prior to 1.7.0 did not store finalized blocks in the
|
||||||
|
# database, and / or the application might have crashed between the head
|
||||||
|
# and finalized blocks updates.
|
||||||
info "Loading finalized blocks",
|
info "Loading finalized blocks",
|
||||||
finHigh = db.finalizedBlocks.high,
|
finHigh = db.finalizedBlocks.high,
|
||||||
finalizedHead = shortLog(dag.finalizedHead)
|
finalizedHead = shortLog(dag.finalizedHead)
|
||||||
|
|
||||||
for blck in db.getAncestorSummaries(dag.finalizedHead.blck.root):
|
var
|
||||||
|
newFinalized: seq[BlockId]
|
||||||
|
tmp = dag.finalizedHead.blck
|
||||||
|
while tmp.parent != nil:
|
||||||
|
newFinalized.add(tmp.bid)
|
||||||
|
let p = tmp.parent
|
||||||
|
tmp.parent = nil
|
||||||
|
tmp = p
|
||||||
|
|
||||||
|
for blck in db.getAncestorSummaries(tmp.root):
|
||||||
if db.finalizedBlocks.high.isSome and
|
if db.finalizedBlocks.high.isSome and
|
||||||
blck.summary.slot <= db.finalizedBlocks.high.get:
|
blck.summary.slot <= db.finalizedBlocks.high.get:
|
||||||
break
|
break
|
||||||
|
|
||||||
# Versions prior to 1.7.0 did not store finalized blocks in the
|
|
||||||
# database, and / or the application might have crashed between the head
|
|
||||||
# and finalized blocks updates.
|
|
||||||
newFinalized.add(BlockId(slot: blck.summary.slot, root: blck.root))
|
newFinalized.add(BlockId(slot: blck.summary.slot, root: blck.root))
|
||||||
|
|
||||||
let finalizedBlocksTick = Moment.now()
|
|
||||||
db.updateFinalizedBlocks(newFinalized)
|
db.updateFinalizedBlocks(newFinalized)
|
||||||
|
|
||||||
|
doAssert dag.finalizedHead.blck.parent == nil,
|
||||||
|
"The finalized head is the last BlockRef with a parent"
|
||||||
|
|
||||||
block:
|
block:
|
||||||
let finalized = db.finalizedBlocks.get(db.finalizedBlocks.high.get()).expect(
|
let finalized = db.finalizedBlocks.get(db.finalizedBlocks.high.get()).expect(
|
||||||
"tail at least")
|
"tail at least")
|
||||||
|
|
|
@ -640,6 +640,43 @@ suite "chain DAG finalization tests" & preset():
|
||||||
dag2.finalizedHead.slot == dag.finalizedHead.slot
|
dag2.finalizedHead.slot == dag.finalizedHead.slot
|
||||||
getStateRoot(dag2.headState) == getStateRoot(dag.headState)
|
getStateRoot(dag2.headState) == getStateRoot(dag.headState)
|
||||||
|
|
||||||
|
test "shutdown during finalization" & preset():
|
||||||
|
var testPassed: bool
|
||||||
|
|
||||||
|
# Configure a hook that is called during finalization while the
|
||||||
|
# database has been partially written, to test behaviour if the
|
||||||
|
# beacon node is exited while the database is inconsistent.
|
||||||
|
proc onHeadChanged(data: HeadChangeInfoObject) =
|
||||||
|
if data.epoch_transition:
|
||||||
|
# Check test assumption: Head block was written before this callback
|
||||||
|
let headBlock = dag.db.getHeadBlock().expect("Valid DB")
|
||||||
|
doAssert headBlock == data.block_root, "Head was written before CB"
|
||||||
|
|
||||||
|
# Check test assumption: New finalized blocks were not written yet
|
||||||
|
let
|
||||||
|
stateFinalizedSlot =
|
||||||
|
dag.headState.getStateField(finalized_checkpoint).epoch.start_slot
|
||||||
|
dbFinalizedSlot =
|
||||||
|
dag.db.finalizedBlocks.high.expect("Valid DB")
|
||||||
|
doAssert stateFinalizedSlot > dbFinalizedSlot, "Finalized not written"
|
||||||
|
|
||||||
|
# If the beacon node were to exit _now_, this is what the DB looks like.
|
||||||
|
# Validate that we can initialize a new DAG from this database.
|
||||||
|
let validatorMonitor2 = newClone(ValidatorMonitor.init())
|
||||||
|
discard ChainDAGRef.init(
|
||||||
|
defaultRuntimeConfig, db, validatorMonitor2, {})
|
||||||
|
testPassed = true
|
||||||
|
dag.setHeadCb(onHeadChanged)
|
||||||
|
|
||||||
|
for blck in makeTestBlocks(
|
||||||
|
dag.headState, cache, int(SLOTS_PER_EPOCH * 4), attested = true):
|
||||||
|
let added = dag.addHeadBlock(verifier, blck.phase0Data, nilPhase0Callback)
|
||||||
|
check: added.isOk
|
||||||
|
dag.updateHead(added[], quarantine, [])
|
||||||
|
dag.pruneAtFinalization()
|
||||||
|
|
||||||
|
check testPassed
|
||||||
|
|
||||||
suite "Old database versions" & preset():
|
suite "Old database versions" & preset():
|
||||||
setup:
|
setup:
|
||||||
let
|
let
|
||||||
|
|
Loading…
Reference in New Issue