Fix missing checkpoint states` (#3225)

With the right sequence of events (for example a REST request or a
validation), it can happen that the first traversal across a state
checkpoint boundary is done without storing that state on disk - this
causes problens when replaying states, because now states may be missing
from the database.

Here, we simply avoid using the caches when advancing a state that will
go into the database, ensuring that the information lost during caching
always is permanently stored.

* fix recursion bug in `isProposed`
This commit is contained in:
Jacek Sieka 2021-12-30 12:33:03 +01:00 committed by GitHub
parent 6b60a774e0
commit 7ec97a6b35
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 62 additions and 23 deletions

View File

@ -222,6 +222,12 @@ proc addHeadBlock*(
# blocks we add to the database are clean for the given state
let startTick = Moment.now()
# The clearance state works as the canonical
# "let's make things permanent" point and saves things to the database -
# storing things is slow, so we don't want to do so before there's a
# reasonable chance that the information will become more permanently useful -
# by the time a new block reaches this point, the parent block will already
# have "established" itself in the network to some degree at least.
var cache = StateCache()
updateStateData(
dag, dag.clearanceState, parent.atSlot(signedBlock.message.slot), true, cache)

View File

@ -201,7 +201,7 @@ func isProposed*(bid: BlockId, slot: Slot): bool =
func isProposed*(blck: BlockRef, slot: Slot): bool =
## Return true if `blck` was proposed in the given slot
not isNil(blck) and blck.isProposed(slot)
not isNil(blck) and blck.bid.isProposed(slot)
func isProposed*(bs: BlockSlot): bool =
## Return true if `bs` represents the proposed block (as opposed to an empty

View File

@ -792,6 +792,7 @@ proc advanceSlots(
# processing - the state must be positions at a slot before or equal to the
# target
doAssert getStateField(state.data, slot) <= slot
while getStateField(state.data, slot) < slot:
let preEpoch = getStateField(state.data, slot).epoch
loadStateCache(dag, cache, state.blck, getStateField(state.data, slot).epoch)
@ -874,15 +875,19 @@ proc updateStateData*(
# it also avoids `hash_tree_root` for slot processing
if exactMatch(state, cur):
found = true
elif exactMatch(dag.headState, cur):
assign(state, dag.headState)
found = true
elif exactMatch(dag.clearanceState, cur):
assign(state, dag.clearanceState)
found = true
elif exactMatch(dag.epochRefState, cur):
assign(state, dag.epochRefState)
found = true
elif not save:
# When required to save states, we cannot rely on the caches because that
# would skip the extra processing that save does - not all information that
# goes into the database is cached
if exactMatch(dag.headState, cur):
assign(state, dag.headState)
found = true
elif exactMatch(dag.clearanceState, cur):
assign(state, dag.clearanceState)
found = true
elif exactMatch(dag.epochRefState, cur):
assign(state, dag.epochRefState)
found = true
# First, run a quick check if we can simply apply a few blocks to an in-memory
# state - any in-memory state will be faster than loading from database.
@ -893,24 +898,25 @@ proc updateStateData*(
# sequentially to grab their votes.
const RewindBlockThreshold = 64
while not found and ancestors.len < RewindBlockThreshold:
if canAdvance(state, cur):
if canAdvance(state, cur): # Typical case / fast path when there's no reorg
found = true
break
if canAdvance(dag.headState, cur):
assign(state, dag.headState)
found = true
break
if not save: # See above
if canAdvance(dag.headState, cur):
assign(state, dag.headState)
found = true
break
if canAdvance(dag.clearanceState, cur):
assign(state, dag.clearanceState)
found = true
break
if canAdvance(dag.clearanceState, cur):
assign(state, dag.clearanceState)
found = true
break
if canAdvance(dag.epochRefState, cur):
assign(state, dag.epochRefState)
found = true
break
if canAdvance(dag.epochRefState, cur):
assign(state, dag.epochRefState)
found = true
break
if cur.slot == cur.blck.slot:
# This is not an empty slot, so the block will need to be applied to

View File

@ -87,6 +87,9 @@ suite "BlockSlot and helpers":
s24.parent == BlockSlot(blck: s2, slot: Slot(3))
s24.parent.parent == s22
s22.isProposed()
not s24.isProposed()
suite "BlockId and helpers":
test "atSlot sanity":
let

View File

@ -160,6 +160,30 @@ suite "Block pool processing" & preset():
dag.getBlockRange(Slot(3), 2, blocks.toOpenArray(0, 1)) == 2
blocks[2..<2].len == 0
# A fork forces the clearance state to a point where it cannot be advanced
let
nextEpoch = dag.head.slot.epoch + 1
nextEpochSlot = nextEpoch.compute_start_slot_at_epoch()
stateCheckpoint = dag.head.parent.atSlot(nextEpochSlot).stateCheckpoint
check:
not dag.getEpochRef(dag.head.parent, nextEpoch).isNil
# Getting an EpochRef should not result in states being stored
db.getStateRoot(stateCheckpoint.blck.root, stateCheckpoint.slot).isErr()
# this is required for the test to work - it's not a "public"
# post-condition of getEpochRef
getStateField(dag.epochRefState.data, slot) == nextEpochSlot
assign(state[], dag.epochRefState.data)
let
bnext = addTestBlock(state[], cache).phase0Data
bnextAdd = dag.addHeadBlock(verifier, bnext, nilPhase0Callback)
check:
# Getting an EpochRef should not result in states being stored
db.getStateRoot(stateCheckpoint.blck.root, stateCheckpoint.slot).isOk()
test "Adding the same block twice returns a Duplicate error" & preset():
let
b10 = dag.addHeadBlock(verifier, b1, nilPhase0Callback)