avoid reading legacy db on write

* don't consider legacy database when writing state - this read is slow
on kvstore
* avoid epoch transition when there's an exact match in cache already
* simplify init to only consider checkpoint states
This commit is contained in:
Jacek Sieka 2021-05-30 10:14:17 +02:00 committed by zah
parent df7bc87af5
commit 60df17786e
2 changed files with 52 additions and 35 deletions

View File

@ -683,10 +683,11 @@ proc containsState*(db: BeaconChainDBV0, key: Eth2Digest): bool =
let sk = subkey(BeaconStateNoImmutableValidators, key) let sk = subkey(BeaconStateNoImmutableValidators, key)
db.stateStore.contains(sk).expectDb() or db.stateStore.contains(sk).expectDb() or
db.backend.contains(sk).expectDb() or db.backend.contains(sk).expectDb() or
db.backend.contains(subkey(BeaconState, key)).expectDb db.backend.contains(subkey(BeaconState, key)).expectDb()
proc containsState*(db: BeaconChainDB, key: Eth2Digest): bool = proc containsState*(db: BeaconChainDB, key: Eth2Digest, legacy: bool = true): bool =
db.statesNoVal.contains(key.data).expectDb or db.v0.containsState(key) db.statesNoVal.contains(key.data).expectDb or
(legacy and db.v0.containsState(key))
iterator getAncestors*(db: BeaconChainDB, root: Eth2Digest): iterator getAncestors*(db: BeaconChainDB, root: Eth2Digest):
TrustedSignedBeaconBlock = TrustedSignedBeaconBlock =

View File

@ -321,6 +321,21 @@ func init*(T: type BlockRef, root: Eth2Digest, blck: SomeBeaconBlock): BlockRef
func contains*(dag: ChainDAGRef, root: Eth2Digest): bool = func contains*(dag: ChainDAGRef, root: Eth2Digest): bool =
KeyedBlockRef.asLookupKey(root) in dag.blocks KeyedBlockRef.asLookupKey(root) in dag.blocks
func isStateCheckpoint(bs: BlockSlot): bool =
## State checkpoints are the points in time for which we store full state
## snapshots, which later serve as rewind starting points when replaying state
## transitions from database, for example during reorgs.
##
# As a policy, we only store epoch boundary states without the epoch block
# (if it exists) applied - the rest can be reconstructed by loading an epoch
# boundary state and applying the missing blocks.
# We also avoid states that were produced with empty slots only - as such,
# there is only a checkpoint for the first epoch after a block.
# The tail block also counts as a state checkpoint!
(bs.slot == bs.blck.slot and bs.blck.parent == nil) or
(bs.slot.isEpoch and bs.slot.epoch == (bs.blck.slot.epoch + 1))
proc init*(T: type ChainDAGRef, proc init*(T: type ChainDAGRef,
preset: RuntimePreset, preset: RuntimePreset,
db: BeaconChainDB, db: BeaconChainDB,
@ -392,21 +407,15 @@ proc init*(T: type ChainDAGRef,
# Now that we have a head block, we need to find the most recent state that # Now that we have a head block, we need to find the most recent state that
# we have saved in the database # we have saved in the database
while cur.blck != nil: while cur.blck != nil:
let root = db.getStateRoot(cur.blck.root, cur.slot) if cur.isStateCheckpoint():
if root.isSome(): let root = db.getStateRoot(cur.blck.root, cur.slot)
if db.getState(root.get(), tmpState.data.data, noRollback): if root.isSome():
tmpState.data.root = root.get() if db.getState(root.get(), tmpState.data.data, noRollback):
tmpState.blck = cur.blck tmpState.data.root = root.get()
tmpState.blck = cur.blck
break break
if cur.blck.parent != nil and cur = cur.parentOrSlot()
cur.blck.slot.epoch != epoch(cur.blck.parent.slot):
# We store the state of the parent block with the epoch processing applied
# in the database!
cur = cur.blck.parent.atEpochStart(cur.blck.slot.epoch)
else:
# Moves back slot by slot, in case a state for an empty slot was saved
cur = cur.parent
if tmpState.blck == nil: if tmpState.blck == nil:
warn "No state found in head history, database corrupt?" warn "No state found in head history, database corrupt?"
@ -546,21 +555,6 @@ proc getState(
true true
func isStateCheckpoint(bs: BlockSlot): bool =
## State checkpoints are the points in time for which we store full state
## snapshots, which later serve as rewind starting points when replaying state
## transitions from database, for example during reorgs.
##
# As a policy, we only store epoch boundary states without the epoch block
# (if it exists) applied - the rest can be reconstructed by loading an epoch
# boundary state and applying the missing blocks.
# We also avoid states that were produced with empty slots only - as such,
# there is only a checkpoint for the first epoch after a block.
# The tail block also counts as a state checkpoint!
(bs.slot == bs.blck.slot and bs.blck.parent == nil) or
(bs.slot.isEpoch and bs.slot.epoch == (bs.blck.slot.epoch + 1))
func stateCheckpoint*(bs: BlockSlot): BlockSlot = func stateCheckpoint*(bs: BlockSlot): BlockSlot =
## The first ancestor BlockSlot that is a state checkpoint ## The first ancestor BlockSlot that is a state checkpoint
var bs = bs var bs = bs
@ -591,7 +585,9 @@ proc putState*(dag: ChainDAGRef, state: var StateData) =
if not isStateCheckpoint(state.blck.atSlot(getStateField(state, slot))): if not isStateCheckpoint(state.blck.atSlot(getStateField(state, slot))):
return return
if dag.db.containsState(state.data.root): # Don't consider legacy tables here, they are slow to read so we'll want to
# rewrite things in the new database anyway.
if dag.db.containsState(state.data.root, legacy = false):
return return
let startTick = Moment.now() let startTick = Moment.now()
@ -759,11 +755,31 @@ proc updateStateData*(
cur = bs cur = bs
found = false found = false
template exactMatch(state: StateData, bs: BlockSlot): bool =
# The block is the same and we're at an early enough slot - the state can
# be used to arrive at the desired blockslot
state.blck == bs.blck and getStateField(state, slot) == bs.slot
template canAdvance(state: StateData, bs: BlockSlot): bool = template canAdvance(state: StateData, bs: BlockSlot): bool =
# The block is the same and we're at an early enough slot - the state can # The block is the same and we're at an early enough slot - the state can
# be used to arrive at the desired blockslot # be used to arrive at the desired blockslot
state.blck == bs.blck and getStateField(state, slot) <= bs.slot state.blck == bs.blck and getStateField(state, slot) <= bs.slot
# Fast path: check all caches for an exact match - this is faster than
# advancing a state where there's epoch processing to do, by a wide margin -
# it also avoids `hash_tree_root` for slot processing
if exactMatch(state, cur):
found = true
elif exactMatch(dag.headState, cur):
assign(state, dag.headState)
found = true
elif exactMatch(dag.clearanceState, cur):
assign(state, dag.clearanceState)
found = true
elif exactMatch(dag.epochRefState, cur):
assign(state, dag.epochRefState)
found = true
# First, run a quick check if we can simply apply a few blocks to an in-memory # First, run a quick check if we can simply apply a few blocks to an in-memory
# state - any in-memory state will be faster than loading from database. # state - any in-memory state will be faster than loading from database.
# The limit here how many blocks we apply is somewhat arbitrary but two full # The limit here how many blocks we apply is somewhat arbitrary but two full
@ -772,7 +788,7 @@ proc updateStateData*(
# This happens in particular during startup where we replay blocks # This happens in particular during startup where we replay blocks
# sequentially to grab their votes. # sequentially to grab their votes.
const RewindBlockThreshold = 64 const RewindBlockThreshold = 64
while ancestors.len < RewindBlockThreshold: while not found and ancestors.len < RewindBlockThreshold:
if canAdvance(state, cur): if canAdvance(state, cur):
found = true found = true
break break
@ -883,7 +899,7 @@ proc updateStateData*(
elif ancestors.len > 0: elif ancestors.len > 0:
debug "State replayed" debug "State replayed"
else: else:
trace "State advanced" # Normal case! debug "State advanced" # Normal case!
proc delState(dag: ChainDAGRef, bs: BlockSlot) = proc delState(dag: ChainDAGRef, bs: BlockSlot) =
# Delete state state and mapping for a particular block+slot # Delete state state and mapping for a particular block+slot