cache empty slot state root (#961)
When replaying state transitions, for the slots that have a block, the state root is taken from the block. For slots that lack a block, it's currently calculated using hash_tree_root which is expensive. Caching the empty slot state roots helps us avoid recalculating this hash, meaning that for replay, hashes are never calculated. This turns blocks into fairly lightweight "state-diffs"! * avoid re-saving state when replaying blocks * advance empty slots slot-by-slot and save root * fix sim randomness * fix sim genesis filename * introduce `isEpoch` to check if a slot is an epoch slot
This commit is contained in:
parent
c3cdb399c0
commit
2449d4b479
|
@ -24,6 +24,9 @@ build/
|
|||
|
||||
# State sim # TODO - move in another folder
|
||||
0000-*.json
|
||||
*.ssz
|
||||
*.log
|
||||
*.sqlite3
|
||||
|
||||
/local_testnet_data
|
||||
|
||||
|
|
|
@ -247,7 +247,7 @@ proc init*(T: type BeaconNode, conf: BeaconNodeConf): Future[BeaconNode] {.async
|
|||
# time to do so?
|
||||
network.initBeaconSync(blockPool, enrForkId.forkDigest,
|
||||
proc(signedBlock: SignedBeaconBlock) =
|
||||
if signedBlock.message.slot mod SLOTS_PER_EPOCH == 0:
|
||||
if signedBlock.message.slot.isEpoch:
|
||||
# TODO this is a hack to make sure that lmd ghost is run regularly
|
||||
# while syncing blocks - it's poor form to keep it here though -
|
||||
# the logic should be moved elsewhere
|
||||
|
|
|
@ -195,19 +195,13 @@ proc init*(T: type BlockPool, db: BeaconChainDB): BlockPool =
|
|||
let root = db.getStateRoot(bs.blck.root, bs.slot)
|
||||
if root.isSome():
|
||||
# TODO load StateData from BeaconChainDB
|
||||
let loaded = db.getState(root.get(), tmpState.data.data, noRollback)
|
||||
if not loaded:
|
||||
# TODO We don't write state root and state atomically, so we need to be
|
||||
# lenient here in case of dirty shutdown - transactions would be
|
||||
# nice!
|
||||
warn "State root, but no state - database corrupt?",
|
||||
stateRoot = root.get(), blockRoot = bs.blck.root, blockSlot = bs.slot
|
||||
continue
|
||||
# We save state root separately for empty slots which means we might
|
||||
# sometimes not find a state even though we saved its state root
|
||||
if db.getState(root.get(), tmpState.data.data, noRollback):
|
||||
tmpState.data.root = root.get()
|
||||
tmpState.blck = bs.blck
|
||||
|
||||
tmpState.data.root = root.get()
|
||||
tmpState.blck = bs.blck
|
||||
|
||||
break
|
||||
break
|
||||
|
||||
bs = bs.parent() # Iterate slot by slot in case there's a gap!
|
||||
|
||||
|
@ -355,8 +349,17 @@ proc putState(pool: BlockPool, state: HashedBeaconState, blck: BlockRef) =
|
|||
# we could easily see a state explosion
|
||||
logScope: pcs = "save_state_at_epoch_start"
|
||||
|
||||
var rootWritten = false
|
||||
if state.data.slot != blck.slot:
|
||||
# This is a state that was produced by a skip slot for which there is no
|
||||
# block - we'll save the state root in the database in case we need to
|
||||
# replay the skip
|
||||
pool.db.putStateRoot(blck.root, state.data.slot, state.root)
|
||||
rootWritten = true
|
||||
|
||||
let epochParity = state.data.slot.compute_epoch_at_slot.uint64 mod 2
|
||||
if state.data.slot mod SLOTS_PER_EPOCH == 0:
|
||||
|
||||
if state.data.slot.isEpoch:
|
||||
if not pool.db.containsState(state.root):
|
||||
info "Storing state",
|
||||
blck = shortLog(blck),
|
||||
|
@ -364,8 +367,8 @@ proc putState(pool: BlockPool, state: HashedBeaconState, blck: BlockRef) =
|
|||
stateRoot = shortLog(state.root),
|
||||
cat = "caching"
|
||||
pool.db.putState(state.root, state.data)
|
||||
# TODO this should be atomic with the above write..
|
||||
pool.db.putStateRoot(blck.root, state.data.slot, state.root)
|
||||
if not rootWritten:
|
||||
pool.db.putStateRoot(blck.root, state.data.slot, state.root)
|
||||
|
||||
# Because state.data.slot mod SLOTS_PER_EPOCH == 0, wrap back to last
|
||||
# time this was the case i.e. last currentCache. The opposite parity,
|
||||
|
@ -462,7 +465,8 @@ proc add*(
|
|||
|
||||
# TODO if the block is from the future, we should not be resolving it (yet),
|
||||
# but maybe we should use it as a hint that our clock is wrong?
|
||||
updateStateData(pool, pool.tmpState, BlockSlot(blck: parent, slot: blck.slot - 1))
|
||||
updateStateData(
|
||||
pool, pool.tmpState, BlockSlot(blck: parent, slot: blck.slot - 1))
|
||||
|
||||
let
|
||||
poolPtr = unsafeAddr pool # safe because restore is short-lived
|
||||
|
@ -639,17 +643,24 @@ func checkMissing*(pool: var BlockPool): seq[FetchRecord] =
|
|||
|
||||
proc skipAndUpdateState(
|
||||
pool: BlockPool,
|
||||
state: var HashedBeaconState, blck: BlockRef, slot: Slot) =
|
||||
state: var HashedBeaconState, blck: BlockRef, slot: Slot, save: bool) =
|
||||
while state.data.slot < slot:
|
||||
# Process slots one at a time in case afterUpdate needs to see empty states
|
||||
process_slots(state, state.data.slot + 1)
|
||||
pool.putState(state, blck)
|
||||
# TODO when replaying, we already do this query when loading the ancestors -
|
||||
# save and reuse
|
||||
# TODO possibly we should keep this in memory for the hot blocks
|
||||
let nextStateRoot = pool.db.getStateRoot(blck.root, state.data.slot + 1)
|
||||
advance_slot(state, nextStateRoot)
|
||||
|
||||
if save:
|
||||
pool.putState(state, blck)
|
||||
|
||||
proc skipAndUpdateState(
|
||||
pool: BlockPool,
|
||||
state: var StateData, blck: BlockData, flags: UpdateFlags): bool =
|
||||
state: var StateData, blck: BlockData, flags: UpdateFlags, save: bool): bool =
|
||||
|
||||
pool.skipAndUpdateState(state.data, blck.refs, blck.data.message.slot - 1)
|
||||
pool.skipAndUpdateState(
|
||||
state.data, blck.refs, blck.data.message.slot - 1, save)
|
||||
|
||||
var statePtr = unsafeAddr state # safe because `rollback` is locally scoped
|
||||
proc rollback(v: var HashedBeaconState) =
|
||||
|
@ -657,7 +668,7 @@ proc skipAndUpdateState(
|
|||
statePtr[] = pool.headState
|
||||
|
||||
let ok = state_transition(state.data, blck.data, flags, rollback)
|
||||
if ok:
|
||||
if ok and save:
|
||||
pool.putState(state.data, blck.refs)
|
||||
|
||||
ok
|
||||
|
@ -678,16 +689,15 @@ proc rewindState(pool: BlockPool, state: var StateData, bs: BlockSlot):
|
|||
# successive parent block and checking if we can find the corresponding state
|
||||
# in the database.
|
||||
var
|
||||
stateRoot = pool.db.getStateRoot(bs.blck.root, bs.slot)
|
||||
stateRoot = block:
|
||||
let tmp = pool.db.getStateRoot(bs.blck.root, bs.slot)
|
||||
if tmp.isSome() and pool.db.containsState(tmp.get()):
|
||||
tmp
|
||||
else:
|
||||
# State roots are sometimes kept in database even though state is not
|
||||
err(Opt[Eth2Digest])
|
||||
curBs = bs
|
||||
|
||||
# TODO this can happen when state root is saved but state is gone - this would
|
||||
# indicate a corrupt database, but since we're not atomically
|
||||
# writing and deleting state+root mappings in a single transaction, it's
|
||||
# likely to happen and we guard against it here.
|
||||
if stateRoot.isSome() and not pool.db.containsState(stateRoot.get()):
|
||||
stateRoot.err()
|
||||
|
||||
while stateRoot.isNone():
|
||||
let parBs = curBs.parent()
|
||||
if parBs.blck.isNil:
|
||||
|
@ -783,8 +793,7 @@ proc getStateDataCached(pool: BlockPool, state: var StateData, bs: BlockSlot): b
|
|||
# In-memory caches didn't hit. Try main blockpool database. This is slower
|
||||
# than the caches due to SSZ (de)serializing and disk I/O, so prefer them.
|
||||
if (let tmp = pool.db.getStateRoot(bs.blck.root, bs.slot); tmp.isSome()):
|
||||
doAssert pool.getState(pool.db, tmp.get(), bs.blck, state)
|
||||
return true
|
||||
return pool.getState(pool.db, tmp.get(), bs.blck, state)
|
||||
|
||||
false
|
||||
|
||||
|
@ -800,7 +809,7 @@ proc updateStateData*(pool: BlockPool, state: var StateData, bs: BlockSlot) =
|
|||
if state.blck.root == bs.blck.root and state.data.data.slot <= bs.slot:
|
||||
if state.data.data.slot != bs.slot:
|
||||
# Might be that we're moving to the same block but later slot
|
||||
pool.skipAndUpdateState(state.data, bs.blck, bs.slot)
|
||||
pool.skipAndUpdateState(state.data, bs.blck, bs.slot, true)
|
||||
|
||||
return # State already at the right spot
|
||||
|
||||
|
@ -818,13 +827,22 @@ proc updateStateData*(pool: BlockPool, state: var StateData, bs: BlockSlot) =
|
|||
# it's the one that we found the state with, and it has already been
|
||||
# applied. Pathologically quadratic in slot number, naïvely.
|
||||
for i in countdown(ancestors.len - 1, 0):
|
||||
# Because the ancestors are in the database, there's no need to persist them
|
||||
# again. Also, because we're applying blocks that were loaded from the
|
||||
# database, we can skip certain checks that have already been performed
|
||||
# before adding the block to the database. In particular, this means that
|
||||
# no state root calculation will take place here, because we can load
|
||||
# the final state root from the block itself.
|
||||
let ok =
|
||||
pool.skipAndUpdateState(
|
||||
state, ancestors[i],
|
||||
{skipBlsValidation, skipMerkleValidation, skipStateRootValidation})
|
||||
{skipBlsValidation, skipMerkleValidation, skipStateRootValidation},
|
||||
false)
|
||||
doAssert ok, "Blocks in database should never fail to apply.."
|
||||
|
||||
pool.skipAndUpdateState(state.data, bs.blck, bs.slot)
|
||||
# We save states here - blocks were guaranteed to have passed through the save
|
||||
# function once at least, but not so for empty slots!
|
||||
pool.skipAndUpdateState(state.data, bs.blck, bs.slot, true)
|
||||
|
||||
state.blck = bs.blck
|
||||
|
||||
|
@ -839,7 +857,6 @@ proc delState(pool: BlockPool, bs: BlockSlot) =
|
|||
# Delete state state and mapping for a particular block+slot
|
||||
if (let root = pool.db.getStateRoot(bs.blck.root, bs.slot); root.isSome()):
|
||||
pool.db.delState(root.get())
|
||||
pool.db.delStateRoot(bs.blck.root, bs.slot)
|
||||
|
||||
proc updateHead*(pool: BlockPool, newHead: BlockRef) =
|
||||
## Update what we consider to be the current head, as given by the fork
|
||||
|
|
|
@ -38,6 +38,9 @@ func compute_epoch_at_slot*(slot: Slot|uint64): Epoch =
|
|||
template epoch*(slot: Slot): Epoch =
|
||||
compute_epoch_at_slot(slot)
|
||||
|
||||
template isEpoch*(slot: Slot): bool =
|
||||
(slot mod SLOTS_PER_EPOCH) == 0
|
||||
|
||||
# https://github.com/ethereum/eth2.0-specs/blob/v0.11.1/specs/phase0/beacon-chain.md#compute_start_slot_at_epoch
|
||||
func compute_start_slot_at_epoch*(epoch: Epoch): Slot =
|
||||
# Return the start slot of ``epoch``.
|
||||
|
|
|
@ -29,6 +29,7 @@
|
|||
|
||||
import
|
||||
chronicles,
|
||||
stew/results,
|
||||
./extras, ./ssz, metrics,
|
||||
./spec/[datatypes, crypto, digest, helpers, validator],
|
||||
./spec/[state_transition_block, state_transition_epoch],
|
||||
|
@ -121,6 +122,32 @@ func process_slot*(state: var HashedBeaconState) {.nbench.} =
|
|||
state.data.block_roots[state.data.slot mod SLOTS_PER_HISTORICAL_ROOT] =
|
||||
hash_tree_root(state.data.latest_block_header)
|
||||
|
||||
# https://github.com/ethereum/eth2.0-specs/blob/v0.10.1/specs/phase0/beacon-chain.md#beacon-chain-state-transition-function
|
||||
proc advance_slot*(state: var HashedBeaconState, nextStateRoot: Opt[Eth2Digest]) =
|
||||
# Special case version of process_slots that moves one slot at a time - can
|
||||
# run faster if the state root is known already (for example when replaying
|
||||
# existing slots)
|
||||
process_slot(state)
|
||||
let is_epoch_transition = (state.data.slot + 1).isEpoch
|
||||
if is_epoch_transition:
|
||||
# Note: Genesis epoch = 0, no need to test if before Genesis
|
||||
try:
|
||||
beacon_previous_validators.set(get_epoch_validator_count(state.data))
|
||||
except Exception as e: # TODO https://github.com/status-im/nim-metrics/pull/22
|
||||
trace "Couldn't update metrics", msg = e.msg
|
||||
process_epoch(state.data)
|
||||
state.data.slot += 1
|
||||
if is_epoch_transition:
|
||||
try:
|
||||
beacon_current_validators.set(get_epoch_validator_count(state.data))
|
||||
except Exception as e: # TODO https://github.com/status-im/nim-metrics/pull/22
|
||||
trace "Couldn't update metrics", msg = e.msg
|
||||
|
||||
if nextStateRoot.isSome:
|
||||
state.root = nextStateRoot.get()
|
||||
else:
|
||||
state.root = hash_tree_root(state.data)
|
||||
|
||||
# https://github.com/ethereum/eth2.0-specs/blob/v0.10.1/specs/phase0/beacon-chain.md#beacon-chain-state-transition-function
|
||||
proc process_slots*(state: var HashedBeaconState, slot: Slot) {.nbench.} =
|
||||
# TODO: Eth specs strongly assert that state.data.slot <= slot
|
||||
|
@ -129,6 +156,11 @@ proc process_slots*(state: var HashedBeaconState, slot: Slot) {.nbench.} =
|
|||
# but it maybe an artifact of the test case
|
||||
# as this was not triggered in the testnet1
|
||||
# after a hour
|
||||
# TODO this function is not _really_ necessary: when replaying states, we
|
||||
# advance slots one by one before calling `state_transition` - this way,
|
||||
# we avoid the state root calculation - as such, instead of advancing
|
||||
# slots "automatically" in `state_transition`, perhaps it would be better
|
||||
# to keep a pre-condition that state must be at the right slot already?
|
||||
if state.data.slot > slot:
|
||||
notice(
|
||||
"Unusual request for a slot in the past",
|
||||
|
@ -139,22 +171,7 @@ proc process_slots*(state: var HashedBeaconState, slot: Slot) {.nbench.} =
|
|||
|
||||
# Catch up to the target slot
|
||||
while state.data.slot < slot:
|
||||
process_slot(state)
|
||||
let is_epoch_transition = (state.data.slot + 1) mod SLOTS_PER_EPOCH == 0
|
||||
if is_epoch_transition:
|
||||
# Note: Genesis epoch = 0, no need to test if before Genesis
|
||||
try:
|
||||
beacon_previous_validators.set(get_epoch_validator_count(state.data))
|
||||
except Exception as e: # TODO https://github.com/status-im/nim-metrics/pull/22
|
||||
trace "Couldn't update metrics", msg = e.msg
|
||||
process_epoch(state.data)
|
||||
state.data.slot += 1
|
||||
if is_epoch_transition:
|
||||
try:
|
||||
beacon_current_validators.set(get_epoch_validator_count(state.data))
|
||||
except Exception as e: # TODO https://github.com/status-im/nim-metrics/pull/22
|
||||
trace "Couldn't update metrics", msg = e.msg
|
||||
state.root = hash_tree_root(state.data)
|
||||
advance_slot(state, err(Opt[Eth2Digest]))
|
||||
|
||||
# TODO remove this once callers gone
|
||||
proc process_slots*(state: var BeaconState, slot: Slot) {.deprecated: "Use HashedBeaconState version".} =
|
||||
|
|
|
@ -33,13 +33,15 @@ type Timers = enum
|
|||
tEpoch = "Process epoch slot with block"
|
||||
tHashBlock = "Tree-hash block"
|
||||
tSignBlock = "Sign block"
|
||||
tShuffle = "Retrieve committee once using get_beacon_committee"
|
||||
tAttest = "Combine committee attestations"
|
||||
tAttest = "Have committee attest to block"
|
||||
tReplay = "Replay all produced blocks"
|
||||
|
||||
# TODO confutils is an impenetrable black box. how can a help text be added here?
|
||||
cli do(slots = SLOTS_PER_EPOCH * 6,
|
||||
validators = SLOTS_PER_EPOCH * 100, # One per shard is minimum
|
||||
attesterRatio {.desc: "ratio of validators that attest in each round"} = 0.73):
|
||||
attesterRatio {.desc: "ratio of validators that attest in each round"} = 0.73,
|
||||
blockRatio {.desc: "ratio of slots with blocks"} = 1.0,
|
||||
replay = true):
|
||||
let
|
||||
state = loadGenesis(validators, true)
|
||||
genesisBlock = get_initial_beacon_block(state[])
|
||||
|
@ -56,11 +58,12 @@ cli do(slots = SLOTS_PER_EPOCH * 6,
|
|||
attPool = AttestationPool.init(blockPool)
|
||||
timers: array[Timers, RunningStat]
|
||||
attesters: RunningStat
|
||||
r: Rand
|
||||
r = initRand(1)
|
||||
|
||||
proc handleAttestations() =
|
||||
let replayState = newClone(blockPool.headState)
|
||||
|
||||
proc handleAttestations(slot: Slot) =
|
||||
let
|
||||
slot = blockPool.head.blck.slot
|
||||
attestationHead = blockPool.head.blck.atSlot(slot)
|
||||
|
||||
blockPool.withState(blockPool.tmpState, attestationHead):
|
||||
|
@ -72,7 +75,7 @@ cli do(slots = SLOTS_PER_EPOCH * 6,
|
|||
state, slot, committee_index.CommitteeIndex, cache)
|
||||
|
||||
for index_in_committee, validatorIdx in committee:
|
||||
if (rand(r, high(int)).float * attesterRatio).int <= high(int):
|
||||
if rand(r, 1.0) <= attesterRatio:
|
||||
let
|
||||
data = makeAttestationData(state, slot, committee_index, blck.root)
|
||||
sig =
|
||||
|
@ -89,10 +92,12 @@ cli do(slots = SLOTS_PER_EPOCH * 6,
|
|||
signature: sig
|
||||
))
|
||||
|
||||
proc proposeBlock() =
|
||||
proc proposeBlock(slot: Slot) =
|
||||
if rand(r, 1.0) > blockRatio:
|
||||
return
|
||||
|
||||
let
|
||||
head = blockPool.head.blck
|
||||
slot = blockPool.head.blck.slot + 1
|
||||
|
||||
blockPool.withState(blockPool.tmpState, head.atSlot(slot)):
|
||||
var cache = get_empty_per_epoch_cache()
|
||||
|
@ -130,18 +135,21 @@ cli do(slots = SLOTS_PER_EPOCH * 6,
|
|||
|
||||
for i in 0..<slots:
|
||||
let
|
||||
slot = blockPool.headState.data.data.slot + 1
|
||||
slot = Slot(i + 1)
|
||||
t =
|
||||
if slot mod SLOTS_PER_EPOCH == 0: tEpoch
|
||||
if slot.isEpoch: tEpoch
|
||||
else: tBlock
|
||||
|
||||
withTimer(timers[t]):
|
||||
proposeBlock()
|
||||
if blockRatio > 0.0:
|
||||
withTimer(timers[t]):
|
||||
proposeBlock(slot)
|
||||
if attesterRatio > 0.0:
|
||||
withTimer(timers[tAttest]):
|
||||
handleAttestations()
|
||||
handleAttestations(slot)
|
||||
|
||||
verifyConsensus(blockPool.headState.data.data, attesterRatio)
|
||||
# TODO if attestation pool was smarter, it would include older attestations
|
||||
# too!
|
||||
verifyConsensus(blockPool.headState.data.data, attesterRatio * blockRatio)
|
||||
|
||||
if t == tEpoch:
|
||||
echo &". slot: {shortLog(slot)} ",
|
||||
|
@ -150,6 +158,11 @@ cli do(slots = SLOTS_PER_EPOCH * 6,
|
|||
write(stdout, ".")
|
||||
flushFile(stdout)
|
||||
|
||||
if replay:
|
||||
withTimer(timers[tReplay]):
|
||||
blockPool.updateStateData(
|
||||
replayState[], blockPool.head.blck.atSlot(Slot(slots)))
|
||||
|
||||
echo "Done!"
|
||||
|
||||
printTimers(blockPool.headState.data.data, attesters, true, timers)
|
||||
|
|
|
@ -40,7 +40,7 @@ func verifyConsensus*(state: BeaconState, attesterRatio: auto) =
|
|||
doAssert state.finalized_checkpoint.epoch + 2 >= current_epoch
|
||||
|
||||
proc loadGenesis*(validators: int, validate: bool): ref BeaconState =
|
||||
let fn = &"genesim_{const_preset}_{validators}"
|
||||
let fn = &"genesim_{const_preset}_{validators}.ssz"
|
||||
if fileExists(fn):
|
||||
let res = newClone(SSZ.loadFile(fn, BeaconState))
|
||||
if res.slot != GENESIS_SLOT:
|
||||
|
|
|
@ -51,7 +51,7 @@ cli do(slots = SLOTS_PER_EPOCH * 6,
|
|||
latest_block_root = hash_tree_root(genesisBlock.message)
|
||||
timers: array[Timers, RunningStat]
|
||||
attesters: RunningStat
|
||||
r: Rand
|
||||
r = initRand(1)
|
||||
signedBlock: SignedBeaconBlock
|
||||
cache = get_empty_per_epoch_cache()
|
||||
|
||||
|
@ -89,7 +89,7 @@ cli do(slots = SLOTS_PER_EPOCH * 6,
|
|||
|
||||
let t =
|
||||
if (state.slot > GENESIS_SLOT and
|
||||
(state.slot + 1) mod SLOTS_PER_EPOCH == 0): tEpoch
|
||||
(state.slot + 1).isEpoch): tEpoch
|
||||
else: tBlock
|
||||
|
||||
withTimer(timers[t]):
|
||||
|
|
Loading…
Reference in New Issue