diff --git a/.gitignore b/.gitignore index 4162c4ca0..055116e13 100644 --- a/.gitignore +++ b/.gitignore @@ -24,6 +24,9 @@ build/ # State sim # TODO - move in another folder 0000-*.json +*.ssz +*.log +*.sqlite3 /local_testnet_data diff --git a/beacon_chain/beacon_node.nim b/beacon_chain/beacon_node.nim index 58ca11708..7278d29c4 100644 --- a/beacon_chain/beacon_node.nim +++ b/beacon_chain/beacon_node.nim @@ -247,7 +247,7 @@ proc init*(T: type BeaconNode, conf: BeaconNodeConf): Future[BeaconNode] {.async # time to do so? network.initBeaconSync(blockPool, enrForkId.forkDigest, proc(signedBlock: SignedBeaconBlock) = - if signedBlock.message.slot mod SLOTS_PER_EPOCH == 0: + if signedBlock.message.slot.isEpoch: # TODO this is a hack to make sure that lmd ghost is run regularly # while syncing blocks - it's poor form to keep it here though - # the logic should be moved elsewhere diff --git a/beacon_chain/block_pool.nim b/beacon_chain/block_pool.nim index e498dc3b1..86b5857fb 100644 --- a/beacon_chain/block_pool.nim +++ b/beacon_chain/block_pool.nim @@ -195,19 +195,13 @@ proc init*(T: type BlockPool, db: BeaconChainDB): BlockPool = let root = db.getStateRoot(bs.blck.root, bs.slot) if root.isSome(): # TODO load StateData from BeaconChainDB - let loaded = db.getState(root.get(), tmpState.data.data, noRollback) - if not loaded: - # TODO We don't write state root and state atomically, so we need to be - # lenient here in case of dirty shutdown - transactions would be - # nice! - warn "State root, but no state - database corrupt?", - stateRoot = root.get(), blockRoot = bs.blck.root, blockSlot = bs.slot - continue + # We save state root separately for empty slots which means we might + # sometimes not find a state even though we saved its state root + if db.getState(root.get(), tmpState.data.data, noRollback): + tmpState.data.root = root.get() + tmpState.blck = bs.blck - tmpState.data.root = root.get() - tmpState.blck = bs.blck - - break + break bs = bs.parent() # Iterate slot by slot in case there's a gap! @@ -355,8 +349,17 @@ proc putState(pool: BlockPool, state: HashedBeaconState, blck: BlockRef) = # we could easily see a state explosion logScope: pcs = "save_state_at_epoch_start" + var rootWritten = false + if state.data.slot != blck.slot: + # This is a state that was produced by a skip slot for which there is no + # block - we'll save the state root in the database in case we need to + # replay the skip + pool.db.putStateRoot(blck.root, state.data.slot, state.root) + rootWritten = true + let epochParity = state.data.slot.compute_epoch_at_slot.uint64 mod 2 - if state.data.slot mod SLOTS_PER_EPOCH == 0: + + if state.data.slot.isEpoch: if not pool.db.containsState(state.root): info "Storing state", blck = shortLog(blck), @@ -364,8 +367,8 @@ proc putState(pool: BlockPool, state: HashedBeaconState, blck: BlockRef) = stateRoot = shortLog(state.root), cat = "caching" pool.db.putState(state.root, state.data) - # TODO this should be atomic with the above write.. - pool.db.putStateRoot(blck.root, state.data.slot, state.root) + if not rootWritten: + pool.db.putStateRoot(blck.root, state.data.slot, state.root) # Because state.data.slot mod SLOTS_PER_EPOCH == 0, wrap back to last # time this was the case i.e. last currentCache. The opposite parity, @@ -462,7 +465,8 @@ proc add*( # TODO if the block is from the future, we should not be resolving it (yet), # but maybe we should use it as a hint that our clock is wrong? - updateStateData(pool, pool.tmpState, BlockSlot(blck: parent, slot: blck.slot - 1)) + updateStateData( + pool, pool.tmpState, BlockSlot(blck: parent, slot: blck.slot - 1)) let poolPtr = unsafeAddr pool # safe because restore is short-lived @@ -639,17 +643,24 @@ func checkMissing*(pool: var BlockPool): seq[FetchRecord] = proc skipAndUpdateState( pool: BlockPool, - state: var HashedBeaconState, blck: BlockRef, slot: Slot) = + state: var HashedBeaconState, blck: BlockRef, slot: Slot, save: bool) = while state.data.slot < slot: # Process slots one at a time in case afterUpdate needs to see empty states - process_slots(state, state.data.slot + 1) - pool.putState(state, blck) + # TODO when replaying, we already do this query when loading the ancestors - + # save and reuse + # TODO possibly we should keep this in memory for the hot blocks + let nextStateRoot = pool.db.getStateRoot(blck.root, state.data.slot + 1) + advance_slot(state, nextStateRoot) + + if save: + pool.putState(state, blck) proc skipAndUpdateState( pool: BlockPool, - state: var StateData, blck: BlockData, flags: UpdateFlags): bool = + state: var StateData, blck: BlockData, flags: UpdateFlags, save: bool): bool = - pool.skipAndUpdateState(state.data, blck.refs, blck.data.message.slot - 1) + pool.skipAndUpdateState( + state.data, blck.refs, blck.data.message.slot - 1, save) var statePtr = unsafeAddr state # safe because `rollback` is locally scoped proc rollback(v: var HashedBeaconState) = @@ -657,7 +668,7 @@ proc skipAndUpdateState( statePtr[] = pool.headState let ok = state_transition(state.data, blck.data, flags, rollback) - if ok: + if ok and save: pool.putState(state.data, blck.refs) ok @@ -678,16 +689,15 @@ proc rewindState(pool: BlockPool, state: var StateData, bs: BlockSlot): # successive parent block and checking if we can find the corresponding state # in the database. var - stateRoot = pool.db.getStateRoot(bs.blck.root, bs.slot) + stateRoot = block: + let tmp = pool.db.getStateRoot(bs.blck.root, bs.slot) + if tmp.isSome() and pool.db.containsState(tmp.get()): + tmp + else: + # State roots are sometimes kept in database even though state is not + err(Opt[Eth2Digest]) curBs = bs - # TODO this can happen when state root is saved but state is gone - this would - # indicate a corrupt database, but since we're not atomically - # writing and deleting state+root mappings in a single transaction, it's - # likely to happen and we guard against it here. - if stateRoot.isSome() and not pool.db.containsState(stateRoot.get()): - stateRoot.err() - while stateRoot.isNone(): let parBs = curBs.parent() if parBs.blck.isNil: @@ -783,8 +793,7 @@ proc getStateDataCached(pool: BlockPool, state: var StateData, bs: BlockSlot): b # In-memory caches didn't hit. Try main blockpool database. This is slower # than the caches due to SSZ (de)serializing and disk I/O, so prefer them. if (let tmp = pool.db.getStateRoot(bs.blck.root, bs.slot); tmp.isSome()): - doAssert pool.getState(pool.db, tmp.get(), bs.blck, state) - return true + return pool.getState(pool.db, tmp.get(), bs.blck, state) false @@ -800,7 +809,7 @@ proc updateStateData*(pool: BlockPool, state: var StateData, bs: BlockSlot) = if state.blck.root == bs.blck.root and state.data.data.slot <= bs.slot: if state.data.data.slot != bs.slot: # Might be that we're moving to the same block but later slot - pool.skipAndUpdateState(state.data, bs.blck, bs.slot) + pool.skipAndUpdateState(state.data, bs.blck, bs.slot, true) return # State already at the right spot @@ -818,13 +827,22 @@ proc updateStateData*(pool: BlockPool, state: var StateData, bs: BlockSlot) = # it's the one that we found the state with, and it has already been # applied. Pathologically quadratic in slot number, naïvely. for i in countdown(ancestors.len - 1, 0): + # Because the ancestors are in the database, there's no need to persist them + # again. Also, because we're applying blocks that were loaded from the + # database, we can skip certain checks that have already been performed + # before adding the block to the database. In particular, this means that + # no state root calculation will take place here, because we can load + # the final state root from the block itself. let ok = pool.skipAndUpdateState( state, ancestors[i], - {skipBlsValidation, skipMerkleValidation, skipStateRootValidation}) + {skipBlsValidation, skipMerkleValidation, skipStateRootValidation}, + false) doAssert ok, "Blocks in database should never fail to apply.." - pool.skipAndUpdateState(state.data, bs.blck, bs.slot) + # We save states here - blocks were guaranteed to have passed through the save + # function once at least, but not so for empty slots! + pool.skipAndUpdateState(state.data, bs.blck, bs.slot, true) state.blck = bs.blck @@ -839,7 +857,6 @@ proc delState(pool: BlockPool, bs: BlockSlot) = # Delete state state and mapping for a particular block+slot if (let root = pool.db.getStateRoot(bs.blck.root, bs.slot); root.isSome()): pool.db.delState(root.get()) - pool.db.delStateRoot(bs.blck.root, bs.slot) proc updateHead*(pool: BlockPool, newHead: BlockRef) = ## Update what we consider to be the current head, as given by the fork diff --git a/beacon_chain/spec/helpers.nim b/beacon_chain/spec/helpers.nim index f8b1ab4d5..e91ac584c 100644 --- a/beacon_chain/spec/helpers.nim +++ b/beacon_chain/spec/helpers.nim @@ -38,6 +38,9 @@ func compute_epoch_at_slot*(slot: Slot|uint64): Epoch = template epoch*(slot: Slot): Epoch = compute_epoch_at_slot(slot) +template isEpoch*(slot: Slot): bool = + (slot mod SLOTS_PER_EPOCH) == 0 + # https://github.com/ethereum/eth2.0-specs/blob/v0.11.1/specs/phase0/beacon-chain.md#compute_start_slot_at_epoch func compute_start_slot_at_epoch*(epoch: Epoch): Slot = # Return the start slot of ``epoch``. diff --git a/beacon_chain/state_transition.nim b/beacon_chain/state_transition.nim index a46147336..bc2226da6 100644 --- a/beacon_chain/state_transition.nim +++ b/beacon_chain/state_transition.nim @@ -29,6 +29,7 @@ import chronicles, + stew/results, ./extras, ./ssz, metrics, ./spec/[datatypes, crypto, digest, helpers, validator], ./spec/[state_transition_block, state_transition_epoch], @@ -121,6 +122,32 @@ func process_slot*(state: var HashedBeaconState) {.nbench.} = state.data.block_roots[state.data.slot mod SLOTS_PER_HISTORICAL_ROOT] = hash_tree_root(state.data.latest_block_header) +# https://github.com/ethereum/eth2.0-specs/blob/v0.10.1/specs/phase0/beacon-chain.md#beacon-chain-state-transition-function +proc advance_slot*(state: var HashedBeaconState, nextStateRoot: Opt[Eth2Digest]) = + # Special case version of process_slots that moves one slot at a time - can + # run faster if the state root is known already (for example when replaying + # existing slots) + process_slot(state) + let is_epoch_transition = (state.data.slot + 1).isEpoch + if is_epoch_transition: + # Note: Genesis epoch = 0, no need to test if before Genesis + try: + beacon_previous_validators.set(get_epoch_validator_count(state.data)) + except Exception as e: # TODO https://github.com/status-im/nim-metrics/pull/22 + trace "Couldn't update metrics", msg = e.msg + process_epoch(state.data) + state.data.slot += 1 + if is_epoch_transition: + try: + beacon_current_validators.set(get_epoch_validator_count(state.data)) + except Exception as e: # TODO https://github.com/status-im/nim-metrics/pull/22 + trace "Couldn't update metrics", msg = e.msg + + if nextStateRoot.isSome: + state.root = nextStateRoot.get() + else: + state.root = hash_tree_root(state.data) + # https://github.com/ethereum/eth2.0-specs/blob/v0.10.1/specs/phase0/beacon-chain.md#beacon-chain-state-transition-function proc process_slots*(state: var HashedBeaconState, slot: Slot) {.nbench.} = # TODO: Eth specs strongly assert that state.data.slot <= slot @@ -129,6 +156,11 @@ proc process_slots*(state: var HashedBeaconState, slot: Slot) {.nbench.} = # but it maybe an artifact of the test case # as this was not triggered in the testnet1 # after a hour + # TODO this function is not _really_ necessary: when replaying states, we + # advance slots one by one before calling `state_transition` - this way, + # we avoid the state root calculation - as such, instead of advancing + # slots "automatically" in `state_transition`, perhaps it would be better + # to keep a pre-condition that state must be at the right slot already? if state.data.slot > slot: notice( "Unusual request for a slot in the past", @@ -139,22 +171,7 @@ proc process_slots*(state: var HashedBeaconState, slot: Slot) {.nbench.} = # Catch up to the target slot while state.data.slot < slot: - process_slot(state) - let is_epoch_transition = (state.data.slot + 1) mod SLOTS_PER_EPOCH == 0 - if is_epoch_transition: - # Note: Genesis epoch = 0, no need to test if before Genesis - try: - beacon_previous_validators.set(get_epoch_validator_count(state.data)) - except Exception as e: # TODO https://github.com/status-im/nim-metrics/pull/22 - trace "Couldn't update metrics", msg = e.msg - process_epoch(state.data) - state.data.slot += 1 - if is_epoch_transition: - try: - beacon_current_validators.set(get_epoch_validator_count(state.data)) - except Exception as e: # TODO https://github.com/status-im/nim-metrics/pull/22 - trace "Couldn't update metrics", msg = e.msg - state.root = hash_tree_root(state.data) + advance_slot(state, err(Opt[Eth2Digest])) # TODO remove this once callers gone proc process_slots*(state: var BeaconState, slot: Slot) {.deprecated: "Use HashedBeaconState version".} = diff --git a/research/block_sim.nim b/research/block_sim.nim index 9c02f6df0..7adb4a08d 100644 --- a/research/block_sim.nim +++ b/research/block_sim.nim @@ -33,13 +33,15 @@ type Timers = enum tEpoch = "Process epoch slot with block" tHashBlock = "Tree-hash block" tSignBlock = "Sign block" - tShuffle = "Retrieve committee once using get_beacon_committee" - tAttest = "Combine committee attestations" + tAttest = "Have committee attest to block" + tReplay = "Replay all produced blocks" # TODO confutils is an impenetrable black box. how can a help text be added here? cli do(slots = SLOTS_PER_EPOCH * 6, validators = SLOTS_PER_EPOCH * 100, # One per shard is minimum - attesterRatio {.desc: "ratio of validators that attest in each round"} = 0.73): + attesterRatio {.desc: "ratio of validators that attest in each round"} = 0.73, + blockRatio {.desc: "ratio of slots with blocks"} = 1.0, + replay = true): let state = loadGenesis(validators, true) genesisBlock = get_initial_beacon_block(state[]) @@ -56,11 +58,12 @@ cli do(slots = SLOTS_PER_EPOCH * 6, attPool = AttestationPool.init(blockPool) timers: array[Timers, RunningStat] attesters: RunningStat - r: Rand + r = initRand(1) - proc handleAttestations() = + let replayState = newClone(blockPool.headState) + + proc handleAttestations(slot: Slot) = let - slot = blockPool.head.blck.slot attestationHead = blockPool.head.blck.atSlot(slot) blockPool.withState(blockPool.tmpState, attestationHead): @@ -72,7 +75,7 @@ cli do(slots = SLOTS_PER_EPOCH * 6, state, slot, committee_index.CommitteeIndex, cache) for index_in_committee, validatorIdx in committee: - if (rand(r, high(int)).float * attesterRatio).int <= high(int): + if rand(r, 1.0) <= attesterRatio: let data = makeAttestationData(state, slot, committee_index, blck.root) sig = @@ -89,10 +92,12 @@ cli do(slots = SLOTS_PER_EPOCH * 6, signature: sig )) - proc proposeBlock() = + proc proposeBlock(slot: Slot) = + if rand(r, 1.0) > blockRatio: + return + let head = blockPool.head.blck - slot = blockPool.head.blck.slot + 1 blockPool.withState(blockPool.tmpState, head.atSlot(slot)): var cache = get_empty_per_epoch_cache() @@ -130,18 +135,21 @@ cli do(slots = SLOTS_PER_EPOCH * 6, for i in 0.. 0.0: + withTimer(timers[t]): + proposeBlock(slot) if attesterRatio > 0.0: withTimer(timers[tAttest]): - handleAttestations() + handleAttestations(slot) - verifyConsensus(blockPool.headState.data.data, attesterRatio) + # TODO if attestation pool was smarter, it would include older attestations + # too! + verifyConsensus(blockPool.headState.data.data, attesterRatio * blockRatio) if t == tEpoch: echo &". slot: {shortLog(slot)} ", @@ -150,6 +158,11 @@ cli do(slots = SLOTS_PER_EPOCH * 6, write(stdout, ".") flushFile(stdout) + if replay: + withTimer(timers[tReplay]): + blockPool.updateStateData( + replayState[], blockPool.head.blck.atSlot(Slot(slots))) + echo "Done!" printTimers(blockPool.headState.data.data, attesters, true, timers) diff --git a/research/simutils.nim b/research/simutils.nim index f519b0322..be652afa2 100644 --- a/research/simutils.nim +++ b/research/simutils.nim @@ -40,7 +40,7 @@ func verifyConsensus*(state: BeaconState, attesterRatio: auto) = doAssert state.finalized_checkpoint.epoch + 2 >= current_epoch proc loadGenesis*(validators: int, validate: bool): ref BeaconState = - let fn = &"genesim_{const_preset}_{validators}" + let fn = &"genesim_{const_preset}_{validators}.ssz" if fileExists(fn): let res = newClone(SSZ.loadFile(fn, BeaconState)) if res.slot != GENESIS_SLOT: diff --git a/research/state_sim.nim b/research/state_sim.nim index 0b9e0ff08..6fe624c42 100644 --- a/research/state_sim.nim +++ b/research/state_sim.nim @@ -51,7 +51,7 @@ cli do(slots = SLOTS_PER_EPOCH * 6, latest_block_root = hash_tree_root(genesisBlock.message) timers: array[Timers, RunningStat] attesters: RunningStat - r: Rand + r = initRand(1) signedBlock: SignedBeaconBlock cache = get_empty_per_epoch_cache() @@ -89,7 +89,7 @@ cli do(slots = SLOTS_PER_EPOCH * 6, let t = if (state.slot > GENESIS_SLOT and - (state.slot + 1) mod SLOTS_PER_EPOCH == 0): tEpoch + (state.slot + 1).isEpoch): tEpoch else: tBlock withTimer(timers[t]):