diff --git a/beacon_chain/block_pool.nim b/beacon_chain/block_pool.nim index 1bbd6d9dd..dfee9cd4e 100644 --- a/beacon_chain/block_pool.nim +++ b/beacon_chain/block_pool.nim @@ -398,7 +398,17 @@ proc add*( # TODO if the block is from the future, we should not be resolving it (yet), # but maybe we should use it as a hint that our clock is wrong? updateStateData(pool, pool.tmpState, BlockSlot(blck: parent, slot: blck.slot - 1)) - if not state_transition(pool.tmpState.data, signedBlock, {}): + + let + poolPtr = unsafeAddr pool # safe because restore is short-lived + proc restore(v: var HashedBeaconState) = + # TODO address this ugly workaround - there should probably be a + # `state_transition` that takes a `StateData` instead and updates + # the block as well + doAssert v.addr == addr poolPtr.tmpState.data + poolPtr.tmpState = poolPtr.headState + + if not state_transition(pool.tmpState.data, signedBlock, {}, restore): # TODO find a better way to log all this block data notice "Invalid block", blck = shortLog(blck), @@ -561,26 +571,6 @@ func checkMissing*(pool: var BlockPool): seq[FetchRecord] = if v.tries.popcount() == 1: result.add(FetchRecord(root: k, historySlots: v.slots)) -proc skipAndUpdateState( - state: var HashedBeaconState, slot: Slot, - afterUpdate: proc (state: HashedBeaconState) {.gcsafe.}) = - while state.data.slot < slot: - # Process slots one at a time in case afterUpdate needs to see empty states - process_slots(state, state.data.slot + 1) - afterUpdate(state) - -proc skipAndUpdateState( - state: var HashedBeaconState, signedBlock: SignedBeaconBlock, flags: UpdateFlags, - afterUpdate: proc (state: HashedBeaconState) {.gcsafe.}): bool = - - skipAndUpdateState(state, signedBlock.message.slot - 1, afterUpdate) - - let ok = state_transition(state, signedBlock, flags) - - afterUpdate(state) - - ok - proc putState(pool: BlockPool, state: HashedBeaconState, blck: BlockRef) = # TODO we save state at every epoch start but never remove them - we also # potentially save multiple states per slot if reorgs happen, meaning @@ -636,6 +626,31 @@ proc putState(pool: BlockPool, state: HashedBeaconState, blck: BlockRef) = # TODO this should be atomic with the above write.. currentCache.putStateRoot(blck.root, state.data.slot, state.root) +proc skipAndUpdateState( + pool: BlockPool, + state: var HashedBeaconState, blck: BlockRef, slot: Slot) = + while state.data.slot < slot: + # Process slots one at a time in case afterUpdate needs to see empty states + process_slots(state, state.data.slot + 1) + pool.putState(state, blck) + +proc skipAndUpdateState( + pool: BlockPool, + state: var StateData, blck: BlockData, flags: UpdateFlags): bool = + + pool.skipAndUpdateState(state.data, blck.refs, blck.data.message.slot - 1) + + var statePtr = unsafeAddr state # safe because `restore` is locally scoped + proc restore(v: var HashedBeaconState) = + doAssert (addr(statePtr.data) == addr v) + statePtr[] = pool.headState + + let ok = state_transition(state.data, blck.data, flags, restore) + if ok: + pool.putState(state.data, blck.refs) + + ok + proc rewindState(pool: BlockPool, state: var StateData, bs: BlockSlot): seq[BlockData] = logScope: pcs = "replay_state" @@ -765,8 +780,7 @@ proc updateStateData*(pool: BlockPool, state: var StateData, bs: BlockSlot) = if state.blck.root == bs.blck.root and state.data.data.slot <= bs.slot: if state.data.data.slot != bs.slot: # Might be that we're moving to the same block but later slot - skipAndUpdateState(state.data, bs.slot) do(state: HashedBeaconState): - pool.putState(state, bs.blck) + pool.skipAndUpdateState(state.data, bs.blck, bs.slot) return # State already at the right spot @@ -785,14 +799,12 @@ proc updateStateData*(pool: BlockPool, state: var StateData, bs: BlockSlot) = # applied. Pathologically quadratic in slot number, naïvely. for i in countdown(ancestors.len - 1, 0): let ok = - skipAndUpdateState(state.data, - ancestors[i].data, - {skipBlsValidation, skipMerkleValidation, skipStateRootValidation}) do (state: HashedBeaconState): - pool.putState(state, ancestors[i].refs) + pool.skipAndUpdateState( + state, ancestors[i], + {skipBlsValidation, skipMerkleValidation, skipStateRootValidation}) doAssert ok, "Blocks in database should never fail to apply.." - skipAndUpdateState(state.data, bs.slot) do(state: HashedBeaconState): - pool.putState(state, bs.blck) + pool.skipAndUpdateState(state.data, bs.blck, bs.slot) state.blck = bs.blck diff --git a/beacon_chain/state_transition.nim b/beacon_chain/state_transition.nim index 2133f6aba..08dfac061 100644 --- a/beacon_chain/state_transition.nim +++ b/beacon_chain/state_transition.nim @@ -138,8 +138,15 @@ proc verifyStateRoot(state: BeaconState, blck: BeaconBlock): bool = else: true +type + RollbackProc* = proc(v: var BeaconState) {.gcsafe.} + +proc noRollback*(state: var BeaconState) = + trace "Skipping rollback of broken state" + proc state_transition*( - state: var BeaconState, signedBlock: SignedBeaconBlock, flags: UpdateFlags): bool {.nbench.}= + state: var BeaconState, signedBlock: SignedBeaconBlock, flags: UpdateFlags, + rollback: RollbackProc): bool {.nbench.} = ## Time in the beacon chain moves by slots. Every time (haha.) that happens, ## we will update the beacon state. Normally, the state updates will be driven ## by the contents of a new block, but it may happen that the block goes @@ -151,6 +158,12 @@ proc state_transition*( ## The flags are used to specify that certain validations should be skipped ## for the new block. This is done during block proposal, to create a state ## whose hash can be included in the new block. + ## + ## `rollback` is called if the transition fails and the given state has been + ## partially changed. If a temporary state was given to `state_transition`, + ## it is safe to use `noRollback` and leave it broken, else the state + ## object should be rolled back to a consistent state. If the transition fails + ## before the state has been updated, `rollback` will not be called. # # TODO this function can be written with a loop inside to handle all empty # slots up to the slot of the new_block - but then again, why not eagerly @@ -167,11 +180,7 @@ proc state_transition*( # many functions will mutate `state` partially without rolling back # the changes in case of failure (look out for `var BeaconState` and # bool return values...) - - ## TODO, of cacheState/processEpoch/processSlot/processBloc, only the last - ## might fail, so should this bother capturing here, or? - var old_state = newClone(state) - + doAssert not rollback.isNil, "use noRollback if it's ok to mess up state" # These should never fail. process_slots(state, signedBlock.message.slot) @@ -194,7 +203,7 @@ proc state_transition*( return true # Block processing failed, roll back changes - state = old_state[] + rollback(state) false # Hashed-state transition functions @@ -250,11 +259,16 @@ proc process_slots*(state: var HashedBeaconState, slot: Slot) = trace "Couldn't update metrics", msg = e.msg state.root = hash_tree_root(state.data) -proc state_transition*( - state: var HashedBeaconState, signedBlock: SignedBeaconBlock, flags: UpdateFlags): bool = - # Save for rollback - var old_state = clone(state) +type + RollbackHashedProc* = proc(state: var HashedBeaconState) {.gcsafe.} +proc noRollback*(state: var HashedBeaconState) = + trace "Skipping rollback of broken state" + +proc state_transition*( + state: var HashedBeaconState, signedBlock: SignedBeaconBlock, + flags: UpdateFlags, rollback: RollbackHashedProc): bool = + doAssert not rollback.isNil, "use noRollback if it's ok to mess up state" process_slots(state, signedBlock.message.slot) if skipBLSValidation in flags or @@ -275,6 +289,6 @@ proc state_transition*( return true # Block processing failed, roll back changes - state.data[] = old_state.data[] - state.root = old_state.root + rollback(state) + false diff --git a/nbench/scenarios.nim b/nbench/scenarios.nim index 3a1a18261..d50fe25a3 100644 --- a/nbench/scenarios.nim +++ b/nbench/scenarios.nim @@ -149,7 +149,8 @@ proc runFullTransition*(dir, preState, blocksPrefix: string, blocksQty: int, ski let signedBlock = parseSSZ(blockPath, SignedBeaconBlock) let flags = if skipBLS: {skipBlsValidation} else: {} - let success = state_transition(state[], signedBlock.message, flags) + let success = state_transition( + state[], signedBlock, flags, noRollback) echo "State transition status: ", if success: "SUCCESS ✓" else: "FAILURE ⚠️" proc runProcessSlots*(dir, preState: string, numSlots: uint64) = diff --git a/ncli/ncli_transition.nim b/ncli/ncli_transition.nim index 4351bb125..3574c2b88 100644 --- a/ncli/ncli_transition.nim +++ b/ncli/ncli_transition.nim @@ -10,7 +10,7 @@ cli do(pre: string, blck: string, post: string, verifyStateRoot = false): flags = if verifyStateRoot: {skipStateRootValidation} else: {} var stateY = HashedBeaconState(data: stateX, root: hash_tree_root(stateX)) - if not state_transition(stateY, blckX, flags): + if not state_transition(stateY, blckX, flags, noRollback): error "State transition failed" - - SSZ.saveFile(post, stateY.data) + else: + SSZ.saveFile(post, stateY.data) diff --git a/nfuzz/libnfuzz.nim b/nfuzz/libnfuzz.nim index 20082996f..56a13a5a1 100644 --- a/nfuzz/libnfuzz.nim +++ b/nfuzz/libnfuzz.nim @@ -106,7 +106,7 @@ proc nfuzz_attester_slashing(input: openArray[byte], output: ptr byte, proc nfuzz_block(input: openArray[byte], output: ptr byte, output_size: ptr uint, disable_bls: bool): bool {.exportc, raises: [FuzzCrashError, Defect].} = decodeAndProcess(BlockInput): - state_transition(data.state[], data.beaconBlock, flags) + state_transition(data.state[], data.beaconBlock, flags, noRollback) proc nfuzz_block_header(input: openArray[byte], output: ptr byte, output_size: ptr uint, disable_bls: bool): bool {.exportc, raises: [FuzzCrashError, Defect].} = diff --git a/research/state_sim.nim b/research/state_sim.nim index 4beb20fa1..607b8314e 100644 --- a/research/state_sim.nim +++ b/research/state_sim.nim @@ -78,15 +78,13 @@ cli do(slots = SLOTS_PER_EPOCH * 6, echo "Generating Genesis..." let - genesisState = - initialize_beacon_state_from_eth1(Eth2Digest(), 0, deposits, flags) - genesisBlock = get_initial_beacon_block(genesisState[]) + state = initialize_beacon_state_from_eth1(Eth2Digest(), 0, deposits, flags) + let genesisBlock = get_initial_beacon_block(state[]) echo "Starting simulation..." var attestations = initTable[Slot, seq[Attestation]]() - state = genesisState latest_block_root = hash_tree_root(genesisBlock.message) timers: array[Timers, RunningStat] attesters: RunningStat @@ -102,10 +100,10 @@ cli do(slots = SLOTS_PER_EPOCH * 6, write(stdout, ".") if last: - writeJson("state.json", state) + writeJson("state.json", state[]) else: - if state.slot mod json_interval.uint64 == 0: - writeJson(jsonName(prefix, state.slot), state) + if state[].slot mod json_interval.uint64 == 0: + writeJson(jsonName(prefix, state.slot), state[]) write(stdout, ":") else: write(stdout, ".") diff --git a/tests/mocking/mock_attestations.nim b/tests/mocking/mock_attestations.nim index 5281fcaee..fa51dbe94 100644 --- a/tests/mocking/mock_attestations.nim +++ b/tests/mocking/mock_attestations.nim @@ -131,4 +131,4 @@ proc add*(state: var BeaconState, attestation: Attestation, slot: Slot) = signMockBlock(state, signedBlock) doAssert state_transition( - state, signedBlock, flags = {skipStateRootValidation}) + state, signedBlock, flags = {skipStateRootValidation}, noRollback) diff --git a/tests/mocking/mock_blocks.nim b/tests/mocking/mock_blocks.nim index df73f5284..830637b19 100644 --- a/tests/mocking/mock_blocks.nim +++ b/tests/mocking/mock_blocks.nim @@ -65,4 +65,5 @@ proc applyEmptyBlock*(state: var BeaconState) = ## Do a state transition with an empty signed block ## on the current slot let signedBlock = mockBlock(state, state.slot, flags = {}) - doAssert state_transition(state, signedBlock, {skipStateRootValidation}) + doAssert state_transition( + state, signedBlock, {skipStateRootValidation}, noRollback) diff --git a/tests/official/test_fixture_sanity_blocks.nim b/tests/official/test_fixture_sanity_blocks.nim index ff82442f7..80492c7fa 100644 --- a/tests/official/test_fixture_sanity_blocks.nim +++ b/tests/official/test_fixture_sanity_blocks.nim @@ -44,10 +44,12 @@ proc runTest(identifier: string) = if hasPostState: # TODO: The EF is using invalid BLS keys so we can't verify them - let success = state_transition(preState[], blck, flags = {skipBlsValidation}) + let success = state_transition( + preState[], blck, flags = {skipBlsValidation}, noRollback) doAssert success, "Failure when applying block " & $i else: - let success = state_transition(preState[], blck, flags = {}) + let success = state_transition( + preState[], blck, flags = {}, noRollback) doAssert not success, "We didn't expect this invalid block to be processed" # check: preState.hash_tree_root() == postState.hash_tree_root() diff --git a/tests/test_state_transition.nim b/tests/test_state_transition.nim index 1a988eba6..63d987fd0 100644 --- a/tests/test_state_transition.nim +++ b/tests/test_state_transition.nim @@ -38,7 +38,7 @@ suiteReport "Block processing" & preset(): previous_block_root = hash_tree_root(genesisBlock.message) new_block = makeTestBlock(state[], previous_block_root) - let block_ok = state_transition(state[], new_block, {}) + let block_ok = state_transition(state[], new_block, {}, noRollback) check: block_ok @@ -58,7 +58,7 @@ suiteReport "Block processing" & preset(): for i in 1..SLOTS_PER_EPOCH.int: let new_block = makeTestBlock(state[], previous_block_root) - let block_ok = state_transition(state[], new_block, {}) + let block_ok = state_transition(state[], new_block, {}, noRollback) check: block_ok @@ -91,7 +91,7 @@ suiteReport "Block processing" & preset(): new_block = makeTestBlock(state[], previous_block_root, attestations = @[attestation] ) - discard state_transition(state[], new_block, {}) + check state_transition(state[], new_block, {}, noRollback) check: # TODO epoch attestations can get multiplied now; clean up paths to