From 8a72ae89b9837720f64538ea11a3272c4ba709aa Mon Sep 17 00:00:00 2001 From: tersec Date: Mon, 20 Apr 2020 17:27:52 +0000 Subject: [PATCH] fix mainnet finalization (#906) * fix mainnet finalization and swith eth2_network_simulation to a kind of small-mainnet profile * Fix slot reference in trace logging * bump a couple of spec refs from v0.11.0 to v0.11.1 * bump another spec ref to v0.11.1, one more try at Jenkins test vector download CI issue * fix other slot reference in trace logging and skip past single-block/multi-slot gaps to re-approach from ancestry side by state_transitioning, by requiring exact match on both root hash and slot for fast path * make more precise the fast path condition * redo logic to make uniform with BeaconChainDB; fix chronos deprecation warning * revert not-working replacement of deprecated chronos futures `or` * switch testnet1 to mainnet --- beacon_chain/beacon_node_types.nim | 5 ++ beacon_chain/block_pool.nim | 79 ++++++++++++++++--- beacon_chain/spec/datatypes.nim | 6 +- scripts/testnet1.env | 2 +- tests/official/test_fixture_sanity_blocks.nim | 3 +- tests/simulation/start.sh | 2 +- 6 files changed, 79 insertions(+), 18 deletions(-) diff --git a/beacon_chain/beacon_node_types.nim b/beacon_chain/beacon_node_types.nim index 60cf94735..2b4d3f285 100644 --- a/beacon_chain/beacon_node_types.nim +++ b/beacon_chain/beacon_node_types.nim @@ -131,6 +131,11 @@ type db*: BeaconChainDB + cachedStates*: array[2, BeaconChainDB] ##\ + ## Dual BeaconChainDBs operates as a pool allocator which handles epoch + ## boundaries which don't align with an ongoing latency of availability + ## of precalculated BeaconStates from the recent past. + heads*: seq[Head] inAdd*: bool diff --git a/beacon_chain/block_pool.nim b/beacon_chain/block_pool.nim index 67c64b46b..9e1f6463c 100644 --- a/beacon_chain/block_pool.nim +++ b/beacon_chain/block_pool.nim @@ -1,6 +1,6 @@ import bitops, chronicles, options, tables, - ssz, beacon_chain_db, state_transition, extras, + ssz, beacon_chain_db, state_transition, extras, kvstore, beacon_node_types, metrics, spec/[crypto, datatypes, digest, helpers, validator] @@ -225,6 +225,15 @@ proc init*(T: type BlockPool, db: BeaconChainDB): BlockPool = let res = BlockPool( pending: initTable[Eth2Digest, SignedBeaconBlock](), missing: initTable[Eth2Digest, MissingBlock](), + + # Usually one of the other of these will get re-initialized if the pool's + # initialized on an epoch boundary, but that is a reasonable readability, + # simplicity, and non-special-casing tradeoff for the inefficiency. + cachedStates: [ + init(BeaconChainDB, kvStore MemoryStoreRef.init()), + init(BeaconChainDB, kvStore MemoryStoreRef.init()) + ], + blocks: blocks, tail: tailRef, head: head, @@ -590,12 +599,14 @@ proc skipAndUpdateState( ok -proc maybePutState(pool: BlockPool, state: HashedBeaconState, blck: BlockRef) = +proc putState(pool: BlockPool, state: HashedBeaconState, blck: BlockRef) = # TODO we save state at every epoch start but never remove them - we also # potentially save multiple states per slot if reorgs happen, meaning # we could easily see a state explosion logScope: pcs = "save_state_at_epoch_start" + var currentCache = + pool.cachedStates[state.data.slot.compute_epoch_at_slot.uint64 mod 2] if state.data.slot mod SLOTS_PER_EPOCH == 0: if not pool.db.containsState(state.root): info "Storing state", @@ -608,6 +619,41 @@ proc maybePutState(pool: BlockPool, state: HashedBeaconState, blck: BlockRef) = # TODO this should be atomic with the above write.. pool.db.putStateRoot(blck.root, state.data.slot, state.root) + # Because state.data.slot mod SLOTS_PER_EPOCH == 0, wrap back to last + # time this was the case i.e. last currentCache. The opposite parity, + # by contrast, has just finished filling from the previous epoch. The + # resulting lookback window is thus >= SLOTS_PER_EPOCH in size, while + # bounded from above by 2*SLOTS_PER_EPOCH. + currentCache = init(BeaconChainDB, kvStore MemoryStoreRef.init()) + else: + # Need to be able to efficiently access states for both attestation + # aggregation and to process block proposals going back to the last + # finalized slot. Ideally to avoid potential combinatiorial forking + # storage and/or memory constraints could CoW, up to and including, + # in particular, hash_tree_root() which is expensive to do 30 times + # since the previous epoch, to efficiently state_transition back to + # desired slot. However, none of that's in place, so there are both + # expensive, repeated BeaconState copies as well as computationally + # time-consuming-near-end-of-epoch hash tree roots. The latter are, + # effectively, naïvely O(n^2) in slot number otherwise, so when the + # slots become in the mid-to-high-20s it's spending all its time in + # pointlessly repeated calculations of prefix-state-transitions. An + # intermediate time/memory workaround involves storing only mapping + # between BlockRefs, or BlockSlots, and the BeaconState tree roots, + # but that still involves tens of megabytes worth of copying, along + # with the concomitant memory allocator and GC load. Instead, use a + # more memory-intensive (but more conceptually straightforward, and + # faster) strategy to just store, for the most recent slots. Keep a + # block's StateData of odd-numbered epoch in bucket 1, whilst evens + # land in bucket 0 (which is handed back to GC in if branch). There + # still is a possibility of combinatorial explosion, but this only, + # by a constant-factor, worsens things. TODO the actual solution's, + # eventually, to switch to CoW and/or ref objects for state and the + # hash_tree_root processing. + currentCache.putState(state.root, state.data) + # TODO this should be atomic with the above write.. + currentCache.putStateRoot(blck.root, state.data.slot, state.root) + proc rewindState(pool: BlockPool, state: var StateData, bs: BlockSlot): seq[BlockData] = logScope: pcs = "replay_state" @@ -642,10 +688,14 @@ proc rewindState(pool: BlockPool, state: var StateData, bs: BlockSlot): if parBs.blck != curBs.blck: ancestors.add(pool.get(parBs.blck)) - if (let tmp = pool.db.getStateRoot(parBs.blck.root, parBs.slot); tmp.isSome()): - if pool.db.containsState(tmp.get): - stateRoot = tmp - break + for db in [pool.db, pool.cachedStates[0], pool.cachedStates[1]]: + if (let tmp = db.getStateRoot(parBs.blck.root, parBs.slot); tmp.isSome()): + if db.containsState(tmp.get): + stateRoot = tmp + break + + if stateRoot.isSome: + break curBs = parBs @@ -663,7 +713,14 @@ proc rewindState(pool: BlockPool, state: var StateData, bs: BlockSlot): let ancestor = ancestors.pop() - ancestorState = pool.db.getState(stateRoot.get()) + root = stateRoot.get() + ancestorState = + if pool.db.containsState(root): + pool.db.getState(root) + elif pool.cachedStates[0].containsState(root): + pool.cachedStates[0].getState(root) + else: + pool.cachedStates[1].getState(root) if ancestorState.isNone(): # TODO this should only happen if the database is corrupt - we walked the @@ -705,7 +762,7 @@ proc updateStateData*(pool: BlockPool, state: var StateData, bs: BlockSlot) = if state.data.data.slot != bs.slot: # Might be that we're moving to the same block but later slot skipAndUpdateState(state.data, bs.slot) do(state: HashedBeaconState): - pool.maybePutState(state, bs.blck) + pool.putState(state, bs.blck) return # State already at the right spot @@ -718,17 +775,17 @@ proc updateStateData*(pool: BlockPool, state: var StateData, bs: BlockSlot) = # Time to replay all the blocks between then and now. We skip one because # it's the one that we found the state with, and it has already been - # applied + # applied. Pathologically quadratic in slot number, naïvely. for i in countdown(ancestors.len - 1, 0): let ok = skipAndUpdateState(state.data, ancestors[i].data, {skipBlsValidation, skipMerkleValidation, skipStateRootValidation}) do (state: HashedBeaconState): - pool.maybePutState(state, ancestors[i].refs) + pool.putState(state, ancestors[i].refs) doAssert ok, "Blocks in database should never fail to apply.." skipAndUpdateState(state.data, bs.slot) do(state: HashedBeaconState): - pool.maybePutState(state, bs.blck) + pool.putState(state, bs.blck) state.blck = bs.blck diff --git a/beacon_chain/spec/datatypes.nim b/beacon_chain/spec/datatypes.nim index 224974f28..08107ca89 100644 --- a/beacon_chain/spec/datatypes.nim +++ b/beacon_chain/spec/datatypes.nim @@ -92,12 +92,12 @@ type DOMAIN_SELECTION_PROOF = 5 DOMAIN_AGGREGATE_AND_PROOF = 6 # Phase 1 - Sharding - # https://github.com/ethereum/eth2.0-specs/blob/v0.11.0/specs/phase1/beacon-chain.md#misc + # https://github.com/ethereum/eth2.0-specs/blob/v0.11.1/specs/phase1/beacon-chain.md#misc DOMAIN_SHARD_PROPOSAL = 128 DOMAIN_SHARD_COMMITTEE = 129 DOMAIN_LIGHT_CLIENT = 130 # Phase 1 - Custody game - # https://github.com/ethereum/eth2.0-specs/blob/v0.11.0/specs/phase1/custody-game.md#signature-domain-types + # https://github.com/ethereum/eth2.0-specs/blob/v0.11.1/specs/phase1/custody-game.md#signature-domain-types DOMAIN_CUSTODY_BIT_SLASHING = 0x83 # https://github.com/ethereum/eth2.0-specs/blob/v0.11.1/specs/phase0/beacon-chain.md#custom-types @@ -368,7 +368,7 @@ type aggregate*: Attestation selection_proof*: ValidatorSig - # https://github.com/ethereum/eth2.0-specs/blob/v0.11.0/specs/phase0/validator.md#signedaggregateandproof + # https://github.com/ethereum/eth2.0-specs/blob/v0.11.1/specs/phase0/validator.md#signedaggregateandproof SignedAggregateAndProof* = object message*: AggregateAndProof signature*: ValidatorSig diff --git a/scripts/testnet1.env b/scripts/testnet1.env index 56009f64b..68b8162e1 100644 --- a/scripts/testnet1.env +++ b/scripts/testnet1.env @@ -1,4 +1,4 @@ -CONST_PRESET=minimal +CONST_PRESET=mainnet QUICKSTART_VALIDATORS=8 RANDOM_VALIDATORS=120 BOOTSTRAP_PORT=9100 diff --git a/tests/official/test_fixture_sanity_blocks.nim b/tests/official/test_fixture_sanity_blocks.nim index 692507063..e0f772492 100644 --- a/tests/official/test_fixture_sanity_blocks.nim +++ b/tests/official/test_fixture_sanity_blocks.nim @@ -15,8 +15,7 @@ import ../../beacon_chain/[ssz, state_transition, extras], # Test utilities ../testutil, - ./fixtures_utils, - ../helpers/debug_state + ./fixtures_utils const SanityBlocksDir = SszTestsDir/const_preset/"phase0"/"sanity"/"blocks"/"pyspec_tests" diff --git a/tests/simulation/start.sh b/tests/simulation/start.sh index 307d32e68..da47e9825 100755 --- a/tests/simulation/start.sh +++ b/tests/simulation/start.sh @@ -12,7 +12,7 @@ mkdir -p "$VALIDATORS_DIR" cd "$GIT_ROOT" -CUSTOM_NIMFLAGS="${NIMFLAGS} -d:useSysAsserts -d:chronicles_sinks:textlines,json[file]" +CUSTOM_NIMFLAGS="${NIMFLAGS} -d:useSysAsserts -d:chronicles_sinks:textlines,json[file] -d:const_preset=mainnet" # Run with "SLOTS_PER_EPOCH=8 ./start.sh" to change these DEFS=""