fix mainnet finalization (#906)

* fix mainnet finalization and swith eth2_network_simulation to a kind of small-mainnet profile

* Fix slot reference in trace logging

* bump a couple of spec refs from v0.11.0 to v0.11.1

* bump another spec ref to v0.11.1, one more try at Jenkins test vector download CI issue

* fix other slot reference in trace logging and skip past single-block/multi-slot gaps to re-approach from ancestry side by state_transitioning, by requiring exact match on both root hash and slot for fast path

* make more precise the fast path condition

* redo logic to make uniform with BeaconChainDB; fix chronos deprecation warning

* revert not-working replacement of deprecated chronos futures `or`

* switch testnet1 to mainnet
This commit is contained in:
tersec 2020-04-20 17:27:52 +00:00 committed by GitHub
parent 3d42da90a8
commit 8a72ae89b9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 79 additions and 18 deletions

View File

@ -131,6 +131,11 @@ type
db*: BeaconChainDB db*: BeaconChainDB
cachedStates*: array[2, BeaconChainDB] ##\
## Dual BeaconChainDBs operates as a pool allocator which handles epoch
## boundaries which don't align with an ongoing latency of availability
## of precalculated BeaconStates from the recent past.
heads*: seq[Head] heads*: seq[Head]
inAdd*: bool inAdd*: bool

View File

@ -1,6 +1,6 @@
import import
bitops, chronicles, options, tables, bitops, chronicles, options, tables,
ssz, beacon_chain_db, state_transition, extras, ssz, beacon_chain_db, state_transition, extras, kvstore,
beacon_node_types, metrics, beacon_node_types, metrics,
spec/[crypto, datatypes, digest, helpers, validator] spec/[crypto, datatypes, digest, helpers, validator]
@ -225,6 +225,15 @@ proc init*(T: type BlockPool, db: BeaconChainDB): BlockPool =
let res = BlockPool( let res = BlockPool(
pending: initTable[Eth2Digest, SignedBeaconBlock](), pending: initTable[Eth2Digest, SignedBeaconBlock](),
missing: initTable[Eth2Digest, MissingBlock](), missing: initTable[Eth2Digest, MissingBlock](),
# Usually one of the other of these will get re-initialized if the pool's
# initialized on an epoch boundary, but that is a reasonable readability,
# simplicity, and non-special-casing tradeoff for the inefficiency.
cachedStates: [
init(BeaconChainDB, kvStore MemoryStoreRef.init()),
init(BeaconChainDB, kvStore MemoryStoreRef.init())
],
blocks: blocks, blocks: blocks,
tail: tailRef, tail: tailRef,
head: head, head: head,
@ -590,12 +599,14 @@ proc skipAndUpdateState(
ok ok
proc maybePutState(pool: BlockPool, state: HashedBeaconState, blck: BlockRef) = proc putState(pool: BlockPool, state: HashedBeaconState, blck: BlockRef) =
# TODO we save state at every epoch start but never remove them - we also # TODO we save state at every epoch start but never remove them - we also
# potentially save multiple states per slot if reorgs happen, meaning # potentially save multiple states per slot if reorgs happen, meaning
# we could easily see a state explosion # we could easily see a state explosion
logScope: pcs = "save_state_at_epoch_start" logScope: pcs = "save_state_at_epoch_start"
var currentCache =
pool.cachedStates[state.data.slot.compute_epoch_at_slot.uint64 mod 2]
if state.data.slot mod SLOTS_PER_EPOCH == 0: if state.data.slot mod SLOTS_PER_EPOCH == 0:
if not pool.db.containsState(state.root): if not pool.db.containsState(state.root):
info "Storing state", info "Storing state",
@ -608,6 +619,41 @@ proc maybePutState(pool: BlockPool, state: HashedBeaconState, blck: BlockRef) =
# TODO this should be atomic with the above write.. # TODO this should be atomic with the above write..
pool.db.putStateRoot(blck.root, state.data.slot, state.root) pool.db.putStateRoot(blck.root, state.data.slot, state.root)
# Because state.data.slot mod SLOTS_PER_EPOCH == 0, wrap back to last
# time this was the case i.e. last currentCache. The opposite parity,
# by contrast, has just finished filling from the previous epoch. The
# resulting lookback window is thus >= SLOTS_PER_EPOCH in size, while
# bounded from above by 2*SLOTS_PER_EPOCH.
currentCache = init(BeaconChainDB, kvStore MemoryStoreRef.init())
else:
# Need to be able to efficiently access states for both attestation
# aggregation and to process block proposals going back to the last
# finalized slot. Ideally to avoid potential combinatiorial forking
# storage and/or memory constraints could CoW, up to and including,
# in particular, hash_tree_root() which is expensive to do 30 times
# since the previous epoch, to efficiently state_transition back to
# desired slot. However, none of that's in place, so there are both
# expensive, repeated BeaconState copies as well as computationally
# time-consuming-near-end-of-epoch hash tree roots. The latter are,
# effectively, naïvely O(n^2) in slot number otherwise, so when the
# slots become in the mid-to-high-20s it's spending all its time in
# pointlessly repeated calculations of prefix-state-transitions. An
# intermediate time/memory workaround involves storing only mapping
# between BlockRefs, or BlockSlots, and the BeaconState tree roots,
# but that still involves tens of megabytes worth of copying, along
# with the concomitant memory allocator and GC load. Instead, use a
# more memory-intensive (but more conceptually straightforward, and
# faster) strategy to just store, for the most recent slots. Keep a
# block's StateData of odd-numbered epoch in bucket 1, whilst evens
# land in bucket 0 (which is handed back to GC in if branch). There
# still is a possibility of combinatorial explosion, but this only,
# by a constant-factor, worsens things. TODO the actual solution's,
# eventually, to switch to CoW and/or ref objects for state and the
# hash_tree_root processing.
currentCache.putState(state.root, state.data)
# TODO this should be atomic with the above write..
currentCache.putStateRoot(blck.root, state.data.slot, state.root)
proc rewindState(pool: BlockPool, state: var StateData, bs: BlockSlot): proc rewindState(pool: BlockPool, state: var StateData, bs: BlockSlot):
seq[BlockData] = seq[BlockData] =
logScope: pcs = "replay_state" logScope: pcs = "replay_state"
@ -642,11 +688,15 @@ proc rewindState(pool: BlockPool, state: var StateData, bs: BlockSlot):
if parBs.blck != curBs.blck: if parBs.blck != curBs.blck:
ancestors.add(pool.get(parBs.blck)) ancestors.add(pool.get(parBs.blck))
if (let tmp = pool.db.getStateRoot(parBs.blck.root, parBs.slot); tmp.isSome()): for db in [pool.db, pool.cachedStates[0], pool.cachedStates[1]]:
if pool.db.containsState(tmp.get): if (let tmp = db.getStateRoot(parBs.blck.root, parBs.slot); tmp.isSome()):
if db.containsState(tmp.get):
stateRoot = tmp stateRoot = tmp
break break
if stateRoot.isSome:
break
curBs = parBs curBs = parBs
if stateRoot.isNone(): if stateRoot.isNone():
@ -663,7 +713,14 @@ proc rewindState(pool: BlockPool, state: var StateData, bs: BlockSlot):
let let
ancestor = ancestors.pop() ancestor = ancestors.pop()
ancestorState = pool.db.getState(stateRoot.get()) root = stateRoot.get()
ancestorState =
if pool.db.containsState(root):
pool.db.getState(root)
elif pool.cachedStates[0].containsState(root):
pool.cachedStates[0].getState(root)
else:
pool.cachedStates[1].getState(root)
if ancestorState.isNone(): if ancestorState.isNone():
# TODO this should only happen if the database is corrupt - we walked the # TODO this should only happen if the database is corrupt - we walked the
@ -705,7 +762,7 @@ proc updateStateData*(pool: BlockPool, state: var StateData, bs: BlockSlot) =
if state.data.data.slot != bs.slot: if state.data.data.slot != bs.slot:
# Might be that we're moving to the same block but later slot # Might be that we're moving to the same block but later slot
skipAndUpdateState(state.data, bs.slot) do(state: HashedBeaconState): skipAndUpdateState(state.data, bs.slot) do(state: HashedBeaconState):
pool.maybePutState(state, bs.blck) pool.putState(state, bs.blck)
return # State already at the right spot return # State already at the right spot
@ -718,17 +775,17 @@ proc updateStateData*(pool: BlockPool, state: var StateData, bs: BlockSlot) =
# Time to replay all the blocks between then and now. We skip one because # Time to replay all the blocks between then and now. We skip one because
# it's the one that we found the state with, and it has already been # it's the one that we found the state with, and it has already been
# applied # applied. Pathologically quadratic in slot number, naïvely.
for i in countdown(ancestors.len - 1, 0): for i in countdown(ancestors.len - 1, 0):
let ok = let ok =
skipAndUpdateState(state.data, skipAndUpdateState(state.data,
ancestors[i].data, ancestors[i].data,
{skipBlsValidation, skipMerkleValidation, skipStateRootValidation}) do (state: HashedBeaconState): {skipBlsValidation, skipMerkleValidation, skipStateRootValidation}) do (state: HashedBeaconState):
pool.maybePutState(state, ancestors[i].refs) pool.putState(state, ancestors[i].refs)
doAssert ok, "Blocks in database should never fail to apply.." doAssert ok, "Blocks in database should never fail to apply.."
skipAndUpdateState(state.data, bs.slot) do(state: HashedBeaconState): skipAndUpdateState(state.data, bs.slot) do(state: HashedBeaconState):
pool.maybePutState(state, bs.blck) pool.putState(state, bs.blck)
state.blck = bs.blck state.blck = bs.blck

View File

@ -92,12 +92,12 @@ type
DOMAIN_SELECTION_PROOF = 5 DOMAIN_SELECTION_PROOF = 5
DOMAIN_AGGREGATE_AND_PROOF = 6 DOMAIN_AGGREGATE_AND_PROOF = 6
# Phase 1 - Sharding # Phase 1 - Sharding
# https://github.com/ethereum/eth2.0-specs/blob/v0.11.0/specs/phase1/beacon-chain.md#misc # https://github.com/ethereum/eth2.0-specs/blob/v0.11.1/specs/phase1/beacon-chain.md#misc
DOMAIN_SHARD_PROPOSAL = 128 DOMAIN_SHARD_PROPOSAL = 128
DOMAIN_SHARD_COMMITTEE = 129 DOMAIN_SHARD_COMMITTEE = 129
DOMAIN_LIGHT_CLIENT = 130 DOMAIN_LIGHT_CLIENT = 130
# Phase 1 - Custody game # Phase 1 - Custody game
# https://github.com/ethereum/eth2.0-specs/blob/v0.11.0/specs/phase1/custody-game.md#signature-domain-types # https://github.com/ethereum/eth2.0-specs/blob/v0.11.1/specs/phase1/custody-game.md#signature-domain-types
DOMAIN_CUSTODY_BIT_SLASHING = 0x83 DOMAIN_CUSTODY_BIT_SLASHING = 0x83
# https://github.com/ethereum/eth2.0-specs/blob/v0.11.1/specs/phase0/beacon-chain.md#custom-types # https://github.com/ethereum/eth2.0-specs/blob/v0.11.1/specs/phase0/beacon-chain.md#custom-types
@ -368,7 +368,7 @@ type
aggregate*: Attestation aggregate*: Attestation
selection_proof*: ValidatorSig selection_proof*: ValidatorSig
# https://github.com/ethereum/eth2.0-specs/blob/v0.11.0/specs/phase0/validator.md#signedaggregateandproof # https://github.com/ethereum/eth2.0-specs/blob/v0.11.1/specs/phase0/validator.md#signedaggregateandproof
SignedAggregateAndProof* = object SignedAggregateAndProof* = object
message*: AggregateAndProof message*: AggregateAndProof
signature*: ValidatorSig signature*: ValidatorSig

View File

@ -1,4 +1,4 @@
CONST_PRESET=minimal CONST_PRESET=mainnet
QUICKSTART_VALIDATORS=8 QUICKSTART_VALIDATORS=8
RANDOM_VALIDATORS=120 RANDOM_VALIDATORS=120
BOOTSTRAP_PORT=9100 BOOTSTRAP_PORT=9100

View File

@ -15,8 +15,7 @@ import
../../beacon_chain/[ssz, state_transition, extras], ../../beacon_chain/[ssz, state_transition, extras],
# Test utilities # Test utilities
../testutil, ../testutil,
./fixtures_utils, ./fixtures_utils
../helpers/debug_state
const SanityBlocksDir = SszTestsDir/const_preset/"phase0"/"sanity"/"blocks"/"pyspec_tests" const SanityBlocksDir = SszTestsDir/const_preset/"phase0"/"sanity"/"blocks"/"pyspec_tests"

View File

@ -12,7 +12,7 @@ mkdir -p "$VALIDATORS_DIR"
cd "$GIT_ROOT" cd "$GIT_ROOT"
CUSTOM_NIMFLAGS="${NIMFLAGS} -d:useSysAsserts -d:chronicles_sinks:textlines,json[file]" CUSTOM_NIMFLAGS="${NIMFLAGS} -d:useSysAsserts -d:chronicles_sinks:textlines,json[file] -d:const_preset=mainnet"
# Run with "SLOTS_PER_EPOCH=8 ./start.sh" to change these # Run with "SLOTS_PER_EPOCH=8 ./start.sh" to change these
DEFS="" DEFS=""