Increase finalization and finalization checking robustness (#990)
* fix some warnings related to beacon_node splitting; reimplement finalization verification more robustly; improve attestation pool block selection logic * re-add missing import * whitelist allowed state transition flags and make rollback/restore naming more consistent * restore usage of update flags passed into skipAndUpdateState(...) in addition to the potential verifyFinalization flag * switch rest of rollback -> restore
This commit is contained in:
parent
bc759ceec3
commit
093d298b2b
|
@ -317,17 +317,34 @@ proc getAttestationsForBlock*(pool: AttestationPool,
|
|||
|
||||
# TODO this shouldn't really need state -- it's to recheck/validate, but that
|
||||
# should be refactored
|
||||
let
|
||||
newBlockSlot = state.slot
|
||||
maybeSlotData = getAttestationsForSlot(pool, newBlockSlot)
|
||||
let newBlockSlot = state.slot
|
||||
var attestations: seq[AttestationEntry]
|
||||
|
||||
if maybeSlotData.isNone:
|
||||
# Logging done in getAttestationsForSlot(...)
|
||||
# This isn't maximally efficient -- iterators or other approaches would
|
||||
# avoid lots of memory allocations -- but this provides a more flexible
|
||||
# base upon which to experiment with, and isn't yet profiling hot-path,
|
||||
# while avoiding penalizing slow attesting too much (as, in the spec it
|
||||
# is supposed to be available two epochs back; it's not meant as). This
|
||||
# isn't a good solution, either -- see the set-packing comment below as
|
||||
# one issue. It also creates problems with lots of repeat attestations,
|
||||
# as a bunch of synchronized beacon_nodes do almost the opposite of the
|
||||
# intended thing -- sure, _blocks_ have to be popular (via attestation)
|
||||
# but _attestations_ shouldn't have to be so frequently repeated, as an
|
||||
# artifact of this state-free, identical-across-clones choice basis. In
|
||||
# addResolved, too, the new attestations get added to the end, while in
|
||||
# these functions, it's reading from the beginning, et cetera. This all
|
||||
# needs a single unified strategy.
|
||||
const LOOKBACK_WINDOW = 3
|
||||
for i in max(1, newBlockSlot.int64 - LOOKBACK_WINDOW) .. newBlockSlot.int64:
|
||||
let maybeSlotData = getAttestationsForSlot(pool, i.Slot)
|
||||
if maybeSlotData.isSome:
|
||||
insert(attestations, maybeSlotData.get.attestations)
|
||||
|
||||
if attestations.len == 0:
|
||||
return
|
||||
let slotData = maybeSlotData.get
|
||||
|
||||
var cache = get_empty_per_epoch_cache()
|
||||
for a in slotData.attestations:
|
||||
for a in attestations:
|
||||
var
|
||||
# https://github.com/ethereum/eth2.0-specs/blob/v0.11.1/specs/phase0/validator.md#construct-attestation
|
||||
attestation = Attestation(
|
||||
|
@ -368,6 +385,8 @@ proc getAttestationsForBlock*(pool: AttestationPool,
|
|||
result.add(attestation)
|
||||
|
||||
if result.len >= MAX_ATTESTATIONS:
|
||||
debug "getAttestationsForBlock: returning early after hitting MAX_ATTESTATIONS",
|
||||
attestationSlot = newBlockSlot - 1
|
||||
return
|
||||
|
||||
proc resolve*(pool: var AttestationPool) =
|
||||
|
|
|
@ -168,8 +168,12 @@ proc init*(T: type BeaconNode, conf: BeaconNodeConf): Future[BeaconNode] {.async
|
|||
quit 1
|
||||
|
||||
# TODO check that genesis given on command line (if any) matches database
|
||||
let
|
||||
blockPool = BlockPool.init(db)
|
||||
let blockPool = BlockPool.init(
|
||||
db,
|
||||
if conf.verifyFinalization:
|
||||
{verifyFinalization}
|
||||
else:
|
||||
{})
|
||||
|
||||
if mainchainMonitor.isNil and
|
||||
conf.web3Url.len > 0 and
|
||||
|
@ -305,21 +309,6 @@ proc onBeaconBlock(node: BeaconNode, signedBlock: SignedBeaconBlock) =
|
|||
# don't know if it's part of the chain we're currently building.
|
||||
discard node.storeBlock(signedBlock)
|
||||
|
||||
proc verifyFinalization(node: BeaconNode, slot: Slot) =
|
||||
# Epoch must be >= 4 to check finalization
|
||||
const SETTLING_TIME_OFFSET = 1'u64
|
||||
let epoch = slot.compute_epoch_at_slot()
|
||||
|
||||
# Don't static-assert this -- if this isn't called, don't require it
|
||||
doAssert SLOTS_PER_EPOCH > SETTLING_TIME_OFFSET
|
||||
|
||||
# Intentionally, loudly assert. Point is to fail visibly and unignorably
|
||||
# during testing.
|
||||
if epoch >= 4 and slot mod SLOTS_PER_EPOCH > SETTLING_TIME_OFFSET:
|
||||
let finalizedEpoch =
|
||||
node.blockPool.finalizedHead.blck.slot.compute_epoch_at_slot()
|
||||
doAssert finalizedEpoch + 2 == epoch
|
||||
|
||||
proc onSlotStart(node: BeaconNode, lastSlot, scheduledSlot: Slot) {.gcsafe, async.} =
|
||||
## Called at the beginning of a slot - usually every slot, but sometimes might
|
||||
## skip a few in case we're running late.
|
||||
|
@ -386,9 +375,6 @@ proc onSlotStart(node: BeaconNode, lastSlot, scheduledSlot: Slot) {.gcsafe, asyn
|
|||
|
||||
beacon_slot.set slot.int64
|
||||
|
||||
if node.config.verifyFinalization:
|
||||
verifyFinalization(node, scheduledSlot)
|
||||
|
||||
if slot > lastSlot + SLOTS_PER_EPOCH:
|
||||
# We've fallen behind more than an epoch - there's nothing clever we can
|
||||
# do here really, except skip all the work and try again later.
|
||||
|
|
|
@ -13,10 +13,9 @@ import
|
|||
|
||||
# Local modules
|
||||
spec/[datatypes, crypto],
|
||||
conf, time, beacon_chain_db, validator_pool,
|
||||
conf, time, beacon_chain_db,
|
||||
attestation_pool, block_pool, eth2_network,
|
||||
beacon_node_types, mainchain_monitor,
|
||||
sync_protocol, request_manager
|
||||
beacon_node_types, mainchain_monitor, request_manager
|
||||
|
||||
type
|
||||
RpcServer* = RpcHttpServer
|
||||
|
|
|
@ -4,7 +4,7 @@ import
|
|||
deques, tables,
|
||||
stew/[endians2, byteutils], chronicles,
|
||||
spec/[datatypes, crypto, digest],
|
||||
beacon_chain_db
|
||||
beacon_chain_db, extras
|
||||
|
||||
type
|
||||
# #############################################
|
||||
|
@ -143,7 +143,7 @@ type
|
|||
|
||||
inAdd*: bool
|
||||
|
||||
headState*: StateData ## \
|
||||
headState*: StateData ##\
|
||||
## State given by the head block; only update in `updateHead`, not anywhere
|
||||
## else via `withState`
|
||||
|
||||
|
@ -151,6 +151,8 @@ type
|
|||
|
||||
tmpState*: StateData ## Scratchpad - may be any state
|
||||
|
||||
updateFlags*: UpdateFlags
|
||||
|
||||
MissingBlock* = object
|
||||
slots*: uint64 # number of slots that are suspected missing
|
||||
tries*: int
|
||||
|
|
|
@ -139,7 +139,8 @@ func init*(T: type BlockRef, root: Eth2Digest, slot: Slot): BlockRef =
|
|||
func init*(T: type BlockRef, root: Eth2Digest, blck: BeaconBlock): BlockRef =
|
||||
BlockRef.init(root, blck.slot)
|
||||
|
||||
proc init*(T: type BlockPool, db: BeaconChainDB): BlockPool =
|
||||
proc init*(T: type BlockPool, db: BeaconChainDB,
|
||||
updateFlags: UpdateFlags = {}): BlockPool =
|
||||
# TODO we require that the db contains both a head and a tail block -
|
||||
# asserting here doesn't seem like the right way to go about it however..
|
||||
|
||||
|
@ -243,9 +244,15 @@ proc init*(T: type BlockPool, db: BeaconChainDB): BlockPool =
|
|||
heads: @[head],
|
||||
headState: tmpState[],
|
||||
justifiedState: tmpState[], # This is wrong but we'll update it below
|
||||
tmpState: tmpState[]
|
||||
tmpState: tmpState[],
|
||||
|
||||
# The only allowed flag right now is verifyFinalization, as the others all
|
||||
# allow skipping some validation.
|
||||
updateFlags: {verifyFinalization} * updateFlags
|
||||
)
|
||||
|
||||
doAssert res.updateFlags in [{}, {verifyFinalization}]
|
||||
|
||||
res.updateStateData(res.justifiedState, justifiedHead)
|
||||
res.updateStateData(res.headState, headRef.atSlot(headRef.slot))
|
||||
|
||||
|
@ -324,18 +331,18 @@ proc getState(
|
|||
pool: BlockPool, db: BeaconChainDB, stateRoot: Eth2Digest, blck: BlockRef,
|
||||
output: var StateData): bool =
|
||||
let outputAddr = unsafeAddr output # local scope
|
||||
proc rollback(v: var BeaconState) =
|
||||
func restore(v: var BeaconState) =
|
||||
if outputAddr == (unsafeAddr pool.headState):
|
||||
# TODO seeing the headState in the rollback shouldn't happen - we load
|
||||
# TODO seeing the headState in the restore shouldn't happen - we load
|
||||
# head states only when updating the head position, and by that time
|
||||
# the database will have gone through enough sanity checks that
|
||||
# SSZ exceptions shouldn't happen, which is when rollback happens.
|
||||
# SSZ exceptions shouldn't happen, which is when restore happens.
|
||||
# Nonetheless, this is an ugly workaround that needs to go away
|
||||
doAssert false, "Cannot alias headState"
|
||||
|
||||
outputAddr[] = pool.headState
|
||||
|
||||
if not db.getState(stateRoot, output.data.data, rollback):
|
||||
if not db.getState(stateRoot, output.data.data, restore):
|
||||
return false
|
||||
|
||||
output.blck = blck
|
||||
|
@ -470,14 +477,15 @@ proc add*(
|
|||
|
||||
let
|
||||
poolPtr = unsafeAddr pool # safe because restore is short-lived
|
||||
proc restore(v: var HashedBeaconState) =
|
||||
func restore(v: var HashedBeaconState) =
|
||||
# TODO address this ugly workaround - there should probably be a
|
||||
# `state_transition` that takes a `StateData` instead and updates
|
||||
# the block as well
|
||||
doAssert v.addr == addr poolPtr.tmpState.data
|
||||
poolPtr.tmpState = poolPtr.headState
|
||||
|
||||
if not state_transition(pool.tmpState.data, signedBlock, {}, restore):
|
||||
if not state_transition(
|
||||
pool.tmpState.data, signedBlock, pool.updateFlags, restore):
|
||||
# TODO find a better way to log all this block data
|
||||
notice "Invalid block",
|
||||
blck = shortLog(blck),
|
||||
|
@ -545,7 +553,7 @@ func getRef*(pool: BlockPool, root: Eth2Digest): BlockRef =
|
|||
## Retrieve a resolved block reference, if available
|
||||
pool.blocks.getOrDefault(root, nil)
|
||||
|
||||
proc getBlockRange*(
|
||||
func getBlockRange*(
|
||||
pool: BlockPool, startSlot: Slot, skipStep: Natural,
|
||||
output: var openArray[BlockRef]): Natural =
|
||||
## This function populates an `output` buffer of blocks
|
||||
|
@ -650,7 +658,7 @@ proc skipAndUpdateState(
|
|||
# save and reuse
|
||||
# TODO possibly we should keep this in memory for the hot blocks
|
||||
let nextStateRoot = pool.db.getStateRoot(blck.root, state.data.slot + 1)
|
||||
advance_slot(state, nextStateRoot)
|
||||
advance_slot(state, nextStateRoot, pool.updateFlags)
|
||||
|
||||
if save:
|
||||
pool.putState(state, blck)
|
||||
|
@ -662,12 +670,13 @@ proc skipAndUpdateState(
|
|||
pool.skipAndUpdateState(
|
||||
state.data, blck.refs, blck.data.message.slot - 1, save)
|
||||
|
||||
var statePtr = unsafeAddr state # safe because `rollback` is locally scoped
|
||||
proc rollback(v: var HashedBeaconState) =
|
||||
var statePtr = unsafeAddr state # safe because `restore` is locally scoped
|
||||
func restore(v: var HashedBeaconState) =
|
||||
doAssert (addr(statePtr.data) == addr v)
|
||||
statePtr[] = pool.headState
|
||||
|
||||
let ok = state_transition(state.data, blck.data, flags, rollback)
|
||||
let ok = state_transition(
|
||||
state.data, blck.data, flags + pool.updateFlags, restore)
|
||||
if ok and save:
|
||||
pool.putState(state.data, blck.refs)
|
||||
|
||||
|
|
|
@ -31,5 +31,6 @@ type
|
|||
## Skip verification of block state root.
|
||||
skipBlockParentRootValidation ##\
|
||||
## Skip verification that the block's parent root matches the previous block header.
|
||||
verifyFinalization
|
||||
|
||||
UpdateFlags* = set[UpdateFlag]
|
||||
|
|
|
@ -564,12 +564,14 @@ proc process_attestation*(
|
|||
|
||||
if attestation.data.target.epoch == get_current_epoch(state):
|
||||
trace "process_attestation: current_epoch_attestations.add",
|
||||
attestation = shortLog(attestation),
|
||||
pending_attestation = pending_attestation,
|
||||
indices = get_attesting_indices(
|
||||
state, attestation.data, attestation.aggregation_bits, stateCache).len
|
||||
state.current_epoch_attestations.add(pending_attestation)
|
||||
else:
|
||||
trace "process_attestation: previous_epoch_attestations.add",
|
||||
attestation = shortLog(attestation),
|
||||
pending_attestation = pending_attestation,
|
||||
indices = get_attesting_indices(
|
||||
state, attestation.data, attestation.aggregation_bits, stateCache).len
|
||||
|
@ -577,6 +579,10 @@ proc process_attestation*(
|
|||
|
||||
true
|
||||
else:
|
||||
trace "process_attestation: check_attestation failed",
|
||||
attestation = shortLog(attestation),
|
||||
indices = get_attesting_indices(
|
||||
state, attestation.data, attestation.aggregation_bits, stateCache).len
|
||||
false
|
||||
|
||||
func makeAttestationData*(
|
||||
|
|
|
@ -37,7 +37,7 @@
|
|||
import
|
||||
math, sequtils, tables,
|
||||
stew/[bitseqs, bitops2], chronicles, json_serialization/std/sets,
|
||||
metrics, ../ssz,
|
||||
metrics, ../extras, ../ssz,
|
||||
beaconstate, crypto, datatypes, digest, helpers, validator,
|
||||
state_transition_helpers,
|
||||
../../nbench/bench_lab
|
||||
|
@ -110,8 +110,8 @@ func get_attesting_balance(
|
|||
state, attestations, stateCache))
|
||||
|
||||
# https://github.com/ethereum/eth2.0-specs/blob/v0.9.4/specs/core/0_beacon-chain.md#justification-and-finalization
|
||||
proc process_justification_and_finalization*(
|
||||
state: var BeaconState, stateCache: var StateCache) {.nbench.}=
|
||||
proc process_justification_and_finalization*(state: var BeaconState,
|
||||
stateCache: var StateCache, updateFlags: UpdateFlags = {}) {.nbench.} =
|
||||
|
||||
logScope: pcs = "process_justification_and_finalization"
|
||||
|
||||
|
@ -422,17 +422,22 @@ func process_final_updates*(state: var BeaconState) {.nbench.}=
|
|||
state.current_epoch_attestations = @[]
|
||||
|
||||
# https://github.com/ethereum/eth2.0-specs/blob/v0.11.1/specs/phase0/beacon-chain.md#epoch-processing
|
||||
proc process_epoch*(state: var BeaconState) {.nbench.}=
|
||||
proc process_epoch*(state: var BeaconState, updateFlags: UpdateFlags)
|
||||
{.nbench.} =
|
||||
trace "process_epoch",
|
||||
current_epoch = get_current_epoch(state)
|
||||
|
||||
var per_epoch_cache = get_empty_per_epoch_cache()
|
||||
|
||||
# https://github.com/ethereum/eth2.0-specs/blob/v0.11.1/specs/phase0/beacon-chain.md#justification-and-finalization
|
||||
process_justification_and_finalization(state, per_epoch_cache)
|
||||
process_justification_and_finalization(state, per_epoch_cache, updateFlags)
|
||||
|
||||
trace "ran process_justification_and_finalization",
|
||||
current_epoch = get_current_epoch(state)
|
||||
# state.slot hasn't been incremented yet.
|
||||
if verifyFinalization in updateFlags and get_current_epoch(state) >= 3:
|
||||
# Rule 2/3/4 finalization results in the most pessimal case. The other
|
||||
# three finalization rules finalize more quickly as long as the any of
|
||||
# the finalization rules triggered.
|
||||
doAssert state.finalized_checkpoint.epoch + 3 >= get_current_epoch(state)
|
||||
|
||||
# https://github.com/ethereum/eth2.0-specs/blob/v0.11.1/specs/phase0/beacon-chain.md#rewards-and-penalties-1
|
||||
process_rewards_and_penalties(state, per_epoch_cache)
|
||||
|
|
|
@ -123,7 +123,8 @@ func process_slot*(state: var HashedBeaconState) {.nbench.} =
|
|||
hash_tree_root(state.data.latest_block_header)
|
||||
|
||||
# https://github.com/ethereum/eth2.0-specs/blob/v0.10.1/specs/phase0/beacon-chain.md#beacon-chain-state-transition-function
|
||||
proc advance_slot*(state: var HashedBeaconState, nextStateRoot: Opt[Eth2Digest]) =
|
||||
proc advance_slot*(state: var HashedBeaconState,
|
||||
nextStateRoot: Opt[Eth2Digest], updateFlags: UpdateFlags) =
|
||||
# Special case version of process_slots that moves one slot at a time - can
|
||||
# run faster if the state root is known already (for example when replaying
|
||||
# existing slots)
|
||||
|
@ -132,7 +133,7 @@ proc advance_slot*(state: var HashedBeaconState, nextStateRoot: Opt[Eth2Digest])
|
|||
if is_epoch_transition:
|
||||
# Note: Genesis epoch = 0, no need to test if before Genesis
|
||||
beacon_previous_validators.set(get_epoch_validator_count(state.data))
|
||||
process_epoch(state.data)
|
||||
process_epoch(state.data, updateFlags)
|
||||
state.data.slot += 1
|
||||
if is_epoch_transition:
|
||||
beacon_current_validators.set(get_epoch_validator_count(state.data))
|
||||
|
@ -143,7 +144,8 @@ proc advance_slot*(state: var HashedBeaconState, nextStateRoot: Opt[Eth2Digest])
|
|||
state.root = hash_tree_root(state.data)
|
||||
|
||||
# https://github.com/ethereum/eth2.0-specs/blob/v0.10.1/specs/phase0/beacon-chain.md#beacon-chain-state-transition-function
|
||||
proc process_slots*(state: var HashedBeaconState, slot: Slot) {.nbench.} =
|
||||
proc process_slots*(state: var HashedBeaconState, slot: Slot,
|
||||
updateFlags: UpdateFlags = {}) {.nbench.} =
|
||||
# TODO: Eth specs strongly assert that state.data.slot <= slot
|
||||
# This prevents receiving attestation in any order
|
||||
# (see tests/test_attestation_pool)
|
||||
|
@ -165,7 +167,7 @@ proc process_slots*(state: var HashedBeaconState, slot: Slot) {.nbench.} =
|
|||
|
||||
# Catch up to the target slot
|
||||
while state.data.slot < slot:
|
||||
advance_slot(state, err(Opt[Eth2Digest]))
|
||||
advance_slot(state, err(Opt[Eth2Digest]), updateFlags)
|
||||
|
||||
proc noRollback*(state: var HashedBeaconState) =
|
||||
trace "Skipping rollback of broken state"
|
||||
|
@ -205,7 +207,7 @@ proc state_transition*(
|
|||
# bool return values...)
|
||||
doAssert not rollback.isNil, "use noRollback if it's ok to mess up state"
|
||||
# These should never fail.
|
||||
process_slots(state, signedBlock.message.slot)
|
||||
process_slots(state, signedBlock.message.slot, flags)
|
||||
|
||||
# Block updates - these happen when there's a new block being suggested
|
||||
# by the block proposer. Every actor in the network will update its state
|
||||
|
@ -217,6 +219,9 @@ proc state_transition*(
|
|||
verify_block_signature(state.data, signedBlock):
|
||||
|
||||
var per_epoch_cache = get_empty_per_epoch_cache()
|
||||
trace "in state_transition: processing block, signature passed",
|
||||
signature = signedBlock.signature,
|
||||
blockRoot = hash_tree_root(signedBlock.message)
|
||||
if processBlock(state.data, signedBlock.message, flags, per_epoch_cache):
|
||||
if skipStateRootValidation in flags or verifyStateRoot(state.data, signedBlock.message):
|
||||
# State root is what it should be - we're done!
|
||||
|
|
|
@ -7,25 +7,25 @@
|
|||
|
||||
import
|
||||
# Standard library
|
||||
os, tables, random, strutils, times,
|
||||
os, tables, strutils, times,
|
||||
|
||||
# Nimble packages
|
||||
stew/[objects, bitseqs, byteutils], stew/shims/macros,
|
||||
chronos, confutils, metrics, json_rpc/[rpcserver, jsonmarshal],
|
||||
chronicles, chronicles/helpers as chroniclesHelpers,
|
||||
stew/[objects, bitseqs], stew/shims/macros,
|
||||
chronos, metrics, json_rpc/[rpcserver, jsonmarshal],
|
||||
chronicles,
|
||||
json_serialization/std/[options, sets, net], serialization/errors,
|
||||
eth/db/kvstore, eth/db/kvstore_sqlite3,
|
||||
eth/p2p/enode, eth/[keys, async_utils], eth/p2p/discoveryv5/[protocol, enr],
|
||||
eth/[keys, async_utils], eth/p2p/discoveryv5/[protocol, enr],
|
||||
|
||||
# Local modules
|
||||
spec/[datatypes, digest, crypto, beaconstate, helpers, validator, network,
|
||||
state_transition_block], spec/presets/custom,
|
||||
conf, time, beacon_chain_db, validator_pool, extras,
|
||||
attestation_pool, block_pool, eth2_network, eth2_discovery,
|
||||
state_transition_block],
|
||||
conf, time, beacon_chain_db, validator_pool,
|
||||
attestation_pool, block_pool, eth2_network,
|
||||
beacon_node_common, beacon_node_types,
|
||||
mainchain_monitor, version, ssz, ssz/dynamic_navigator,
|
||||
sync_protocol, request_manager, validator_keygen, interop, statusbar,
|
||||
attestation_aggregation, sync_manager, state_transition, sszdump
|
||||
request_manager, interop, statusbar,
|
||||
attestation_aggregation, sync_manager, sszdump
|
||||
|
||||
# Metrics for tracking attestation and beacon block loss
|
||||
declareCounter beacon_attestations_sent,
|
||||
|
|
|
@ -31,4 +31,4 @@ proc transitionEpochUntilJustificationFinalization*(state: var HashedBeaconState
|
|||
# From process_epoch()
|
||||
var per_epoch_cache = get_empty_per_epoch_cache()
|
||||
|
||||
process_justification_and_finalization(state.data, per_epoch_cache)
|
||||
process_justification_and_finalization(state.data, per_epoch_cache, {})
|
||||
|
|
Loading…
Reference in New Issue