Increase finalization and finalization checking robustness (#990)

* fix some warnings related to beacon_node splitting; reimplement finalization verification more robustly; improve attestation pool block selection logic

* re-add missing import

* whitelist allowed state transition flags and make rollback/restore naming more consistent

* restore usage of update flags passed into skipAndUpdateState(...) in addition to the potential verifyFinalization flag

* switch rest of rollback -> restore
This commit is contained in:
tersec 2020-05-09 12:43:15 +00:00 committed by GitHub
parent bc759ceec3
commit 093d298b2b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 100 additions and 68 deletions

View File

@ -317,17 +317,34 @@ proc getAttestationsForBlock*(pool: AttestationPool,
# TODO this shouldn't really need state -- it's to recheck/validate, but that
# should be refactored
let
newBlockSlot = state.slot
maybeSlotData = getAttestationsForSlot(pool, newBlockSlot)
let newBlockSlot = state.slot
var attestations: seq[AttestationEntry]
if maybeSlotData.isNone:
# Logging done in getAttestationsForSlot(...)
# This isn't maximally efficient -- iterators or other approaches would
# avoid lots of memory allocations -- but this provides a more flexible
# base upon which to experiment with, and isn't yet profiling hot-path,
# while avoiding penalizing slow attesting too much (as, in the spec it
# is supposed to be available two epochs back; it's not meant as). This
# isn't a good solution, either -- see the set-packing comment below as
# one issue. It also creates problems with lots of repeat attestations,
# as a bunch of synchronized beacon_nodes do almost the opposite of the
# intended thing -- sure, _blocks_ have to be popular (via attestation)
# but _attestations_ shouldn't have to be so frequently repeated, as an
# artifact of this state-free, identical-across-clones choice basis. In
# addResolved, too, the new attestations get added to the end, while in
# these functions, it's reading from the beginning, et cetera. This all
# needs a single unified strategy.
const LOOKBACK_WINDOW = 3
for i in max(1, newBlockSlot.int64 - LOOKBACK_WINDOW) .. newBlockSlot.int64:
let maybeSlotData = getAttestationsForSlot(pool, i.Slot)
if maybeSlotData.isSome:
insert(attestations, maybeSlotData.get.attestations)
if attestations.len == 0:
return
let slotData = maybeSlotData.get
var cache = get_empty_per_epoch_cache()
for a in slotData.attestations:
for a in attestations:
var
# https://github.com/ethereum/eth2.0-specs/blob/v0.11.1/specs/phase0/validator.md#construct-attestation
attestation = Attestation(
@ -368,6 +385,8 @@ proc getAttestationsForBlock*(pool: AttestationPool,
result.add(attestation)
if result.len >= MAX_ATTESTATIONS:
debug "getAttestationsForBlock: returning early after hitting MAX_ATTESTATIONS",
attestationSlot = newBlockSlot - 1
return
proc resolve*(pool: var AttestationPool) =

View File

@ -168,8 +168,12 @@ proc init*(T: type BeaconNode, conf: BeaconNodeConf): Future[BeaconNode] {.async
quit 1
# TODO check that genesis given on command line (if any) matches database
let
blockPool = BlockPool.init(db)
let blockPool = BlockPool.init(
db,
if conf.verifyFinalization:
{verifyFinalization}
else:
{})
if mainchainMonitor.isNil and
conf.web3Url.len > 0 and
@ -305,21 +309,6 @@ proc onBeaconBlock(node: BeaconNode, signedBlock: SignedBeaconBlock) =
# don't know if it's part of the chain we're currently building.
discard node.storeBlock(signedBlock)
proc verifyFinalization(node: BeaconNode, slot: Slot) =
# Epoch must be >= 4 to check finalization
const SETTLING_TIME_OFFSET = 1'u64
let epoch = slot.compute_epoch_at_slot()
# Don't static-assert this -- if this isn't called, don't require it
doAssert SLOTS_PER_EPOCH > SETTLING_TIME_OFFSET
# Intentionally, loudly assert. Point is to fail visibly and unignorably
# during testing.
if epoch >= 4 and slot mod SLOTS_PER_EPOCH > SETTLING_TIME_OFFSET:
let finalizedEpoch =
node.blockPool.finalizedHead.blck.slot.compute_epoch_at_slot()
doAssert finalizedEpoch + 2 == epoch
proc onSlotStart(node: BeaconNode, lastSlot, scheduledSlot: Slot) {.gcsafe, async.} =
## Called at the beginning of a slot - usually every slot, but sometimes might
## skip a few in case we're running late.
@ -386,9 +375,6 @@ proc onSlotStart(node: BeaconNode, lastSlot, scheduledSlot: Slot) {.gcsafe, asyn
beacon_slot.set slot.int64
if node.config.verifyFinalization:
verifyFinalization(node, scheduledSlot)
if slot > lastSlot + SLOTS_PER_EPOCH:
# We've fallen behind more than an epoch - there's nothing clever we can
# do here really, except skip all the work and try again later.

View File

@ -13,10 +13,9 @@ import
# Local modules
spec/[datatypes, crypto],
conf, time, beacon_chain_db, validator_pool,
conf, time, beacon_chain_db,
attestation_pool, block_pool, eth2_network,
beacon_node_types, mainchain_monitor,
sync_protocol, request_manager
beacon_node_types, mainchain_monitor, request_manager
type
RpcServer* = RpcHttpServer

View File

@ -4,7 +4,7 @@ import
deques, tables,
stew/[endians2, byteutils], chronicles,
spec/[datatypes, crypto, digest],
beacon_chain_db
beacon_chain_db, extras
type
# #############################################
@ -143,7 +143,7 @@ type
inAdd*: bool
headState*: StateData ## \
headState*: StateData ##\
## State given by the head block; only update in `updateHead`, not anywhere
## else via `withState`
@ -151,6 +151,8 @@ type
tmpState*: StateData ## Scratchpad - may be any state
updateFlags*: UpdateFlags
MissingBlock* = object
slots*: uint64 # number of slots that are suspected missing
tries*: int

View File

@ -139,7 +139,8 @@ func init*(T: type BlockRef, root: Eth2Digest, slot: Slot): BlockRef =
func init*(T: type BlockRef, root: Eth2Digest, blck: BeaconBlock): BlockRef =
BlockRef.init(root, blck.slot)
proc init*(T: type BlockPool, db: BeaconChainDB): BlockPool =
proc init*(T: type BlockPool, db: BeaconChainDB,
updateFlags: UpdateFlags = {}): BlockPool =
# TODO we require that the db contains both a head and a tail block -
# asserting here doesn't seem like the right way to go about it however..
@ -243,9 +244,15 @@ proc init*(T: type BlockPool, db: BeaconChainDB): BlockPool =
heads: @[head],
headState: tmpState[],
justifiedState: tmpState[], # This is wrong but we'll update it below
tmpState: tmpState[]
tmpState: tmpState[],
# The only allowed flag right now is verifyFinalization, as the others all
# allow skipping some validation.
updateFlags: {verifyFinalization} * updateFlags
)
doAssert res.updateFlags in [{}, {verifyFinalization}]
res.updateStateData(res.justifiedState, justifiedHead)
res.updateStateData(res.headState, headRef.atSlot(headRef.slot))
@ -324,18 +331,18 @@ proc getState(
pool: BlockPool, db: BeaconChainDB, stateRoot: Eth2Digest, blck: BlockRef,
output: var StateData): bool =
let outputAddr = unsafeAddr output # local scope
proc rollback(v: var BeaconState) =
func restore(v: var BeaconState) =
if outputAddr == (unsafeAddr pool.headState):
# TODO seeing the headState in the rollback shouldn't happen - we load
# TODO seeing the headState in the restore shouldn't happen - we load
# head states only when updating the head position, and by that time
# the database will have gone through enough sanity checks that
# SSZ exceptions shouldn't happen, which is when rollback happens.
# SSZ exceptions shouldn't happen, which is when restore happens.
# Nonetheless, this is an ugly workaround that needs to go away
doAssert false, "Cannot alias headState"
outputAddr[] = pool.headState
if not db.getState(stateRoot, output.data.data, rollback):
if not db.getState(stateRoot, output.data.data, restore):
return false
output.blck = blck
@ -470,14 +477,15 @@ proc add*(
let
poolPtr = unsafeAddr pool # safe because restore is short-lived
proc restore(v: var HashedBeaconState) =
func restore(v: var HashedBeaconState) =
# TODO address this ugly workaround - there should probably be a
# `state_transition` that takes a `StateData` instead and updates
# the block as well
doAssert v.addr == addr poolPtr.tmpState.data
poolPtr.tmpState = poolPtr.headState
if not state_transition(pool.tmpState.data, signedBlock, {}, restore):
if not state_transition(
pool.tmpState.data, signedBlock, pool.updateFlags, restore):
# TODO find a better way to log all this block data
notice "Invalid block",
blck = shortLog(blck),
@ -545,7 +553,7 @@ func getRef*(pool: BlockPool, root: Eth2Digest): BlockRef =
## Retrieve a resolved block reference, if available
pool.blocks.getOrDefault(root, nil)
proc getBlockRange*(
func getBlockRange*(
pool: BlockPool, startSlot: Slot, skipStep: Natural,
output: var openArray[BlockRef]): Natural =
## This function populates an `output` buffer of blocks
@ -650,7 +658,7 @@ proc skipAndUpdateState(
# save and reuse
# TODO possibly we should keep this in memory for the hot blocks
let nextStateRoot = pool.db.getStateRoot(blck.root, state.data.slot + 1)
advance_slot(state, nextStateRoot)
advance_slot(state, nextStateRoot, pool.updateFlags)
if save:
pool.putState(state, blck)
@ -662,12 +670,13 @@ proc skipAndUpdateState(
pool.skipAndUpdateState(
state.data, blck.refs, blck.data.message.slot - 1, save)
var statePtr = unsafeAddr state # safe because `rollback` is locally scoped
proc rollback(v: var HashedBeaconState) =
var statePtr = unsafeAddr state # safe because `restore` is locally scoped
func restore(v: var HashedBeaconState) =
doAssert (addr(statePtr.data) == addr v)
statePtr[] = pool.headState
let ok = state_transition(state.data, blck.data, flags, rollback)
let ok = state_transition(
state.data, blck.data, flags + pool.updateFlags, restore)
if ok and save:
pool.putState(state.data, blck.refs)

View File

@ -31,5 +31,6 @@ type
## Skip verification of block state root.
skipBlockParentRootValidation ##\
## Skip verification that the block's parent root matches the previous block header.
verifyFinalization
UpdateFlags* = set[UpdateFlag]

View File

@ -564,12 +564,14 @@ proc process_attestation*(
if attestation.data.target.epoch == get_current_epoch(state):
trace "process_attestation: current_epoch_attestations.add",
attestation = shortLog(attestation),
pending_attestation = pending_attestation,
indices = get_attesting_indices(
state, attestation.data, attestation.aggregation_bits, stateCache).len
state.current_epoch_attestations.add(pending_attestation)
else:
trace "process_attestation: previous_epoch_attestations.add",
attestation = shortLog(attestation),
pending_attestation = pending_attestation,
indices = get_attesting_indices(
state, attestation.data, attestation.aggregation_bits, stateCache).len
@ -577,6 +579,10 @@ proc process_attestation*(
true
else:
trace "process_attestation: check_attestation failed",
attestation = shortLog(attestation),
indices = get_attesting_indices(
state, attestation.data, attestation.aggregation_bits, stateCache).len
false
func makeAttestationData*(

View File

@ -37,7 +37,7 @@
import
math, sequtils, tables,
stew/[bitseqs, bitops2], chronicles, json_serialization/std/sets,
metrics, ../ssz,
metrics, ../extras, ../ssz,
beaconstate, crypto, datatypes, digest, helpers, validator,
state_transition_helpers,
../../nbench/bench_lab
@ -110,8 +110,8 @@ func get_attesting_balance(
state, attestations, stateCache))
# https://github.com/ethereum/eth2.0-specs/blob/v0.9.4/specs/core/0_beacon-chain.md#justification-and-finalization
proc process_justification_and_finalization*(
state: var BeaconState, stateCache: var StateCache) {.nbench.}=
proc process_justification_and_finalization*(state: var BeaconState,
stateCache: var StateCache, updateFlags: UpdateFlags = {}) {.nbench.} =
logScope: pcs = "process_justification_and_finalization"
@ -422,17 +422,22 @@ func process_final_updates*(state: var BeaconState) {.nbench.}=
state.current_epoch_attestations = @[]
# https://github.com/ethereum/eth2.0-specs/blob/v0.11.1/specs/phase0/beacon-chain.md#epoch-processing
proc process_epoch*(state: var BeaconState) {.nbench.}=
proc process_epoch*(state: var BeaconState, updateFlags: UpdateFlags)
{.nbench.} =
trace "process_epoch",
current_epoch = get_current_epoch(state)
var per_epoch_cache = get_empty_per_epoch_cache()
# https://github.com/ethereum/eth2.0-specs/blob/v0.11.1/specs/phase0/beacon-chain.md#justification-and-finalization
process_justification_and_finalization(state, per_epoch_cache)
process_justification_and_finalization(state, per_epoch_cache, updateFlags)
trace "ran process_justification_and_finalization",
current_epoch = get_current_epoch(state)
# state.slot hasn't been incremented yet.
if verifyFinalization in updateFlags and get_current_epoch(state) >= 3:
# Rule 2/3/4 finalization results in the most pessimal case. The other
# three finalization rules finalize more quickly as long as the any of
# the finalization rules triggered.
doAssert state.finalized_checkpoint.epoch + 3 >= get_current_epoch(state)
# https://github.com/ethereum/eth2.0-specs/blob/v0.11.1/specs/phase0/beacon-chain.md#rewards-and-penalties-1
process_rewards_and_penalties(state, per_epoch_cache)

View File

@ -123,7 +123,8 @@ func process_slot*(state: var HashedBeaconState) {.nbench.} =
hash_tree_root(state.data.latest_block_header)
# https://github.com/ethereum/eth2.0-specs/blob/v0.10.1/specs/phase0/beacon-chain.md#beacon-chain-state-transition-function
proc advance_slot*(state: var HashedBeaconState, nextStateRoot: Opt[Eth2Digest]) =
proc advance_slot*(state: var HashedBeaconState,
nextStateRoot: Opt[Eth2Digest], updateFlags: UpdateFlags) =
# Special case version of process_slots that moves one slot at a time - can
# run faster if the state root is known already (for example when replaying
# existing slots)
@ -132,7 +133,7 @@ proc advance_slot*(state: var HashedBeaconState, nextStateRoot: Opt[Eth2Digest])
if is_epoch_transition:
# Note: Genesis epoch = 0, no need to test if before Genesis
beacon_previous_validators.set(get_epoch_validator_count(state.data))
process_epoch(state.data)
process_epoch(state.data, updateFlags)
state.data.slot += 1
if is_epoch_transition:
beacon_current_validators.set(get_epoch_validator_count(state.data))
@ -143,7 +144,8 @@ proc advance_slot*(state: var HashedBeaconState, nextStateRoot: Opt[Eth2Digest])
state.root = hash_tree_root(state.data)
# https://github.com/ethereum/eth2.0-specs/blob/v0.10.1/specs/phase0/beacon-chain.md#beacon-chain-state-transition-function
proc process_slots*(state: var HashedBeaconState, slot: Slot) {.nbench.} =
proc process_slots*(state: var HashedBeaconState, slot: Slot,
updateFlags: UpdateFlags = {}) {.nbench.} =
# TODO: Eth specs strongly assert that state.data.slot <= slot
# This prevents receiving attestation in any order
# (see tests/test_attestation_pool)
@ -165,7 +167,7 @@ proc process_slots*(state: var HashedBeaconState, slot: Slot) {.nbench.} =
# Catch up to the target slot
while state.data.slot < slot:
advance_slot(state, err(Opt[Eth2Digest]))
advance_slot(state, err(Opt[Eth2Digest]), updateFlags)
proc noRollback*(state: var HashedBeaconState) =
trace "Skipping rollback of broken state"
@ -205,7 +207,7 @@ proc state_transition*(
# bool return values...)
doAssert not rollback.isNil, "use noRollback if it's ok to mess up state"
# These should never fail.
process_slots(state, signedBlock.message.slot)
process_slots(state, signedBlock.message.slot, flags)
# Block updates - these happen when there's a new block being suggested
# by the block proposer. Every actor in the network will update its state
@ -217,6 +219,9 @@ proc state_transition*(
verify_block_signature(state.data, signedBlock):
var per_epoch_cache = get_empty_per_epoch_cache()
trace "in state_transition: processing block, signature passed",
signature = signedBlock.signature,
blockRoot = hash_tree_root(signedBlock.message)
if processBlock(state.data, signedBlock.message, flags, per_epoch_cache):
if skipStateRootValidation in flags or verifyStateRoot(state.data, signedBlock.message):
# State root is what it should be - we're done!

View File

@ -7,25 +7,25 @@
import
# Standard library
os, tables, random, strutils, times,
os, tables, strutils, times,
# Nimble packages
stew/[objects, bitseqs, byteutils], stew/shims/macros,
chronos, confutils, metrics, json_rpc/[rpcserver, jsonmarshal],
chronicles, chronicles/helpers as chroniclesHelpers,
stew/[objects, bitseqs], stew/shims/macros,
chronos, metrics, json_rpc/[rpcserver, jsonmarshal],
chronicles,
json_serialization/std/[options, sets, net], serialization/errors,
eth/db/kvstore, eth/db/kvstore_sqlite3,
eth/p2p/enode, eth/[keys, async_utils], eth/p2p/discoveryv5/[protocol, enr],
eth/[keys, async_utils], eth/p2p/discoveryv5/[protocol, enr],
# Local modules
spec/[datatypes, digest, crypto, beaconstate, helpers, validator, network,
state_transition_block], spec/presets/custom,
conf, time, beacon_chain_db, validator_pool, extras,
attestation_pool, block_pool, eth2_network, eth2_discovery,
state_transition_block],
conf, time, beacon_chain_db, validator_pool,
attestation_pool, block_pool, eth2_network,
beacon_node_common, beacon_node_types,
mainchain_monitor, version, ssz, ssz/dynamic_navigator,
sync_protocol, request_manager, validator_keygen, interop, statusbar,
attestation_aggregation, sync_manager, state_transition, sszdump
request_manager, interop, statusbar,
attestation_aggregation, sync_manager, sszdump
# Metrics for tracking attestation and beacon block loss
declareCounter beacon_attestations_sent,

View File

@ -31,4 +31,4 @@ proc transitionEpochUntilJustificationFinalization*(state: var HashedBeaconState
# From process_epoch()
var per_epoch_cache = get_empty_per_epoch_cache()
process_justification_and_finalization(state.data, per_epoch_cache)
process_justification_and_finalization(state.data, per_epoch_cache, {})