Jacek Sieka 05ffe7b2bf
Prune BlockRef on finalization (#3513)
Up til now, the block dag has been using `BlockRef`, a structure adapted
for a full DAG, to represent all of chain history. This is a correct and
simple design, but does not exploit the linearity of the chain once
parts of it finalize.

By pruning the in-memory `BlockRef` structure at finalization, we save,
at the time of writing, a cool ~250mb (or 25%:ish) chunk of memory
landing us at a steady state of ~750mb normal memory usage for a
validating node.

Above all though, we prevent memory usage from growing proportionally
with the length of the chain, something that would not be sustainable
over time -  instead, the steady state memory usage is roughly
determined by the validator set size which grows much more slowly. With
these changes, the core should remain sustainable memory-wise post-merge
all the way to withdrawals (when the validator set is expected to grow).

In-memory indices are still used for the "hot" unfinalized portion of
the chain - this ensure that consensus performance remains unchanged.

What changes is that for historical access, we use a db-based linear
slot index which is cache-and-disk-friendly, keeping the cost for
accessing historical data at a similar level as before, achieving the
savings at no percievable cost to functionality or performance.

A nice collateral benefit is the almost-instant startup since we no
longer load any large indicies at dag init.

The cost of this functionality instead can be found in the complexity of
having to deal with two ways of traversing the chain - by `BlockRef` and
by slot.

* use `BlockId` instead of `BlockRef` where finalized / historical data
may be required
* simplify clearance pre-advancement
* remove dag.finalizedBlocks (~50:ish mb)
* remove `getBlockAtSlot` - use `getBlockIdAtSlot` instead
* `parent` and `atSlot` for `BlockId` now require a `ChainDAGRef`
instance, unlike `BlockRef` traversal
* prune `BlockRef` parents on finality (~200:ish mb)
* speed up ChainDAG init by not loading finalized history index
* mess up light client server error handling - this need revisiting :)
2022-03-17 17:42:56 +00:00

370 lines
14 KiB
Nim

# beacon_chain
# Copyright (c) 2018-2022 Status Research & Development GmbH
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at https://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at https://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
{.push raises: [Defect].}
import
chronicles,
stew/[assign2, results],
../spec/[beaconstate, forks, signatures, signatures_batch, state_transition],
"."/[block_dag, blockchain_dag, blockchain_dag_light_client]
export results, signatures_batch, block_dag, blockchain_dag
# Clearance
# ---------------------------------------------
#
# This module is in charge of making the
# "quarantined" network blocks
# pass the firewall and be stored in the chain DAG
logScope:
topics = "clearance"
proc addResolvedHeadBlock(
dag: ChainDAGRef,
state: var ForkedHashedBeaconState,
trustedBlock: ForkyTrustedSignedBeaconBlock,
parent: BlockRef, cache: var StateCache,
onBlockAdded: OnPhase0BlockAdded | OnAltairBlockAdded | OnBellatrixBlockAdded,
stateDataDur, sigVerifyDur, stateVerifyDur: Duration
): BlockRef =
doAssert state.matches_block_slot(
trustedBlock.root, trustedBlock.message.slot),
"Given state must have the new block applied"
let
blockRoot = trustedBlock.root
blockRef = BlockRef.init(blockRoot, trustedBlock.message)
startTick = Moment.now()
link(parent, blockRef)
dag.forkBlocks.incl(KeyedBlockRef.init(blockRef))
# Resolved blocks should be stored in database
dag.putBlock(trustedBlock)
let putBlockTick = Moment.now()
var foundHead: bool
for head in dag.heads.mitems():
if head.isAncestorOf(blockRef):
head = blockRef
foundHead = true
break
if not foundHead:
dag.heads.add(blockRef)
# Regardless of the chain we're on, the deposits come in the same order so
# as soon as we import a block, we'll also update the shared public key
# cache
dag.updateValidatorKeys(getStateField(state, validators).asSeq())
# Getting epochRef with the state will potentially create a new EpochRef
let
epochRef = dag.getEpochRef(state, cache)
epochRefTick = Moment.now()
debug "Block resolved",
blockRoot = shortLog(blockRoot),
blck = shortLog(trustedBlock.message),
heads = dag.heads.len(),
stateDataDur, sigVerifyDur, stateVerifyDur,
putBlockDur = putBlockTick - startTick,
epochRefDur = epochRefTick - putBlockTick
# Update light client data
dag.processNewBlockForLightClient(state, trustedBlock, parent)
# Notify others of the new block before processing the quarantine, such that
# notifications for parents happens before those of the children
if onBlockAdded != nil:
onBlockAdded(blockRef, trustedBlock, epochRef)
if not(isNil(dag.onBlockAdded)):
dag.onBlockAdded(ForkedTrustedSignedBeaconBlock.init(trustedBlock))
blockRef
proc checkStateTransition(
dag: ChainDAGRef, signedBlock: ForkySigVerifiedSignedBeaconBlock,
cache: var StateCache): Result[void, BlockError] =
## Ensure block can be applied on a state
func restore(v: var ForkedHashedBeaconState) =
assign(dag.clearanceState, dag.headState)
let res = state_transition_block(
dag.cfg, dag.clearanceState, signedBlock,
cache, dag.updateFlags, restore)
if res.isErr():
info "Invalid block",
blockRoot = shortLog(signedBlock.root),
blck = shortLog(signedBlock.message),
error = res.error()
err(BlockError.Invalid)
else:
ok()
proc advanceClearanceState*(dag: ChainDAGRef) =
# When the chain is synced, the most likely block to be produced is the block
# right after head - we can exploit this assumption and advance the state
# to that slot before the block arrives, thus allowing us to do the expensive
# epoch transition ahead of time.
# Notably, we use the clearance state here because that's where the block will
# first be seen - later, this state will be copied to the head state!
let advanced = withState(dag.clearanceState):
state.data.slot > state.data.latest_block_header.slot
if not advanced:
let next = getStateField(dag.clearanceState, slot) + 1
let startTick = Moment.now()
var
cache = StateCache()
info = ForkedEpochInfo()
dag.advanceSlots(dag.clearanceState, next, true, cache, info)
debug "Prepared clearance state for next block",
next, updateStateDur = Moment.now() - startTick
proc addHeadBlock*(
dag: ChainDAGRef, verifier: var BatchVerifier,
signedBlock: ForkySignedBeaconBlock,
onBlockAdded: OnPhase0BlockAdded | OnAltairBlockAdded |
OnBellatrixBlockAdded
): Result[BlockRef, BlockError] =
## Try adding a block to the chain, verifying first that it passes the state
## transition function and contains correct cryptographic signature.
##
## Cryptographic checks can be skipped by adding skipBLSValidation to
## dag.updateFlags
logScope:
blockRoot = shortLog(signedBlock.root)
blck = shortLog(signedBlock.message)
signature = shortLog(signedBlock.signature)
template blck(): untyped = signedBlock.message # shortcuts without copy
template blockRoot(): untyped = signedBlock.root
# If the block we get is older than what we finalized already, we drop it.
# One way this can happen is that we start request a block and finalization
# happens in the meantime - the block we requested will then be stale
# by the time it gets here.
if blck.slot <= dag.finalizedHead.slot:
let existing = dag.getBlockIdAtSlot(blck.slot)
# The exact slot match ensures we reject blocks that were orphaned in
# the finalized chain
if existing.isSome:
if existing.get().bid.slot == blck.slot and
existing.get().bid.root == blockRoot:
debug "Duplicate block"
return err(BlockError.Duplicate)
# Block is older than finalized, but different from the block in our
# canonical history: it must be from an unviable branch
debug "Block from unviable fork",
existing = shortLog(existing.get()),
finalizedHead = shortLog(dag.finalizedHead),
tail = shortLog(dag.tail)
return err(BlockError.UnviableFork)
# Check non-finalized blocks as well
if dag.containsForkBlock(blockRoot):
return err(BlockError.Duplicate)
let parent = dag.getBlockRef(blck.parent_root).valueOr:
# There are two cases where the parent won't be found: we don't have it or
# it has been finalized already, and as a result the branch the new block
# is on is no longer a viable fork candidate - we can't tell which is which
# at this stage, but we can check if we've seen the parent block previously
# and thus prevent requests for it to be downloaded again.
let parentId = dag.getBlockId(blck.parent_root)
if parentId.isSome():
debug "Block unviable due to pre-finalized-checkpoint parent",
parentId = parentId.get()
return err(BlockError.UnviableFork)
debug "Block parent unknown or finalized already", parentId
return err(BlockError.MissingParent)
if parent.slot >= blck.slot:
# A block whose parent is newer than the block itself is clearly invalid -
# discard it immediately
debug "Block older than parent",
parent = shortLog(parent)
return err(BlockError.Invalid)
# The block is resolved, now it's time to validate it to ensure that the
# blocks we add to the database are clean for the given state
let startTick = Moment.now()
# The clearance state works as the canonical
# "let's make things permanent" point and saves things to the database -
# storing things is slow, so we don't want to do so before there's a
# reasonable chance that the information will become more permanently useful -
# by the time a new block reaches this point, the parent block will already
# have "established" itself in the network to some degree at least.
var cache = StateCache()
let clearanceBlock =
parent.atSlot(signedBlock.message.slot).toBlockslotId.expect("not nil")
if not updateState(
dag, dag.clearanceState, clearanceBlock, true, cache):
# We should never end up here - the parent must be a block no older than and
# rooted in the finalized checkpoint, hence we should always be able to
# load its corresponding state
error "Unable to load clearance state for parent block, database corrupt?",
parent = shortLog(parent.atSlot(signedBlock.message.slot)),
clearanceBlock = shortLog(clearanceBlock)
return err(BlockError.MissingParent)
let stateDataTick = Moment.now()
# First, batch-verify all signatures in block
if skipBLSValidation notin dag.updateFlags:
# TODO: remove skipBLSValidation
var sigs: seq[SignatureSet]
if (let e = sigs.collectSignatureSets(
signedBlock, dag.db.immutableValidators,
dag.clearanceState, cache); e.isErr()):
# A PublicKey or Signature isn't on the BLS12-381 curve
info "Unable to load signature sets",
err = e.error()
return err(BlockError.Invalid)
if not verifier.batchVerify(sigs):
info "Block signature verification failed",
signature = shortLog(signedBlock.signature)
return err(BlockError.Invalid)
let sigVerifyTick = Moment.now()
? checkStateTransition(dag, signedBlock.asSigVerified(), cache)
let stateVerifyTick = Moment.now()
# Careful, clearanceState.data has been updated but not blck - we need to
# create the BlockRef first!
ok addResolvedHeadBlock(
dag, dag.clearanceState,
signedBlock.asTrusted(),
parent, cache,
onBlockAdded,
stateDataDur = stateDataTick - startTick,
sigVerifyDur = sigVerifyTick - stateDataTick,
stateVerifyDur = stateVerifyTick - sigVerifyTick)
proc addBackfillBlock*(
dag: ChainDAGRef,
signedBlock: ForkySignedBeaconBlock): Result[void, BlockError] =
## When performing checkpoint sync, we need to backfill historical blocks
## in order to respond to GetBlocksByRange requests. Backfill blocks are
## added in backwards order, one by one, based on the `parent_root` of the
## earliest block we know about.
##
## Because only one history is relevant when backfilling, one doesn't have to
## consider forks or other finalization-related issues - a block is either
## valid and finalized, or not.
logScope:
blockRoot = shortLog(signedBlock.root)
blck = shortLog(signedBlock.message)
signature = shortLog(signedBlock.signature)
backfill = (dag.backfill.slot, shortLog(dag.backfill.parent_root))
template blck(): untyped = signedBlock.message # shortcuts without copy
template blockRoot(): untyped = signedBlock.root
let startTick = Moment.now()
if blck.slot >= dag.backfill.slot:
let existing = dag.getBlockIdAtSlot(blck.slot)
if existing.isSome:
if existing.get().bid.slot == blck.slot and
existing.get().bid.root == blockRoot:
# We should not call the block added callback for blocks that already
# existed in the pool, as that may confuse consumers such as the fork
# choice.
debug "Duplicate block"
return err(BlockError.Duplicate)
# Block is older than finalized, but different from the block in our
# canonical history: it must be from an unviable branch
debug "Block from unviable fork",
existing = shortLog(existing.get()),
finalizedHead = shortLog(dag.finalizedHead)
return err(BlockError.UnviableFork)
if blck.slot == dag.genesis.slot and
dag.backfill.parent_root == dag.genesis.root:
if blockRoot != dag.genesis.root:
# We've matched the backfill blocks all the way back to genesis via the
# `parent_root` chain and ended up at a different genesis - one way this
# can happen is when an invalid `--network` parameter is given during
# startup (though in theory, we check that - maybe the database was
# swapped or something?).
fatal "Checkpoint given during initial startup inconsistent with genesis block - wrong network used when starting the node?",
genesis = shortLog(dag.genesis), tail = shortLog(dag.tail),
head = shortLog(dag.head)
quit 1
dag.backfill = blck.toBeaconBlockSummary()
dag.db.finalizedBlocks.insert(blck.slot, blockRoot)
notice "Received final block during backfill, backfill complete"
# Backfill done - dag.backfill.slot now points to genesis block just like
# it would if we loaded a fully synced database - returning duplicate
# here is appropriate, though one could also call it ... ok?
return err(BlockError.Duplicate)
if dag.backfill.parent_root != blockRoot:
debug "Block does not match expected backfill root"
return err(BlockError.MissingParent) # MissingChild really, but ..
# If the hash is correct, the block itself must be correct, but the root does
# not cover the signature, which we check next
let proposerKey = dag.validatorKey(blck.proposer_index)
if proposerKey.isNone():
# We've verified that the block root matches our expectations by following
# the chain of parents all the way from checkpoint. If all those blocks
# were valid, the proposer_index in this block must also be valid, and we
# should have a key for it but we don't: this is either a bug on our from
# which we cannot recover, or an invalid checkpoint state was given in which
# case we're in trouble.
fatal "Invalid proposer in backfill block - checkpoint state corrupt?",
head = shortLog(dag.head), tail = shortLog(dag.tail),
genesis = shortLog(dag.genesis)
quit 1
if not verify_block_signature(
dag.forkAtEpoch(blck.slot.epoch),
getStateField(dag.headState, genesis_validators_root),
blck.slot,
signedBlock.root,
proposerKey.get(),
signedBlock.signature):
info "Block signature verification failed"
return err(BlockError.Invalid)
let sigVerifyTick = Moment.now
dag.putBlock(signedBlock.asTrusted())
dag.db.finalizedBlocks.insert(blck.slot, blockRoot)
dag.backfill = blck.toBeaconBlockSummary()
let putBlockTick = Moment.now
debug "Block backfilled",
sigVerifyDur = sigVerifyTick - startTick,
putBlockDur = putBlocktick - sigVerifyTick
ok()