Incremental pruning (#4887)

* Incremental pruning

When turning on pruning the first time the current pruning algorithm
will prune the full database at startup. This delays restart
unnecessarily, since all of the pruned space is not needed at once.

This PR introduces incremental pruning such that we will never prune
more than 32 blocks or the sync speed, whichever is higher.

This mode is expected to become default in a follow-up release.
This commit is contained in:
Jacek Sieka 2023-05-12 12:37:15 +02:00 committed by GitHub
parent 938d21f1ed
commit 51418a7894
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 111 additions and 68 deletions

View File

@ -175,6 +175,12 @@ type
## The last prune point ## The last prune point
## We can prune up to finalizedHead ## We can prune up to finalizedHead
lastHistoryPruneHorizon*: Slot
## The horizon when we last pruned, for horizon diff computation
lastHistoryPruneBlockHorizon*: Slot
## Block pruning progress at the last call
# ----------------------------------- # -----------------------------------
# Rewinder - Mutable state processing # Rewinder - Mutable state processing

View File

@ -50,10 +50,16 @@ declareGauge beacon_processed_deposits_total, "Number of total deposits included
logScope: topics = "chaindag" logScope: topics = "chaindag"
const const
# When finality happens, we prune historical states from the database except
# for a snapshort every 32 epochs from which replays can happen - there's a
# balance here between making long replays and saving on disk space
EPOCHS_PER_STATE_SNAPSHOT* = 32 EPOCHS_PER_STATE_SNAPSHOT* = 32
## When finality happens, we prune historical states from the database except
## for a snapshot every 32 epochs from which replays can happen - there's a
## balance here between making long replays and saving on disk space
MAX_SLOTS_PER_PRUNE* = SLOTS_PER_EPOCH
## We prune the database incrementally so as not to introduce long
## processing breaks - this number is the maximum number of blocks we allow
## to be pruned every time the prune call is made (once per slot typically)
## unless head is moving faster (ie during sync)
proc putBlock*( proc putBlock*(
dag: ChainDAGRef, signedBlock: ForkyTrustedSignedBeaconBlock) = dag: ChainDAGRef, signedBlock: ForkyTrustedSignedBeaconBlock) =
@ -1049,6 +1055,7 @@ proc init*(T: type ChainDAGRef, cfg: RuntimeConfig, db: BeaconChainDB,
info).expect("head blocks should apply") info).expect("head blocks should apply")
dag.head = headRef dag.head = headRef
dag.heads = @[headRef]
assign(dag.clearanceState, dag.headState) assign(dag.clearanceState, dag.headState)
@ -1091,8 +1098,6 @@ proc init*(T: type ChainDAGRef, cfg: RuntimeConfig, db: BeaconChainDB,
dag.finalizedHead = headRef.atSlot(finalizedSlot) dag.finalizedHead = headRef.atSlot(finalizedSlot)
dag.lastPrunePoint = dag.finalizedHead.toBlockSlotId().expect("not nil") dag.lastPrunePoint = dag.finalizedHead.toBlockSlotId().expect("not nil")
dag.heads = @[headRef]
doAssert dag.finalizedHead.blck != nil, doAssert dag.finalizedHead.blck != nil,
"The finalized head should exist at the slot" "The finalized head should exist at the slot"
doAssert dag.finalizedHead.blck.parent == nil, doAssert dag.finalizedHead.blck.parent == nil,
@ -1201,6 +1206,16 @@ proc init*(T: type ChainDAGRef, cfg: RuntimeConfig, db: BeaconChainDB,
# have a cache # have a cache
dag.updateValidatorKeys(getStateField(dag.headState, validators).asSeq()) dag.updateValidatorKeys(getStateField(dag.headState, validators).asSeq())
# Initialize pruning such that when starting with a database that hasn't been
# pruned, we work our way from the tail to the horizon in incremental steps
dag.lastHistoryPruneHorizon = dag.horizon()
dag.lastHistoryPruneBlockHorizon = block:
let boundary = min(dag.tail.slot, dag.horizon())
if boundary.epoch() >= EPOCHS_PER_STATE_SNAPSHOT:
start_slot(boundary.epoch() - EPOCHS_PER_STATE_SNAPSHOT)
else:
Slot(0)
info "Block DAG initialized", info "Block DAG initialized",
head = shortLog(dag.head), head = shortLog(dag.head),
finalizedHead = shortLog(dag.finalizedHead), finalizedHead = shortLog(dag.finalizedHead),
@ -1841,30 +1856,75 @@ proc pruneStateCachesDAG*(dag: ChainDAGRef) =
statePruneDur = statePruneTick - startTick, statePruneDur = statePruneTick - startTick,
epochRefPruneDur = epochRefPruneTick - statePruneTick epochRefPruneDur = epochRefPruneTick - statePruneTick
proc pruneStep(horizon, lastHorizon, lastBlockHorizon: Slot):
tuple[stateHorizon, blockHorizon: Slot] =
## Compute a reasonable incremental pruning step considering the current
## horizon, how far the database has been pruned already and where we want the
## tail to be - the return value shows the first state and block that we
## should _keep_ (inclusive).
const SLOTS_PER_STATE_SNAPSHOT =
uint64(EPOCHS_PER_STATE_SNAPSHOT * SLOTS_PER_EPOCH)
let
blockHorizon = block:
let
# Keep up with horizon if it's moving fast, ie if we're syncing
maxSlots = max(horizon - lastHorizon, MAX_SLOTS_PER_PRUNE)
# Move the block horizon cap with a lag so that it moves slot-by-slot
# instead of a big jump every time we prune a state - assuming we
# prune every slot, this makes us prune one slot at a time instead of
# a burst of prunes (as computed by maxSlots) around every snapshot
# change followed by no pruning for the rest of the period
maxBlockHorizon =
if horizon + 1 >= SLOTS_PER_STATE_SNAPSHOT:
horizon + 1 - SLOTS_PER_STATE_SNAPSHOT
else:
Slot(0)
# `lastBlockHorizon` captures the case where we're incrementally
# pruning a database that hasn't been pruned for a while: it's
# initialized to a pre-tail value on startup and moves to approach
# `maxBlockHorizon`.
min(maxBlockHorizon, lastBlockHorizon + maxSlots)
# Round up such that we remove state only once blocks have been removed
stateHorizon =
((blockHorizon + SLOTS_PER_STATE_SNAPSHOT - 1) div
SLOTS_PER_STATE_SNAPSHOT) * SLOTS_PER_STATE_SNAPSHOT
(Slot(stateHorizon), blockHorizon)
proc pruneHistory*(dag: ChainDAGRef, startup = false) = proc pruneHistory*(dag: ChainDAGRef, startup = false) =
## Perform an incremental pruning step of the history
if dag.db.db.readOnly: if dag.db.db.readOnly:
return return
let horizon = dag.horizon()
if horizon == GENESIS_SLOT:
return
let let
preTail = dag.tail horizon = dag.horizon()
# Round to state snapshot boundary - this is where we'll leave the tail (stateHorizon, blockHorizon) = pruneStep(
# after pruning horizon, dag.lastHistoryPruneHorizon, dag.lastHistoryPruneBlockHorizon)
stateHorizon = Epoch((horizon.epoch div EPOCHS_PER_STATE_SNAPSHOT) * EPOCHS_PER_STATE_SNAPSHOT)
doAssert blockHorizon <= stateHorizon,
"we must never prune blocks while leaving the state"
debug "Pruning history",
horizon, blockHorizon, stateHorizon,
lastHorizon = dag.lastHistoryPruneHorizon,
lastBlockHorizon = dag.lastHistoryPruneBlockHorizon,
tail = dag.tail, head = dag.head
dag.lastHistoryPruneHorizon = horizon
dag.lastHistoryPruneBlockHorizon = blockHorizon
dag.db.withManyWrites: dag.db.withManyWrites:
if stateHorizon > 0 and if stateHorizon > dag.tail.slot:
stateHorizon > (dag.tail.slot + SLOTS_PER_EPOCH - 1).epoch():
# First, we want to see if it's possible to prune any states - we store one # First, we want to see if it's possible to prune any states - we store one
# state every EPOCHS_PER_STATE_SNAPSHOT, so this happens infrequently. # state every EPOCHS_PER_STATE_SNAPSHOT, so this happens infrequently.
debug "Pruning states",
horizon, stateHorizon, tail = dag.tail, head = dag.head
var var
cur = dag.getBlockIdAtSlot(stateHorizon.start_slot) cur = dag.getBlockIdAtSlot(stateHorizon)
var first = true var first = true
while cur.isSome(): while cur.isSome():
@ -1882,7 +1942,7 @@ proc pruneHistory*(dag: ChainDAGRef, startup = false) =
debug "Pruning historical state", bs debug "Pruning historical state", bs
dag.delState(bs) dag.delState(bs)
elif not bs.isProposed: elif not bs.isProposed:
debug "Reached already-pruned slot, done pruning states", bs trace "Reached already-pruned slot, done pruning states", bs
break break
if bs.isProposed: if bs.isProposed:
@ -1896,40 +1956,15 @@ proc pruneHistory*(dag: ChainDAGRef, startup = false) =
else: else:
break break
# We can now prune all blocks before the tail - however, we'll add a # Prune blocks after sanity-checking that we don't prune post-tail blocks -
# small lag so that we typically prune one block at a time - otherwise, # this could happen if a state is missing at the expected state horizon and
# we'd be pruning `EPOCHS_PER_STATE_SNAPSHOT` every time the tail is # would indicate a partially inconsistent database since the base
# updated - if H is the "normal" pruning point, E is the adjusted one and # invariant is that there exists a state at the snapshot slot - better not
# when T0 is reset to T1, we'll continue removing block by block instead # further mess things up regardless
# of removing all blocks between T0 and T1 if blockHorizon > GENESIS_SLOT and blockHorizon <= dag.tail.slot:
# T0 T1 var
# | | # Leave the horizon block itself
# --------------------- cur = dag.getBlockIdAtSlot(blockHorizon - 1).map(proc(x: auto): auto = x.bid)
# | |
# E H
const extraSlots = EPOCHS_PER_STATE_SNAPSHOT * SLOTS_PER_EPOCH
if horizon < extraSlots:
return
let
# We don't need the tail block itself, but we do need everything after
# that in order to be able to recreate states
tailSlot = dag.tail.slot
blockHorizon =
min(horizon - extraSlots, tailSlot)
if dag.tail.slot - preTail.slot > 8192:
# First-time pruning or long offline period
notice "Pruning deep block history, this may take several minutes",
preTail, tail = dag.tail, head = dag.head, blockHorizon
else:
debug "Pruning blocks",
preTail, tail = dag.tail, head = dag.head, blockHorizon
block:
var cur = dag.getBlockIdAtSlot(blockHorizon).map(proc(x: auto): auto = x.bid)
while cur.isSome: while cur.isSome:
let let
@ -1941,19 +1976,22 @@ proc pruneHistory*(dag: ChainDAGRef, startup = false) =
break break
if not dag.db.delBlock(fork, bid.root): if not dag.db.delBlock(fork, bid.root):
# Stop at the first gap - a buggy DB might have more blocks but we # Stop at the first gap - this is typically the pruning point of the
# have no efficient way of detecting that # previous call to pruneHistory. An inconsistent DB might have more
# blocks beyond that point but we have no efficient way of detecting
# that.
break break
cur = dag.parent(bid) cur = dag.parent(bid)
if startup: if startup and
dag.cfg.consensusForkAtEpoch(blockHorizon.epoch) > ConsensusFork.Phase0:
# Once during start, we'll clear all "old fork" data - this ensures we get # Once during start, we'll clear all "old fork" data - this ensures we get
# rid of any leftover junk in the tables - we do so after linear pruning # rid of any leftover junk in the tables - we do so after linear pruning
# so as to "mostly" clean up the phase0 tables as well (which cannot be # so as to "mostly" clean up the phase0 tables as well (which cannot be
# pruned easily by fork) # pruned easily by fork)
let stateFork = dag.cfg.consensusForkAtEpoch(tailSlot.epoch) let stateFork = dag.cfg.consensusForkAtEpoch(dag.tail.slot.epoch)
if stateFork > ConsensusFork.Phase0: if stateFork > ConsensusFork.Phase0:
for fork in ConsensusFork.Phase0..<stateFork: for fork in ConsensusFork.Phase0..<stateFork:
dag.db.clearStates(fork) dag.db.clearStates(fork)

View File

@ -1155,7 +1155,10 @@ proc onSlotEnd(node: BeaconNode, slot: Slot) {.async.} =
node.consensusManager[].pruneStateCachesAndForkChoice() node.consensusManager[].pruneStateCachesAndForkChoice()
if node.config.historyMode == HistoryMode.Prune: if node.config.historyMode == HistoryMode.Prune:
node.dag.pruneHistory() if not (slot + 1).is_epoch():
# The epoch slot already is "heavy" due to the epoch processing, leave
# the pruning for later
node.dag.pruneHistory()
when declared(GC_fullCollect): when declared(GC_fullCollect):
# The slots in the beacon node work as frames in a game: we want to make # The slots in the beacon node work as frames in a game: we want to make

View File

@ -3,13 +3,12 @@
!!! note "" !!! note ""
This feature is available from Nimbus `v23.1.0` onwards. This feature is available from Nimbus `v23.1.0` onwards.
In order for the network to remain healthy, each node must keep a minimum of 5 months of historical block data. Ethereum consensus nodes are required to keep a minimum of 5 months of historical block data ensuring the health of the network.
Nimbus can be configured to either retain or remove historical data past that point using the `--history` option. Nimbus can be configured to either retain or remove historical data past that point using the `--history` option.
!!! note "Default mode" !!! note "Default mode"
Nimbus currently retains full history by default. Nimbus currently retains history by default - with the `Capella` hard fork completed successfully, this will soon be changed to `prune`.
After the `Capella` hard fork, this will change to pruning.
## History modes ## History modes
@ -18,7 +17,7 @@ It does not affect the ability to perform validator duties.
In `prune` mode, blocks and states past that point are removed from the database continuously and the freed space is reused for more recent data. In `prune` mode, blocks and states past that point are removed from the database continuously and the freed space is reused for more recent data.
!!! info !!! tip "Database size"
Although blocks and states are pruned, the database will not shrink in size: instead, the freed space is reused for new data. Although blocks and states are pruned, the database will not shrink in size: instead, the freed space is reused for new data.
In `archive` mode, queries can be as far back as the state that the database was created with — the checkpoint state in the case of trusted node sync or genesis. In `archive` mode, queries can be as far back as the state that the database was created with — the checkpoint state in the case of trusted node sync or genesis.
@ -28,16 +27,13 @@ In `archive` mode, queries can be as far back as the state that the database was
It is possible to switch between `prune` and `archive` modes. It is possible to switch between `prune` and `archive` modes.
When switching to `prune` mode, deep history will be removed from the database and the prune point will be updated continuously as usual. When switching to `prune` mode, deep history will be removed from the database and the prune point will be updated continuously as usual.
As noted above, the database will not shrink in size.
To reclaim space, perform a [trusted node sync](./trusted-node-sync.md) on a fresh database instead.
!!! warning "Backwards compatiblity" As noted above, the database will not shrink in size.
Versions prior to Nimbus `v23.1.0` do not fully support pruned databases! To reclaim space, perform a [trusted node sync](./trusted-node-sync.md) using a fresh database.
To downgrade, you may need to perform a [trusted node sync](./trusted-node-sync.md).
When switching to `archive` mode, the node will start keeping history from the most recent prune point, but will not recreate deep history. When switching to `archive` mode, the node will start keeping history from the most recent prune point, but will not recreate deep history.
In order to recreate deep history in a pruned node, downloading the [era archive of deep history](./era-store.md) and reindexing the database using [trusted node sync](./trusted-node-sync.md) with the `--reindex` option is necessary — this is a lengthy operation. In order to recreate deep history in a pruned node, download the [era archive of deep history](./era-store.md) and [reindex the database](./trusted-node-sync.md#recreate-historical-state-access-indices) — this operation may take several hours.
## Command line ## Command line