History pruning (fixes #4419) (#4445)

Introduce (optional) pruning of historical data - a pruned node will continue to answer queries for historical data up to `MIN_EPOCHS_FOR_BLOCK_REQUESTS` epochs, or roughly 5 months, capping typical database usage at around 60-70gb. To enable pruning, add `--history=prune` to the command line - on the first start, old data will be cleared (which may take a while) - after that, data is pruned continuously. When pruning an existing database, the database will not shrink - instead, the freed space is recycled as the node continues to run - to free up space, perform a trusted node sync with a fresh database. When switching on archive mode in a pruned node, history is retained from that point onwards. History pruning is scheduled to be enabled by default in a future release. In this PR, `minimal` mode from #4419 is not implemented meaning retention periods for states and blocks are always the same - depending on user demand, a future PR may implement `minimal` as well.
2023-01-07 11:02:15 +01:00 · 2023-01-07 11:02:15 +01:00 · 0ba9fc4ede
parent d80082c784
commit 0ba9fc4ede
16 changed files with 402 additions and 108 deletions
--- a/AllTests-mainnet.md
+++ b/AllTests-mainnet.md
@ -374,6 +374,11 @@ OK: 1/1 Fail: 0/1 Skip: 0/1
 + deletePeer() test                                                                          OK
 ```
 OK: 12/12 Fail: 0/12 Skip: 0/12
+## Pruning
+```diff
+ prune states                                                                               OK
+```
+OK: 1/1 Fail: 0/1 Skip: 0/1
 ## Remove keystore testing suite
 ```diff
 + vesion 1                                                                                   OK
@ -615,4 +620,4 @@ OK: 2/2 Fail: 0/2 Skip: 0/2
 OK: 9/9 Fail: 0/9 Skip: 0/9

 ---TOTAL---
-OK: 344/349 Fail: 0/349 Skip: 5/349
+OK: 345/350 Fail: 0/350 Skip: 5/350
--- a/beacon_chain/beacon_chain_db.nim
+++ b/beacon_chain/beacon_chain_db.nim
@ -70,7 +70,7 @@ type
    ##
    ## 1.2 moved BeaconStateNoImmutableValidators to a separate table to
    ## alleviate some of the btree balancing issues - this doubled the speed but
-    ## was still
+    ## was still slow
    ##
    ## 1.3 creates `kvstore` with rowid, making it quite fast, but doesn't do
    ## anything about existing databases. Versions after that use a separate
@ -420,41 +420,43 @@ proc loadImmutableValidators(vals: DbSeq[ImmutableValidatorDataDb2]): seq[Immuta
      withdrawal_credentials: tmp.withdrawal_credentials)

 template withManyWrites*(dbParam: BeaconChainDB, body: untyped) =
-  let db = dbParam
-  # Make sure we're not nesting transactions.
-  if isInsideTransaction(db.db):
-    raiseAssert "Sqlite does not support nested transactions"
+  let
+    db = dbParam
+    nested = isInsideTransaction(db.db)
+
  # We don't enforce strong ordering or atomicity requirements in the beacon
  # chain db in general, relying instead on readers to be able to deal with
  # minor inconsistencies - however, putting writes in a transaction is orders
  # of magnitude faster when doing many small writes, so we use this as an
  # optimization technique and the templace is named accordingly.
-  expectDb db.db.exec("BEGIN TRANSACTION;")
+  if not nested:
+    expectDb db.db.exec("BEGIN TRANSACTION;")
  var commit = false
  try:
-    body
-    commit = true
+      body
+      commit = true
  finally:
-    if commit:
-      expectDb db.db.exec("COMMIT TRANSACTION;")
-    else:
-      # https://www.sqlite.org/lang_transaction.html
-      #
-      # For all of these errors, SQLite attempts to undo just the one statement
-      # it was working on and leave changes from prior statements within the same
-      # transaction intact and continue with the transaction. However, depending
-      # on the statement being evaluated and the point at which the error occurs,
-      # it might be necessary for SQLite to rollback and cancel the entire transaction.
-      # An application can tell which course of action SQLite took by using the
-      # sqlite3_get_autocommit() C-language interface.
-      #
-      # It is recommended that applications respond to the errors listed above by
-      # explicitly issuing a ROLLBACK command. If the transaction has already been
-      # rolled back automatically by the error response, then the ROLLBACK command
-      # will fail with an error, but no harm is caused by this.
-      #
-      if isInsideTransaction(db.db): # calls `sqlite3_get_autocommit`
-        expectDb db.db.exec("ROLLBACK TRANSACTION;")
+    if not nested:
+      if commit:
+        expectDb db.db.exec("COMMIT TRANSACTION;")
+      else:
+        # https://www.sqlite.org/lang_transaction.html
+        #
+        # For all of these errors, SQLite attempts to undo just the one statement
+        # it was working on and leave changes from prior statements within the same
+        # transaction intact and continue with the transaction. However, depending
+        # on the statement being evaluated and the point at which the error occurs,
+        # it might be necessary for SQLite to rollback and cancel the entire transaction.
+        # An application can tell which course of action SQLite took by using the
+        # sqlite3_get_autocommit() C-language interface.
+        #
+        # It is recommended that applications respond to the errors listed above by
+        # explicitly issuing a ROLLBACK command. If the transaction has already been
+        # rolled back automatically by the error response, then the ROLLBACK command
+        # will fail with an error, but no harm is caused by this.
+        #
+        if isInsideTransaction(db.db): # calls `sqlite3_get_autocommit`
+          expectDb db.db.exec("ROLLBACK TRANSACTION;")

 proc new*(T: type BeaconChainDBV0,
          db: SqStoreRef,
@ -839,29 +841,34 @@ proc putStateRoot*(db: BeaconChainDB, root: Eth2Digest, slot: Slot,
 proc putStateDiff*(db: BeaconChainDB, root: Eth2Digest, value: BeaconStateDiff) =
  db.stateDiffs.putSnappySSZ(root.data, value)

-proc delBlock*(db: BeaconChainDB, key: Eth2Digest) =
+proc delBlock*(db: BeaconChainDB, fork: BeaconBlockFork, key: Eth2Digest): bool =
+  var deleted = false
  db.withManyWrites:
-    for kv in db.blocks:
-      kv.del(key.data).expectDb()
-    db.summaries.del(key.data).expectDb()
+    discard db.summaries.del(key.data).expectDb()
+    deleted = db.blocks[fork].del(key.data).expectDb()
+  deleted

-proc delState*(db: BeaconChainDB, key: Eth2Digest) =
-  db.withManyWrites:
-    for kv in db.statesNoVal:
-      kv.del(key.data).expectDb()
+proc delState*(db: BeaconChainDB, fork: BeaconStateFork, key: Eth2Digest) =
+  discard db.statesNoVal[fork].del(key.data).expectDb()
+
+proc clearBlocks*(db: BeaconChainDB, fork: BeaconBlockFork) =
+  discard db.blocks[fork].clear().expectDb()
+
+proc clearStates*(db: BeaconChainDB, fork: BeaconStateFork) =
+  discard db.statesNoVal[fork].clear().expectDb()

 proc delKeyValue*(db: BeaconChainDB, key: array[1, byte]) =
-  db.keyValues.del(key).expectDb()
-  db.v0.backend.del(key).expectDb()
+  discard db.keyValues.del(key).expectDb()
+  discard db.v0.backend.del(key).expectDb()

 proc delKeyValue*(db: BeaconChainDB, key: DbKeyKind) =
  db.delKeyValue(subkey(key))

 proc delStateRoot*(db: BeaconChainDB, root: Eth2Digest, slot: Slot) =
-  db.stateRoots.del(stateRootKey(root, slot)).expectDb()
+  discard db.stateRoots.del(stateRootKey(root, slot)).expectDb()

 proc delStateDiff*(db: BeaconChainDB, root: Eth2Digest) =
-  db.stateDiffs.del(root.data).expectDb()
+  discard db.stateDiffs.del(root.data).expectDb()

 proc putHeadBlock*(db: BeaconChainDB, key: Eth2Digest) =
  db.keyValues.putRaw(subkey(kHeadBlock), key)
@ -1067,7 +1074,6 @@ proc getBlockSZ*(
  of BeaconBlockFork.EIP4844:
    getBlockSZ(db, key, data, eip4844.TrustedSignedBeaconBlock)

-
 proc getStateOnlyMutableValidators(
    immutableValidators: openArray[ImmutableValidatorData2],
    store: KvStoreRef, key: openArray[byte],
@ -1338,6 +1344,12 @@ proc containsState*(db: BeaconChainDBV0, key: Eth2Digest): bool =
    db.backend.contains(sk).expectDb() or
    db.backend.contains(subkey(phase0.BeaconState, key)).expectDb()

+proc containsState*(db: BeaconChainDB, fork: BeaconStateFork, key: Eth2Digest,
+    legacy: bool = true): bool =
+  if db.statesNoVal[fork].contains(key.data).expectDb(): return true
+
+  (legacy and fork == BeaconStateFork.Phase0 and db.v0.containsState(key))
+
 proc containsState*(db: BeaconChainDB, key: Eth2Digest, legacy: bool = true): bool =
  for fork in countdown(BeaconStateFork.high, BeaconStateFork.low):
    if db.statesNoVal[fork].contains(key.data).expectDb(): return true
@ -1418,7 +1430,7 @@ iterator getAncestorSummaries*(db: BeaconChainDB, root: Eth2Digest):
      INNER JOIN next ON `key` == substr(v, 9, 32)
  )
  SELECT v FROM next;
-"""
+  """
  let
    stmt = expectDb db.db.prepareStmt(
      summariesQuery, array[32, byte],
--- a/beacon_chain/conf.nim
+++ b/beacon_chain/conf.nim
@ -107,6 +107,10 @@ type
    Json = "json"
    None = "none"

+  HistoryMode* {.pure.} = enum
+    Archive = "archive"
+    Prune = "prune"
+
  SlashProtCmd* = enum
    `import` = "Import a EIP-3076 slashing protection interchange file"
    `export` = "Export a EIP-3076 slashing protection interchange file"
@ -576,6 +580,11 @@ type
        defaultValue: ""
        name: "payload-builder-url" .}: string

+      historyMode* {.
+        desc: "Retention strategy for historical data (archive/pruned)"
+        defaultValue: HistoryMode.Archive
+        name: "history".}: HistoryMode
+
    of BNStartUpCmd.createTestnet:
      testnetDepositsFile* {.
        desc: "A LaunchPad deposits file for the genesis state validators"
--- a/beacon_chain/consensus_object_pools/block_pools_types.nim
+++ b/beacon_chain/consensus_object_pools/block_pools_types.nim
@ -363,10 +363,12 @@ template frontfill*(dagParam: ChainDAGRef): Opt[BlockId] =
    dag.genesis

 func horizon*(dag: ChainDAGRef): Slot =
-  ## The sync horizon that we target during backfill - ie we will not backfill
-  ## blocks older than this from the network
-  if dag.head.slot.epoch > dag.cfg.MIN_EPOCHS_FOR_BLOCK_REQUESTS:
-    start_slot(dag.head.slot.epoch - dag.cfg.MIN_EPOCHS_FOR_BLOCK_REQUESTS)
+  ## The sync horizon that we target during backfill - we will backfill and
+  ## retain this and newer blocks, but anything older may get pruned depending
+  ## on the history mode
+  let minSlots = dag.cfg.MIN_EPOCHS_FOR_BLOCK_REQUESTS * SLOTS_PER_EPOCH
+  if dag.head.slot > minSlots:
+    min(dag.finalizedHead.slot, dag.head.slot - minSlots)
  else:
    GENESIS_SLOT

--- a/beacon_chain/consensus_object_pools/blockchain_dag.nim
+++ b/beacon_chain/consensus_object_pools/blockchain_dag.nim
@ -16,6 +16,7 @@ import
  metrics, snappy, chronicles,
  ../spec/[beaconstate, eth2_merkleization, eth2_ssz_serialization, helpers,
    state_transition, validator],
+  ../spec/forks,
  ../spec/datatypes/[phase0, altair, bellatrix, capella],
  ".."/[beacon_chain_db, era_db],
  "."/[block_pools_types, block_quarantine]
@ -55,7 +56,7 @@ const
  # When finality happens, we prune historical states from the database except
  # for a snapshort every 32 epochs from which replays can happen - there's a
  # balance here between making long replays and saving on disk space
-  EPOCHS_PER_STATE_SNAPSHOT = 32
+  EPOCHS_PER_STATE_SNAPSHOT* = 32

 proc putBlock*(
    dag: ChainDAGRef, signedBlock: ForkyTrustedSignedBeaconBlock) =
@ -630,6 +631,21 @@ proc getState(

  db.getState(cfg.stateForkAtEpoch(slot.epoch), state_root, state, rollback)

+proc containsState*(
+    db: BeaconChainDB, cfg: RuntimeConfig, block_root: Eth2Digest,
+    slots: Slice[Slot]): bool =
+  var slot = slots.b
+  while slot >= slots.a:
+    let state_root = db.getStateRoot(block_root, slot)
+    if state_root.isSome() and
+        db.containsState(cfg.stateForkAtEpoch(slot.epoch), state_root.get()):
+      return true
+
+    if slot == slots.a: # avoid underflow at genesis
+      break
+    slot -= 1
+  false
+
 proc getState*(
    db: BeaconChainDB, cfg: RuntimeConfig, block_root: Eth2Digest,
    slots: Slice[Slot], state: var ForkedHashedBeaconState,
@ -782,17 +798,19 @@ export

 proc putState(dag: ChainDAGRef, state: ForkedHashedBeaconState, bid: BlockId) =
  # Store a state and its root
+  let slot = getStateField(state, slot)
  logScope:
    blck = shortLog(bid)
-    stateSlot = shortLog(getStateField(state, slot))
+    stateSlot = shortLog(slot)
    stateRoot = shortLog(getStateRoot(state))

-  if not dag.isStateCheckpoint(BlockSlotId.init(bid, getStateField(state, slot))):
+  if not dag.isStateCheckpoint(BlockSlotId.init(bid, slot)):
    return

  # Don't consider legacy tables here, they are slow to read so we'll want to
  # rewrite things in the new table anyway.
-  if dag.db.containsState(getStateRoot(state), legacy = false):
+  if dag.db.containsState(
+      dag.cfg.stateForkAtEpoch(slot.epoch), getStateRoot(state), legacy = false):
    return

  let startTick = Moment.now()
@ -925,7 +943,7 @@ proc init*(T: type ChainDAGRef, cfg: RuntimeConfig, db: BeaconChainDB,
      onBlockAdded: onBlockCb,
      onHeadChanged: onHeadCb,
      onReorgHappened: onReorgCb,
-      onFinHappened: onFinCb
+      onFinHappened: onFinCb,
    )
    loadTick = Moment.now()

@ -1433,7 +1451,6 @@ proc updateState*(
        ancestors.add(cur.bid)

      # Move slot by slot to capture epoch boundary states
-      # TODO https://github.com/nim-lang/Nim/issues/19613
      cur = dag.parentOrSlot(cur).valueOr:
        break

@ -1477,7 +1494,6 @@ proc updateState*(
        return false

      # Move slot by slot to capture epoch boundary states
-      # TODO https://github.com/nim-lang/Nim/issues/19613
      cur = dag.parentOrSlot(cur).valueOr:
        if not dag.getStateByParent(cur.bid, state):
          notice "Request for pruned historical state",
@ -1564,13 +1580,14 @@ proc updateState*(
  true

 proc delState(dag: ChainDAGRef, bsi: BlockSlotId) =
-  # Delete state state and mapping for a particular block+slot
+  # Delete state and mapping for a particular block+slot
  if not dag.isStateCheckpoint(bsi):
    return # We only ever save epoch states

  if (let root = dag.db.getStateRoot(bsi.bid.root, bsi.slot); root.isSome()):
-    dag.db.delState(root.get())
-    dag.db.delStateRoot(bsi.bid.root, bsi.slot)
+    dag.db.withManyWrites:
+      dag.db.delStateRoot(bsi.bid.root, bsi.slot)
+      dag.db.delState(dag.cfg.stateForkAtEpoch(bsi.slot.epoch), root.get())

 proc pruneBlockSlot(dag: ChainDAGRef, bs: BlockSlot) =
  # TODO: should we move that disk I/O to `onSlotEnd`
@ -1582,7 +1599,8 @@ proc pruneBlockSlot(dag: ChainDAGRef, bs: BlockSlot) =

    dag.optimisticRoots.excl bs.blck.root
    dag.forkBlocks.excl(KeyedBlockRef.init(bs.blck))
-    dag.db.delBlock(bs.blck.root)
+    discard dag.db.delBlock(
+      dag.cfg.blockForkAtEpoch(bs.blck.slot.epoch), bs.blck.root)

 proc pruneBlocksDAG(dag: ChainDAGRef) =
  ## This prunes the block DAG
@ -1766,10 +1784,10 @@ proc pruneStateCachesDAG*(dag: ChainDAGRef) =
      prev = dag.parentOrSlot(dag.stateCheckpoint(dag.lastPrunePoint))

    while cur.isSome and prev.isSome and cur.get() != prev.get():
-      if not isFinalizedStateSnapshot(cur.get().slot) and
-          cur.get().slot != dag.tail.slot:
-        dag.delState(cur.get())
-      # TODO https://github.com/nim-lang/Nim/issues/19613
+      let bs = cur.get()
+      if not isFinalizedStateSnapshot(bs.slot) and
+          bs.slot != dag.tail.slot:
+        dag.delState(bs)
      let tmp = cur.get()
      cur = dag.parentOrSlot(tmp)

@ -1789,6 +1807,129 @@ proc pruneStateCachesDAG*(dag: ChainDAGRef) =
    statePruneDur = statePruneTick - startTick,
    epochRefPruneDur = epochRefPruneTick - statePruneTick

+proc pruneHistory*(dag: ChainDAGRef, startup = false) =
+  if dag.db.db.readOnly:
+    return
+
+  let horizon = dag.horizon()
+  if horizon == GENESIS_SLOT:
+    return
+
+  let
+    preTail = dag.tail
+    # Round to state snapshot boundary - this is where we'll leave the tail
+    # after pruning
+    stateHorizon = Epoch((horizon.epoch div EPOCHS_PER_STATE_SNAPSHOT) * EPOCHS_PER_STATE_SNAPSHOT)
+
+  dag.db.withManyWrites:
+    if stateHorizon > 0 and
+        stateHorizon > (dag.tail.slot + SLOTS_PER_EPOCH - 1).epoch():
+      # First, we want to see if it's possible to prune any states - we store one
+      # state every EPOCHS_PER_STATE_SNAPSHOT, so this happens infrequently.
+
+      debug "Pruning states",
+        horizon, stateHorizon, tail = dag.tail, head = dag.head
+      var
+        cur = dag.getBlockIdAtSlot(stateHorizon.start_slot)
+
+      var first = true
+      while cur.isSome():
+        let bs = cur.get()
+        if dag.db.containsState(dag.cfg, bs.bid.root, bs.slot..bs.slot):
+          if first:
+            # We leave the state on the prune horizon intact and update the tail
+            # to point to this state, indicating the new point in time from
+            # which we can load states in general.
+            debug "Updating tail", bs
+            dag.db.putTailBlock(bs.bid.root)
+            dag.tail = bs.bid
+            first = false
+          else:
+            debug "Pruning historical state", bs
+            dag.delState(bs)
+        elif not bs.isProposed:
+          debug "Reached already-pruned slot, done pruning states", bs
+          break
+
+        if bs.isProposed:
+          # We store states either at the same slot at the block (checkpoint) or
+          # by advancing the slot to the nearest epoch start - check both when
+          # pruning
+          cur = dag.parentOrSlot(bs)
+        elif bs.slot.epoch > EPOCHS_PER_STATE_SNAPSHOT:
+          # Jump one snapshot interval at a time, but don't prune genesis
+          cur = dag.getBlockIdAtSlot(start_slot(bs.slot.epoch() - EPOCHS_PER_STATE_SNAPSHOT))
+        else:
+          break
+
+    # We can now prune all blocks before the tail - however, we'll add a
+    # small lag so that we typically prune one block at a time - otherwise,
+    # we'd be pruning `EPOCHS_PER_STATE_SNAPSHOT` every time the tail is
+    # updated - if H is the "normal" pruning point, E is the adjusted one and
+    # when T0 is reset to T1, we'll continue removing block by block instead
+    # of removing all blocks between T0 and T1
+    #           T0        T1
+    #           |         |
+    # ---------------------
+    #      |          |
+    #      E          H
+
+    const extraSlots = EPOCHS_PER_STATE_SNAPSHOT * SLOTS_PER_EPOCH
+
+    if horizon < extraSlots:
+      return
+
+    let
+      # We don't need the tail block itself, but we do need everything after
+      # that in order to be able to recreate states
+      tailSlot = dag.tail.slot
+      blockHorizon =
+        min(horizon - extraSlots, tailSlot)
+
+    if dag.tail.slot - preTail.slot > 8192:
+      # First-time pruning or long offline period
+      notice "Pruning deep block history, this may take several minutes",
+        preTail, tail = dag.tail, head = dag.head, blockHorizon
+    else:
+      debug "Pruning blocks",
+        preTail, tail = dag.tail, head = dag.head, blockHorizon
+
+    block:
+      var cur = dag.getBlockIdAtSlot(blockHorizon).map(proc(x: auto): auto = x.bid)
+
+      while cur.isSome:
+        let
+          bid = cur.get()
+          fork = dag.cfg.blockForkAtEpoch(bid.slot.epoch)
+
+        if bid.slot == GENESIS_SLOT:
+          # Leave genesis block for nostalgia and the REST API
+          break
+
+        if not dag.db.delBlock(fork, bid.root):
+          # Stop at the first gap - a buggy DB might have more blocks but we
+          # have no efficient way of detecting that
+          break
+
+        cur = dag.parent(bid)
+
+    if startup:
+      # Once during start, we'll clear all "old fork" data - this ensures we get
+      # rid of any leftover junk in the tables - we do so after linear pruning
+      # so as to "mostly" clean up the phase0 tables as well (which cannot be
+      # pruned easily by fork)
+
+      let stateFork = dag.cfg.stateForkAtEpoch(tailSlot.epoch)
+      if stateFork > BeaconStateFork.Phase0:
+        for fork in BeaconStateFork.Phase0..<stateFork:
+          dag.db.clearStates(fork)
+
+      let blockFork = dag.cfg.blockForkAtEpoch(blockHorizon.epoch)
+
+      if blockFork > BeaconBlockFork.Phase0:
+        for fork in BeaconBlockFork.Phase0..<blockFork:
+          dag.db.clearBlocks(fork)
+
 proc loadExecutionBlockRoot*(dag: ChainDAGRef, bid: BlockId): Eth2Digest =
  if dag.cfg.blockForkAtEpoch(bid.slot.epoch) < BeaconBlockFork.Bellatrix:
    return ZERO_HASH
@ -2179,7 +2320,7 @@ proc aggregateAll*(
    ok(finish(aggregateKey))

 func needsBackfill*(dag: ChainDAGRef): bool =
-  dag.backfill.slot > GENESIS_SLOT
+  dag.backfill.slot > dag.horizon

 proc rebuildIndex*(dag: ChainDAGRef) =
  ## After a checkpoint sync, we lack intermediate states to replay from - this
@ -2221,7 +2362,7 @@ proc rebuildIndex*(dag: ChainDAGRef) =
        junk.add((k, v))
        continue

-    if not dag.db.containsState(v):
+    if not dag.db.containsState(dag.cfg.stateForkAtEpoch(k[0].epoch), v):
      continue # If it's not in the database..

    canonical[k[0].epoch div EPOCHS_PER_STATE_SNAPSHOT] = v
@ -2329,4 +2470,4 @@ proc rebuildIndex*(dag: ChainDAGRef) =

    for i in junk:
      dag.db.delStateRoot(i[0][1], i[0][0])
-      dag.db.delState(i[1])
+      dag.db.delState(dag.cfg.stateForkAtEpoch(i[0][0].epoch), i[1])
--- a/beacon_chain/nimbus_beacon_node.nim
+++ b/beacon_chain/nimbus_beacon_node.nim
@ -187,6 +187,10 @@ proc loadChainDag(
            dataDir = config.dataDir
      quit 1

+  # The first pruning after restart may take a while..
+  if config.historyMode == HistoryMode.Prune:
+    dag.pruneHistory(true)
+
  dag

 proc checkWeakSubjectivityCheckpoint(
@ -1124,6 +1128,9 @@ proc onSlotEnd(node: BeaconNode, slot: Slot) {.async.} =
  # This is the last pruning to do as it clears the "needPruning" condition.
  node.consensusManager[].pruneStateCachesAndForkChoice()

+  if node.config.historyMode == HistoryMode.Prune:
+    node.dag.pruneHistory()
+
  when declared(GC_fullCollect):
    # The slots in the beacon node work as frames in a game: we want to make
    # sure that we're ready for the next one and don't get stuck in lengthy
--- a/beacon_chain/spec/datatypes/altair.nim
+++ b/beacon_chain/spec/datatypes/altair.nim
@ -79,6 +79,9 @@ type

  EpochParticipationFlags* =
    distinct List[ParticipationFlags, Limit VALIDATOR_REGISTRY_LIMIT]
+    ## Not a HashList because the list sees significant updates every block
+    ## effectively making the cost of clearing the cache higher than the typical
+    ## gains

  # https://github.com/ethereum/consensus-specs/blob/v1.3.0-alpha.2/specs/altair/beacon-chain.md#syncaggregate
  SyncAggregate* = object
--- a/docs/the_nimbus_book/mkdocs.yml
+++ b/docs/the_nimbus_book/mkdocs.yml
@ -104,6 +104,7 @@ nav:
    - Storage:
      - 'data-dir.md'
      - 'era-store.md'
+      - 'history.md'
    - 'migration-options.md'
    - 'attestation-performance.md'
    - 'troubleshooting.md'
--- a/docs/the_nimbus_book/src/data-dir.md
+++ b/docs/the_nimbus_book/src/data-dir.md
@ -28,6 +28,8 @@ drwx------ 1 nimbus nimbus 250 Jul 19 18:18 validators

 The `db` folder contains historical chain data and information about the latest observed state of the chain. If you remove the `db` folder, the beacon node will have to resync.

+The growth of the database depends on the [history mode](./history.md).
+
 ### `secrets` and `validators`

 These two folders contain your validator keys as well as the passwords needed to unlock them when starting the beacon node.
--- a/docs/the_nimbus_book/src/era-store.md
+++ b/docs/the_nimbus_book/src/era-store.md
@ -12,9 +12,26 @@ Each era file contains the blocks of 8192 slots (~27 hours). Blocks in era files

 Nimbus can both create and use era files as a starting point to regenerate past history as well as to serve blocks.

+## Importing era files
+
+To import an era archive, place the files in a folder called `era` in the [data directory](./data-dir.md):
+
+```sh
+# Go to the nimbus directory
+cd build/data/shared_mainnet_0
+
+# Create era directory
+mkdir -p era
+
+# Download era store from era provider
+wget --no-parent  -A '*.era' -q --show-progress -nd -r -c https://provider/era
+```
+
+With the era files present, perform a [trusted node sync](./trusted-node-sync.md) to complete the import, possibly with `--reindex` in order to create an [archive node](./history.md).
+
 ## Generating era files

-To generate era files, you need to first [build](./build.md) Nimbus from source, and [sync](./start-syncing.md) the node using either full sync or a trusted node sync with [`--backfill`](./trusted-node-sync.md#delay-block-history-backfill) and [`--reindex`](./trusted-node-sync.md#recreate-historical-state-access-indices) enabled.
+To generate era files, you need to first [build](./build.md) Nimbus from source and [sync](./start-syncing.md) the node using full sync. A checkpoint-synced node can be used to generate era files from the checkpoint onwards.

 After that, build the additional `ncli_db` tool:

@ -43,26 +60,10 @@ It is recommended to set up a cron job or a timer, and run the export command ev
 !!! tip
    You do not need to stop Nimbus to generate era files - it is however not recommended to run era file generation on a node that is also serving validators.

-
 ## Sharing era files

 Era files can be shared directly from the `era` folder using a web server, or simply by copying them to a new location.

-## Importing era files
-
-Nimbus supports reading era files directly, replacing the backfill of a trusted node sync. To use era files instead of backfill, copy the `era` folder to the data directory, then perform a [trusted node sync](./trusted-node-sync.md) with `--backfill=false`.
-
-```sh
-# Go to the nimbus directory
-cd build/data/shared_mainnet_0
-
-# Create era directory
-mkdir -p era
-
-# Download era store from era provider
-wget --no-parent  -A '*.era' -q --show-progress -nd -r -c https://provider/era
-```
-
 ## Options

 You can pass a custom era store location to Nimbus using `--era-dir`:
--- a/docs/the_nimbus_book/src/history.md
+++ b/docs/the_nimbus_book/src/history.md
@ -0,0 +1,47 @@
+# Historical data
+
+!!! note ""
+    This feature is available from `v23.1.0` onwards
+
+In order for the network to remain healthy, each node must keep a minimum of 5 months of historical block data.
+
+Nimbus can be configured to either retain or remove historical data past that point using the `--history` option.
+
+!!! note "Default mode"
+    Nimbus currently retains full history by default - after the `Capella` hard fork, this will change to pruning.
+
+## History modes
+
+The history mode controls how far back Nimbus supports answering historical queries in the [REST API](./rest-api.md) - it does not affect the ability to perform validator duties.
+
+In `prune` mode, blocks and states past that point are removed from the database continuously and the freed space is reused for more recent data.
+
+!!! info
+    Although blocks and states are pruned, the database will not shrink in size - instead, the freed space is reused for new data
+
+In `archive` mode, queries can be as far back as the state that the database was created with - the checkpoint state in the case of trusted node sync or genesis.
+
+## Switching between modes
+
+It is possible to switch between `prune` and `archive` modes.
+
+When switching to `prune` mode, deep history will be removed from the database and the prune point will be updated continuously as usual. As noted above, the database will not shrink in size - to reclaim space, perform a [trusted node sync](./trusted-node-sync.md) on a fresh database instead.
+
+!!! warning "Backwards compatiblity"
+    Versions prior to v23.1.0 do not fully support pruned databases - to downgrade, you may need to perform a [trusted node sync](./trusted-node-sync.md).
+
+When switching to `archive` mode, the node will start keeping history from the most recent prune point, but will not recreate deep history.
+
+In order to recreate deep history in a pruned node, downloading the [era archive of deep history](./era-store.md) and reindexing the database using [trusted node sync](./trusted-node-sync.md) with the `--reindex` option is necessary - this is a lengthy operation.
+
+## Command line
+
+=== "Mainnet"
+    ```sh
+    ./run-mainnet-beacon-node.sh --history=prune ...
+    ```
+
+=== "Prater"
+    ```sh
+    ./run-prater-beacon-node.sh --history=prune ...
+    ```
--- a/docs/the_nimbus_book/src/trusted-node-sync.md
+++ b/docs/the_nimbus_book/src/trusted-node-sync.md
@ -11,7 +11,7 @@ To use trusted node sync, you must have access to a node that you trust and that

 Should this node, or your connection to it, be compromised, your node will not be able to detect whether or not it is being served false information.

-It is possibly to use trusted node sync with a third-party API provider -- see [here](trusted-node-sync.md#verify-you-synced-the-correct-chain) for how to verify that the chain you are given corresponds to the canonical chain at the time.
+It is possible to use trusted node sync with a third-party API provider -- see [here](trusted-node-sync.md#verify-you-synced-the-correct-chain) for how to verify that the chain you are given corresponds to the canonical chain at the time.

 !!! tip
    A list of community-operated checkpoint sync nodes can be found [here](https://eth-clients.github.io/checkpoint-sync-endpoints/) - always verify after after a checkpoint sync that the right chain was provided by the node.
@ -54,10 +54,10 @@ And eventually:
 Done, your beacon node is ready to serve you! Don't forget to check that you're on the canonical chain by comparing the checkpoint root with other online sources. See https://nimbus.guide/trusted-node-sync.html for more information.
 ```

-After this the application will terminate and you can now [start the `nimbus_beacon_node`](./quick-start.md) with your usual command.
+After this the application will terminate and you can now [start the beacon node](./quick-start.md) as usual.

 !!! note
-    Because trusted node sync by default copies all blocks via REST, you may hit API limits if you are using a third-party provider. If this happens to you, you may need to use the `--backfill` option to [delay the backfill of the block history](./trusted-node-sync.md#delay-block-history-backfill).
+    Because trusted node sync by default copies blocks via REST, you may hit API limits if you are using a third-party provider. If this happens to you, you may need to use the `--backfill` option to [delay the backfill of the block history](./trusted-node-sync.md#delay-block-history-backfill).


 ## Verify you synced the correct chain
@ -80,16 +80,22 @@ The `head` root is also printed in the log output at regular intervals.

 ## Advanced

-### Skip syncing the history of deposits
+### Sync deposit history

-The recently standardized Beacon API endpoint `/eth/v1/beacon/deposit_snapshot` allows a client to skip downloading the entire history of deposit by downloading a small snapshot of the state of the validator deposit contract. To take advantage of this functionality, make sure you are syncing against a beacon node which supports it (e.g. Nimbus 22.12.0 or later) and specify the the command line option `--with-deposit-snapshot` when executed the `trustedNodeSync` command.
+!!! note ""
+    This feature is available from `v22.12.0` onwards
+
+The `--with-deposit-snapshot` allows syncing deposit history via REST, avoiding the need to search the execution client for this information and thus allowing the client to more quickly start producing blocks.
+
+!!! note
+    The API endpoint for downloading this information was recently added to the Beacon API specification and is available on nodes running Nimbus v22.12.0 and later. For other checkpoint sources, consult their documentation with regards to the `/eth/v1/beacon/deposit_snapshot` endpoint.

 !!! tip
    It's safe to always specify this option. Nimbus will produce a warning if the specified beacon node doesn't support the required endpoint. Future versions of Nimbus will enable the option by default.

 ### Delay block history backfill

-By default, both the state and the full block history will be downloaded from the trusted node.
+By default, both state and block history will be downloaded from the trusted node.

 It is possible to get started more quickly by delaying the backfill of the block history using the `--backfill=false` parameter. In this case, the beacon node will first sync to the current head so that it can start performing its duties, then backfill the blocks from the network.

@ -126,7 +132,9 @@ curl -o state.32000.ssz \

 ## Recreate historical state access indices

-When performing checkpoint sync, the historical state data from the time before the checkpoint is not available. To recreate the indices and caches necessary for historical state access, run trusted node sync with the `--reindex` flag - this can be done on an already-synced node as well, in which case the process will simply resume where it left off:
+When performing trusted node sync, the historical state data from the time before the trusted is not available. To recreate the indices and caches necessary for historical state access, run trusted node sync with the `--reindex` flag - this can be done on an already-synced node as well, in which case the process will simply resume where it left off:
+
+To recreate a historical index from before the checkpoint, it is necessary to first download an [era archive](./era-store.md) containing the deep block history.

 ```sh
 build/nimbus_beacon_node trustedNodeSync \
--- a/ncli/ncli_db.nim
+++ b/ncli/ncli_db.nim
@ -42,7 +42,7 @@ type
    putBlock = "Store a given SignedBeaconBlock in the database, potentially updating some of the pointers"
    rewindState = "Extract any state from the database based on a given block and slot, replaying if needed"
    verifyEra = "Verify a single era file"
-    exportEra = "Write an experimental era file"
+    exportEra = "Export historical data to era files"
    importEra = "Import era files to the database"
    validatorPerf
    validatorDb = "Create or update attestation performance database"
@ -507,7 +507,7 @@ proc cmdExportEra(conf: DbConf, cfg: RuntimeConfig) =
        else: some((era - 1).start_slot)
      endSlot = era.start_slot
      eraBid = dag.atSlot(dag.head.bid, endSlot).valueOr:
-        echo "Skipping era ", era, ", blocks not available"
+        echo "Skipping era ", era, ", history not available"
        era += 1
        continue

--- a/tests/test_beacon_chain_db.nim
+++ b/tests/test_beacon_chain_db.nim
@ -157,8 +157,8 @@ suite "Beacon chain DB" & preset():
      tmp2 == encodeFramed(tmp)
      uncompressedLenFramed(tmp2).isSome

-    db.delBlock(root)
    check:
+      db.delBlock(BeaconBlockFork.Phase0, root)
      not db.containsBlock(root)
      not db.containsBlock(root, phase0.TrustedSignedBeaconBlock)
      not db.containsBlock(root, altair.TrustedSignedBeaconBlock)
@ -204,8 +204,8 @@ suite "Beacon chain DB" & preset():
      tmp2 == encodeFramed(tmp)
      uncompressedLenFramed(tmp2).isSome

-    db.delBlock(root)
    check:
+      db.delBlock(BeaconBlockFork.Altair, root)
      not db.containsBlock(root)
      not db.containsBlock(root, phase0.TrustedSignedBeaconBlock)
      not db.containsBlock(root, altair.TrustedSignedBeaconBlock)
@ -251,8 +251,8 @@ suite "Beacon chain DB" & preset():
      tmp2 == encodeFramed(tmp)
      uncompressedLenFramed(tmp2).isSome

-    db.delBlock(root)
    check:
+      db.delBlock(BeaconBlockFork.Bellatrix, root)
      not db.containsBlock(root)
      not db.containsBlock(root, phase0.TrustedSignedBeaconBlock)
      not db.containsBlock(root, altair.TrustedSignedBeaconBlock)
@ -298,8 +298,8 @@ suite "Beacon chain DB" & preset():
      tmp2 == encodeFramed(tmp)
      uncompressedLenFramed(tmp2).isSome

-    db.delBlock(root)
    check:
+      db.delBlock(BeaconBlockFork.Capella, root)
      not db.containsBlock(root)
      not db.containsBlock(root, phase0.TrustedSignedBeaconBlock)
      not db.containsBlock(root, altair.TrustedSignedBeaconBlock)
@ -345,8 +345,8 @@ suite "Beacon chain DB" & preset():
      tmp2 == encodeFramed(tmp)
      uncompressedLenFramed(tmp2).isSome

-    db.delBlock(root)
    check:
+      db.delBlock(BeaconBlockFork.EIP4844, root)
      not db.containsBlock(root)
      not db.containsBlock(root, phase0.TrustedSignedBeaconBlock)
      not db.containsBlock(root, altair.TrustedSignedBeaconBlock)
@ -379,7 +379,7 @@ suite "Beacon chain DB" & preset():
        db.containsState(root)
        hash_tree_root(db.getPhase0StateRef(root)[]) == root

-      db.delState(root)
+      db.delState(BeaconStateFork.Phase0, root)
      check:
        not db.containsState(root)
        db.getPhase0StateRef(root).isNil
@ -397,7 +397,7 @@ suite "Beacon chain DB" & preset():
        db.containsState(root)
        hash_tree_root(db.getAltairStateRef(root)[]) == root

-      db.delState(root)
+      db.delState(BeaconStateFork.Altair, root)
      check:
        not db.containsState(root)
        db.getAltairStateRef(root).isNil
@ -415,7 +415,7 @@ suite "Beacon chain DB" & preset():
        db.containsState(root)
        hash_tree_root(db.getBellatrixStateRef(root)[]) == root

-      db.delState(root)
+      db.delState(BeaconStateFork.Bellatrix, root)
      check:
        not db.containsState(root)
        db.getBellatrixStateRef(root).isNil
@ -433,7 +433,7 @@ suite "Beacon chain DB" & preset():
        db.containsState(root)
        hash_tree_root(db.getCapellaStateRef(root)[]) == root

-      db.delState(root)
+      db.delState(BeaconStateFork.Capella, root)
      check:
        not db.containsState(root)
        db.getCapellaStateRef(root).isNil
@ -451,7 +451,7 @@ suite "Beacon chain DB" & preset():
        db.containsState(root)
        hash_tree_root(db.getEIP4844StateRef(root)[]) == root

-      db.delState(root)
+      db.delState(BeaconStateFork.EIP4844, root)
      check:
        not db.containsState(root)
        db.getEIP4844StateRef(root).isNil
@ -471,7 +471,7 @@ suite "Beacon chain DB" & preset():
        db.containsState(root)
        hash_tree_root(stateBuffer[]) == root

-      db.delState(root)
+      db.delState(BeaconStateFork.Phase0, root)
      check:
        not db.containsState(root)
        not db.getState(root, stateBuffer[], noRollback)
@ -491,7 +491,7 @@ suite "Beacon chain DB" & preset():
        db.containsState(root)
        hash_tree_root(stateBuffer[]) == root

-      db.delState(root)
+      db.delState(BeaconStateFork.Altair, root)
      check:
        not db.containsState(root)
        not db.getState(root, stateBuffer[], noRollback)
@ -511,7 +511,7 @@ suite "Beacon chain DB" & preset():
        db.containsState(root)
        hash_tree_root(stateBuffer[]) == root

-      db.delState(root)
+      db.delState(BeaconStateFork.Bellatrix, root)
      check:
        not db.containsState(root)
        not db.getState(root, stateBuffer[], noRollback)
@ -531,7 +531,7 @@ suite "Beacon chain DB" & preset():
        db.containsState(root)
        hash_tree_root(stateBuffer[]) == root

-      db.delState(root)
+      db.delState(BeaconStateFork.Capella, root)
      check:
        not db.containsState(root)
        not db.getState(root, stateBuffer[], noRollback)
@ -551,7 +551,7 @@ suite "Beacon chain DB" & preset():
        db.containsState(root)
        hash_tree_root(stateBuffer[]) == root

-      db.delState(root)
+      db.delState(BeaconStateFork.EIP4844, root)
      check:
        not db.containsState(root)
        not db.getState(root, stateBuffer[], noRollback)
@ -734,7 +734,7 @@ suite "Beacon chain DB" & preset():

    check db.containsState(state[].root)
    let state2 = db.getPhase0StateRef(state[].root)
-    db.delState(state[].root)
+    db.delState(BeaconStateFork.Phase0, state[].root)
    check not db.containsState(state[].root)
    db.close()

--- a/tests/test_blockchain_dag.nim
+++ b/tests/test_blockchain_dag.nim
@ -1109,3 +1109,59 @@ suite "Latest valid hash" & preset():
      dag.getEarliestInvalidBlockRoot(
        b2Add[].root, b2.message.body.execution_payload.block_hash,
          fallbackEarliestInvalid) == fallbackEarliestInvalid
+
+suite "Pruning":
+  setup:
+    let
+      cfg = block:
+        var res = defaultRuntimeConfig
+        res.MIN_VALIDATOR_WITHDRAWABILITY_DELAY = 4
+        res.CHURN_LIMIT_QUOTIENT = 1
+        doAssert res.MIN_EPOCHS_FOR_BLOCK_REQUESTS == 4
+        res
+      db = makeTestDB(SLOTS_PER_EPOCH)
+      validatorMonitor = newClone(ValidatorMonitor.init())
+      dag = init(ChainDAGRef, cfg, db, validatorMonitor, {})
+      tmpState = assignClone(dag.headState)
+
+    var
+      verifier = BatchVerifier(rng: keys.newRng(), taskpool: Taskpool.new())
+      quarantine = Quarantine.init()
+      cache = StateCache()
+      blocks = @[dag.head]
+
+    for i in 0 ..< (SLOTS_PER_EPOCH * (EPOCHS_PER_STATE_SNAPSHOT + cfg.MIN_EPOCHS_FOR_BLOCK_REQUESTS)):
+      let blck = addTestBlock(
+        tmpState[], cache,
+        attestations = makeFullAttestations(
+          tmpState[], dag.head.root, getStateField(tmpState[], slot), cache, {})).phase0Data
+      let added = dag.addHeadBlock(verifier, blck, nilPhase0Callback)
+      check: added.isOk()
+      blocks.add(added[])
+      dag.updateHead(added[], quarantine)
+      dag.pruneAtFinalization()
+
+  test "prune states":
+    dag.pruneHistory()
+
+    check:
+      dag.tail.slot == Epoch(EPOCHS_PER_STATE_SNAPSHOT).start_slot - 1
+      db.containsBlock(blocks[0].root)
+      db.containsBlock(blocks[1].root)
+
+    # Add a block
+    for i in 0..2:
+      let blck = addTestBlock(
+        tmpState[], cache,
+        attestations = makeFullAttestations(
+          tmpState[], dag.head.root, getStateField(tmpState[], slot), cache, {})).phase0Data
+      let added = dag.addHeadBlock(verifier, blck, nilPhase0Callback)
+      check: added.isOk()
+      dag.updateHead(added[], quarantine)
+      dag.pruneAtFinalization()
+
+    dag.pruneHistory()
+
+    check:
+      dag.tail.slot == Epoch(EPOCHS_PER_STATE_SNAPSHOT).start_slot - 1
+      not db.containsBlock(blocks[1].root)
--- a/vendor/nim-eth
+++ b/vendor/nim-eth
@ -1 +1 @@
-Subproject commit 2b5f2a27e303b13127bb525b0c7a309eaa7fbed9
+Subproject commit 8f0ae55353b95f888dce8a32bf810e58f7091b96