fix db growth on attestation processing (#1860)

It turns out that we often save lots of states in the database that are the result of empty slot processing only - here, we make sure to only save a state if a block follows - this fixes several issues: * empty slot states are not always pruned leading to state database size explosion * storing states is (very) slow which slows down processing in general, so we should only do it when it's likely to be useful * attestation processing doesn't get stuck on saving random states that won't appear in the chain history
2020-10-15 14:28:44 +02:00 · 2020-10-15 14:28:44 +02:00 · 6b9419e547
parent 4e73d10f86
commit 6b9419e547
5 changed files with 29 additions and 34 deletions
--- a/beacon_chain/block_pools/chain_dag.nim
+++ b/beacon_chain/block_pools/chain_dag.nim
@ -493,8 +493,8 @@ proc putState*(dag: ChainDAGRef, state: StateData) =
  # As a policy, we only store epoch boundary states without the epoch block
  # (if it exists) applied - the rest can be reconstructed by loading an epoch
  # boundary state and applying the missing blocks.
-  # We also avoid states that were produced with empty slots only, except the
-  # first such state as replaying to such states should be quick.
+  # We also avoid states that were produced with empty slots only - we should
+  # not call this function for states that don't have a follow-up block
  if not state.data.data.slot.isEpoch:
    trace "Not storing non-epoch state"
    return
@ -589,18 +589,14 @@ proc get*(dag: ChainDAGRef, root: Eth2Digest): Option[BlockData] =
    none(BlockData)

 proc advanceSlots(
-    dag: ChainDAGRef, state: var StateData, slot: Slot, cache: var StateCache,
-    save: bool) =
+    dag: ChainDAGRef, state: var StateData, slot: Slot, cache: var StateCache) =
  # Given a state, advance it zero or more slots by applying empty slot
-  # processing
+  # processing - the state must be positions at a slot before or equal to the
+  # target
  doAssert state.data.data.slot <= slot
-
-  while state.data.data.slot < slot:
-    # Process slots one at a time in case afterUpdate needs to see empty states
-    advance_slot(state.data, dag.updateFlags, cache)
-
-    if save:
-      dag.putState(state)
+  if slot > state.data.data.slot:
+    doAssert process_slots(state.data, slot, cache, dag.updateFlags),
+      "process_slots shouldn't fail when state slot is correct"

 proc applyBlock(
    dag: ChainDAGRef,
@ -611,9 +607,15 @@ proc applyBlock(
  # applied
  doAssert state.blck == blck.refs.parent

-  # `state_transition` can handle empty slots, but we want to potentially save
-  # some of the empty slot states
-  dag.advanceSlots(state, blck.data.message.slot, cache, save)
+  # `state_transition` can handle empty slots, but we want to save the state
+  # before applying the block
+  dag.advanceSlots(state, blck.data.message.slot, cache)
+
+  if save:
+    # Save state before applying the block, in case the "raw" epoch state is
+    # needed for a different fork
+    # TODO if the block fails to apply, it can be removed from the database
+    dag.putState(state)

  var statePtr = unsafeAddr state # safe because `restore` is locally scoped
  func restore(v: var HashedBeaconState) =
@ -625,7 +627,6 @@ proc applyBlock(
    cache, flags + dag.updateFlags + {slotProcessed}, restore)
  if ok:
    state.blck = blck.refs
-    dag.putState(state)

  ok

@ -645,7 +646,7 @@ proc updateStateData*(
  if state.blck == bs.blck and state.data.data.slot <= bs.slot:
    # The block is the same and we're at an early enough slot - advance the
    # state with empty slot processing until the slot is correct
-    dag.advanceSlots(state, bs.slot, cache, true)
+    dag.advanceSlots(state, bs.slot, cache)

    return

@ -681,8 +682,8 @@ proc updateStateData*(
      cur = cur.parent

  let
-    startSlot = state.data.data.slot
-    startRoot = state.data.root
+    startSlot {.used.} = state.data.data.slot # used in logs below
+    startRoot {.used.} = state.data.root
  # Time to replay all the blocks between then and now
  for i in countdown(ancestors.len - 1, 0):
    # Because the ancestors are in the database, there's no need to persist them
@ -693,9 +694,8 @@ proc updateStateData*(
      dag.applyBlock(state, dag.get(ancestors[i]), {}, cache, false)
    doAssert ok, "Blocks in database should never fail to apply.."

-  # We save states here - blocks were guaranteed to have passed through the save
-  # function once at least, but not so for empty slots!
-  dag.advanceSlots(state, bs.slot, cache, true)
+  # ...and make sure to process empty slots as requested
+  dag.advanceSlots(state, bs.slot, cache)

  beacon_state_rewinds.inc()

--- a/beacon_chain/spec/state_transition.nim
+++ b/beacon_chain/spec/state_transition.nim
@ -134,13 +134,11 @@ func clear_epoch_from_cache(cache: var StateCache, epoch: Epoch) =
    cache.beacon_proposer_indices.del i

 # https://github.com/ethereum/eth2.0-specs/blob/v1.0.0-rc.0/specs/phase0/beacon-chain.md#beacon-chain-state-transition-function
-proc advance_slot*(
+proc advance_slot(
    state: var HashedBeaconState, updateFlags: UpdateFlags,
    epochCache: var StateCache) {.nbench.} =
-  # Special case version of process_slots that moves one slot at a time - can
-  # run faster if the state root is known already (for example when replaying
-  # existing slots)
  process_slot(state)
+
  let is_epoch_transition = (state.data.slot + 1).isEpoch
  if is_epoch_transition:
    # Note: Genesis epoch = 0, no need to test if before Genesis
@ -153,16 +151,13 @@ proc advance_slot*(
  if is_epoch_transition:
    beacon_current_validators.set(get_epoch_validator_count(state.data))

+  # The root must be updated on every slot update, or the next `process_slot`
+  # will be incorrect
  state.root = hash_tree_root(state.data)

 # https://github.com/ethereum/eth2.0-specs/blob/v1.0.0-rc.0/specs/phase0/beacon-chain.md#beacon-chain-state-transition-function
 proc process_slots*(state: var HashedBeaconState, slot: Slot,
    cache: var StateCache, updateFlags: UpdateFlags = {}): bool {.nbench.} =
-  # TODO this function is not _really_ necessary: when replaying states, we
-  #      advance slots one by one before calling `state_transition` - this way,
-  #      we avoid the state root calculation - as such, instead of advancing
-  #      slots "automatically" in `state_transition`, perhaps it would be better
-  #      to keep a pre-condition that state must be at the right slot already?
  if not (state.data.slot < slot):
    if slotProcessed notin updateFlags or state.data.slot != slot:
      notice(
--- a/ncli/ncli.nim
+++ b/ncli/ncli.nim
@ -110,7 +110,7 @@ proc doSlots(conf: NcliConf) =
  for i in 0'u64..<conf.slot:
    let isEpoch = (stateY[].data.slot + 1).isEpoch
    withTimer(timers[if isEpoch: tApplyEpochSlot else: tApplySlot]):
-      advance_slot(stateY[], {}, cache)
+      doAssert process_slots(stateY[], stateY[].data.slot + 1, cache)

  withTimer(timers[tSaveState]):
    SSZ.saveFile(conf.postState, stateY.data)
--- a/tests/test_block_pool.nim
+++ b/tests/test_block_pool.nim
@ -420,7 +420,7 @@ suiteReport "chain DAG finalization tests" & preset():
    # The loop creates multiple branches, which StateCache isn't suitable for
    cache = StateCache()

-    advance_slot(prestate[], {}, cache)
+    doAssert process_slots(prestate[], prestate[].data.slot + 1, cache)

    # create another block, orphaning the head
    let blck = makeTestBlock(
--- a/tests/testblockutil.nim
+++ b/tests/testblockutil.nim
@ -95,7 +95,7 @@ proc addTestBlock*(
    graffiti = default(GraffitiBytes),
    flags: set[UpdateFlag] = {}): SignedBeaconBlock =
  # Create and add a block to state - state will advance by one slot!
-  advance_slot(state, flags, cache)
+  doAssert process_slots(state, state.data.slot + 1, cache, flags)

  let
    proposer_index = get_beacon_proposer_index(state.data, cache)