From 777b3f4e29c2189ae9a434e3c457f678c01428d6 Mon Sep 17 00:00:00 2001
From: Yuriy Glukhov <yuriy.glukhov@gmail.com>
Date: Fri, 22 Nov 2019 16:14:13 +0200
Subject: [PATCH] State/block pruning

---
 beacon_chain/beacon_chain_db.nim         |  6 ++
 beacon_chain/beacon_node_types.nim       |  2 +-
 beacon_chain/block_pool.nim              | 79 +++++++++++++++++++++---
 beacon_chain/spec/datatypes.nim          |  6 ++
 beacon_chain/trusted_state_snapshots.nim |  6 --
 5 files changed, 82 insertions(+), 17 deletions(-)

diff --git a/beacon_chain/beacon_chain_db.nim b/beacon_chain/beacon_chain_db.nim
index 59e39d545..a5a2d24a3 100644
--- a/beacon_chain/beacon_chain_db.nim
+++ b/beacon_chain/beacon_chain_db.nim
@@ -86,6 +86,12 @@ proc putStateRoot*(db: BeaconChainDB, root: Eth2Digest, slot: Slot,
 proc putBlock*(db: BeaconChainDB, value: BeaconBlock) =
   db.putBlock(signing_root(value), value)
 
+proc delBlock*(db: BeaconChainDB, key: Eth2Digest) =
+  db.backend.del(subkey(BeaconBlock, key))
+
+proc delState*(db: BeaconChainDB, key: Eth2Digest) =
+  db.backend.del(subkey(BeaconState, key))
+
 proc putHeadBlock*(db: BeaconChainDB, key: Eth2Digest) =
   db.backend.put(subkey(kHeadBlock), key.data) # TODO head block?
 
diff --git a/beacon_chain/beacon_node_types.nim b/beacon_chain/beacon_node_types.nim
index 37c3f5fdc..dcdf02541 100644
--- a/beacon_chain/beacon_node_types.nim
+++ b/beacon_chain/beacon_node_types.nim
@@ -118,7 +118,7 @@ type
     ## Tree of blocks pointing back to a finalized block on the chain we're
     ## interested in - we call that block the tail
 
-    blocksBySlot*: Table[uint64, seq[BlockRef]]
+    blocksBySlot*: Table[Slot, seq[BlockRef]]
 
     tail*: BlockRef ##\
     ## The earliest finalized block we know about
diff --git a/beacon_chain/block_pool.nim b/beacon_chain/block_pool.nim
index 10f7bcffc..af36b798a 100644
--- a/beacon_chain/block_pool.nim
+++ b/beacon_chain/block_pool.nim
@@ -90,10 +90,10 @@ proc init*(T: type BlockPool, db: BeaconChainDB): BlockPool =
   else:
     headRef = tailRef
 
-  var blocksBySlot = initTable[uint64, seq[BlockRef]]()
+  var blocksBySlot = initTable[Slot, seq[BlockRef]]()
   for _, b in tables.pairs(blocks):
     let slot = db.getBlock(b.root).get().slot
-    blocksBySlot.mgetOrPut(slot.uint64, @[]).add(b)
+    blocksBySlot.mgetOrPut(slot, @[]).add(b)
 
   let
     # The head state is necessary to find out what we considered to be the
@@ -132,11 +132,21 @@ proc init*(T: type BlockPool, db: BeaconChainDB): BlockPool =
     heads: @[head]
   )
 
-func addSlotMapping(pool: BlockPool, slot: uint64, br: BlockRef) =
+proc addSlotMapping(pool: BlockPool, br: BlockRef) =
   proc addIfMissing(s: var seq[BlockRef], v: BlockRef) =
     if v notin s:
       s.add(v)
-  pool.blocksBySlot.mgetOrPut(slot, @[]).addIfMissing(br)
+  pool.blocksBySlot.mgetOrPut(br.slot, @[]).addIfMissing(br)
+
+proc delSlotMapping(pool: BlockPool, br: BlockRef) =
+  var blks = pool.blocksBySlot.getOrDefault(br.slot)
+  if blks.len != 0:
+    let i = blks.find(br)
+    if i >= 0: blks.del(i)
+    if blks.len == 0:
+      pool.blocksBySlot.del(br.slot)
+    else:
+      pool.blocksBySlot[br.slot] = blks
 
 proc updateStateData*(
   pool: BlockPool, state: var StateData, bs: BlockSlot) {.gcsafe.}
@@ -155,7 +165,7 @@ proc addResolvedBlock(
 
   pool.blocks[blockRoot] = blockRef
 
-  pool.addSlotMapping(blck.slot.uint64, blockRef)
+  pool.addSlotMapping(blockRef)
 
   # Resolved blocks should be stored in database
   pool.db.putBlock(blockRoot, blck)
@@ -393,8 +403,8 @@ func getOrResolve*(pool: var BlockPool, root: Eth2Digest): BlockRef =
   if result.isNil:
     pool.missing[root] = MissingBlock(slots: 1)
 
-iterator blockRootsForSlot*(pool: BlockPool, slot: uint64|Slot): Eth2Digest =
-  for br in pool.blocksBySlot.getOrDefault(slot.uint64, @[]):
+iterator blockRootsForSlot*(pool: BlockPool, slot: Slot): Eth2Digest =
+  for br in pool.blocksBySlot.getOrDefault(slot, @[]):
     yield br.root
 
 func checkMissing*(pool: var BlockPool): seq[FetchRecord] =
@@ -580,6 +590,44 @@ func isAncestorOf*(a, b: BlockRef): bool =
   else:
     a.isAncestorOf(b.parent)
 
+proc delBlockAndState(pool: BlockPool, blockRoot: Eth2Digest) =
+  if (let blk = pool.db.getBlock(blockRoot); blk.isSome):
+    pool.db.delState(blk.get.stateRoot)
+    pool.db.delBlock(blockRoot)
+
+proc delFinalizedStateIfNeeded(pool: BlockPool, b: BlockRef) =
+  # Delete finalized state for block `b` from the database, that doesn't need
+  # to be kept for replaying.
+  # TODO: Currently the protocol doesn't provide a way to request states,
+  # so we don't need any of the finalized states, and thus remove all of them
+  # (except the most recent)
+  if (let blk = pool.db.getBlock(b.root); blk.isSome):
+    pool.db.delState(blk.get.stateRoot)
+
+proc setTailBlock(pool: BlockPool, newTail: BlockRef) =
+  ## Advance tail block, pruning all the states and blocks with older slots
+  let oldTail = pool.tail
+  let fromSlot = oldTail.slot.uint64
+  let toSlot = newTail.slot.uint64 - 1
+  assert(toSlot > fromSlot)
+  for s in fromSlot .. toSlot:
+    for b in pool.blocksBySlot.getOrDefault(s.Slot, @[]):
+      pool.delBlockAndState(b.root)
+      b.children = @[]
+      b.parent = nil
+      pool.blocks.del(b.root)
+      pool.pending.del(b.root)
+      pool.missing.del(b.root)
+
+    pool.blocksBySlot.del(s.Slot)
+
+  pool.db.putTailBlock(newTail.root)
+  pool.tail = newTail
+  pool.addSlotMapping(newTail)
+  info "Tail block updated",
+    slot = newTail.slot,
+    root = shortLog(newTail.root)
+
 proc updateHead*(pool: BlockPool, state: var StateData, blck: BlockRef) =
   ## Update what we consider to be the current head, as given by the fork
   ## choice.
@@ -634,10 +682,9 @@ proc updateHead*(pool: BlockPool, state: var StateData, blck: BlockRef) =
       cat = "fork_choice"
 
   let
+    finalizedEpochStartSlot = state.data.data.finalized_checkpoint.epoch.compute_start_slot_at_epoch()
     # TODO there might not be a block at the epoch boundary - what then?
-    finalizedHead =
-      blck.findAncestorBySlot(
-        state.data.data.finalized_checkpoint.epoch.compute_start_slot_at_epoch())
+    finalizedHead = blck.findAncestorBySlot(finalizedEpochStartSlot)
 
   doAssert (not finalizedHead.blck.isNil),
     "Block graph should always lead to a finalized block"
@@ -666,6 +713,10 @@ proc updateHead*(pool: BlockPool, state: var StateData, blck: BlockRef) =
       for child in cur.parent.children:
         if child != cur:
           pool.blocks.del(child.root)
+          pool.delBlockAndState(child.root)
+          pool.delSlotMapping(child)
+        else:
+          pool.delFinalizedStateIfNeeded(child)
       cur.parent.children = @[cur]
       cur = cur.parent
 
@@ -678,6 +729,14 @@ proc updateHead*(pool: BlockPool, state: var StateData, blck: BlockRef) =
           not pool.heads[n].blck.isAncestorOf(pool.finalizedHead.blck):
         pool.heads.del(n)
 
+  # Calculate new tail block and set it
+  # New tail should be WEAK_SUBJECTIVITY_PERIOD * 2 older than finalizedHead
+  const tailSlotInterval = WEAK_SUBJECTVITY_PERIOD * 2
+  if finalizedEpochStartSlot - GENESIS_SLOT > tailSlotInterval:
+    let tailSlot = finalizedEpochStartSlot - tailSlotInterval
+    let newTail = finalizedHead.blck.findAncestorBySlot(tailSlot)
+    pool.setTailBlock(newTail.blck)
+
 func latestJustifiedBlock*(pool: BlockPool): BlockSlot =
   ## Return the most recent block that is justified and at least as recent
   ## as the latest finalized block
diff --git a/beacon_chain/spec/datatypes.nim b/beacon_chain/spec/datatypes.nim
index b32c2bbd9..2e5624f39 100644
--- a/beacon_chain/spec/datatypes.nim
+++ b/beacon_chain/spec/datatypes.nim
@@ -67,6 +67,12 @@ const
   # Not part of spec. Still useful, pending removing usage if appropriate.
   ZERO_HASH* = Eth2Digest()
 
+  # Not part of spec
+  WEAK_SUBJECTVITY_PERIOD* =
+    Slot(uint64(4 * 30 * 24 * 60 * 60) div SECONDS_PER_SLOT)
+    # TODO: This needs revisiting.
+    # Why was the validator WITHDRAWAL_PERIOD altered in the spec?
+
 template maxSize*(n: int) {.pragma.}
 
 type
diff --git a/beacon_chain/trusted_state_snapshots.nim b/beacon_chain/trusted_state_snapshots.nim
index 686534948..5d492470c 100644
--- a/beacon_chain/trusted_state_snapshots.nim
+++ b/beacon_chain/trusted_state_snapshots.nim
@@ -2,12 +2,6 @@ import
   os, chronos, json_serialization,
   spec/[datatypes], beacon_chain_db
 
-const
-  WEAK_SUBJECTVITY_PERIOD* =
-    Slot(uint64(4 * 30 * 24 * 60 * 60) div SECONDS_PER_SLOT)
-    # TODO: This needs revisiting.
-    # Why was the validator WITHDRAWAL_PERIOD altered in the spec?
-
 proc obtainTrustedStateSnapshot*(db: BeaconChainDB): Future[BeaconState] {.async.} =
   # In case our latest state is too old, we must obtain a recent snapshot
   # of the state from a trusted location. This is explained in detail here: