From 777b3f4e29c2189ae9a434e3c457f678c01428d6 Mon Sep 17 00:00:00 2001 From: Yuriy Glukhov Date: Fri, 22 Nov 2019 16:14:13 +0200 Subject: [PATCH] State/block pruning --- beacon_chain/beacon_chain_db.nim | 6 ++ beacon_chain/beacon_node_types.nim | 2 +- beacon_chain/block_pool.nim | 79 +++++++++++++++++++++--- beacon_chain/spec/datatypes.nim | 6 ++ beacon_chain/trusted_state_snapshots.nim | 6 -- 5 files changed, 82 insertions(+), 17 deletions(-) diff --git a/beacon_chain/beacon_chain_db.nim b/beacon_chain/beacon_chain_db.nim index 59e39d545..a5a2d24a3 100644 --- a/beacon_chain/beacon_chain_db.nim +++ b/beacon_chain/beacon_chain_db.nim @@ -86,6 +86,12 @@ proc putStateRoot*(db: BeaconChainDB, root: Eth2Digest, slot: Slot, proc putBlock*(db: BeaconChainDB, value: BeaconBlock) = db.putBlock(signing_root(value), value) +proc delBlock*(db: BeaconChainDB, key: Eth2Digest) = + db.backend.del(subkey(BeaconBlock, key)) + +proc delState*(db: BeaconChainDB, key: Eth2Digest) = + db.backend.del(subkey(BeaconState, key)) + proc putHeadBlock*(db: BeaconChainDB, key: Eth2Digest) = db.backend.put(subkey(kHeadBlock), key.data) # TODO head block? diff --git a/beacon_chain/beacon_node_types.nim b/beacon_chain/beacon_node_types.nim index 37c3f5fdc..dcdf02541 100644 --- a/beacon_chain/beacon_node_types.nim +++ b/beacon_chain/beacon_node_types.nim @@ -118,7 +118,7 @@ type ## Tree of blocks pointing back to a finalized block on the chain we're ## interested in - we call that block the tail - blocksBySlot*: Table[uint64, seq[BlockRef]] + blocksBySlot*: Table[Slot, seq[BlockRef]] tail*: BlockRef ##\ ## The earliest finalized block we know about diff --git a/beacon_chain/block_pool.nim b/beacon_chain/block_pool.nim index 10f7bcffc..af36b798a 100644 --- a/beacon_chain/block_pool.nim +++ b/beacon_chain/block_pool.nim @@ -90,10 +90,10 @@ proc init*(T: type BlockPool, db: BeaconChainDB): BlockPool = else: headRef = tailRef - var blocksBySlot = initTable[uint64, seq[BlockRef]]() + var blocksBySlot = initTable[Slot, seq[BlockRef]]() for _, b in tables.pairs(blocks): let slot = db.getBlock(b.root).get().slot - blocksBySlot.mgetOrPut(slot.uint64, @[]).add(b) + blocksBySlot.mgetOrPut(slot, @[]).add(b) let # The head state is necessary to find out what we considered to be the @@ -132,11 +132,21 @@ proc init*(T: type BlockPool, db: BeaconChainDB): BlockPool = heads: @[head] ) -func addSlotMapping(pool: BlockPool, slot: uint64, br: BlockRef) = +proc addSlotMapping(pool: BlockPool, br: BlockRef) = proc addIfMissing(s: var seq[BlockRef], v: BlockRef) = if v notin s: s.add(v) - pool.blocksBySlot.mgetOrPut(slot, @[]).addIfMissing(br) + pool.blocksBySlot.mgetOrPut(br.slot, @[]).addIfMissing(br) + +proc delSlotMapping(pool: BlockPool, br: BlockRef) = + var blks = pool.blocksBySlot.getOrDefault(br.slot) + if blks.len != 0: + let i = blks.find(br) + if i >= 0: blks.del(i) + if blks.len == 0: + pool.blocksBySlot.del(br.slot) + else: + pool.blocksBySlot[br.slot] = blks proc updateStateData*( pool: BlockPool, state: var StateData, bs: BlockSlot) {.gcsafe.} @@ -155,7 +165,7 @@ proc addResolvedBlock( pool.blocks[blockRoot] = blockRef - pool.addSlotMapping(blck.slot.uint64, blockRef) + pool.addSlotMapping(blockRef) # Resolved blocks should be stored in database pool.db.putBlock(blockRoot, blck) @@ -393,8 +403,8 @@ func getOrResolve*(pool: var BlockPool, root: Eth2Digest): BlockRef = if result.isNil: pool.missing[root] = MissingBlock(slots: 1) -iterator blockRootsForSlot*(pool: BlockPool, slot: uint64|Slot): Eth2Digest = - for br in pool.blocksBySlot.getOrDefault(slot.uint64, @[]): +iterator blockRootsForSlot*(pool: BlockPool, slot: Slot): Eth2Digest = + for br in pool.blocksBySlot.getOrDefault(slot, @[]): yield br.root func checkMissing*(pool: var BlockPool): seq[FetchRecord] = @@ -580,6 +590,44 @@ func isAncestorOf*(a, b: BlockRef): bool = else: a.isAncestorOf(b.parent) +proc delBlockAndState(pool: BlockPool, blockRoot: Eth2Digest) = + if (let blk = pool.db.getBlock(blockRoot); blk.isSome): + pool.db.delState(blk.get.stateRoot) + pool.db.delBlock(blockRoot) + +proc delFinalizedStateIfNeeded(pool: BlockPool, b: BlockRef) = + # Delete finalized state for block `b` from the database, that doesn't need + # to be kept for replaying. + # TODO: Currently the protocol doesn't provide a way to request states, + # so we don't need any of the finalized states, and thus remove all of them + # (except the most recent) + if (let blk = pool.db.getBlock(b.root); blk.isSome): + pool.db.delState(blk.get.stateRoot) + +proc setTailBlock(pool: BlockPool, newTail: BlockRef) = + ## Advance tail block, pruning all the states and blocks with older slots + let oldTail = pool.tail + let fromSlot = oldTail.slot.uint64 + let toSlot = newTail.slot.uint64 - 1 + assert(toSlot > fromSlot) + for s in fromSlot .. toSlot: + for b in pool.blocksBySlot.getOrDefault(s.Slot, @[]): + pool.delBlockAndState(b.root) + b.children = @[] + b.parent = nil + pool.blocks.del(b.root) + pool.pending.del(b.root) + pool.missing.del(b.root) + + pool.blocksBySlot.del(s.Slot) + + pool.db.putTailBlock(newTail.root) + pool.tail = newTail + pool.addSlotMapping(newTail) + info "Tail block updated", + slot = newTail.slot, + root = shortLog(newTail.root) + proc updateHead*(pool: BlockPool, state: var StateData, blck: BlockRef) = ## Update what we consider to be the current head, as given by the fork ## choice. @@ -634,10 +682,9 @@ proc updateHead*(pool: BlockPool, state: var StateData, blck: BlockRef) = cat = "fork_choice" let + finalizedEpochStartSlot = state.data.data.finalized_checkpoint.epoch.compute_start_slot_at_epoch() # TODO there might not be a block at the epoch boundary - what then? - finalizedHead = - blck.findAncestorBySlot( - state.data.data.finalized_checkpoint.epoch.compute_start_slot_at_epoch()) + finalizedHead = blck.findAncestorBySlot(finalizedEpochStartSlot) doAssert (not finalizedHead.blck.isNil), "Block graph should always lead to a finalized block" @@ -666,6 +713,10 @@ proc updateHead*(pool: BlockPool, state: var StateData, blck: BlockRef) = for child in cur.parent.children: if child != cur: pool.blocks.del(child.root) + pool.delBlockAndState(child.root) + pool.delSlotMapping(child) + else: + pool.delFinalizedStateIfNeeded(child) cur.parent.children = @[cur] cur = cur.parent @@ -678,6 +729,14 @@ proc updateHead*(pool: BlockPool, state: var StateData, blck: BlockRef) = not pool.heads[n].blck.isAncestorOf(pool.finalizedHead.blck): pool.heads.del(n) + # Calculate new tail block and set it + # New tail should be WEAK_SUBJECTIVITY_PERIOD * 2 older than finalizedHead + const tailSlotInterval = WEAK_SUBJECTVITY_PERIOD * 2 + if finalizedEpochStartSlot - GENESIS_SLOT > tailSlotInterval: + let tailSlot = finalizedEpochStartSlot - tailSlotInterval + let newTail = finalizedHead.blck.findAncestorBySlot(tailSlot) + pool.setTailBlock(newTail.blck) + func latestJustifiedBlock*(pool: BlockPool): BlockSlot = ## Return the most recent block that is justified and at least as recent ## as the latest finalized block diff --git a/beacon_chain/spec/datatypes.nim b/beacon_chain/spec/datatypes.nim index b32c2bbd9..2e5624f39 100644 --- a/beacon_chain/spec/datatypes.nim +++ b/beacon_chain/spec/datatypes.nim @@ -67,6 +67,12 @@ const # Not part of spec. Still useful, pending removing usage if appropriate. ZERO_HASH* = Eth2Digest() + # Not part of spec + WEAK_SUBJECTVITY_PERIOD* = + Slot(uint64(4 * 30 * 24 * 60 * 60) div SECONDS_PER_SLOT) + # TODO: This needs revisiting. + # Why was the validator WITHDRAWAL_PERIOD altered in the spec? + template maxSize*(n: int) {.pragma.} type diff --git a/beacon_chain/trusted_state_snapshots.nim b/beacon_chain/trusted_state_snapshots.nim index 686534948..5d492470c 100644 --- a/beacon_chain/trusted_state_snapshots.nim +++ b/beacon_chain/trusted_state_snapshots.nim @@ -2,12 +2,6 @@ import os, chronos, json_serialization, spec/[datatypes], beacon_chain_db -const - WEAK_SUBJECTVITY_PERIOD* = - Slot(uint64(4 * 30 * 24 * 60 * 60) div SECONDS_PER_SLOT) - # TODO: This needs revisiting. - # Why was the validator WITHDRAWAL_PERIOD altered in the spec? - proc obtainTrustedStateSnapshot*(db: BeaconChainDB): Future[BeaconState] {.async.} = # In case our latest state is too old, we must obtain a recent snapshot # of the state from a trusted location. This is explained in detail here: