From 3ad60532981e47758380049c999ddd21389d0eef Mon Sep 17 00:00:00 2001 From: Jacek Sieka Date: Thu, 28 Mar 2019 00:10:48 -0600 Subject: [PATCH] Use block, slot tuple to idenfiy fork this is the beginning of tracking block-slots more precisely, so we can the justification epoch slot bug. * avoid asyncDiscard (swallows assertions) * fix attestation delay * fix several state root cache bugs * introduce workaround for genesis epoch spec issue --- beacon_chain/beacon_chain_db.nim | 26 ++++ beacon_chain/beacon_node.nim | 41 +++--- beacon_chain/beacon_node_types.nim | 8 +- beacon_chain/block_pool.nim | 193 ++++++++++++++++++----------- beacon_chain/spec/validator.nim | 4 +- beacon_chain/ssz.nim | 2 +- tests/test_beacon_chain_db.nim | 4 + tests/testutil.nim | 12 +- 8 files changed, 190 insertions(+), 100 deletions(-) diff --git a/beacon_chain/beacon_chain_db.nim b/beacon_chain/beacon_chain_db.nim index 768d1615b..3f1275225 100644 --- a/beacon_chain/beacon_chain_db.nim +++ b/beacon_chain/beacon_chain_db.nim @@ -20,6 +20,7 @@ type ## TODO: determine how aggressively the database should be pruned. For a ## healthy network sync, we probably need to store blocks at least ## past the weak subjectivity period. + kBlockSlotStateRoot ## BlockSlot -> state_root mapping func subkey(kind: DbKeyKind): array[1, byte] = result[0] = byte ord(kind) @@ -39,6 +40,23 @@ func subkey(kind: type BeaconState, key: Eth2Digest): auto = func subkey(kind: type BeaconBlock, key: Eth2Digest): auto = subkey(kHashToBlock, key.data) +func subkey(root: Eth2Digest, slot: Slot): auto = + # var + # # takes care of endians.. + # TODO: this gives 8 bytes back(!) + # root = SSZ.encode(root) + # slot = SSZ.encode(slot) + var ret: array[1 + 32 + 8, byte] + # doAssert sizeof(ret) == 1 + sizeof(root) + sizeof(slot), + # "Can't sizeof this in VM" + + ret[0] = byte ord(kBlockSlotStateRoot) + + copyMem(addr ret[1], unsafeaddr root, sizeof(root)) + copyMem(addr ret[1 + sizeof(root)], unsafeaddr slot, sizeof(slot)) + + ret + proc init*(T: type BeaconChainDB, backend: TrieDatabaseRef): BeaconChainDB = new result result.backend = backend @@ -58,6 +76,10 @@ proc putState*(db: BeaconChainDB, key: Eth2Digest, value: BeaconState) = proc putState*(db: BeaconChainDB, value: BeaconState) = db.putState(hash_tree_root(value), value) +proc putStateRoot*(db: BeaconChainDB, root: Eth2Digest, slot: Slot, + value: Eth2Digest) = + db.backend.put(subkey(root, slot), value.data) + proc putBlock*(db: BeaconChainDB, value: BeaconBlock) = db.putBlock(signed_root(value), value) @@ -83,6 +105,10 @@ proc getBlock*(db: BeaconChainDB, key: Eth2Digest): Option[BeaconBlock] = proc getState*(db: BeaconChainDB, key: Eth2Digest): Option[BeaconState] = db.get(subkey(BeaconState, key), BeaconState) +proc getStateRoot*(db: BeaconChainDB, root: Eth2Digest, slot: Slot): + Option[Eth2Digest] = + db.get(subkey(root, slot), Eth2Digest) + proc getHeadBlock*(db: BeaconChainDB): Option[Eth2Digest] = db.get(subkey(kHeadBlock), Eth2Digest) diff --git a/beacon_chain/beacon_node.nim b/beacon_chain/beacon_node.nim index 7596c871d..52d8dc162 100644 --- a/beacon_chain/beacon_node.nim +++ b/beacon_chain/beacon_node.nim @@ -147,18 +147,8 @@ proc init*(T: type BeaconNode, conf: BeaconNodeConf): Future[BeaconNode] {.async let tailState = Json.loadFile(snapshotFile, BeaconState) tailBlock = get_initial_beacon_block(tailState) - blockRoot = signed_root(tailBlock) - notice "Creating new database from snapshot", - blockRoot = shortLog(blockRoot), - stateRoot = shortLog(tailBlock.state_root), - fork = tailState.fork, - validators = tailState.validator_registry.len() - - result.db.putState(tailState) - result.db.putBlock(tailBlock) - result.db.putTailBlock(blockRoot) - result.db.putHeadBlock(blockRoot) + BlockPool.preInit(result.db, tailState, tailBlock) except SerializationError as err: stderr.write "Failed to import ", snapshotFile, "\n" @@ -251,14 +241,15 @@ proc getAttachedValidator(node: BeaconNode, idx: int): AttachedValidator = proc updateHead(node: BeaconNode, slot: Slot): BlockRef = # Use head state for attestation resolution below # TODO do we need to resolve attestations using all available head states? - node.blockPool.updateState(node.state, node.blockPool.head, slot) + node.blockPool.updateState( + node.state, BlockSlot(blck: node.blockPool.head, slot: slot)) # Check pending attestations - maybe we found some blocks for them node.attestationPool.resolve(node.state.data) # TODO move all of this logic to BlockPool debug "Preparing for fork choice", - currentHeadBlock = shortLog(node.state.root), + stateRoot = shortLog(node.state.root), connectedPeers = node.network.connectedPeers, stateSlot = humaneSlotNum(node.state.data.slot), stateEpoch = humaneEpochNum(node.state.data.slot.slotToEpoch) @@ -270,7 +261,8 @@ proc updateHead(node: BeaconNode, slot: Slot): BlockRef = # got finalized: # https://github.com/ethereum/eth2.0-specs/issues/768 node.blockPool.updateState( - node.justifiedStateCache, justifiedHead, justifiedHead.slot) + node.justifiedStateCache, + BlockSlot(blck: justifiedHead, slot: justifiedHead.slot)) let newHead = lmdGhost( node.attestationPool, node.justifiedStateCache.data, justifiedHead) @@ -343,7 +335,7 @@ proc proposeBlock(node: BeaconNode, doAssert false, "head slot matches proposal slot (!)" # return - node.blockPool.updateState(node.state, head, slot - 1) + node.blockPool.updateState(node.state, BlockSlot(blck: head, slot: slot - 1)) # To create a block, we'll first apply a partial block to the state, skipping # some validations. let @@ -462,17 +454,22 @@ proc handleAttestations(node: BeaconNode, head: BlockRef, slot: Slot) = attestationHeadSlot = humaneSlotNum(attestationHead.slot), attestationSlot = humaneSlotNum(slot) + debug "Checking attestations", + attestationHeadRoot = shortLog(attestationHead.root), + attestationSlot = humaneSlotNum(slot) + # We need to run attestations exactly for the slot that we're attesting to. # In case blocks went missing, this means advancing past the latest block # using empty slots as fillers. - node.blockPool.updateState(node.state, attestationHead, slot) + node.blockPool.updateState( + node.state, BlockSlot(blck: attestationHead, slot: slot)) for crosslink_committee in get_crosslink_committees_at_slot( node.state.data, slot): for i, validatorIdx in crosslink_committee.committee: let validator = node.getAttachedValidator(validatorIdx) if validator != nil: - asyncDiscard makeAttestation(node, validator, node.state.data, head, + asyncCheck makeAttestation(node, validator, node.state.data, head, crosslink_committee.shard, crosslink_committee.committee.len, i) @@ -485,7 +482,7 @@ proc handleProposal(node: BeaconNode, head: BlockRef, slot: Slot): # proposing for it - basically, we're selecting proposer based on an # empty slot.. wait for the committee selection to settle, then # revisit this - we should be able to advance behind - node.blockPool.updateState(node.state, head, slot) + node.blockPool.updateState(node.state, BlockSlot(blck: head, slot: slot)) let proposerIdx = get_beacon_proposer_index(node.state.data, slot) let validator = node.getAttachedValidator(proposerIdx) @@ -621,11 +618,15 @@ proc onSlotStart(node: BeaconNode, lastSlot, scheduledSlot: Slot) {.gcsafe, asyn halfSlot = seconds(int64(SECONDS_PER_SLOT div 2)) if attestationStart.inFuture or attestationStart.offset <= halfSlot: + let fromNow = + if attestationStart.inFuture: attestationStart.offset + halfSlot + else: halfSlot - attestationStart.offset + debug "Waiting to send attestations", slot = humaneSlotNum(slot), - fromNow = shortLog(attestationStart.offset + halfSlot) + fromNow = shortLog(fromNow) - await sleepAsync(attestationStart.offset + halfSlot) + await sleepAsync(fromNow) # Time passed - we might need to select a new head in that case head = node.updateHead(slot) diff --git a/beacon_chain/beacon_node_types.nim b/beacon_chain/beacon_node_types.nim index 4fbb453ed..af2a1cddc 100644 --- a/beacon_chain/beacon_node_types.nim +++ b/beacon_chain/beacon_node_types.nim @@ -220,6 +220,13 @@ type ## The block associated with the state found in data - in particular, ## blck.state_root == root + BlockSlot* = object + ## Unique identifier for a particular fork in the block chain - normally, + ## there's a block for every slot, but in the case a block is not produced, + ## the chain progresses anyway, producing a new state for every slot. + blck*: BlockRef + slot*: Slot + # ############################################# # # Validator Pool @@ -254,4 +261,3 @@ type proc userValidatorsRange*(d: NetworkMetadata): HSlice[int, int] = 0 .. d.lastUserValidator.int - diff --git a/beacon_chain/block_pool.nim b/beacon_chain/block_pool.nim index f169a3bed..bea11e3f9 100644 --- a/beacon_chain/block_pool.nim +++ b/beacon_chain/block_pool.nim @@ -4,6 +4,12 @@ import beacon_node_types, spec/[crypto, datatypes, digest, helpers] +proc parent*(bs: BlockSlot): BlockSlot = + BlockSlot( + blck: if bs.slot > bs.blck.slot: bs.blck else: bs.blck.parent, + slot: bs.slot - 1 + ) + proc link(parent, child: BlockRef) = doAssert (not (parent.root == Eth2Digest() or child.root == Eth2Digest())), "blocks missing root!" @@ -132,7 +138,7 @@ proc addSlotMapping(pool: BlockPool, slot: uint64, br: BlockRef) = pool.blocksBySlot.mgetOrPut(slot, @[]).addIfMissing(br) proc updateState*( - pool: BlockPool, state: var StateData, blck: BlockRef, slot: Slot) {.gcsafe.} + pool: BlockPool, state: var StateData, bs: BlockSlot) {.gcsafe.} proc add*( pool: var BlockPool, state: var StateData, blockRoot: Eth2Digest, @@ -175,7 +181,7 @@ proc add*( # blocks we add to the database are clean for the given state # TODO if the block is from the future, we should not be resolving it (yet), # but maybe we should use it as a hint that our clock is wrong? - updateState(pool, state, parent, blck.slot - 1) + updateState(pool, state, BlockSlot(blck: parent, slot: blck.slot - 1)) if not updateState(state.data, blck, {}): # TODO find a better way to log all this block data @@ -311,7 +317,7 @@ proc skipAndUpdateState( ok -proc maybePutState(pool: BlockPool, state: BeaconState) = +proc maybePutState(pool: BlockPool, state: BeaconState, blck: BlockRef) = # TODO we save state at every epoch start but never remove them - we also # potentially save multiple states per slot if reorgs happen, meaning # we could easily see a state explosion @@ -323,9 +329,76 @@ proc maybePutState(pool: BlockPool, state: BeaconState) = stateSlot = humaneSlotNum(state.slot), stateRoot = shortLog(root) pool.db.putState(root, state) + # TODO this should be atomic with the above write.. + pool.db.putStateRoot(blck.root, state.slot, root) -proc updateState*( - pool: BlockPool, state: var StateData, blck: BlockRef, slot: Slot) = +proc rewindState(pool: BlockPool, state: var StateData, bs: BlockSlot): + seq[BlockData] = + var ancestors = @[pool.get(bs.blck)] + # Common case: the last block applied is the parent of the block to apply: + if not bs.blck.parent.isNil and state.blck.root == bs.blck.parent.root and + state.data.slot < bs.slot: + return ancestors + + # It appears that the parent root of the proposed new block is different from + # what we expected. We will have to rewind the state to a point along the + # chain of ancestors of the new block. We will do this by loading each + # successive parent block and checking if we can find the corresponding state + # in the database. + + var + stateRoot = pool.db.getStateRoot(bs.blck.root, bs.slot) + curBs = bs + while stateRoot.isNone(): + let parBs = curBs.parent() + if parBs.blck.isNil: + break # Bug probably! + + if parBs.blck != curBs.blck: + ancestors.add(pool.get(parBs.blck)) + + if (let tmp = pool.db.getStateRoot(parBs.blck.root, parBs.slot); tmp.isSome()): + if pool.db.containsState(tmp.get): + stateRoot = tmp + break + + curBs = parBs + + if stateRoot.isNone(): + # TODO this should only happen if the database is corrupt - we walked the + # list of parent blocks and couldn't find a corresponding state in the + # database, which should never happen (at least we should have the + # tail state in there!) + error "Couldn't find ancestor state root!", + blockRoot = shortLog(bs.blck.root) + doAssert false, "Oh noes, we passed big bang!" + + let + ancestor = ancestors[^1] + ancestorState = pool.db.getState(stateRoot.get()) + + if ancestorState.isNone(): + # TODO this should only happen if the database is corrupt - we walked the + # list of parent blocks and couldn't find a corresponding state in the + # database, which should never happen (at least we should have the + # tail state in there!) + error "Couldn't find ancestor state or block parent missing!", + blockRoot = shortLog(bs.blck.root) + doAssert false, "Oh noes, we passed big bang!" + + debug "Replaying state transitions", + stateSlot = humaneSlotNum(state.data.slot), + ancestorStateRoot = shortLog(ancestor.data.state_root), + ancestorStateSlot = humaneSlotNum(ancestorState.get().slot), + slot = humaneSlotNum(bs.slot), + blockRoot = shortLog(bs.blck.root), + ancestors = ancestors.len + + state.data = ancestorState.get() + + ancestors + +proc updateState*(pool: BlockPool, state: var StateData, bs: BlockSlot) = ## Rewind or advance state such that it matches the given block and slot - ## this may include replaying from an earlier snapshot if blck is on a ## different branch or has advanced to a higher slot number than slot @@ -334,84 +407,38 @@ proc updateState*( # We need to check the slot because the state might have moved forwards # without blocks - if state.blck.root == blck.root and state.data.slot == slot: + if state.blck.root == bs.blck.root and state.data.slot <= bs.slot: + # Might be that we're moving to the same block but later slot + skipSlots(state.data, bs.slot) do (state: BeaconState): + pool.maybePutState(state, bs.blck) + return # State already at the right spot - var ancestors = @[pool.get(blck)] - - # Common case: the last thing that was applied to the state was the parent - # of blck - if state.blck.root == ancestors[0].data.previous_block_root and - state.data.slot < blck.slot: - let ok = skipAndUpdateState( - state.data, ancestors[0].data, {skipValidation}) do (state: BeaconState): - pool.maybePutState(state) - doAssert ok, "Blocks in database should never fail to apply.." - state.blck = blck - state.root = ancestors[0].data.state_root - - skipSlots(state.data, slot) do (state: BeaconState): - pool.maybePutState(state) - - return - - # It appears that the parent root of the proposed new block is different from - # what we expected. We will have to rewind the state to a point along the - # chain of ancestors of the new block. We will do this by loading each - # successive parent block and checking if we can find the corresponding state - # in the database. - while not ancestors[^1].refs.parent.isNil: - let parent = pool.get(ancestors[^1].refs.parent) - ancestors.add parent - - if pool.db.containsState(parent.data.state_root): break - - let - ancestor = ancestors[^1] - ancestorState = pool.db.getState(ancestor.data.state_root) - - if ancestorState.isNone(): - # TODO this should only happen if the database is corrupt - we walked the - # list of parent blocks and couldn't find a corresponding state in the - # database, which should never happen (at least we should have the - # tail state in there!) - error "Couldn't find ancestor state or block parent missing!", - blockRoot = shortLog(blck.root) - doAssert false, "Oh noes, we passed big bang!" - - debug "Replaying state transitions", - stateSlot = humaneSlotNum(state.data.slot), - stateRoot = shortLog(ancestor.data.state_root), - prevStateSlot = humaneSlotNum(ancestorState.get().slot), - ancestors = ancestors.len - - state.data = ancestorState.get() + let ancestors = rewindState(pool, state, bs) # If we come this far, we found the state root. The last block on the stack # is the one that produced this particular state, so we can pop it # TODO it might be possible to use the latest block hashes from the state to # do this more efficiently.. whatever! - # Time to replay all the blocks between then and now. We skip the one because + # Time to replay all the blocks between then and now. We skip one because # it's the one that we found the state with, and it has already been # applied for i in countdown(ancestors.len - 2, 0): - let last = ancestors[i] + let ok = + skipAndUpdateState(state.data, ancestors[i].data, {skipValidation}) do( + state: BeaconState): + pool.maybePutState(state, ancestors[i].refs) + doAssert ok, "Blocks in database should never fail to apply.." - skipSlots(state.data, last.data.slot - 1) do(state: BeaconState): - pool.maybePutState(state) + skipSlots(state.data, bs.slot) do (state: BeaconState): + pool.maybePutState(state, bs.blck) - let ok = updateState(state.data, last.data, {skipValidation}) - doAssert ok, - "We only keep validated blocks in the database, should never fail" - - state.blck = blck - state.root = ancestors[0].data.state_root - - pool.maybePutState(state.data) - - skipSlots(state.data, slot) do (state: BeaconState): - pool.maybePutState(state) + # TODO could perhaps avoi a hash_tree_root if putState happens.. hmm.. + state.blck = bs.blck + state.root = + if state.data.slot == ancestors[0].data.slot: ancestors[0].data.state_root + else: hash_tree_root(state.data) proc loadTailState*(pool: BlockPool): StateData = ## Load the state associated with the current tail in the pool @@ -441,7 +468,7 @@ proc updateHead*(pool: BlockPool, state: var StateData, blck: BlockRef) = pool.head = blck # Start off by making sure we have the right state - updateState(pool, state, blck, blck.slot) + updateState(pool, state, BlockSlot(blck: blck, slot: blck.slot)) if lastHead != blck.parent: notice "Updated head with new parent", @@ -525,3 +552,27 @@ proc latestState*(pool: BlockPool): BeaconState = else: error "Block from block pool not found in db", root = b.root b = b.parent + + +proc preInit*( + T: type BlockPool, db: BeaconChainDB, state: BeaconState, blck: BeaconBlock) = + # write a genesis state, the way the BlockPool expects it to be stored in + # database + # TODO probably should just init a blockpool with the freshly written + # state - but there's more refactoring needed to make it nice - doing + # a minimal patch for now.. + let + blockRoot = signed_root(blck) + + # TODO Error: undeclared identifier: 'log' + # notice "Creating new database from snapshot", + # blockRoot = shortLog(blockRoot), + # stateRoot = shortLog(blck.state_root), + # fork = state.fork, + # validators = state.validator_registry.len() + + db.putState(state) + db.putBlock(blck) + db.putTailBlock(blockRoot) + db.putHeadBlock(blockRoot) + db.putStateRoot(blockRoot, blck.slot, blck.state_root) diff --git a/beacon_chain/spec/validator.nim b/beacon_chain/spec/validator.nim index 04bb158a8..2550c2c2e 100644 --- a/beacon_chain/spec/validator.nim +++ b/beacon_chain/spec/validator.nim @@ -128,8 +128,10 @@ func get_previous_epoch*(state: BeaconState): Epoch = # Note: This is allowed to underflow internally (this is why GENESIS_EPOCH != 0) # however when interfacing with peers for example for attestations # this should not underflow. + # TODO or not - it causes issues: https://github.com/ethereum/eth2.0-specs/issues/849 + let epoch = get_current_epoch(state) - epoch - 1 + max(GENESIS_EPOCH, epoch - 1) # TODO max here to work around the above issue # https://github.com/ethereum/eth2.0-specs/blob/v0.5.0/specs/core/0_beacon-chain.md#get_crosslink_committees_at_slot diff --git a/beacon_chain/ssz.nim b/beacon_chain/ssz.nim index 6adcb261a..c77675bdc 100644 --- a/beacon_chain/ssz.nim +++ b/beacon_chain/ssz.nim @@ -166,7 +166,7 @@ proc writeValue*(w: var SszWriter, obj: auto) = mixin writeValue when obj is ValidatorIndex|BasicType: - w.stream.append obj.toBytesSSZ + w.stream.append obj.toSSZType().toBytesSSZ elif obj is enum: w.stream.append uint64(obj).toBytesSSZ else: diff --git a/tests/test_beacon_chain_db.nim b/tests/test_beacon_chain_db.nim index eaddf3430..bf0ac8d25 100644 --- a/tests/test_beacon_chain_db.nim +++ b/tests/test_beacon_chain_db.nim @@ -33,6 +33,10 @@ suite "Beacon chain DB": db.containsBlock(root) db.getBlock(root).get() == blck + db.putStateRoot(root, blck.slot, root) + check: + db.getStateRoot(root, blck.slot).get() == root + test "sanity check states": var db = init(BeaconChainDB, newMemoryDB()) diff --git a/tests/testutil.nim b/tests/testutil.nim index 7f4ad9fd7..edf9dac85 100644 --- a/tests/testutil.nim +++ b/tests/testutil.nim @@ -8,8 +8,10 @@ import options, sequtils, eth/trie/[db], - ../beacon_chain/[beacon_chain_db, extras, ssz, state_transition, validator_pool], - ../beacon_chain/spec/[beaconstate, bitfield, crypto, datatypes, digest, helpers, validator] + ../beacon_chain/[beacon_chain_db, block_pool, extras, ssz, state_transition, + validator_pool, beacon_node_types], + ../beacon_chain/spec/[beaconstate, bitfield, crypto, datatypes, digest, + helpers, validator] func makeFakeValidatorPrivKey*(i: int): ValidatorPrivKey = var i = i + 1 # 0 does not work, as private key... @@ -194,7 +196,5 @@ proc makeTestDB*(tailState: BeaconState, tailBlock: BeaconBlock): BeaconChainDB tailRoot = signed_root(tailBlock) result = init(BeaconChainDB, newMemoryDB()) - result.putState(tailState) - result.putBlock(tailBlock) - result.putTailBlock(tailRoot) - result.putHeadBlock(tailRoot) + BlockPool.preInit(result, tailState, tailBlock) +