nimbus-eth2/beacon_chain/sync/sync_protocol.nim

440 lines
15 KiB
Nim
Raw Normal View History

# beacon_chain
# Copyright (c) 2018-2022 Status Research & Development GmbH
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at https://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at https://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
{.push raises: [Defect].}
import
options, tables, sets, macros,
chronicles, chronos, stew/ranges/bitranges, libp2p/switch,
../spec/datatypes/[phase0, altair, bellatrix],
../spec/[helpers, forks, network],
".."/[beacon_clock],
../networking/eth2_network,
../consensus_object_pools/blockchain_dag
logScope:
topics = "sync"
const
MAX_REQUEST_BLOCKS = 1024
blockByRootLookupCost = allowedOpsPerSecondCost(50)
blockResponseCost = allowedOpsPerSecondCost(100)
blockByRangeLookupCost = allowedOpsPerSecondCost(20)
type
2020-04-20 14:59:18 +00:00
StatusMsg* = object
forkDigest*: ForkDigest
finalizedRoot*: Eth2Digest
finalizedEpoch*: Epoch
headRoot*: Eth2Digest
headSlot*: Slot
ValidatorSetDeltaFlags {.pure.} = enum
Activation = 0
Exit = 1
ValidatorChangeLogEntry* = object
case kind*: ValidatorSetDeltaFlags
of Activation:
2018-11-29 01:08:34 +00:00
pubkey: ValidatorPubKey
else:
index: uint32
BeaconSyncNetworkState* = ref object
dag*: ChainDAGRef
getBeaconTime*: GetBeaconTimeFn
2019-02-18 10:34:39 +00:00
BeaconSyncPeerState* = ref object
statusLastTime*: chronos.Moment
2020-04-20 14:59:18 +00:00
statusMsg*: StatusMsg
BlockRootSlot* = object
blockRoot: Eth2Digest
slot: Slot
BlockRootsList* = List[Eth2Digest, Limit MAX_REQUEST_BLOCKS]
proc readChunkPayload*(conn: Connection, peer: Peer,
MsgType: type ForkedSignedBeaconBlock): Future[NetRes[ForkedSignedBeaconBlock]] {.async.} =
var contextBytes: ForkDigest
try:
await conn.readExactly(addr contextBytes, sizeof contextBytes)
except CatchableError:
return neterr UnexpectedEOF
if contextBytes == peer.network.forkDigests.phase0:
let res = await readChunkPayload(conn, peer, phase0.SignedBeaconBlock)
if res.isOk:
return ok ForkedSignedBeaconBlock.init(res.get)
else:
return err(res.error)
elif contextBytes == peer.network.forkDigests.altair:
let res = await readChunkPayload(conn, peer, altair.SignedBeaconBlock)
if res.isOk:
return ok ForkedSignedBeaconBlock.init(res.get)
else:
return err(res.error)
elif contextBytes == peer.network.forkDigests.bellatrix:
let res = await readChunkPayload(conn, peer, bellatrix.SignedBeaconBlock)
if res.isOk:
return ok ForkedSignedBeaconBlock.init(res.get)
else:
return err(res.error)
else:
return neterr InvalidContextBytes
proc sendResponseChunk*(response: UntypedResponse,
val: ForkedSignedBeaconBlock): Future[void] =
inc response.writtenChunks
case val.kind
of BeaconBlockFork.Phase0:
response.stream.writeChunk(some ResponseCode.Success,
SSZ.encode(val.phase0Data),
response.peer.network.forkDigests.phase0.data)
of BeaconBlockFork.Altair:
response.stream.writeChunk(some ResponseCode.Success,
SSZ.encode(val.altairData),
response.peer.network.forkDigests.altair.data)
of BeaconBlockFork.Bellatrix:
response.stream.writeChunk(some ResponseCode.Success,
SSZ.encode(val.bellatrixData),
response.peer.network.forkDigests.bellatrix.data)
func shortLog*(s: StatusMsg): auto =
(
forkDigest: s.forkDigest,
finalizedRoot: shortLog(s.finalizedRoot),
finalizedEpoch: shortLog(s.finalizedEpoch),
headRoot: shortLog(s.headRoot),
headSlot: shortLog(s.headSlot)
)
chronicles.formatIt(StatusMsg): shortLog(it)
func disconnectReasonName(reason: uint64): string =
# haha, nim doesn't support uint64 in `case`!
if reason == uint64(ClientShutDown): "Client shutdown"
elif reason == uint64(IrrelevantNetwork): "Irrelevant network"
elif reason == uint64(FaultOrError): "Fault or error"
else: "Disconnected (" & $reason & ")"
proc getCurrentStatus(state: BeaconSyncNetworkState): StatusMsg =
let
dag = state.dag
wallSlot = state.getBeaconTime().slotOrZero
StatusMsg(
forkDigest: dag.forkDigestAtEpoch(wallSlot.epoch),
finalizedRoot: dag.finalizedHead.blck.root,
finalizedEpoch: dag.finalizedHead.slot.epoch,
headRoot: dag.head.root,
headSlot: dag.head.slot)
proc checkStatusMsg(state: BeaconSyncNetworkState, status: StatusMsg):
Result[void, cstring] =
let
dag = state.dag
wallSlot = (state.getBeaconTime() + MAXIMUM_GOSSIP_CLOCK_DISPARITY).slotOrZero
if status.finalizedEpoch > status.headSlot.epoch:
# Can be equal during genesis or checkpoint start
return err("finalized epoch newer than head")
if status.headSlot > wallSlot:
return err("head more recent than wall clock")
if dag.forkDigestAtEpoch(wallSlot.epoch) != status.forkDigest:
return err("fork digests differ")
if status.finalizedEpoch <= dag.finalizedHead.slot.epoch:
let blockId = dag.getBlockIdAtSlot(status.finalizedEpoch.start_slot())
if status.finalizedRoot != blockId.bid.root and blockId.bid.root != Eth2Digest():
return err("peer following different finality")
ok()
proc handleStatus(peer: Peer,
state: BeaconSyncNetworkState,
theirStatus: StatusMsg): Future[bool] {.gcsafe.}
proc setStatusMsg(peer: Peer, statusMsg: StatusMsg) {.gcsafe.}
{.pop.} # TODO fix p2p macro for raises
2018-11-29 01:08:34 +00:00
p2pProtocol BeaconSync(version = 1,
networkState = BeaconSyncNetworkState,
peerState = BeaconSyncPeerState):
2019-02-18 10:34:39 +00:00
onPeerConnected do (peer: Peer, incoming: bool) {.async.}:
2020-06-11 05:14:26 +00:00
debug "Peer connected",
peer, peerId = shortLog(peer.peerId), incoming
# Per the eth2 protocol, whoever dials must send a status message when
# connected for the first time, but because of how libp2p works, there may
# be a race between incoming and outgoing connections and disconnects that
# makes the incoming flag unreliable / obsolete by the time we get to
# this point - instead of making assumptions, we'll just send a status
# message redundantly.
# TODO(zah)
# the spec does not prohibit sending the extra status message on
# incoming connections, but it should not be necessary - this would
# need a dedicated flow in libp2p that resolves the race conditions -
# this needs more thinking around the ordering of events and the
# given incoming flag
let
ourStatus = peer.networkState.getCurrentStatus()
theirStatus = await peer.status(ourStatus, timeout = RESP_TIMEOUT)
if theirStatus.isOk:
discard await peer.handleStatus(peer.networkState, theirStatus.get())
else:
debug "Status response not received in time",
peer, errorKind = theirStatus.error.kind
await peer.disconnect(FaultOrError)
2019-02-18 10:34:39 +00:00
proc status(peer: Peer,
theirStatus: StatusMsg,
response: SingleChunkResponse[StatusMsg])
{.async, libp2pProtocol("status", 1).} =
let ourStatus = peer.networkState.getCurrentStatus()
trace "Sending status message", peer = peer, status = ourStatus
await response.send(ourStatus)
discard await peer.handleStatus(peer.networkState, theirStatus)
proc ping(peer: Peer, value: uint64): uint64
{.libp2pProtocol("ping", 1).} =
return peer.network.metadata.seq_number
proc getMetaData(peer: Peer): phase0.MetaData
{.libp2pProtocol("metadata", 1).} =
return peer.network.phase0metadata
proc getMetadata_v2(peer: Peer): altair.MetaData
{.libp2pProtocol("metadata", 2).} =
return peer.network.metadata
proc beaconBlocksByRange(
peer: Peer,
startSlot: Slot,
reqCount: uint64,
reqStep: uint64,
response: MultipleChunksResponse[phase0.SignedBeaconBlock])
{.async, libp2pProtocol("beacon_blocks_by_range", 1).} =
trace "got range request", peer, startSlot,
count = reqCount, step = reqStep
if reqCount > 0'u64 and reqStep > 0'u64:
var blocks: array[MAX_REQUEST_BLOCKS, BlockId]
let
dag = peer.networkState.dag
# Limit number of blocks in response
count = int min(reqCount, blocks.lenu64)
let
endIndex = count - 1
startIndex =
dag.getBlockRange(startSlot, reqStep, blocks.toOpenArray(0, endIndex))
peer.updateRequestQuota(
blockByRangeLookupCost +
max(0, endIndex - startIndex + 1).float * blockResponseCost)
peer.awaitNonNegativeRequestQuota()
2019-02-18 10:34:39 +00:00
for i in startIndex..endIndex:
trace "wrote response block",
slot = blocks[i].slot, roor = shortLog(blocks[i].root)
let blk = dag.getForkedBlock(blocks[i])
if blk.isSome():
let blck = blk.get()
case blck.kind
of BeaconBlockFork.Phase0:
await response.write(blck.phase0Data.asSigned)
else:
# Skipping all subsequent blocks should be OK because the spec says:
# "Clients MAY limit the number of blocks in the response."
# https://github.com/ethereum/consensus-specs/blob/dev/specs/phase0/p2p-interface.md#beaconblocksbyrange
#
# Also, our response would be indistinguishable from a node
# that have been synced exactly to the altair transition slot.
break
debug "Block range request done",
peer, startSlot, count, reqStep, found = count - startIndex
else:
raise newException(InvalidInputsError, "Empty range requested")
proc beaconBlocksByRoot(
peer: Peer,
# Please note that the SSZ list here ensures that the
# spec constant MAX_REQUEST_BLOCKS is enforced:
blockRoots: BlockRootsList,
response: MultipleChunksResponse[phase0.SignedBeaconBlock])
{.async, libp2pProtocol("beacon_blocks_by_root", 1).} =
if blockRoots.len == 0:
raise newException(InvalidInputsError, "No blocks requested")
let
dag = peer.networkState.dag
count = blockRoots.len
peer.updateRequestQuota(count.float * blockByRootLookupCost)
peer.awaitNonNegativeRequestQuota()
var found = 0
for i in 0..<count:
limit by-root requests to non-finalized blocks (#3293) * limit by-root requests to non-finalized blocks Presently, we keep a mapping from block root to `BlockRef` in memory - this has simplified reasoning about the dag, but is not sustainable with the chain growing. We can distinguish between two cases where by-root access is useful: * unfinalized blocks - this is where the beacon chain is operating generally, by validating incoming data as interesting for future fork choice decisions - bounded by the length of the unfinalized period * finalized blocks - historical access in the REST API etc - no bounds, really In this PR, we limit the by-root block index to the first use case: finalized chain data can more efficiently be addressed by slot number. Future work includes: * limiting the `BlockRef` horizon in general - each instance is 40 bytes+overhead which adds up - this needs further refactoring to deal with the tail vs state problem * persisting the finalized slot-to-hash index - this one also keeps growing unbounded (albeit slowly) Anyway, this PR easily shaves ~128mb of memory usage at the time of writing. * No longer honor `BeaconBlocksByRoot` requests outside of the non-finalized period - previously, Nimbus would generously return any block through this libp2p request - per the spec, finalized blocks should be fetched via `BeaconBlocksByRange` instead. * return `Opt[BlockRef]` instead of `nil` when blocks can't be found - this becomes a lot more common now and thus deserves more attention * `dag.blocks` -> `dag.forkBlocks` - this index only carries unfinalized blocks from now - `finalizedBlocks` covers the other `BlockRef` instances * in backfill, verify that the last backfilled block leads back to genesis, or panic * add backfill timings to log * fix missing check that `BlockRef` block can be fetched with `getForkedBlock` reliably * shortcut doppelganger check when feature is not enabled * in REST/JSON-RPC, fetch blocks without involving `BlockRef` * fix dag.blocks ref
2022-01-21 11:33:16 +00:00
let blockRef = dag.getBlockRef(blockRoots[i])
if blockRef.isSome():
let blk = dag.getForkedBlock(blockRef[])
case blk.kind
of BeaconBlockFork.Phase0:
await response.write(blk.phase0Data.asSigned)
inc found
of BeaconBlockFork.Altair, BeaconBlockFork.Bellatrix:
# Skipping this block should be fine because the spec says:
# "Clients MAY limit the number of blocks in the response."
# https://github.com/ethereum/consensus-specs/blob/dev/specs/phase0/p2p-interface.md#beaconblocksbyroot
#
# Also, our response would be indistinguishable from a node
# that have been synced exactly to the altair transition slot.
continue
peer.updateRequestQuota(found.float * blockResponseCost)
debug "Block root request done",
peer, roots = blockRoots.len, count, found
proc beaconBlocksByRange_v2(
peer: Peer,
startSlot: Slot,
reqCount: uint64,
reqStep: uint64,
response: MultipleChunksResponse[ForkedSignedBeaconBlock])
{.async, libp2pProtocol("beacon_blocks_by_range", 2).} =
trace "got range request", peer, startSlot,
count = reqCount, step = reqStep
if reqCount > 0'u64 and reqStep > 0'u64:
var blocks: array[MAX_REQUEST_BLOCKS, BlockId]
let
dag = peer.networkState.dag
# Limit number of blocks in response
count = int min(reqCount, blocks.lenu64)
let
endIndex = count - 1
startIndex =
dag.getBlockRange(startSlot, reqStep,
blocks.toOpenArray(0, endIndex))
peer.updateRequestQuota(
blockByRangeLookupCost +
max(0, endIndex - startIndex + 1).float * blockResponseCost)
peer.awaitNonNegativeRequestQuota()
for i in startIndex..endIndex:
let
limit by-root requests to non-finalized blocks (#3293) * limit by-root requests to non-finalized blocks Presently, we keep a mapping from block root to `BlockRef` in memory - this has simplified reasoning about the dag, but is not sustainable with the chain growing. We can distinguish between two cases where by-root access is useful: * unfinalized blocks - this is where the beacon chain is operating generally, by validating incoming data as interesting for future fork choice decisions - bounded by the length of the unfinalized period * finalized blocks - historical access in the REST API etc - no bounds, really In this PR, we limit the by-root block index to the first use case: finalized chain data can more efficiently be addressed by slot number. Future work includes: * limiting the `BlockRef` horizon in general - each instance is 40 bytes+overhead which adds up - this needs further refactoring to deal with the tail vs state problem * persisting the finalized slot-to-hash index - this one also keeps growing unbounded (albeit slowly) Anyway, this PR easily shaves ~128mb of memory usage at the time of writing. * No longer honor `BeaconBlocksByRoot` requests outside of the non-finalized period - previously, Nimbus would generously return any block through this libp2p request - per the spec, finalized blocks should be fetched via `BeaconBlocksByRange` instead. * return `Opt[BlockRef]` instead of `nil` when blocks can't be found - this becomes a lot more common now and thus deserves more attention * `dag.blocks` -> `dag.forkBlocks` - this index only carries unfinalized blocks from now - `finalizedBlocks` covers the other `BlockRef` instances * in backfill, verify that the last backfilled block leads back to genesis, or panic * add backfill timings to log * fix missing check that `BlockRef` block can be fetched with `getForkedBlock` reliably * shortcut doppelganger check when feature is not enabled * in REST/JSON-RPC, fetch blocks without involving `BlockRef` * fix dag.blocks ref
2022-01-21 11:33:16 +00:00
blck = dag.getForkedBlock(blocks[i]).valueOr:
continue
limit by-root requests to non-finalized blocks (#3293) * limit by-root requests to non-finalized blocks Presently, we keep a mapping from block root to `BlockRef` in memory - this has simplified reasoning about the dag, but is not sustainable with the chain growing. We can distinguish between two cases where by-root access is useful: * unfinalized blocks - this is where the beacon chain is operating generally, by validating incoming data as interesting for future fork choice decisions - bounded by the length of the unfinalized period * finalized blocks - historical access in the REST API etc - no bounds, really In this PR, we limit the by-root block index to the first use case: finalized chain data can more efficiently be addressed by slot number. Future work includes: * limiting the `BlockRef` horizon in general - each instance is 40 bytes+overhead which adds up - this needs further refactoring to deal with the tail vs state problem * persisting the finalized slot-to-hash index - this one also keeps growing unbounded (albeit slowly) Anyway, this PR easily shaves ~128mb of memory usage at the time of writing. * No longer honor `BeaconBlocksByRoot` requests outside of the non-finalized period - previously, Nimbus would generously return any block through this libp2p request - per the spec, finalized blocks should be fetched via `BeaconBlocksByRange` instead. * return `Opt[BlockRef]` instead of `nil` when blocks can't be found - this becomes a lot more common now and thus deserves more attention * `dag.blocks` -> `dag.forkBlocks` - this index only carries unfinalized blocks from now - `finalizedBlocks` covers the other `BlockRef` instances * in backfill, verify that the last backfilled block leads back to genesis, or panic * add backfill timings to log * fix missing check that `BlockRef` block can be fetched with `getForkedBlock` reliably * shortcut doppelganger check when feature is not enabled * in REST/JSON-RPC, fetch blocks without involving `BlockRef` * fix dag.blocks ref
2022-01-21 11:33:16 +00:00
await response.write(blck.asSigned)
debug "Block range request done",
peer, startSlot, count, reqStep, found = count - startIndex
else:
raise newException(InvalidInputsError, "Empty range requested")
proc beaconBlocksByRoot_v2(
peer: Peer,
# Please note that the SSZ list here ensures that the
# spec constant MAX_REQUEST_BLOCKS is enforced:
blockRoots: BlockRootsList,
response: MultipleChunksResponse[ForkedSignedBeaconBlock])
{.async, libp2pProtocol("beacon_blocks_by_root", 2).} =
if blockRoots.len == 0:
raise newException(InvalidInputsError, "No blocks requested")
let
dag = peer.networkState.dag
count = blockRoots.len
peer.updateRequestQuota(count.float * blockByRootLookupCost)
peer.awaitNonNegativeRequestQuota()
var found = 0
for i in 0..<count:
limit by-root requests to non-finalized blocks (#3293) * limit by-root requests to non-finalized blocks Presently, we keep a mapping from block root to `BlockRef` in memory - this has simplified reasoning about the dag, but is not sustainable with the chain growing. We can distinguish between two cases where by-root access is useful: * unfinalized blocks - this is where the beacon chain is operating generally, by validating incoming data as interesting for future fork choice decisions - bounded by the length of the unfinalized period * finalized blocks - historical access in the REST API etc - no bounds, really In this PR, we limit the by-root block index to the first use case: finalized chain data can more efficiently be addressed by slot number. Future work includes: * limiting the `BlockRef` horizon in general - each instance is 40 bytes+overhead which adds up - this needs further refactoring to deal with the tail vs state problem * persisting the finalized slot-to-hash index - this one also keeps growing unbounded (albeit slowly) Anyway, this PR easily shaves ~128mb of memory usage at the time of writing. * No longer honor `BeaconBlocksByRoot` requests outside of the non-finalized period - previously, Nimbus would generously return any block through this libp2p request - per the spec, finalized blocks should be fetched via `BeaconBlocksByRange` instead. * return `Opt[BlockRef]` instead of `nil` when blocks can't be found - this becomes a lot more common now and thus deserves more attention * `dag.blocks` -> `dag.forkBlocks` - this index only carries unfinalized blocks from now - `finalizedBlocks` covers the other `BlockRef` instances * in backfill, verify that the last backfilled block leads back to genesis, or panic * add backfill timings to log * fix missing check that `BlockRef` block can be fetched with `getForkedBlock` reliably * shortcut doppelganger check when feature is not enabled * in REST/JSON-RPC, fetch blocks without involving `BlockRef` * fix dag.blocks ref
2022-01-21 11:33:16 +00:00
let blockRef = dag.getBlockRef(blockRoots[i])
if blockRef.isSome():
let blk = dag.getForkedBlock(blockRef[])
await response.write(blk.asSigned)
inc found
peer.updateRequestQuota(found.float * blockResponseCost)
debug "Block root request done",
peer, roots = blockRoots.len, count, found
proc goodbye(peer: Peer,
reason: uint64)
{.async, libp2pProtocol("goodbye", 1).} =
debug "Received Goodbye message", reason = disconnectReasonName(reason), peer
proc useSyncV2*(state: BeaconSyncNetworkState): bool =
let
wallTimeSlot = state.getBeaconTime().slotOrZero
wallTimeSlot.epoch >= state.dag.cfg.ALTAIR_FORK_EPOCH
proc useSyncV2*(peer: Peer): bool =
peer.networkState(BeaconSync).useSyncV2()
proc setStatusMsg(peer: Peer, statusMsg: StatusMsg) =
debug "Peer status", peer, statusMsg
peer.state(BeaconSync).statusMsg = statusMsg
peer.state(BeaconSync).statusLastTime = Moment.now()
proc handleStatus(peer: Peer,
state: BeaconSyncNetworkState,
theirStatus: StatusMsg): Future[bool] {.async, gcsafe.} =
let
res = checkStatusMsg(state, theirStatus)
return if res.isErr():
debug "Irrelevant peer", peer, theirStatus, err = res.error()
await peer.disconnect(IrrelevantNetwork)
false
else:
peer.setStatusMsg(theirStatus)
if peer.connectionState == Connecting:
# As soon as we get here it means that we passed handshake succesfully. So
# we can add this peer to PeerPool.
await peer.handlePeer()
true
proc updateStatus*(peer: Peer): Future[bool] {.async.} =
## Request `status` of remote peer ``peer``.
let
nstate = peer.networkState(BeaconSync)
ourStatus = getCurrentStatus(nstate)
let theirFut = awaitne peer.status(ourStatus, timeout = RESP_TIMEOUT)
if theirFut.failed():
return false
else:
let theirStatus = theirFut.read()
if theirStatus.isOk:
return await peer.handleStatus(nstate, theirStatus.get())
else:
return false
proc getHeadSlot*(peer: Peer): Slot =
## Returns head slot for specific peer ``peer``.
peer.state(BeaconSync).statusMsg.headSlot
proc initBeaconSync*(network: Eth2Node, dag: ChainDAGRef,
getBeaconTime: GetBeaconTimeFn) =
var networkState = network.protocolState(BeaconSync)
networkState.dag = dag
networkState.getBeaconTime = getBeaconTime