nimbus-eth1/nimbus/nimbus.nim

258 lines
8.4 KiB
Nim
Raw Normal View History

2018-04-27 08:53:53 +00:00
# Nimbus
# Copyright (c) 2018 Status Research & Development GmbH
# Licensed under either of
# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE))
# * MIT license ([LICENSE-MIT](LICENSE-MIT))
# at your option.
# This file may not be copied, modified, or distributed except according to
# those terms.
import
../nimbus/vm_compile_info
import
2019-04-17 01:56:28 +00:00
os, strutils, net, options,
eth/keys, db/[storage_types, db_chain, select_backend],
2019-02-05 19:15:50 +00:00
eth/common as eth_common, eth/p2p as eth_p2p,
chronos, json_rpc/rpcserver, chronicles,
eth/p2p/rlpx_protocols/les_protocol,
Sync: Move `blockchain_sync` code and use it with `eth/65` Move `blockchain_sync.nim` from `nim-eth` to `nimbus-eth1`. This lets `blockchain_sync` use the `eth/65` protocol to synchronise with more modern peers than before. Practically, the effect is the sync process runs more quickly and reliably than before. It finds usable peers, and they are up to date. Note, this is mostly old code, and it mostly performs "classic sync", the original Ethereum method. Here's a summary of this code: - It decides on a blockchain canonical head by sampling a few peers. - Starting from block 0 (genesis), it downloads each block header and block, mostly in order. - After it downloads each block, it executes the EVM transactions in that block and updates state trie from that, before going to the next block. - This way the database state is updated by EVM executions in block order, and new state is persisted to the trie database after each block. Even though it mentions Geth "fast sync" (comments near end of file), and has some elements, it isn't really. The most obvious missing part is this code _doesn't download a state trie_, it calculates all state from block 0. Geth "fast sync" has several parts: 1. Find an agreed common chain among several peers to treat as probably secure, and a sufficiently long suffix to provide "statistical economic consensus" when it is validated. 2. Perform a subset of PoW calculations, skipping forward over a segment to verify some of the PoWs according to a pattern in the relevant paper. 3. Download the state trie from the block at the start of that last segment. 4. Execute only the blocks/transactions in that last segment, using the downloaded state trie, to fill out the later states and properly validate the blocks in the last segment. Some other issues with `blockchain_sync` code: - If it ever reaches the head of the chain, it doesn't follow new blocks with increasing block numbers, at least not rapidly. - If the chain undergoes a reorg, this code won't fetch a block number it has already fetched, so it can't accept the reorg. It will end up conflicted with peers. This hasn't mattered because the development focus has been on the bulk of the catching up process, not the real-time head and reorgs. - So it probably doesn't work correctly when it gets close to the head due to many small reorgs, though it might for subtle reasons. - Some of the network message handling isn't sufficiently robust, and it discards some replies that have valid data according to specification. - On rare occasions the initial query mapping block hash to number can fail (because the peer's state changes). - It makes some assumptions about the state of peers based on their responses which may not be valid (I'm not convinced they are). The method for working out "trusted" peers that agree a common chain prefix is clever. It compares peers by asking each peer if it has the header matching another peer's canonical head block by hash. But it's not clear that merely knowing about a block constitutes agreement about the canonical chain. (If it did, query by block number would give the same answer more authoritatively.) Nonetheless, being able to run this sync process on `eth/65` is useful. <# interactive rebase in progress; onto 66532e8a Signed-off-by: Jamie Lokier <jamie@shareable.org>
2021-07-22 13:36:10 +00:00
./p2p/blockchain_sync, eth/net/nat, eth/p2p/peer_pool,
./sync/protocol_eth65,
2021-08-11 19:37:00 +00:00
config, genesis, rpc/[common, p2p, debug], p2p/chain,
eth/trie/db, metrics, metrics/[chronos_httpserver, chronicles_support],
graphql/ethapi,
"."/[utils, conf_utils, sealer, constants]
2018-06-20 17:27:32 +00:00
## TODO:
## * No IPv6 support
## * No multiple bind addresses support
## * No database support
const
nimbusClientId = "nimbus 0.1.0"
2018-06-20 17:27:32 +00:00
type
NimbusState = enum
2020-05-21 01:33:11 +00:00
Starting, Running, Stopping
2018-06-20 17:27:32 +00:00
2020-03-25 18:00:04 +00:00
NimbusNode = ref object
rpcServer*: RpcHttpServer
ethNode*: EthereumNode
2018-06-20 17:27:32 +00:00
state*: NimbusState
graphqlServer*: GraphqlHttpServerRef
2021-08-05 07:51:28 +00:00
wsRpcServer*: RpcWebSocketServer
sealingEngine*: SealingEngineRef
2018-06-20 17:27:32 +00:00
2020-03-25 18:00:04 +00:00
proc start(nimbus: NimbusNode) =
2018-06-20 17:27:32 +00:00
var conf = getConfiguration()
## logging
setLogLevel(conf.debug.logLevel)
if len(conf.debug.logFile) != 0:
defaultChroniclesStream.output.outFile = nil # to avoid closing stdout
discard defaultChroniclesStream.output.open(conf.debug.logFile, fmAppend)
createDir(conf.dataDir)
let trieDB = trieDB newChainDb(conf.dataDir)
var chainDB = newBaseChainDB(trieDB,
conf.prune == PruneMode.Full,
conf.net.networkId
)
chainDB.populateProgress()
if canonicalHeadHashKey().toOpenArray notin trieDB:
initializeEmptyDb(chainDb)
doAssert(canonicalHeadHashKey().toOpenArray in trieDB)
if conf.importFile.len > 0:
# success or not, we quit after importing blocks
if not importRlpBlock(conf.importFile, chainDB):
quit(QuitFailure)
else:
quit(QuitSuccess)
let res = conf.loadKeystoreFiles()
if res.isErr:
echo res.error()
quit(QuitFailure)
# metrics logging
if conf.debug.logMetrics:
# https://github.com/nim-lang/Nim/issues/17369
var logMetrics: proc(udata: pointer) {.gcsafe, raises: [Defect].}
logMetrics = proc(udata: pointer) =
{.gcsafe.}:
let registry = defaultRegistry
info "metrics", registry
2020-07-21 06:25:27 +00:00
discard setTimer(Moment.fromNow(conf.debug.logMetricsInterval.seconds), logMetrics)
discard setTimer(Moment.fromNow(conf.debug.logMetricsInterval.seconds), logMetrics)
2018-06-20 17:27:32 +00:00
## Creating P2P Server
let keypair = conf.net.nodekey.toKeyPair()
2018-06-20 17:27:32 +00:00
var address: Address
address.ip = parseIpAddress("0.0.0.0")
address.tcpPort = Port(conf.net.bindPort)
address.udpPort = Port(conf.net.discPort)
2019-04-17 01:56:28 +00:00
if conf.net.nat == NatNone:
if conf.net.externalIP != "":
# any required port redirection is assumed to be done by hand
address.ip = parseIpAddress(conf.net.externalIP)
else:
# automated NAT traversal
let extIP = getExternalIP(conf.net.nat)
2019-04-17 23:17:06 +00:00
# This external IP only appears in the logs, so don't worry about dynamic
# IPs. Don't remove it either, because the above call does initialisation
# and discovery for NAT-related objects.
2019-04-17 01:56:28 +00:00
if extIP.isSome:
address.ip = extIP.get()
let extPorts = redirectPorts(tcpPort = address.tcpPort,
udpPort = address.udpPort,
description = NIMBUS_NAME & " " & NIMBUS_VERSION)
if extPorts.isSome:
(address.tcpPort, address.udpPort) = extPorts.get()
2018-06-20 17:27:32 +00:00
nimbus.ethNode = newEthereumNode(keypair, address, conf.net.networkId,
nil, nimbusClientId,
addAllCapabilities = false,
minPeers = conf.net.maxPeers)
# Add protocol capabilities based on protocol flags
if ProtocolFlags.Eth in conf.net.protocols:
nimbus.ethNode.addCapability eth
if ProtocolFlags.Les in conf.net.protocols:
nimbus.ethNode.addCapability les
2018-06-20 17:27:32 +00:00
Fearture/poa clique tuning (#765) * Provide API details: API is bundled via clique.nim. * Set extraValidation as default for PoA chains why: This triggers consensus verification and an update of the list of authorised signers. These signers are integral part of the PoA block chain. todo: Option argument to control validation for the nimbus binary. * Fix snapshot state block number why: Using sub-sequence here, so the len() function was wrong. * Optional start where block verification begins why: Can speed up time building loading initial parts of block chain. For PoA, this allows to prove & test that authorised signers can be (correctly) calculated starting at any point on the block chain. todo: On Goerli around blocks #193537..#197568, processing time increases disproportionally -- needs to be understand * For Clique test, get old grouping back (7 transactions per log entry) why: Forgot to change back after troubleshooting * Fix field/function/module-name misunderstanding why: Make compilation work * Use eth_types.blockHash() rather than utils.hash() in Clique modules why: Prefer lib module * Dissolve snapshot_misc.nim details: .. into clique_verify.nim (the other source file clique_unused.nim is inactive) * Hide unused AsyncLock in Clique descriptor details: Unused here but was part of the Go reference implementation * Remove fakeDiff flag from Clique descriptor details: This flag was a kludge in the Go reference implementation used for the canonical tests. The tests have been adapted so there is no need for the fakeDiff flag and its implementation. * Not observing minimum distance from epoch sync point why: For compiling PoA state, the go implementation will walk back to the epoch header with at least 90000 blocks apart from the current header in the absence of other synchronisation points. Here just the nearest epoch header is used. The assumption is that all the checkpoints before have been vetted already regardless of the current branch. details: The behaviour of using the nearest vs the minimum distance epoch is controlled by a flag and can be changed at run time. * Analysing processing time (patch adds some debugging/visualisation support) why: At the first half million blocks of the Goerli replay, blocks on the interval #194854..#196224 take exceptionally long to process, but not due to PoA processing. details: It turns out that much time is spent in p2p/excecutor.processBlock() where the elapsed transaction execution time is significantly greater for many of these blocks. Between the 1371 blocks #194854..#196224 there are 223 blocks with more than 1/2 seconds execution time whereas there are only 4 such blocks before and 13 such after this range up to #504192. * fix debugging symbol in clique_desc (causes CI failing) * Fixing canonical reference tests why: Two errors were introduced earlier but ovelooked: 1. "Remove fakeDiff flag .." patch was incomplete 2. "Not observing minimum distance .." introduced problem w/tests 23/24 details: Fixing 2. needed to revert the behaviour by setting the applySnapsMinBacklog flag for the Clique descriptor. Also a new test was added to lock the new behaviour. * Remove cruft why: Clique/PoA processing was intended to take place somewhere in executor/process_block.processBlock() but was decided later to run from chain/persist_block.persistBlock() instead. * Update API comment * ditto
2021-07-30 14:06:51 +00:00
# chainRef: some name to avoid module-name/filed/function misunderstandings
let chainRef = newChain(chainDB)
nimbus.ethNode.chain = chainRef
if conf.verifyFromOk:
chainRef.extraValidation = 0 < conf.verifyFrom
chainRef.verifyFrom = conf.verifyFrom
## Creating RPC Server
if RpcFlags.Enabled in conf.rpc.flags:
nimbus.rpcServer = newRpcHttpServer(conf.rpc.binds)
setupCommonRpc(nimbus.ethNode, nimbus.rpcServer)
# Enable RPC APIs based on RPC flags and protocol flags
if RpcFlags.Eth in conf.rpc.flags and ProtocolFlags.Eth in conf.net.protocols:
2018-08-29 08:49:01 +00:00
setupEthRpc(nimbus.ethNode, chainDB, nimbus.rpcServer)
if RpcFlags.Debug in conf.rpc.flags:
setupDebugRpc(chainDB, nimbus.rpcServer)
2021-08-05 07:51:28 +00:00
# Creating Websocket RPC Server
if RpcFlags.Enabled in conf.ws.flags:
doAssert(conf.ws.binds.len > 0)
nimbus.wsRpcServer = newRpcWebSocketServer(conf.ws.binds[0])
setupCommonRpc(nimbus.ethNode, nimbus.wsRpcServer)
# Enable Websocket RPC APIs based on RPC flags and protocol flags
if RpcFlags.Eth in conf.ws.flags and ProtocolFlags.Eth in conf.net.protocols:
setupEthRpc(nimbus.ethNode, chainDB, nimbus.wsRpcServer)
if RpcFlags.Debug in conf.ws.flags:
setupDebugRpc(chainDB, nimbus.wsRpcServer)
2018-06-20 17:27:32 +00:00
## Starting servers
if RpcFlags.Enabled in conf.rpc.flags:
nimbus.rpcServer.rpc("admin_quit") do() -> string:
{.gcsafe.}:
nimbus.state = Stopping
2018-06-20 17:27:32 +00:00
result = "EXITING"
nimbus.rpcServer.start()
2020-03-25 18:00:04 +00:00
if conf.graphql.enabled:
nimbus.graphqlServer = setupGraphqlHttpServer(conf, chainDB, nimbus.ethNode)
nimbus.graphqlServer.start()
if conf.engineSigner != ZERO_ADDRESS:
let rs = validateSealer(chainRef)
if rs.isErr:
echo rs.error
quit(QuitFailure)
nimbus.sealingEngine = SealingEngineRef.new(chainRef)
nimbus.sealingEngine.start()
# metrics server
if conf.net.metricsServer:
let metricsAddress = "127.0.0.1"
info "Starting metrics HTTP server", address = metricsAddress, port = conf.net.metricsServerPort
startMetricsHttpServer(metricsAddress, Port(conf.net.metricsServerPort))
# Connect directly to the static nodes
for enode in conf.net.staticNodes:
asyncCheck nimbus.ethNode.peerPool.connectToNode(newNode(enode))
# Connect via discovery
if conf.net.customBootNodes.len > 0:
# override the default bootnodes from public network
waitFor nimbus.ethNode.connectToNetwork(conf.net.customBootNodes,
enableDiscovery = NoDiscover notin conf.net.flags)
else:
waitFor nimbus.ethNode.connectToNetwork(conf.net.bootNodes,
enableDiscovery = NoDiscover notin conf.net.flags)
if ProtocolFlags.Eth in conf.net.protocols:
# TODO: temp code until the CLI/RPC interface is fleshed out
let status = waitFor nimbus.ethNode.fastBlockchainSync()
if status != syncSuccess:
debug "Block sync failed: ", status
if nimbus.state == Starting:
# it might have been set to "Stopping" with Ctrl+C
nimbus.state = Running
2018-06-20 17:27:32 +00:00
2020-03-25 18:00:04 +00:00
proc stop*(nimbus: NimbusNode) {.async, gcsafe.} =
trace "Graceful shutdown"
var conf = getConfiguration()
if RpcFlags.Enabled in conf.rpc.flags:
nimbus.rpcServer.stop()
if conf.graphql.enabled:
await nimbus.graphqlServer.stop()
if conf.engineSigner != ZERO_ADDRESS:
await nimbus.sealingEngine.stop()
2018-06-20 17:27:32 +00:00
2020-03-25 18:00:04 +00:00
proc process*(nimbus: NimbusNode) =
2020-05-21 01:33:11 +00:00
# Main event loop
while nimbus.state == Running:
try:
poll()
except CatchableError as e:
debug "Exception in poll()", exc = e.name, err = e.msg
discard e # silence warning when chronicles not activated
2018-06-20 17:27:32 +00:00
# Stop loop
2020-03-25 18:00:04 +00:00
waitFor nimbus.stop()
2018-04-27 08:53:53 +00:00
when isMainModule:
2020-05-21 01:33:11 +00:00
var nimbus = NimbusNode(state: Starting)
2020-03-25 18:00:04 +00:00
## Ctrl+C handling
proc controlCHandler() {.noconv.} =
when defined(windows):
# workaround for https://github.com/nim-lang/Nim/issues/4057
setupForeignThreadGc()
nimbus.state = Stopping
echo "\nCtrl+C pressed. Waiting for a graceful shutdown."
setControlCHook(controlCHandler)
2018-04-27 08:53:53 +00:00
var message: string
2018-06-20 17:27:32 +00:00
## Print Nimbus header
2018-06-20 17:27:32 +00:00
echo NimbusHeader
## Show logs on stdout until we get the user's logging choice
2019-04-17 01:56:28 +00:00
discard defaultChroniclesStream.output.open(stdout)
2018-06-20 17:27:32 +00:00
## Processing command line arguments
if processArguments(message) != ConfigStatus.Success:
2018-04-27 08:53:53 +00:00
echo message
quit(QuitFailure)
else:
if len(message) > 0:
echo message
2018-06-20 17:27:32 +00:00
quit(QuitSuccess)
2018-04-27 08:53:53 +00:00
2020-03-25 18:00:04 +00:00
nimbus.start()
nimbus.process()