nimbus-eth1/nimbus/nimbus_execution_client.nim

309 lines
9.5 KiB
Nim
Raw Normal View History

2018-04-27 11:53:53 +03:00
# Nimbus
# Copyright (c) 2018-2024 Status Research & Development GmbH
2018-04-27 11:53:53 +03:00
# Licensed under either of
# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE))
# * MIT license ([LICENSE-MIT](LICENSE-MIT))
# at your option.
# This file may not be copied, modified, or distributed except according to
# those terms.
import
../nimbus/compile_info
import
std/[os, osproc, strutils, net],
chronicles,
eth/net/nat,
metrics,
metrics/chronicles_support,
kzg4844/kzg,
stew/byteutils,
./rpc,
./version,
./constants,
./nimbus_desc,
./nimbus_import,
./core/block_import,
./core/eip4844,
./db/core_db/persistent,
./db/storage_types,
./sync/handlers,
./common/chain_config_hash
Speed up initial MPT root computation after import (#2788) When `nimbus import` runs, we end up with a database without MPT roots leading to long startup times the first time one is needed. Computing the state root is slow because the on-disk order based on VertexID sorting does not match the trie traversal order and therefore makes lookups inefficent. Here we introduce a helper that speeds up this computation by traversing the trie in on-disk order and computing the trie hashes bottom up instead - even though this leads to some redundant reads of nodes that we cannot yet compute, it's still a net win as leaves and "bottom" branches make up the majority of the database. This PR also addresses a few other sources of inefficiency largely due to the separation of AriKey and AriVtx into their own column families. Each column family is its own LSM tree that produces hundreds of SST filtes - with a limit of 512 open files, rocksdb must keep closing and opening files which leads to expensive metadata reads during random access. When rocksdb makes a lookup, it has to read several layers of files for each lookup. Ribbon filters to skip over files that don't have the requested data but when these filters are not in memory, reading them is slow - this happens in two cases: when opening a file and when the filter has been evicted from the LRU cache. Addressing the open file limit solves one source of inefficiency, but we must also increase the block cache size to deal with this problem. * rocksdb.max_open_files increased to 2048 * per-file size limits increased so that fewer files are created * WAL size increased to avoid partial flushes which lead to small files * rocksdb block cache increased All these increases of course lead to increased memory usage, but at least performance is acceptable - in the future, we'll need to explore options such as joining AriVtx and AriKey and/or reducing the row count (by grouping branch layers under a single vertexid). With this PR, the mainnet state root can be computed in ~8 hours (down from 2-3 days) - not great, but still better. Further, we write all keys to the database, also those that are less than 32 bytes - because the mpt path is part of the input, it is very rare that we actually hit a key like this (about 200k such entries on mainnet), so the code complexity is not worth the benefit really, in the current database layout / design.
2024-10-27 12:08:37 +01:00
from beacon_chain/nimbus_binary_common import setupFileLimits
when defined(evmc_enabled):
import transaction/evmc_dynamic_loader
2018-06-20 20:27:32 +03:00
## TODO:
## * No IPv6 support
## * No multiple bind addresses support
## * No database support
proc basicServices(nimbus: NimbusNode,
conf: NimbusConf,
2022-12-02 11:39:12 +07:00
com: CommonRef) =
nimbus.chainRef = ForkedChainRef.init(com)
# txPool must be informed of active head
# so it can know the latest account state
# e.g. sender nonce, etc
nimbus.txPool = TxPoolRef.new(nimbus.chainRef)
nimbus.beaconEngine = BeaconEngineRef.new(nimbus.txPool)
proc manageAccounts(nimbus: NimbusNode, conf: NimbusConf) =
if string(conf.keyStore).len > 0:
let res = nimbus.ctx.am.loadKeystores(string conf.keyStore)
if res.isErr:
fatal "Load keystore error", msg = res.error()
quit(QuitFailure)
if string(conf.importKey).len > 0:
let res = nimbus.ctx.am.importPrivateKey(string conf.importKey)
if res.isErr:
fatal "Import private key error", msg = res.error()
quit(QuitFailure)
proc setupP2P(nimbus: NimbusNode, conf: NimbusConf,
2024-10-28 22:17:07 +00:00
com: CommonRef) =
2018-06-20 20:27:32 +03:00
## Creating P2P Server
let kpres = nimbus.ctx.getNetKeys(conf.netKey, conf.dataDir.string)
if kpres.isErr:
fatal "Get network keys error", msg = kpres.error
quit(QuitFailure)
let keypair = kpres.get()
var address = enode.Address(
ip: conf.listenAddress,
tcpPort: conf.tcpPort,
udpPort: conf.udpPort
)
if conf.nat.hasExtIp:
# any required port redirection is assumed to be done by hand
address.ip = conf.nat.extIp
2019-04-17 03:56:28 +02:00
else:
# automated NAT traversal
let extIP = getExternalIP(conf.nat.nat)
2019-04-18 01:17:06 +02:00
# This external IP only appears in the logs, so don't worry about dynamic
# IPs. Don't remove it either, because the above call does initialisation
# and discovery for NAT-related objects.
2019-04-17 03:56:28 +02:00
if extIP.isSome:
address.ip = extIP.get()
let extPorts = redirectPorts(tcpPort = address.tcpPort,
udpPort = address.udpPort,
description = NimbusName & " " & NimbusVersion)
2019-04-17 03:56:28 +02:00
if extPorts.isSome:
(address.tcpPort, address.udpPort) = extPorts.get()
2018-06-20 20:27:32 +03:00
let bootstrapNodes = conf.getBootNodes()
nimbus.ethNode = newEthereumNode(
keypair, address, conf.networkId, conf.agentString,
addAllCapabilities = false, minPeers = conf.maxPeers,
bootstrapNodes = bootstrapNodes,
bindUdpPort = conf.udpPort, bindTcpPort = conf.tcpPort,
bindIp = conf.listenAddress,
rng = nimbus.ctx.rng)
2024-10-28 22:17:07 +00:00
# Add protocol capabilities
nimbus.ethNode.addEthHandlerCapability(
nimbus.ethNode.peerPool, nimbus.chainRef, nimbus.txPool)
2018-06-20 20:27:32 +03:00
# Always initialise beacon syncer
nimbus.beaconSyncRef = BeaconSyncRef.init(
nimbus.ethNode, nimbus.chainRef, conf.maxPeers, conf.beaconChunkSize)
# Connect directly to the static nodes
let staticPeers = conf.getStaticPeers()
if staticPeers.len > 0:
nimbus.peerManager = PeerManagerRef.new(
nimbus.ethNode.peerPool,
conf.reconnectInterval,
conf.reconnectMaxRetry,
staticPeers
)
nimbus.peerManager.start()
# Start Eth node
if conf.maxPeers > 0:
nimbus.networkLoop = nimbus.ethNode.connectToNetwork(
enableDiscovery = conf.discovery != DiscoveryType.None,
waitForPeers = true)
proc setupMetrics(nimbus: NimbusNode, conf: NimbusConf) =
# metrics logging
if conf.logMetricsEnabled:
# https://github.com/nim-lang/Nim/issues/17369
var logMetrics: proc(udata: pointer) {.gcsafe, raises: [].}
logMetrics = proc(udata: pointer) =
{.gcsafe.}:
let registry = defaultRegistry
info "metrics", registry
discard setTimer(Moment.fromNow(conf.logMetricsInterval.seconds), logMetrics)
discard setTimer(Moment.fromNow(conf.logMetricsInterval.seconds), logMetrics)
# metrics server
if conf.metricsEnabled:
info "Starting metrics HTTP server", address = conf.metricsAddress, port = conf.metricsPort
let res = MetricsHttpServerRef.new($conf.metricsAddress, conf.metricsPort)
if res.isErr:
fatal "Failed to create metrics server", msg=res.error
quit(QuitFailure)
nimbus.metricsServer = res.get
waitFor nimbus.metricsServer.start()
proc preventLoadingDataDirForTheWrongNetwork(db: CoreDbRef; conf: NimbusConf) =
let
kvt = db.ctx.getKvt()
calculatedId = calcHash(conf.networkId, conf.networkParams)
dataDirIdBytes = kvt.get(dataDirIdKey().toOpenArray).valueOr:
# an empty database
info "Writing data dir ID", ID=calculatedId
kvt.put(dataDirIdKey().toOpenArray, calculatedId.data).isOkOr:
fatal "Cannot write data dir ID", ID=calculatedId
quit(QuitFailure)
return
if calculatedId.data != dataDirIdBytes:
fatal "Data dir already initialized with other network configuration",
get=dataDirIdBytes.toHex,
expected=calculatedId
quit(QuitFailure)
proc run(nimbus: NimbusNode, conf: NimbusConf) =
## logging
setLogLevel(conf.logLevel)
if conf.logFile.isSome:
let logFile = string conf.logFile.get()
defaultChroniclesStream.output.outFile = nil # to avoid closing stdout
discard defaultChroniclesStream.output.open(logFile, fmAppend)
Speed up initial MPT root computation after import (#2788) When `nimbus import` runs, we end up with a database without MPT roots leading to long startup times the first time one is needed. Computing the state root is slow because the on-disk order based on VertexID sorting does not match the trie traversal order and therefore makes lookups inefficent. Here we introduce a helper that speeds up this computation by traversing the trie in on-disk order and computing the trie hashes bottom up instead - even though this leads to some redundant reads of nodes that we cannot yet compute, it's still a net win as leaves and "bottom" branches make up the majority of the database. This PR also addresses a few other sources of inefficiency largely due to the separation of AriKey and AriVtx into their own column families. Each column family is its own LSM tree that produces hundreds of SST filtes - with a limit of 512 open files, rocksdb must keep closing and opening files which leads to expensive metadata reads during random access. When rocksdb makes a lookup, it has to read several layers of files for each lookup. Ribbon filters to skip over files that don't have the requested data but when these filters are not in memory, reading them is slow - this happens in two cases: when opening a file and when the filter has been evicted from the LRU cache. Addressing the open file limit solves one source of inefficiency, but we must also increase the block cache size to deal with this problem. * rocksdb.max_open_files increased to 2048 * per-file size limits increased so that fewer files are created * WAL size increased to avoid partial flushes which lead to small files * rocksdb block cache increased All these increases of course lead to increased memory usage, but at least performance is acceptable - in the future, we'll need to explore options such as joining AriVtx and AriKey and/or reducing the row count (by grouping branch layers under a single vertexid). With this PR, the mainnet state root can be computed in ~8 hours (down from 2-3 days) - not great, but still better. Further, we write all keys to the database, also those that are less than 32 bytes - because the mpt path is part of the input, it is very rare that we actually hit a key like this (about 200k such entries on mainnet), so the code complexity is not worth the benefit really, in the current database layout / design.
2024-10-27 12:08:37 +01:00
setupFileLimits()
info "Launching execution client",
version = FullVersionStr,
conf
when defined(evmc_enabled):
evmcSetLibraryPath(conf.evm)
EVMC: Option `--evm`, load third-party EVM as a shared library This patch adds: - Load and use a third-party EVM in a shared library, instead of Nimbus EVM. - New option `--evm` to specify which library to load. - The library and this loader conforms to the [EVMC] (https://evmc.ethereum.org/) 9.x specification. Any third-party EVM which is compatible with EVMC version 9.x and supports EVM1 contract code will be accepted. The operating system's shared library format applies. These are `.so*` files on Linux, `.dll` files on Windows and `.dylib` files on Mac. The alternative EVM can be selected in two ways: - Nimbus command line option `--evm:<path>`. - Environment variable `NIMBUS_EVM=<path>`. The reason for an environment variable is this allows all the test programs to run with a third-party EVM as well. Some don't parse command line options. There are some limitations to be aware of: - The third-party EVM must use EVMC version 9.x, no other major version. EVMC 9.x supports EIP-1559 / London fork and older transactions. - Nested `*CALL` and `CREATE*` operations don't use the third-party EVM yet. These call the built-in Nimbus EVM. This mixing of different EVMs between levels is explicitly allowed in specs, so there is no problem doing it. - The third-party EVM doesn't need to support precompiles, because those are nested calls, which use the built-in Nimbus EVM. - Third-party EVMs execute contracts correctly, but fail the final `rootHash` match. The reason is that some account state changes, which are correct, are currently inside the Nimbus EVM and need to be moved to EVMC host logic. *This is a known work in progress*. The EVM execution itself is fine. Test results using "evmone" third-party EVM: - [evmone](https://github.com/ethereum/evmone) has been tested. Only on Linux but it "should" work on Windows and Mac equally well. - [Version 0.8.1](https://github.com/ethereum/evmone/releases/tag/v0.8.1) was used because it is compatible with EVMC 9.x, which is required for the EIP-1559 / London fork, which Nimbus supports. Version 0.8.0 could be used but it looks like an important bug was fixed in 0.8.1. - evmone runs fine and the trace output looks good. The calls and arguments are the same as the built-in Nimbus EVM for tests that have been checked manually, except evmone skips some calls that can be safely skipped. - The final `rootHash` is incorrect, due to the *work in progress* mentioned above which is not part of the evmone execution. Due to this, it's possible to try evmone and verify expected behaviours, which also validates our own EVMC implementation, but it can't be used as a full substitute yet. Signed-off-by: Jamie Lokier <jamie@shareable.org>
2021-12-05 12:20:27 +01:00
# Trusted setup is needed for processing Cancun+ blocks
if conf.trustedSetupFile.isSome:
let fileName = conf.trustedSetupFile.get()
let res = loadTrustedSetup(fileName, 0)
if res.isErr:
fatal "Cannot load Kzg trusted setup from file", msg=res.error
quit(QuitFailure)
else:
let res = loadKzgTrustedSetup()
if res.isErr:
fatal "Cannot load baked in Kzg trusted setup", msg=res.error
quit(QuitFailure)
createDir(string conf.dataDir)
let coreDB =
# Resolve statically for database type
case conf.chainDbMode:
of Aristo,AriPrune:
AristoDbRocks.newCoreDbRef(
string conf.dataDir,
conf.dbOptions(noKeyCache = conf.cmd == NimbusCmd.`import`))
preventLoadingDataDirForTheWrongNetwork(coreDB, conf)
setupMetrics(nimbus, conf)
let taskpool =
try:
if conf.numThreads < 0:
fatal "The number of threads --num-threads cannot be negative."
quit QuitFailure
elif conf.numThreads == 0:
Taskpool.new(numThreads = min(countProcessors(), 16))
else:
Taskpool.new(numThreads = conf.numThreads)
except CatchableError as e:
fatal "Cannot start taskpool", err = e.msg
quit QuitFailure
info "Threadpool started", numThreads = taskpool.numThreads
Unified database frontend integration (#1670) * Nimbus folder environment update details: * Integrated `CoreDbRef` for the sources in the `nimbus` sub-folder. * The `nimbus` program does not compile yet as it needs the updates in the parallel `stateless` sub-folder. * Stateless environment update details: * Integrated `CoreDbRef` for the sources in the `stateless` sub-folder. * The `nimbus` program compiles now. * Premix environment update details: * Integrated `CoreDbRef` for the sources in the `premix` sub-folder. * Fluffy environment update details: * Integrated `CoreDbRef` for the sources in the `fluffy` sub-folder. * Tools environment update details: * Integrated `CoreDbRef` for the sources in the `tools` sub-folder. * Nodocker environment update details: * Integrated `CoreDbRef` for the sources in the `hive_integration/nodocker` sub-folder. * Tests environment update details: * Integrated `CoreDbRef` for the sources in the `tests` sub-folder. * The unit tests compile and run cleanly now. * Generalise `CoreDbRef` to any `select_backend` supported database why: Generalisation was just missed due to overcoming some compiler oddity which was tied to rocksdb for testing. * Suppress compiler warning for `newChainDB()` why: Warning was added to this function which must be wrapped so that any `CatchableError` is re-raised as `Defect`. * Split off persistent `CoreDbRef` constructor into separate file why: This allows to compile a memory only database version without linking the backend library. * Use memory `CoreDbRef` database by default detail: Persistent DB constructor needs to import `db/core_db/persistent why: Most tests use memory DB anyway. This avoids linking `-lrocksdb` or any other backend by default. * fix `toLegacyBackend()` availability check why: got garbled after memory/persistent split. * Clarify raw access to MPT for snap sync handler why: Logically, `kvt` is not the raw access for the hexary trie (although this holds for the legacy database)
2023-08-04 12:10:09 +01:00
let com = CommonRef.new(
db = coreDB,
taskpool = taskpool,
pruneHistory = (conf.chainDbMode == AriPrune),
networkId = conf.networkId,
params = conf.networkParams)
if conf.extraData.len > 32:
warn "ExtraData exceeds 32 bytes limit, truncate",
extraData=conf.extraData,
len=conf.extraData.len
if conf.gasLimit > GAS_LIMIT_MAXIMUM or
conf.gasLimit < GAS_LIMIT_MINIMUM:
warn "GasLimit not in expected range, truncate",
min=GAS_LIMIT_MINIMUM,
max=GAS_LIMIT_MAXIMUM,
get=conf.gasLimit
com.extraData = conf.extraData
com.gasLimit = conf.gasLimit
defer:
com.db.finish()
case conf.cmd
of NimbusCmd.`import`:
2022-12-02 11:39:12 +07:00
importBlocks(conf, com)
of NimbusCmd.`import-rlp`:
importRlpBlocks(conf, com)
else:
2022-12-02 11:39:12 +07:00
basicServices(nimbus, conf, com)
manageAccounts(nimbus, conf)
2024-10-28 22:17:07 +00:00
setupP2P(nimbus, conf, com)
setupRpc(nimbus, conf, com)
Beacon sync activation control update (#2782) * Clarifying/commenting FCU setup condition & small fixes, comments etc. * Update some logging * Reorg metrics updater and activation * Better `async` responsiveness why: Block import does not allow `async` task activation while executing. So allow potential switch after each imported block (rather than a group of 32 blocks.) * Handle resuming after previous sync followed by import why: In this case the ledger state is more recent than the saved sync state. So this is considered a pristine sync where any previous sync state is forgotten. This fixes some assert thrown because of inconsistent internal state at some point. * Provide option for clearing saved beacon sync state before starting syncer why: It would resume with the last state otherwise which might be undesired sometimes. Without RPC available, the syncer typically stops and terminates with the canonical head larger than the base/finalised head. The latter one will be saved as database/ledger state and the canonical head as syncer target. Resuming syncing here will repeat itself. So clearing the syncer state can prevent from starting the syncer unnecessarily avoiding useless actions. * Allow workers to request syncer shutdown from within why: In one-trick-pony mode (after resuming without RPC support) the syncer can be stopped from within soavoiding unnecessary polling. In that case, the syncer can (theoretically) be restarted externally with `startSync()`. * Terminate beacon sync after a single run target is reached why: Stops doing useless polling (typically when there is no RPC available) * Remove crufty comments * Tighten state reload condition when resuming why: Some pathological case might apply if the syncer is stopped while the distance between finalised block and head is very large and the FCU base becomes larger than the locked finalised state. * Verify that finalised number from CL is at least FCU base number why: The FCU base number is determined by the database, non zero if manually imported. The finalised number is passed via RPC by the CL node and will increase over time. Unless fully synced, this number will be pretty low. On the other hand, the FCU call `forkChoice()` will eventually fail if the `finalizedHash` argument refers to something outside the internal chain starting at the FCU base block. * Remove support for completing interrupted sync without RPC support why: Simplifies start/stop logic * Rmove unused import
2024-10-28 16:22:04 +00:00
if conf.maxPeers > 0 and conf.engineApiServerEnabled():
# Not starting syncer if there is definitely no way to run it. This
# avoids polling (i.e. waiting for instructions) and some logging.
Beacon sync activation control update (#2782) * Clarifying/commenting FCU setup condition & small fixes, comments etc. * Update some logging * Reorg metrics updater and activation * Better `async` responsiveness why: Block import does not allow `async` task activation while executing. So allow potential switch after each imported block (rather than a group of 32 blocks.) * Handle resuming after previous sync followed by import why: In this case the ledger state is more recent than the saved sync state. So this is considered a pristine sync where any previous sync state is forgotten. This fixes some assert thrown because of inconsistent internal state at some point. * Provide option for clearing saved beacon sync state before starting syncer why: It would resume with the last state otherwise which might be undesired sometimes. Without RPC available, the syncer typically stops and terminates with the canonical head larger than the base/finalised head. The latter one will be saved as database/ledger state and the canonical head as syncer target. Resuming syncing here will repeat itself. So clearing the syncer state can prevent from starting the syncer unnecessarily avoiding useless actions. * Allow workers to request syncer shutdown from within why: In one-trick-pony mode (after resuming without RPC support) the syncer can be stopped from within soavoiding unnecessary polling. In that case, the syncer can (theoretically) be restarted externally with `startSync()`. * Terminate beacon sync after a single run target is reached why: Stops doing useless polling (typically when there is no RPC available) * Remove crufty comments * Tighten state reload condition when resuming why: Some pathological case might apply if the syncer is stopped while the distance between finalised block and head is very large and the FCU base becomes larger than the locked finalised state. * Verify that finalised number from CL is at least FCU base number why: The FCU base number is determined by the database, non zero if manually imported. The finalised number is passed via RPC by the CL node and will increase over time. Unless fully synced, this number will be pretty low. On the other hand, the FCU call `forkChoice()` will eventually fail if the `finalizedHash` argument refers to something outside the internal chain starting at the FCU base block. * Remove support for completing interrupted sync without RPC support why: Simplifies start/stop logic * Rmove unused import
2024-10-28 16:22:04 +00:00
if not nimbus.beaconSyncRef.start():
nimbus.beaconSyncRef = BeaconSyncRef(nil)
if nimbus.state == NimbusState.Starting:
# it might have been set to "Stopping" with Ctrl+C
nimbus.state = NimbusState.Running
2018-06-20 20:27:32 +03:00
# Main event loop
while nimbus.state == NimbusState.Running:
try:
poll()
except CatchableError as e:
debug "Exception in poll()", exc = e.name, err = e.msg
discard e # silence warning when chronicles not activated
2018-06-20 20:27:32 +03:00
# Stop loop
waitFor nimbus.stop(conf)
2018-04-27 11:53:53 +03:00
when isMainModule:
var nimbus = NimbusNode(state: NimbusState.Starting, ctx: newEthContext())
2020-03-25 20:00:04 +02:00
## Ctrl+C handling
proc controlCHandler() {.noconv.} =
when defined(windows):
# workaround for https://github.com/nim-lang/Nim/issues/4057
setupForeignThreadGc()
nimbus.state = NimbusState.Stopping
2020-03-25 20:00:04 +02:00
echo "\nCtrl+C pressed. Waiting for a graceful shutdown."
setControlCHook(controlCHandler)
## Show logs on stdout until we get the user's logging choice
2019-04-17 03:56:28 +02:00
discard defaultChroniclesStream.output.open(stdout)
2018-06-20 20:27:32 +03:00
## Processing command line arguments
let conf = makeConfig()
nimbus.run(conf)