nimbus-eth1/nimbus/sync/flare.nim

128 lines
3.8 KiB
Nim
Raw Normal View History

# Nimbus
# Copyright (c) 2023-2024 Status Research & Development GmbH
# Licensed under either of
# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or
# http://www.apache.org/licenses/LICENSE-2.0)
# * MIT license ([LICENSE-MIT](LICENSE-MIT) or
# http://opensource.org/licenses/MIT)
# at your option. This file may not be copied, modified, or distributed
# except according to those terms.
{.push raises: [].}
import
pkg/[chronicles, chronos, eth/p2p, results],
pkg/stew/[interval_set, sorted_set],
./flare/[worker, worker_desc],
"."/[sync_desc, sync_sched, protocol]
logScope:
Flare sync (#2627) * Cosmetics, small fixes, add stashed headers verifier * Remove direct `Era1` support why: Era1 is indirectly supported by using the import tool before syncing. * Clarify database persistent save function. why: Function relied on the last saved state block number which was wrong. It now relies on the tx-level. If it is 0, then data are saved directly. Otherwise the task that owns the tx will do it. * Extracted configuration constants into separate file * Enable single peer mode for debugging * Fix peer losing issue in multi-mode details: Running concurrent download peers was previously programmed as running a batch downloading and storing ~8k headers and then leaving the `async` function to be restarted by a scheduler. This was unfortunate because of occasionally occurring long waiting times for restart. While the time gap until restarting were typically observed a few millisecs, there were always a few outliers which well exceed several seconds. This seemed to let remote peers run into timeouts. * Prefix function names `unprocXxx()` and `stagedYyy()` by `headers` why: There will be other `unproc` and `staged` modules. * Remove cruft, update logging * Fix accounting issue details: When staging after fetching headers from the network, there was an off by 1 error occurring when the result was by one smaller than requested. Also, a whole range was mis-accounted when a peer was terminating connection immediately after responding. * Fix slow/error header accounting when fetching why: Originally set for detecting slow headers in a row, the counter was wrongly extended to general errors. * Ban peers for a while that respond with too few headers continuously why: Some peers only returned one header at a time. If these peers sit on a farm, they might collectively slow down the download process. * Update RPC beacon header updater why: Old function hook has slightly changed its meaning since it was used for snap sync. Also, the old hook is used by other functions already. * Limit number of peers or set to single peer mode details: Merge several concepts, single peer mode being one of it. * Some code clean up, fixings for removing of compiler warnings * De-noise header fetch related sources why: Header download looks relatively stable, so general debugging is not needed, anymore. This is the equivalent of removing the scaffold from the part of the building where work has completed. * More clean up and code prettification for headers stuff * Implement body fetch and block import details: Available headers are used stage blocks by combining existing headers with newly fetched blocks. Then these blocks are imported/executed via `persistBlocks()`. * Logger cosmetics and cleanup * Remove staged block queue debugging details: Feature still available, just not executed anymore * Docu, logging update * Update/simplify `runDaemon()` * Re-calibrate block body requests and soft config for import blocks batch why: * For fetching, larger fetch requests are mostly truncated anyway on MainNet. * For executing, smaller batch sizes reduce the memory needed for the price of longer execution times. * Update metrics counters * Docu update * Some fixes, formatting updates, etc. * Update `borrowed` type: uint -. uint64 also: Always convert to `uint64` rather than `uint` where appropriate
2024-09-27 15:07:42 +00:00
topics = "flare"
type
FlareSyncRef* = RunnerSyncRef[FlareCtxData,FlareBuddyData]
const
extraTraceMessages = false # or true
## Enable additional logging noise
# ------------------------------------------------------------------------------
# Private logging helpers
# ------------------------------------------------------------------------------
template traceMsg(f, info: static[string]; args: varargs[untyped]) =
trace "Flare scheduler " & f & "() " & info, args
template traceMsgCtx(f, info: static[string]; c: FlareCtxRef) =
when extraTraceMessages:
block:
let
poolMode {.inject.} = c.poolMode
daemon {.inject.} = c.daemon
f.traceMsg info, poolMode, daemon
template traceMsgBuddy(f, info: static[string]; b: FlareBuddyRef) =
when extraTraceMessages:
block:
let
peer {.inject.} = b.peer
runState {.inject.} = b.ctrl.state
multiOk {.inject.} = b.ctrl.multiOk
poolMode {.inject.} = b.ctx.poolMode
daemon {.inject.} = b.ctx.daemon
f.traceMsg info, peer, runState, multiOk, poolMode, daemon
template tracerFrameCtx(f: static[string]; c: FlareCtxRef; code: untyped) =
f.traceMsgCtx "begin", c
code
f.traceMsgCtx "end", c
template tracerFrameBuddy(f: static[string]; b: FlareBuddyRef; code: untyped) =
f.traceMsgBuddy "begin", b
code
f.traceMsgBuddy "end", b
# ------------------------------------------------------------------------------
# Virtual methods/interface, `mixin` functions
# ------------------------------------------------------------------------------
proc runSetup(ctx: FlareCtxRef): bool =
tracerFrameCtx("runSetup", ctx):
result = worker.setup(ctx)
proc runRelease(ctx: FlareCtxRef) =
tracerFrameCtx("runRelease", ctx):
worker.release(ctx)
proc runDaemon(ctx: FlareCtxRef) {.async.} =
tracerFrameCtx("runDaemon", ctx):
await worker.runDaemon(ctx)
proc runStart(buddy: FlareBuddyRef): bool =
tracerFrameBuddy("runStart", buddy):
result = worker.start(buddy)
proc runStop(buddy: FlareBuddyRef) =
tracerFrameBuddy("runStop", buddy):
worker.stop(buddy)
proc runPool(buddy: FlareBuddyRef; last: bool; laps: int): bool =
tracerFrameBuddy("runPool", buddy):
result = worker.runPool(buddy, last, laps)
proc runSingle(buddy: FlareBuddyRef) {.async.} =
tracerFrameBuddy("runSingle", buddy):
await worker.runSingle(buddy)
proc runMulti(buddy: FlareBuddyRef) {.async.} =
tracerFrameBuddy("runMulti", buddy):
await worker.runMulti(buddy)
# ------------------------------------------------------------------------------
# Public functions
# ------------------------------------------------------------------------------
proc init*(
T: type FlareSyncRef;
ethNode: EthereumNode;
chain: ForkedChainRef;
maxPeers: int;
Flare sync (#2627) * Cosmetics, small fixes, add stashed headers verifier * Remove direct `Era1` support why: Era1 is indirectly supported by using the import tool before syncing. * Clarify database persistent save function. why: Function relied on the last saved state block number which was wrong. It now relies on the tx-level. If it is 0, then data are saved directly. Otherwise the task that owns the tx will do it. * Extracted configuration constants into separate file * Enable single peer mode for debugging * Fix peer losing issue in multi-mode details: Running concurrent download peers was previously programmed as running a batch downloading and storing ~8k headers and then leaving the `async` function to be restarted by a scheduler. This was unfortunate because of occasionally occurring long waiting times for restart. While the time gap until restarting were typically observed a few millisecs, there were always a few outliers which well exceed several seconds. This seemed to let remote peers run into timeouts. * Prefix function names `unprocXxx()` and `stagedYyy()` by `headers` why: There will be other `unproc` and `staged` modules. * Remove cruft, update logging * Fix accounting issue details: When staging after fetching headers from the network, there was an off by 1 error occurring when the result was by one smaller than requested. Also, a whole range was mis-accounted when a peer was terminating connection immediately after responding. * Fix slow/error header accounting when fetching why: Originally set for detecting slow headers in a row, the counter was wrongly extended to general errors. * Ban peers for a while that respond with too few headers continuously why: Some peers only returned one header at a time. If these peers sit on a farm, they might collectively slow down the download process. * Update RPC beacon header updater why: Old function hook has slightly changed its meaning since it was used for snap sync. Also, the old hook is used by other functions already. * Limit number of peers or set to single peer mode details: Merge several concepts, single peer mode being one of it. * Some code clean up, fixings for removing of compiler warnings * De-noise header fetch related sources why: Header download looks relatively stable, so general debugging is not needed, anymore. This is the equivalent of removing the scaffold from the part of the building where work has completed. * More clean up and code prettification for headers stuff * Implement body fetch and block import details: Available headers are used stage blocks by combining existing headers with newly fetched blocks. Then these blocks are imported/executed via `persistBlocks()`. * Logger cosmetics and cleanup * Remove staged block queue debugging details: Feature still available, just not executed anymore * Docu, logging update * Update/simplify `runDaemon()` * Re-calibrate block body requests and soft config for import blocks batch why: * For fetching, larger fetch requests are mostly truncated anyway on MainNet. * For executing, smaller batch sizes reduce the memory needed for the price of longer execution times. * Update metrics counters * Docu update * Some fixes, formatting updates, etc. * Update `borrowed` type: uint -. uint64 also: Always convert to `uint64` rather than `uint` where appropriate
2024-09-27 15:07:42 +00:00
chunkSize: int;
): T =
Flare sync (#2627) * Cosmetics, small fixes, add stashed headers verifier * Remove direct `Era1` support why: Era1 is indirectly supported by using the import tool before syncing. * Clarify database persistent save function. why: Function relied on the last saved state block number which was wrong. It now relies on the tx-level. If it is 0, then data are saved directly. Otherwise the task that owns the tx will do it. * Extracted configuration constants into separate file * Enable single peer mode for debugging * Fix peer losing issue in multi-mode details: Running concurrent download peers was previously programmed as running a batch downloading and storing ~8k headers and then leaving the `async` function to be restarted by a scheduler. This was unfortunate because of occasionally occurring long waiting times for restart. While the time gap until restarting were typically observed a few millisecs, there were always a few outliers which well exceed several seconds. This seemed to let remote peers run into timeouts. * Prefix function names `unprocXxx()` and `stagedYyy()` by `headers` why: There will be other `unproc` and `staged` modules. * Remove cruft, update logging * Fix accounting issue details: When staging after fetching headers from the network, there was an off by 1 error occurring when the result was by one smaller than requested. Also, a whole range was mis-accounted when a peer was terminating connection immediately after responding. * Fix slow/error header accounting when fetching why: Originally set for detecting slow headers in a row, the counter was wrongly extended to general errors. * Ban peers for a while that respond with too few headers continuously why: Some peers only returned one header at a time. If these peers sit on a farm, they might collectively slow down the download process. * Update RPC beacon header updater why: Old function hook has slightly changed its meaning since it was used for snap sync. Also, the old hook is used by other functions already. * Limit number of peers or set to single peer mode details: Merge several concepts, single peer mode being one of it. * Some code clean up, fixings for removing of compiler warnings * De-noise header fetch related sources why: Header download looks relatively stable, so general debugging is not needed, anymore. This is the equivalent of removing the scaffold from the part of the building where work has completed. * More clean up and code prettification for headers stuff * Implement body fetch and block import details: Available headers are used stage blocks by combining existing headers with newly fetched blocks. Then these blocks are imported/executed via `persistBlocks()`. * Logger cosmetics and cleanup * Remove staged block queue debugging details: Feature still available, just not executed anymore * Docu, logging update * Update/simplify `runDaemon()` * Re-calibrate block body requests and soft config for import blocks batch why: * For fetching, larger fetch requests are mostly truncated anyway on MainNet. * For executing, smaller batch sizes reduce the memory needed for the price of longer execution times. * Update metrics counters * Docu update * Some fixes, formatting updates, etc. * Update `borrowed` type: uint -. uint64 also: Always convert to `uint64` rather than `uint` where appropriate
2024-09-27 15:07:42 +00:00
var desc = T()
desc.initSync(ethNode, chain, maxPeers)
desc.ctx.pool.nBodiesBatch = chunkSize
desc
proc start*(ctx: FlareSyncRef) =
## Beacon Sync always begin with stop mode
doAssert ctx.startSync() # Initialize subsystems
proc stop*(ctx: FlareSyncRef) =
ctx.stopSync()
# ------------------------------------------------------------------------------
# End
# ------------------------------------------------------------------------------