nimbus-eth1/nimbus/sync/flare.nim

# Nimbus
# Copyright (c) 2023-2024 Status Research & Development GmbH
# Licensed under either of
#  * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or
#    http://www.apache.org/licenses/LICENSE-2.0)
#  * MIT license ([LICENSE-MIT](LICENSE-MIT) or
#    http://opensource.org/licenses/MIT)
# at your option. This file may not be copied, modified, or distributed
# except according to those terms.

{.push raises: [].}

import
  pkg/[chronicles, chronos, eth/p2p, results],
  pkg/stew/[interval_set, sorted_set],
  ./flare/[worker, worker_desc],
  "."/[sync_desc, sync_sched, protocol]

logScope:
  topics = "flare"

type
  FlareSyncRef* = RunnerSyncRef[FlareCtxData,FlareBuddyData]

const
  extraTraceMessages = false # or true
    ## Enable additional logging noise

# ------------------------------------------------------------------------------
# Private logging helpers
# ------------------------------------------------------------------------------

template traceMsg(f, info: static[string]; args: varargs[untyped]) =
  trace "Flare scheduler " & f & "() " & info, args

template traceMsgCtx(f, info: static[string]; c: FlareCtxRef) =
  when extraTraceMessages:
    block:
      let
        poolMode {.inject.} = c.poolMode
        daemon   {.inject.} = c.daemon
      f.traceMsg info, poolMode, daemon

template traceMsgBuddy(f, info: static[string]; b: FlareBuddyRef) =
  when extraTraceMessages:
    block:
      let
        peer     {.inject.} = b.peer
        runState {.inject.} = b.ctrl.state
        multiOk  {.inject.} = b.ctrl.multiOk
        poolMode {.inject.} = b.ctx.poolMode
        daemon   {.inject.} = b.ctx.daemon
      f.traceMsg info, peer, runState, multiOk, poolMode, daemon


template tracerFrameCtx(f: static[string]; c: FlareCtxRef; code: untyped) =
  f.traceMsgCtx "begin", c
  code
  f.traceMsgCtx "end", c

template tracerFrameBuddy(f: static[string]; b: FlareBuddyRef; code: untyped) =
  f.traceMsgBuddy "begin", b
  code
  f.traceMsgBuddy "end", b

# ------------------------------------------------------------------------------
# Virtual methods/interface, `mixin` functions
# ------------------------------------------------------------------------------

proc runSetup(ctx: FlareCtxRef): bool =
  tracerFrameCtx("runSetup", ctx):
    result = worker.setup(ctx)

proc runRelease(ctx: FlareCtxRef) =
  tracerFrameCtx("runRelease", ctx):
    worker.release(ctx)

proc runDaemon(ctx: FlareCtxRef) {.async.} =
  tracerFrameCtx("runDaemon", ctx):
    await worker.runDaemon(ctx)

proc runStart(buddy: FlareBuddyRef): bool =
  tracerFrameBuddy("runStart", buddy):
    result = worker.start(buddy)

proc runStop(buddy: FlareBuddyRef) =
  tracerFrameBuddy("runStop", buddy):
    worker.stop(buddy)

proc runPool(buddy: FlareBuddyRef; last: bool; laps: int): bool =
  tracerFrameBuddy("runPool", buddy):
    result = worker.runPool(buddy, last, laps)

proc runSingle(buddy: FlareBuddyRef) {.async.} =
  tracerFrameBuddy("runSingle", buddy):
    await worker.runSingle(buddy)

proc runMulti(buddy: FlareBuddyRef) {.async.} =
  tracerFrameBuddy("runMulti", buddy):
    await worker.runMulti(buddy)

# ------------------------------------------------------------------------------
# Public functions
# ------------------------------------------------------------------------------

proc init*(
    T: type FlareSyncRef;
    ethNode: EthereumNode;
    chain: ForkedChainRef;
    maxPeers: int;
    chunkSize: int;
      ): T =
  var desc = T()
  desc.initSync(ethNode, chain, maxPeers)
  desc.ctx.pool.nBodiesBatch = chunkSize
  desc

proc start*(ctx: FlareSyncRef) =
  ## Beacon Sync always begin with stop mode
  doAssert ctx.startSync()      # Initialize subsystems

proc stop*(ctx: FlareSyncRef) =
  ctx.stopSync()

# ------------------------------------------------------------------------------
# End
# ------------------------------------------------------------------------------
Block header download beacon to era1 (#2601) * Block header download starting at Beacon down to Era1 details: The header download implementation is intended to be completed to a full sync facility. Downloaded block headers are stored in a `CoreDb` table. Later on they should be fetched, complemented by a block body, executed/imported, and deleted from the table. The Era1 repository may be partial or missing. Era1 headers are neither downloaded nor stored on the `CoreDb` table. Headers are downloaded top down (largest block number first) using the hash of the block header by one peer. Other peers fetch headers opportunistically using block numbers Observed download times for 14m `MainNet` headers varies between 30min and 1h (Era1 size truncated to 66m blocks.), full download 52min (anectdotal.) The number of peers downloading concurrently is crucial here. * Activate `flare` by command line option * Fix copyright year 2024-09-09 09:12:56 +00:00			`# Nimbus`
			`# Copyright (c) 2023-2024 Status Research & Development GmbH`
			`# Licensed under either of`
			`# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or`
			`# http://www.apache.org/licenses/LICENSE-2.0)`
			`# * MIT license ([LICENSE-MIT](LICENSE-MIT) or`
			`# http://opensource.org/licenses/MIT)`
			`# at your option. This file may not be copied, modified, or distributed`
			`# except according to those terms.`

			`{.push raises: [].}`

			`import`
			`pkg/[chronicles, chronos, eth/p2p, results],`
			`pkg/stew/[interval_set, sorted_set],`
			`./flare/[worker, worker_desc],`
			`"."/[sync_desc, sync_sched, protocol]`

			`logScope:`
Flare sync (#2627) * Cosmetics, small fixes, add stashed headers verifier * Remove direct `Era1` support why: Era1 is indirectly supported by using the import tool before syncing. * Clarify database persistent save function. why: Function relied on the last saved state block number which was wrong. It now relies on the tx-level. If it is 0, then data are saved directly. Otherwise the task that owns the tx will do it. * Extracted configuration constants into separate file * Enable single peer mode for debugging * Fix peer losing issue in multi-mode details: Running concurrent download peers was previously programmed as running a batch downloading and storing ~8k headers and then leaving the `async` function to be restarted by a scheduler. This was unfortunate because of occasionally occurring long waiting times for restart. While the time gap until restarting were typically observed a few millisecs, there were always a few outliers which well exceed several seconds. This seemed to let remote peers run into timeouts. * Prefix function names `unprocXxx()` and `stagedYyy()` by `headers` why: There will be other `unproc` and `staged` modules. * Remove cruft, update logging * Fix accounting issue details: When staging after fetching headers from the network, there was an off by 1 error occurring when the result was by one smaller than requested. Also, a whole range was mis-accounted when a peer was terminating connection immediately after responding. * Fix slow/error header accounting when fetching why: Originally set for detecting slow headers in a row, the counter was wrongly extended to general errors. * Ban peers for a while that respond with too few headers continuously why: Some peers only returned one header at a time. If these peers sit on a farm, they might collectively slow down the download process. * Update RPC beacon header updater why: Old function hook has slightly changed its meaning since it was used for snap sync. Also, the old hook is used by other functions already. * Limit number of peers or set to single peer mode details: Merge several concepts, single peer mode being one of it. * Some code clean up, fixings for removing of compiler warnings * De-noise header fetch related sources why: Header download looks relatively stable, so general debugging is not needed, anymore. This is the equivalent of removing the scaffold from the part of the building where work has completed. * More clean up and code prettification for headers stuff * Implement body fetch and block import details: Available headers are used stage blocks by combining existing headers with newly fetched blocks. Then these blocks are imported/executed via `persistBlocks()`. * Logger cosmetics and cleanup * Remove staged block queue debugging details: Feature still available, just not executed anymore * Docu, logging update * Update/simplify `runDaemon()` * Re-calibrate block body requests and soft config for import blocks batch why: * For fetching, larger fetch requests are mostly truncated anyway on MainNet. * For executing, smaller batch sizes reduce the memory needed for the price of longer execution times. * Update metrics counters * Docu update * Some fixes, formatting updates, etc. * Update `borrowed` type: uint -. uint64 also: Always convert to `uint64` rather than `uint` where appropriate 2024-09-27 15:07:42 +00:00			`topics = "flare"`
Block header download beacon to era1 (#2601) * Block header download starting at Beacon down to Era1 details: The header download implementation is intended to be completed to a full sync facility. Downloaded block headers are stored in a `CoreDb` table. Later on they should be fetched, complemented by a block body, executed/imported, and deleted from the table. The Era1 repository may be partial or missing. Era1 headers are neither downloaded nor stored on the `CoreDb` table. Headers are downloaded top down (largest block number first) using the hash of the block header by one peer. Other peers fetch headers opportunistically using block numbers Observed download times for 14m `MainNet` headers varies between 30min and 1h (Era1 size truncated to 66m blocks.), full download 52min (anectdotal.) The number of peers downloading concurrently is crucial here. * Activate `flare` by command line option * Fix copyright year 2024-09-09 09:12:56 +00:00
			`type`
			`FlareSyncRef* = RunnerSyncRef[FlareCtxData,FlareBuddyData]`

			`const`
			`extraTraceMessages = false # or true`
			`## Enable additional logging noise`

			`# ------------------------------------------------------------------------------`
			`# Private logging helpers`
			`# ------------------------------------------------------------------------------`

			`template traceMsg(f, info: static[string]; args: varargs[untyped]) =`
			`trace "Flare scheduler " & f & "() " & info, args`

			`template traceMsgCtx(f, info: static[string]; c: FlareCtxRef) =`
			`when extraTraceMessages:`
			`block:`
			`let`
			`poolMode {.inject.} = c.poolMode`
			`daemon {.inject.} = c.daemon`
			`f.traceMsg info, poolMode, daemon`

			`template traceMsgBuddy(f, info: static[string]; b: FlareBuddyRef) =`
			`when extraTraceMessages:`
			`block:`
			`let`
			`peer {.inject.} = b.peer`
			`runState {.inject.} = b.ctrl.state`
			`multiOk {.inject.} = b.ctrl.multiOk`
			`poolMode {.inject.} = b.ctx.poolMode`
			`daemon {.inject.} = b.ctx.daemon`
			`f.traceMsg info, peer, runState, multiOk, poolMode, daemon`


			`template tracerFrameCtx(f: static[string]; c: FlareCtxRef; code: untyped) =`
			`f.traceMsgCtx "begin", c`
			`code`
			`f.traceMsgCtx "end", c`

			`template tracerFrameBuddy(f: static[string]; b: FlareBuddyRef; code: untyped) =`
			`f.traceMsgBuddy "begin", b`
			`code`
			`f.traceMsgBuddy "end", b`

			`# ------------------------------------------------------------------------------`
			# Virtual methods/interface, `mixin` functions
			`# ------------------------------------------------------------------------------`

			`proc runSetup(ctx: FlareCtxRef): bool =`
			`tracerFrameCtx("runSetup", ctx):`
			`result = worker.setup(ctx)`

			`proc runRelease(ctx: FlareCtxRef) =`
			`tracerFrameCtx("runRelease", ctx):`
			`worker.release(ctx)`

			`proc runDaemon(ctx: FlareCtxRef) {.async.} =`
			`tracerFrameCtx("runDaemon", ctx):`
			`await worker.runDaemon(ctx)`

			`proc runStart(buddy: FlareBuddyRef): bool =`
			`tracerFrameBuddy("runStart", buddy):`
			`result = worker.start(buddy)`

			`proc runStop(buddy: FlareBuddyRef) =`
			`tracerFrameBuddy("runStop", buddy):`
			`worker.stop(buddy)`

			`proc runPool(buddy: FlareBuddyRef; last: bool; laps: int): bool =`
			`tracerFrameBuddy("runPool", buddy):`
			`result = worker.runPool(buddy, last, laps)`

			`proc runSingle(buddy: FlareBuddyRef) {.async.} =`
			`tracerFrameBuddy("runSingle", buddy):`
			`await worker.runSingle(buddy)`

			`proc runMulti(buddy: FlareBuddyRef) {.async.} =`
			`tracerFrameBuddy("runMulti", buddy):`
			`await worker.runMulti(buddy)`

			`# ------------------------------------------------------------------------------`
			`# Public functions`
			`# ------------------------------------------------------------------------------`

			`proc init*(`
			`T: type FlareSyncRef;`
			`ethNode: EthereumNode;`
			`chain: ForkedChainRef;`
			`maxPeers: int;`
Flare sync (#2627) * Cosmetics, small fixes, add stashed headers verifier * Remove direct `Era1` support why: Era1 is indirectly supported by using the import tool before syncing. * Clarify database persistent save function. why: Function relied on the last saved state block number which was wrong. It now relies on the tx-level. If it is 0, then data are saved directly. Otherwise the task that owns the tx will do it. * Extracted configuration constants into separate file * Enable single peer mode for debugging * Fix peer losing issue in multi-mode details: Running concurrent download peers was previously programmed as running a batch downloading and storing ~8k headers and then leaving the `async` function to be restarted by a scheduler. This was unfortunate because of occasionally occurring long waiting times for restart. While the time gap until restarting were typically observed a few millisecs, there were always a few outliers which well exceed several seconds. This seemed to let remote peers run into timeouts. * Prefix function names `unprocXxx()` and `stagedYyy()` by `headers` why: There will be other `unproc` and `staged` modules. * Remove cruft, update logging * Fix accounting issue details: When staging after fetching headers from the network, there was an off by 1 error occurring when the result was by one smaller than requested. Also, a whole range was mis-accounted when a peer was terminating connection immediately after responding. * Fix slow/error header accounting when fetching why: Originally set for detecting slow headers in a row, the counter was wrongly extended to general errors. * Ban peers for a while that respond with too few headers continuously why: Some peers only returned one header at a time. If these peers sit on a farm, they might collectively slow down the download process. * Update RPC beacon header updater why: Old function hook has slightly changed its meaning since it was used for snap sync. Also, the old hook is used by other functions already. * Limit number of peers or set to single peer mode details: Merge several concepts, single peer mode being one of it. * Some code clean up, fixings for removing of compiler warnings * De-noise header fetch related sources why: Header download looks relatively stable, so general debugging is not needed, anymore. This is the equivalent of removing the scaffold from the part of the building where work has completed. * More clean up and code prettification for headers stuff * Implement body fetch and block import details: Available headers are used stage blocks by combining existing headers with newly fetched blocks. Then these blocks are imported/executed via `persistBlocks()`. * Logger cosmetics and cleanup * Remove staged block queue debugging details: Feature still available, just not executed anymore * Docu, logging update * Update/simplify `runDaemon()` * Re-calibrate block body requests and soft config for import blocks batch why: * For fetching, larger fetch requests are mostly truncated anyway on MainNet. * For executing, smaller batch sizes reduce the memory needed for the price of longer execution times. * Update metrics counters * Docu update * Some fixes, formatting updates, etc. * Update `borrowed` type: uint -. uint64 also: Always convert to `uint64` rather than `uint` where appropriate 2024-09-27 15:07:42 +00:00			`chunkSize: int;`
Block header download beacon to era1 (#2601) * Block header download starting at Beacon down to Era1 details: The header download implementation is intended to be completed to a full sync facility. Downloaded block headers are stored in a `CoreDb` table. Later on they should be fetched, complemented by a block body, executed/imported, and deleted from the table. The Era1 repository may be partial or missing. Era1 headers are neither downloaded nor stored on the `CoreDb` table. Headers are downloaded top down (largest block number first) using the hash of the block header by one peer. Other peers fetch headers opportunistically using block numbers Observed download times for 14m `MainNet` headers varies between 30min and 1h (Era1 size truncated to 66m blocks.), full download 52min (anectdotal.) The number of peers downloading concurrently is crucial here. * Activate `flare` by command line option * Fix copyright year 2024-09-09 09:12:56 +00:00			`): T =`
Flare sync (#2627) * Cosmetics, small fixes, add stashed headers verifier * Remove direct `Era1` support why: Era1 is indirectly supported by using the import tool before syncing. * Clarify database persistent save function. why: Function relied on the last saved state block number which was wrong. It now relies on the tx-level. If it is 0, then data are saved directly. Otherwise the task that owns the tx will do it. * Extracted configuration constants into separate file * Enable single peer mode for debugging * Fix peer losing issue in multi-mode details: Running concurrent download peers was previously programmed as running a batch downloading and storing ~8k headers and then leaving the `async` function to be restarted by a scheduler. This was unfortunate because of occasionally occurring long waiting times for restart. While the time gap until restarting were typically observed a few millisecs, there were always a few outliers which well exceed several seconds. This seemed to let remote peers run into timeouts. * Prefix function names `unprocXxx()` and `stagedYyy()` by `headers` why: There will be other `unproc` and `staged` modules. * Remove cruft, update logging * Fix accounting issue details: When staging after fetching headers from the network, there was an off by 1 error occurring when the result was by one smaller than requested. Also, a whole range was mis-accounted when a peer was terminating connection immediately after responding. * Fix slow/error header accounting when fetching why: Originally set for detecting slow headers in a row, the counter was wrongly extended to general errors. * Ban peers for a while that respond with too few headers continuously why: Some peers only returned one header at a time. If these peers sit on a farm, they might collectively slow down the download process. * Update RPC beacon header updater why: Old function hook has slightly changed its meaning since it was used for snap sync. Also, the old hook is used by other functions already. * Limit number of peers or set to single peer mode details: Merge several concepts, single peer mode being one of it. * Some code clean up, fixings for removing of compiler warnings * De-noise header fetch related sources why: Header download looks relatively stable, so general debugging is not needed, anymore. This is the equivalent of removing the scaffold from the part of the building where work has completed. * More clean up and code prettification for headers stuff * Implement body fetch and block import details: Available headers are used stage blocks by combining existing headers with newly fetched blocks. Then these blocks are imported/executed via `persistBlocks()`. * Logger cosmetics and cleanup * Remove staged block queue debugging details: Feature still available, just not executed anymore * Docu, logging update * Update/simplify `runDaemon()` * Re-calibrate block body requests and soft config for import blocks batch why: * For fetching, larger fetch requests are mostly truncated anyway on MainNet. * For executing, smaller batch sizes reduce the memory needed for the price of longer execution times. * Update metrics counters * Docu update * Some fixes, formatting updates, etc. * Update `borrowed` type: uint -. uint64 also: Always convert to `uint64` rather than `uint` where appropriate 2024-09-27 15:07:42 +00:00			`var desc = T()`
			`desc.initSync(ethNode, chain, maxPeers)`
			`desc.ctx.pool.nBodiesBatch = chunkSize`
			`desc`
Block header download beacon to era1 (#2601) * Block header download starting at Beacon down to Era1 details: The header download implementation is intended to be completed to a full sync facility. Downloaded block headers are stored in a `CoreDb` table. Later on they should be fetched, complemented by a block body, executed/imported, and deleted from the table. The Era1 repository may be partial or missing. Era1 headers are neither downloaded nor stored on the `CoreDb` table. Headers are downloaded top down (largest block number first) using the hash of the block header by one peer. Other peers fetch headers opportunistically using block numbers Observed download times for 14m `MainNet` headers varies between 30min and 1h (Era1 size truncated to 66m blocks.), full download 52min (anectdotal.) The number of peers downloading concurrently is crucial here. * Activate `flare` by command line option * Fix copyright year 2024-09-09 09:12:56 +00:00
			`proc start*(ctx: FlareSyncRef) =`
			`## Beacon Sync always begin with stop mode`
			`doAssert ctx.startSync() # Initialize subsystems`

			`proc stop*(ctx: FlareSyncRef) =`
			`ctx.stopSync()`

			`# ------------------------------------------------------------------------------`
			`# End`
			`# ------------------------------------------------------------------------------`