nimbus-eth1/nimbus/sync/beacon/worker_config.nim

# Nimbus
# Copyright (c) 2021-2024 Status Research & Development GmbH
# Licensed and distributed under either of
#   * MIT license (license terms in the root directory or at
#     https://opensource.org/licenses/MIT).
#   * Apache v2 license (license terms in the root directory or at
#     https://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed
# except according to those terms.

{.push raises:[].}

import
  pkg/chronos

const
  enableTicker* = true
    ## Log regular status updates similar to metrics. Great for debugging.

  runsThisManyPeersOnly* = 8
    ## Set to `1` for running a single peer only at a time. Great for debugging.
    ##
    ## Otherwise, this setting limits the number of peers accepted by the
    ## `runStart()` peer initialiser. When testing with an unlimited number of
    ## peers with some double digit number of connected peers, the observed
    ## response times when fetching headers seemed to degrade considerable into
    ## seconds (rather than ms.) This will be further looked at to be confirmed
    ## or rejected as insignificant.
    ##
    ## FIXME: This setting has priority over the `maxPeers` setting of the
    ##        `BeaconSyncRef.init()` initaliser. This might be harmonised at
    ##        a later stage.

  # ----------------------

  metricsUpdateInterval* = chronos.seconds(10)
    ## Wait at least this time before next update

  daemonWaitInterval* = chronos.seconds(10)
    ## Some waiting time at the end of the daemon task which always lingers
    ## in the background.

  workerIdleWaitInterval* = chronos.seconds(10)
    ## Sleep some time in multi-mode if there is nothing to do

  asyncThreadSwitchTimeSlot* = chronos.nanoseconds(10)
    ## Nano-sleep to allows pseudo/async thread switch

  # ----------------------

  nFetchHeadersRequest* = 1_024
    ## Number of headers that will be requested with a single `eth/xx` message.
    ##
    ## On `Geth`, responses to larger requests are all truncted to 1024 header
    ## entries (see `Geth` constant `maxHeadersServe`.)

  fetchHeadersReqThresholdZombie* = chronos.seconds(2)
  fetchHeadersReqThresholdCount* = 3
    ## Response time allowance. If the response time for the set of headers
    ## exceeds this threshold for more than `fetchHeadersReqThresholdCount`
    ## times in a row, then this peer will be banned for a while.

  fetchHeadersReqMinResponsePC* = 10
    ## Some peers only returned one header at a time. If these peers sit on a
    ## farm, they might collectively slow down the download process. So this
    ## constant sets a percentage of minimum headers needed to return so that
    ## the peers is not treated as a slow responder (see above for slow
    ## responder count.)

  nFetchHeadersBatch* = 8 * nFetchHeadersRequest
    ## Length of the request/stage batch. Several headers are consecutively
    ## fetched and stashed together as a single record on the staged queue.

  headersStagedQueueLengthLwm* = 32
    ## Limit the number of records in the staged headers queue.
    ##
    ## Queue entries start accumulating if one peer stalls while fetching the
    ## top chain so leaving a gap. This gap must be filled first before
    ## inserting the queue into a contiguous chain of headers.
    ##
    ## This low-water mark tryggers the system to do some **magic** to mitigate
    ## the above problem. Currently the **magic** is to let (pseudo) threads
    ## terminate and then restart all over again.

  headersStagedQueueLengthHwm* = 48
    ## If this size is exceeded, the staged queue is flushed and resized to
    ## `headersStagedQueueLengthLwm-1` entries. Then contents is re-fetched
    ## from scratch.

  # ----------------------

  nFetchBodiesRequest* = 128
    ## Similar to `nFetchHeadersRequest`

  fetchBodiesReqThresholdZombie* = chronos.seconds(2)
  fetchBodiesReqThresholdCount* = 3
    ## Similar to `fetchHeadersReqThreshold*`

  fetchBodiesReqMinResponsePC* = 10
    ## Similar to `fetchHeadersReqMinResponsePC`

  nFetchBodiesBatchDefault* = 6 * nFetchBodiesRequest
    ## Similar to `nFetchHeadersBatch`
    ##
    ## This value can be overridden with a smaller value which must be at
    ## least `nFetchBodiesRequest`.

  blocksStagedQueueLenMaxDefault* = 16
    ## Maximum number of staged header + bodies blocks records to be filled. If
    ## this size is reached, the process stops with staging with the exception
    ## of the lowest blockes (in case there is a gap.)
    ##
    ## This value might be adjusted with a larger value if
    ## `nFetchBodiesBatchDefault` is overridden with a smaller value.
    ##
    ## Some cursory measurements on `MainNet` suggest an average maximum block
    ## size ~25KiB (i.e. header + body) at block height ~4.5MiB. There will be
    ## as many as `nFetchBodiesBatch` blocks on a single staged blocks record.
    ## And there will be at most `blocksStagedQueueLengthMax+1` records on the
    ## staged blocks queue. (The `+1` is exceptional, appears when the least
    ## entry block number is too high and so leaves a gap to the ledger state
    ## block number.)

  # ----------------------

static:
  doAssert 0 < runsThisManyPeersOnly

  doAssert 0 < nFetchHeadersRequest
  doAssert nFetchHeadersRequest <= nFetchHeadersBatch
  doAssert 0 < headersStagedQueueLengthLwm
  doAssert headersStagedQueueLengthLwm < headersStagedQueueLengthHwm

  doAssert 0 < nFetchBodiesRequest
  doAssert nFetchBodiesRequest <= nFetchBodiesBatchDefault
  doAssert 0 < blocksStagedQueueLenMaxDefault

# End
Flare sync (#2627) * Cosmetics, small fixes, add stashed headers verifier * Remove direct `Era1` support why: Era1 is indirectly supported by using the import tool before syncing. * Clarify database persistent save function. why: Function relied on the last saved state block number which was wrong. It now relies on the tx-level. If it is 0, then data are saved directly. Otherwise the task that owns the tx will do it. * Extracted configuration constants into separate file * Enable single peer mode for debugging * Fix peer losing issue in multi-mode details: Running concurrent download peers was previously programmed as running a batch downloading and storing ~8k headers and then leaving the `async` function to be restarted by a scheduler. This was unfortunate because of occasionally occurring long waiting times for restart. While the time gap until restarting were typically observed a few millisecs, there were always a few outliers which well exceed several seconds. This seemed to let remote peers run into timeouts. * Prefix function names `unprocXxx()` and `stagedYyy()` by `headers` why: There will be other `unproc` and `staged` modules. * Remove cruft, update logging * Fix accounting issue details: When staging after fetching headers from the network, there was an off by 1 error occurring when the result was by one smaller than requested. Also, a whole range was mis-accounted when a peer was terminating connection immediately after responding. * Fix slow/error header accounting when fetching why: Originally set for detecting slow headers in a row, the counter was wrongly extended to general errors. * Ban peers for a while that respond with too few headers continuously why: Some peers only returned one header at a time. If these peers sit on a farm, they might collectively slow down the download process. * Update RPC beacon header updater why: Old function hook has slightly changed its meaning since it was used for snap sync. Also, the old hook is used by other functions already. * Limit number of peers or set to single peer mode details: Merge several concepts, single peer mode being one of it. * Some code clean up, fixings for removing of compiler warnings * De-noise header fetch related sources why: Header download looks relatively stable, so general debugging is not needed, anymore. This is the equivalent of removing the scaffold from the part of the building where work has completed. * More clean up and code prettification for headers stuff * Implement body fetch and block import details: Available headers are used stage blocks by combining existing headers with newly fetched blocks. Then these blocks are imported/executed via `persistBlocks()`. * Logger cosmetics and cleanup * Remove staged block queue debugging details: Feature still available, just not executed anymore * Docu, logging update * Update/simplify `runDaemon()` * Re-calibrate block body requests and soft config for import blocks batch why: * For fetching, larger fetch requests are mostly truncated anyway on MainNet. * For executing, smaller batch sizes reduce the memory needed for the price of longer execution times. * Update metrics counters * Docu update * Some fixes, formatting updates, etc. * Update `borrowed` type: uint -. uint64 also: Always convert to `uint64` rather than `uint` where appropriate 2024-09-27 15:07:42 +00:00			`# Nimbus`
			`# Copyright (c) 2021-2024 Status Research & Development GmbH`
			`# Licensed and distributed under either of`
			`# * MIT license (license terms in the root directory or at`
			`# https://opensource.org/licenses/MIT).`
			`# * Apache v2 license (license terms in the root directory or at`
			`# https://www.apache.org/licenses/LICENSE-2.0).`
			`# at your option. This file may not be copied, modified, or distributed`
			`# except according to those terms.`

			`{.push raises:[].}`

			`import`
			`pkg/chronos`

			`const`
			`enableTicker* = true`
			`## Log regular status updates similar to metrics. Great for debugging.`

			`runsThisManyPeersOnly* = 8`
			## Set to `1` for running a single peer only at a time. Great for debugging.
			`##`
			`## Otherwise, this setting limits the number of peers accepted by the`
			## `runStart()` peer initialiser. When testing with an unlimited number of
			`## peers with some double digit number of connected peers, the observed`
			`## response times when fetching headers seemed to degrade considerable into`
			`## seconds (rather than ms.) This will be further looked at to be confirmed`
			`## or rejected as insignificant.`
			`##`
			## FIXME: This setting has priority over the `maxPeers` setting of the
Rename flare as beacon (#2680) * Remove `--sync-mode` option from nimbus config why: Currently there is only one sync mode available. * Rename `flare` -> `beacon`, but not base module folder and nim source why: The name `flare` was used do designate an alternative `beacon` mode that. Leaving the base folder and source as-is for a moment, makes it easier to read change diffs. * Rename `flare` base module folder and nim source: `flare` -> `beacon` 2024-10-02 11:31:33 +00:00			## `BeaconSyncRef.init()` initaliser. This might be harmonised at
Flare sync (#2627) * Cosmetics, small fixes, add stashed headers verifier * Remove direct `Era1` support why: Era1 is indirectly supported by using the import tool before syncing. * Clarify database persistent save function. why: Function relied on the last saved state block number which was wrong. It now relies on the tx-level. If it is 0, then data are saved directly. Otherwise the task that owns the tx will do it. * Extracted configuration constants into separate file * Enable single peer mode for debugging * Fix peer losing issue in multi-mode details: Running concurrent download peers was previously programmed as running a batch downloading and storing ~8k headers and then leaving the `async` function to be restarted by a scheduler. This was unfortunate because of occasionally occurring long waiting times for restart. While the time gap until restarting were typically observed a few millisecs, there were always a few outliers which well exceed several seconds. This seemed to let remote peers run into timeouts. * Prefix function names `unprocXxx()` and `stagedYyy()` by `headers` why: There will be other `unproc` and `staged` modules. * Remove cruft, update logging * Fix accounting issue details: When staging after fetching headers from the network, there was an off by 1 error occurring when the result was by one smaller than requested. Also, a whole range was mis-accounted when a peer was terminating connection immediately after responding. * Fix slow/error header accounting when fetching why: Originally set for detecting slow headers in a row, the counter was wrongly extended to general errors. * Ban peers for a while that respond with too few headers continuously why: Some peers only returned one header at a time. If these peers sit on a farm, they might collectively slow down the download process. * Update RPC beacon header updater why: Old function hook has slightly changed its meaning since it was used for snap sync. Also, the old hook is used by other functions already. * Limit number of peers or set to single peer mode details: Merge several concepts, single peer mode being one of it. * Some code clean up, fixings for removing of compiler warnings * De-noise header fetch related sources why: Header download looks relatively stable, so general debugging is not needed, anymore. This is the equivalent of removing the scaffold from the part of the building where work has completed. * More clean up and code prettification for headers stuff * Implement body fetch and block import details: Available headers are used stage blocks by combining existing headers with newly fetched blocks. Then these blocks are imported/executed via `persistBlocks()`. * Logger cosmetics and cleanup * Remove staged block queue debugging details: Feature still available, just not executed anymore * Docu, logging update * Update/simplify `runDaemon()` * Re-calibrate block body requests and soft config for import blocks batch why: * For fetching, larger fetch requests are mostly truncated anyway on MainNet. * For executing, smaller batch sizes reduce the memory needed for the price of longer execution times. * Update metrics counters * Docu update * Some fixes, formatting updates, etc. * Update `borrowed` type: uint -. uint64 also: Always convert to `uint64` rather than `uint` where appropriate 2024-09-27 15:07:42 +00:00			`## a later stage.`

			`# ----------------------`

			`metricsUpdateInterval* = chronos.seconds(10)`
			`## Wait at least this time before next update`

			`daemonWaitInterval* = chronos.seconds(10)`
			`## Some waiting time at the end of the daemon task which always lingers`
			`## in the background.`

			`workerIdleWaitInterval* = chronos.seconds(10)`
			`## Sleep some time in multi-mode if there is nothing to do`

			`asyncThreadSwitchTimeSlot* = chronos.nanoseconds(10)`
			`## Nano-sleep to allows pseudo/async thread switch`

			`# ----------------------`

			`nFetchHeadersRequest* = 1_024`
			## Number of headers that will be requested with a single `eth/xx` message.
			`##`
			## On `Geth`, responses to larger requests are all truncted to 1024 header
			## entries (see `Geth` constant `maxHeadersServe`.)

			`fetchHeadersReqThresholdZombie* = chronos.seconds(2)`
			`fetchHeadersReqThresholdCount* = 3`
			`## Response time allowance. If the response time for the set of headers`
			## exceeds this threshold for more than `fetchHeadersReqThresholdCount`
			`## times in a row, then this peer will be banned for a while.`

			`fetchHeadersReqMinResponsePC* = 10`
			`## Some peers only returned one header at a time. If these peers sit on a`
			`## farm, they might collectively slow down the download process. So this`
			`## constant sets a percentage of minimum headers needed to return so that`
			`## the peers is not treated as a slow responder (see above for slow`
			`## responder count.)`

			`nFetchHeadersBatch* = 8 * nFetchHeadersRequest`
			`## Length of the request/stage batch. Several headers are consecutively`
			`## fetched and stashed together as a single record on the staged queue.`

			`headersStagedQueueLengthLwm* = 32`
			`## Limit the number of records in the staged headers queue.`
			`##`
			`## Queue entries start accumulating if one peer stalls while fetching the`
			`## top chain so leaving a gap. This gap must be filled first before`
			`## inserting the queue into a contiguous chain of headers.`
			`##`
			`## This low-water mark tryggers the system to do some magic to mitigate`
			`## the above problem. Currently the magic is to let (pseudo) threads`
			`## terminate and then restart all over again.`

			`headersStagedQueueLengthHwm* = 48`
			`## If this size is exceeded, the staged queue is flushed and resized to`
			## `headersStagedQueueLengthLwm-1` entries. Then contents is re-fetched
			`## from scratch.`

			`# ----------------------`

			`nFetchBodiesRequest* = 128`
			## Similar to `nFetchHeadersRequest`

			`fetchBodiesReqThresholdZombie* = chronos.seconds(2)`
			`fetchBodiesReqThresholdCount* = 3`
			## Similar to `fetchHeadersReqThreshold*`

			`fetchBodiesReqMinResponsePC* = 10`
			## Similar to `fetchHeadersReqMinResponsePC`

			`nFetchBodiesBatchDefault* = 6 * nFetchBodiesRequest`
			## Similar to `nFetchHeadersBatch`
			`##`
			`## This value can be overridden with a smaller value which must be at`
			## least `nFetchBodiesRequest`.

			`blocksStagedQueueLenMaxDefault* = 16`
			`## Maximum number of staged header + bodies blocks records to be filled. If`
			`## this size is reached, the process stops with staging with the exception`
			`## of the lowest blockes (in case there is a gap.)`
			`##`
			`## This value might be adjusted with a larger value if`
			## `nFetchBodiesBatchDefault` is overridden with a smaller value.
			`##`
			## Some cursory measurements on `MainNet` suggest an average maximum block
			`## size ~25KiB (i.e. header + body) at block height ~4.5MiB. There will be`
			## as many as `nFetchBodiesBatch` blocks on a single staged blocks record.
			## And there will be at most `blocksStagedQueueLengthMax+1` records on the
			## staged blocks queue. (The `+1` is exceptional, appears when the least
			`## entry block number is too high and so leaves a gap to the ledger state`
			`## block number.)`

			`# ----------------------`

			`static:`
			`doAssert 0 < runsThisManyPeersOnly`

			`doAssert 0 < nFetchHeadersRequest`
			`doAssert nFetchHeadersRequest <= nFetchHeadersBatch`
			`doAssert 0 < headersStagedQueueLengthLwm`
			`doAssert headersStagedQueueLengthLwm < headersStagedQueueLengthHwm`

			`doAssert 0 < nFetchBodiesRequest`
			`doAssert nFetchBodiesRequest <= nFetchBodiesBatchDefault`
			`doAssert 0 < blocksStagedQueueLenMaxDefault`

			`# End`