nimbus-eth1/nimbus/sync/snap/constants.nim

# Nimbus
# Copyright (c) 2021 Status Research & Development GmbH
# Licensed under either of
#  * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or
#    http://www.apache.org/licenses/LICENSE-2.0)
#  * MIT license ([LICENSE-MIT](LICENSE-MIT) or
#    http://opensource.org/licenses/MIT)
# at your option. This file may not be copied, modified, or distributed
# except according to those terms.

{.push raises: [].}

import
  std/sets,
  eth/[common, trie/nibbles]

const
  EmptyBlob* = seq[byte].default
    ## Useful shortcut

  EmptyBlobSet* = HashSet[Blob].default
    ## Useful shortcut

  EmptyBlobSeq* = seq[Blob].default
    ## Useful shortcut

  EmptyNibbleSeq* = EmptyBlob.initNibbleRange
    ## Useful shortcut

  # ---------

  pivotTableLruEntriesMax* = 50
    ## Max depth of pivot table. On overflow, the oldest one will be removed.

  pivotBlockDistanceMin* = 128
    ## The minimal depth of two block headers needed to activate a new state
    ## root pivot.
    ##
    ## Effects on assembling the state via `snap/1` protocol:
    ##
    ## * A small value of this constant increases the propensity to update the
    ##   pivot header more often. This is so because each new peer negoiates a
    ##   pivot block number at least the current one.
    ##
    ## * A large value keeps the current pivot more stable but some experiments
    ##   suggest that the `snap/1` protocol is answered only for later block
    ##   numbers (aka pivot blocks.) So a large value tends to keep the pivot
    ##   farther away from the chain head.
    ##
    ##   Note that 128 is the magic distance for snapshots used by *Geth*.

  # --------------

  fetchRequestBytesLimit* = 2 * 1024 * 1024
    ## Soft bytes limit to request in `snap/1` protocol calls.

  fetchRequestTrieNodesMax* = 1024
    ## Informal maximal number of trie nodes to fetch at once in `snap/1`
    ## protocol calls. This is not an official limit but found with several
    ## implementations (e.g. Geth.)
    ##
    ## Resticting the fetch list length early allows to better parallelise
    ## healing.

  fetchRequestStorageSlotsMax* = 2 * 1024
    ## Maximal number of storage tries to fetch with a single request message.

  # --------------

  fetchRequestContractsMax* = 1024
    ## Maximal number of contract codes fetch with a single request message.

  # --------------

  saveAccountsProcessedChunksMax* = 1000
    ## Recovery data are stored if the processed ranges list contains no more
    ## than this many range *chunks*.
    ##
    ## If the range set is too much fragmented, no data will be saved and
    ## restart has to perform from scratch or an earlier checkpoint.

  saveStorageSlotsMax* = 20_000
    ## Recovery data are stored if the oustanding storage slots to process do
    ## not amount to more than this many entries.
    ##
    ## If there are too many dangling nodes, no data will be saved and restart
    ## has to perform from scratch or an earlier checkpoint.

  saveContactsMax* = 10_000
    ## Similar to `saveStorageSlotsMax`

  # --------------

  storageSlotsFetchFailedFullMax* = fetchRequestStorageSlotsMax + 100
    ## Maximal number of failures when fetching full range storage slots.
    ## These failed slot ranges are only called for once in the same cycle.

  storageSlotsFetchFailedPartialMax* = 300
    ## Ditto for partial range storage slots.

  storageSlotsTrieInheritPerusalMax* = 30_000
    ## Maximal number of nodes to visit in order to find out whether this
    ## storage slots trie is complete. This allows to *inherit* the full trie
    ## for an existing root node if the trie is small enough.

  storageSlotsQuPrioThresh* = 5_000
    ## For a new worker, prioritise processing the storage slots queue over
    ## processing accounts if the queue has more than this many items.
    ##
    ## For a running worker processing accounts, stop processing accounts
    ## and switch to processing the storage slots queue if the queue has
    ## more than this many items.

  # --------------

  contractsQuPrioThresh* = 2_000
    ## Similar to `storageSlotsQuPrioThresh`

  # --------------

  healAccountsCoverageTrigger* = 1.01
    ## Apply accounts healing if the global snap download coverage factor
    ## exceeds this setting. The global coverage factor is derived by merging
    ## all account ranges retrieved for all pivot state roots (see
    ## `coveredAccounts` in the object `CtxData`.) Note that a coverage factor
    ## greater than 100% is not exact but rather a lower bound estimate.

  healAccountsInspectionPlanBLevel* = 4
    ## Search this level deep for missing nodes if `hexaryEnvelopeDecompose()`
    ## only produces existing nodes.

  healAccountsInspectionPlanBRetryMax* = 2
    ## Retry inspection with depth level argument starting at
    ## `healAccountsInspectionPlanBLevel-1` and counting down at most this
    ## many times until there is at least one dangling node found and the
    ## depth level argument remains positive. The cumulative depth of the
    ## iterated seach is
    ## ::
    ##      b        1
    ##      Σ ν  =  --- (b - a + 1) (a + b)
    ##      a        2
    ## for
    ## ::
    ##      b = healAccountsInspectionPlanBLevel
    ##      a = b - healAccountsInspectionPlanBRetryMax
    ##

  healAccountsInspectionPlanBRetryNapMSecs* = 2
    ## Sleep beween inspection retrys to allow thread switch. If this constant
    ## is set `0`, `1`ns wait is used.

  # --------------

  healStorageSlotsInspectionPlanBLevel* = 5
    ## Similar to `healAccountsInspectionPlanBLevel`

  healStorageSlotsInspectionPlanBRetryMax* = 99 # 5 + 4 + .. + 1 => 15
    ## Similar to `healAccountsInspectionPlanBRetryMax`

  healStorageSlotsInspectionPlanBRetryNapMSecs* = 2
    ## Similar to `healAccountsInspectionPlanBRetryNapMSecs`

  healStorageSlotsBatchMax* = 32
    ## Maximal number of storage tries to to heal in a single batch run. Only
    ## this many items will be removed from the batch queue. These items will
    ## then be processed one by one.

  healStorageSlotsFailedMax* = 300
    ## Ditto for partial range storage slots.

  # --------------

  comErrorsTimeoutMax* = 3
    ## Maximal number of non-resonses accepted in a row. If there are more than
    ## `comErrorsTimeoutMax` consecutive errors, the worker will be degraded
    ## as zombie.

  comErrorsTimeoutSleepMSecs* = 5000
    ## Wait/suspend for this many seconds after a timeout error if there are
    ## not more than `comErrorsTimeoutMax` errors in a row (maybe some other
    ## network or no-data errors mixed in.) Set 0 to disable.


  comErrorsNetworkMax* = 5
    ## Similar to `comErrorsTimeoutMax` but for network errors.

  comErrorsNetworkSleepMSecs* = 5000
    ## Similar to `comErrorsTimeoutSleepSecs` but for network errors.
    ## Set 0 to disable.

  comErrorsNoDataMax* = 3
    ## Similar to `comErrorsTimeoutMax` but for missing data errors.

  comErrorsNoDataSleepMSecs* = 0
    ## Similar to `comErrorsTimeoutSleepSecs` but for missing data errors.
    ## Set 0 to disable.

static:
  doAssert storageSlotsQuPrioThresh < saveStorageSlotsMax
  doAssert contractsQuPrioThresh < saveContactsMax
  doAssert 0 <= storageSlotsFetchFailedFullMax
  doAssert 0 <= storageSlotsFetchFailedPartialMax

# ------------------------------------------------------------------------------
# End
# ------------------------------------------------------------------------------
-												Prep for full sync after snap make 5 (#1286)

* Update docu and logging

* Extracted and updated constants from `worker_desc` into separate file

* Update and re-calibrate communication error handling

* Allow simplified pivot negotiation

why:
  This feature allows to turn off pivot negotiation so that peers agree
  on a a pivot header.

  For snap sync with fast changing pivots this only throttles the sync
  process. The finally downloaded DB snapshot is typically a merged
  version of different pivot states augmented by a healing process.

* Re-model worker queues for accounts download & healing

why:
  Currently there is only one data fetch per download or healing task.
  This task is then repeated by the scheduler after a short time. In
  many cases, this short time seems enough for some peers to decide to
  terminate connection.

* Update main task batch `runMulti()`

details:
  The function `runMulti()` is activated in quasi-parallel mode by the
  scheduler. This function calls the download, healing and fast-sync
  functions.

  While in debug mode, after each set of jobs run by this function the
  database is analysed (by the `snapdb_check` module) and the result
  printed.
											
										
										
											2022-11-01 15:07:44 +00:00
+								# Nimbus
 								# Copyright (c) 2021 Status Research & Development GmbH
 								# Licensed under either of
 								#  * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or
 								#    http://www.apache.org/licenses/LICENSE-2.0)
 								#  * MIT license ([LICENSE-MIT](LICENSE-MIT) or
 								#    http://opensource.org/licenses/MIT)
 								# at your option. This file may not be copied, modified, or distributed
 								# except according to those terms.
-												Silence compiler gossip after nim upgrade (#1454)

* Silence some compiler gossip -- part 1, tx_pool

details:
  Mostly removing redundant imports and `Defect` tracer after switch
  to nim 1.6

* Silence some compiler gossip -- part 2, clique

details:
  Mostly removing redundant imports and `Defect` tracer after switch
  to nim 1.6

* Silence some compiler gossip -- part 3, misc core

details:
  Mostly removing redundant imports and `Defect` tracer after switch
  to nim 1.6

* Silence some compiler gossip -- part 4, sync

details:
  Mostly removing redundant imports and `Defect` tracer after switch
  to nim 1.6

* Clique update

why:
  Missing exception annotation
											
										
										
											2023-01-30 22:10:23 +00:00
+								{.push raises: [].}
-												Prep for full sync after snap make 5 (#1286)

* Update docu and logging

* Extracted and updated constants from `worker_desc` into separate file

* Update and re-calibrate communication error handling

* Allow simplified pivot negotiation

why:
  This feature allows to turn off pivot negotiation so that peers agree
  on a a pivot header.

  For snap sync with fast changing pivots this only throttles the sync
  process. The finally downloaded DB snapshot is typically a merged
  version of different pivot states augmented by a healing process.

* Re-model worker queues for accounts download & healing

why:
  Currently there is only one data fetch per download or healing task.
  This task is then repeated by the scheduler after a short time. In
  many cases, this short time seems enough for some peers to decide to
  terminate connection.

* Update main task batch `runMulti()`

details:
  The function `runMulti()` is activated in quasi-parallel mode by the
  scheduler. This function calls the download, healing and fast-sync
  functions.

  While in debug mode, after each set of jobs run by this function the
  database is analysed (by the `snapdb_check` module) and the result
  printed.
											
										
										
											2022-11-01 15:07:44 +00:00
-												Update snap server client test scenario (#1518)

* Redesign snap1 message GetTrieNodes argument prototypes

why:
  A list of sub-objects `seq[SnapTriePath]` is more intuitive to work with
  than an opaque definition `seq[seq[Blob]]` because the inner object
  `SnapTriePath` object has a dedicated inner structure (for how to
  interprete `seq[Blob]`.)

* Collect some public constants into `constants.nim` file

* Reorg `hexary_paths.nim`

why:
+ Collecting nodes following a partial path properly ending at an
  extension node failed to collect this last node.
+ Merged the nodes collecting algorithm for persistent and in-memory
  into a single generic function `hexary_paths.rootPathExtend()`

info:
  Extracted common tasks to `hexary_nodes_helper.nim`

* Implement `StorageRanges` message handler for snap/1 protocol
											
										
										
											2023-03-22 20:11:49 +00:00
+								import
-												Update snap client storage slots download and healing (#1529)

* Fix fringe condition for `GetStorageRanges` message handler

why:
  Receiving a proved empty range was not considered at all. This lead to
  inconsistencies of the return value which led to subsequent errors.

* Update storage range bulk download

details;
  Mainly re-org of storage queue processing in `storage_queue_helper.nim`

* Update logging variables/messages

* Update storage slots healing

details:
  Mainly clean up after improved helper functions from the sources
  `find_missing_nodes.nim` and `storage_queue_helper.nim`.

* Simplify account fetch

why:
  To much fuss made tolerating some errors. There will be an overall
  strategy implemented where the concert of download and healing function
  is orchestrated.

* Add error resilience to the concert of download and healing.

why:
  The idea is that a peer might stop serving snap/1 accounts and storage
  slot downloads while still able to support fetching nodes for healing.
											
										
										
											2023-04-04 13:36:18 +00:00
+								  std/sets,
-												Update snap client account healing (#1521)

* Update nearby/neighbour leaf nodes finder

details:
  Update return error codes so that in the case that there is no more
  leaf node beyond the search direction, the particular error code
  `NearbyBeyondRange` is returned.

* Compile largest interval range containing only this leaf point

why:
  Will be needed in snap sync for adding single leaf nodes to the range
  of already allocated nodes.

* Reorg `hexary_inspect.nim`

why:
 Merged the nodes collecting algorithm for persistent and in-memory
 into a single generic function `hexary_inspect.inspectTrieImpl()`

* Update fetching accounts range failure handling in `rangeFetchAccounts()`

why:
  Rejected response leads now to fetching for another account range. Only
  repeated failures (or all done) terminate the algorithm.

* Update accounts healing

why:
+ Fixed looping over a bogus node response that could not inserted into
  the database. As a solution, these nodes are locally registered and not
  asked for in this download cycle.
+ Sub-optimal handling of interval range for a healed account leaf node.
  Now the maximal range interval containing this node is registered as
  processed which leafs to de-fragementation of the processed (and
  unprocessed) range list(s). So *gap* ranges which are known not to
  cover any account leaf node are not asked for on the network, anymore.
+ Sporadically remove empty interval ranges (if any)

* Update logging, better variable names
											
										
										
											2023-03-25 10:44:48 +00:00
+								  eth/[common, trie/nibbles]
-												Update snap server client test scenario (#1518)

* Redesign snap1 message GetTrieNodes argument prototypes

why:
  A list of sub-objects `seq[SnapTriePath]` is more intuitive to work with
  than an opaque definition `seq[seq[Blob]]` because the inner object
  `SnapTriePath` object has a dedicated inner structure (for how to
  interprete `seq[Blob]`.)

* Collect some public constants into `constants.nim` file

* Reorg `hexary_paths.nim`

why:
+ Collecting nodes following a partial path properly ending at an
  extension node failed to collect this last node.
+ Merged the nodes collecting algorithm for persistent and in-memory
  into a single generic function `hexary_paths.rootPathExtend()`

info:
  Extracted common tasks to `hexary_nodes_helper.nim`

* Implement `StorageRanges` message handler for snap/1 protocol
											
										
										
											2023-03-22 20:11:49 +00:00
-												Prep for full sync after snap make 5 (#1286)

* Update docu and logging

* Extracted and updated constants from `worker_desc` into separate file

* Update and re-calibrate communication error handling

* Allow simplified pivot negotiation

why:
  This feature allows to turn off pivot negotiation so that peers agree
  on a a pivot header.

  For snap sync with fast changing pivots this only throttles the sync
  process. The finally downloaded DB snapshot is typically a merged
  version of different pivot states augmented by a healing process.

* Re-model worker queues for accounts download & healing

why:
  Currently there is only one data fetch per download or healing task.
  This task is then repeated by the scheduler after a short time. In
  many cases, this short time seems enough for some peers to decide to
  terminate connection.

* Update main task batch `runMulti()`

details:
  The function `runMulti()` is activated in quasi-parallel mode by the
  scheduler. This function calls the download, healing and fast-sync
  functions.

  While in debug mode, after each set of jobs run by this function the
  database is analysed (by the `snapdb_check` module) and the result
  printed.
											
										
										
											2022-11-01 15:07:44 +00:00
+								const
-												Update snap server client test scenario (#1518)

* Redesign snap1 message GetTrieNodes argument prototypes

why:
  A list of sub-objects `seq[SnapTriePath]` is more intuitive to work with
  than an opaque definition `seq[seq[Blob]]` because the inner object
  `SnapTriePath` object has a dedicated inner structure (for how to
  interprete `seq[Blob]`.)

* Collect some public constants into `constants.nim` file

* Reorg `hexary_paths.nim`

why:
+ Collecting nodes following a partial path properly ending at an
  extension node failed to collect this last node.
+ Merged the nodes collecting algorithm for persistent and in-memory
  into a single generic function `hexary_paths.rootPathExtend()`

info:
  Extracted common tasks to `hexary_nodes_helper.nim`

* Implement `StorageRanges` message handler for snap/1 protocol
											
										
										
											2023-03-22 20:11:49 +00:00
+								  EmptyBlob* = seq[byte].default
 								    ## Useful shortcut
-												Update snap client storage slots download and healing (#1529)

* Fix fringe condition for `GetStorageRanges` message handler

why:
  Receiving a proved empty range was not considered at all. This lead to
  inconsistencies of the return value which led to subsequent errors.

* Update storage range bulk download

details;
  Mainly re-org of storage queue processing in `storage_queue_helper.nim`

* Update logging variables/messages

* Update storage slots healing

details:
  Mainly clean up after improved helper functions from the sources
  `find_missing_nodes.nim` and `storage_queue_helper.nim`.

* Simplify account fetch

why:
  To much fuss made tolerating some errors. There will be an overall
  strategy implemented where the concert of download and healing function
  is orchestrated.

* Add error resilience to the concert of download and healing.

why:
  The idea is that a peer might stop serving snap/1 accounts and storage
  slot downloads while still able to support fetching nodes for healing.
											
										
										
											2023-04-04 13:36:18 +00:00
+								  EmptyBlobSet* = HashSet[Blob].default
 								    ## Useful shortcut
-												Update snap client account healing (#1521)

* Update nearby/neighbour leaf nodes finder

details:
  Update return error codes so that in the case that there is no more
  leaf node beyond the search direction, the particular error code
  `NearbyBeyondRange` is returned.

* Compile largest interval range containing only this leaf point

why:
  Will be needed in snap sync for adding single leaf nodes to the range
  of already allocated nodes.

* Reorg `hexary_inspect.nim`

why:
 Merged the nodes collecting algorithm for persistent and in-memory
 into a single generic function `hexary_inspect.inspectTrieImpl()`

* Update fetching accounts range failure handling in `rangeFetchAccounts()`

why:
  Rejected response leads now to fetching for another account range. Only
  repeated failures (or all done) terminate the algorithm.

* Update accounts healing

why:
+ Fixed looping over a bogus node response that could not inserted into
  the database. As a solution, these nodes are locally registered and not
  asked for in this download cycle.
+ Sub-optimal handling of interval range for a healed account leaf node.
  Now the maximal range interval containing this node is registered as
  processed which leafs to de-fragementation of the processed (and
  unprocessed) range list(s). So *gap* ranges which are known not to
  cover any account leaf node are not asked for on the network, anymore.
+ Sporadically remove empty interval ranges (if any)

* Update logging, better variable names
											
										
										
											2023-03-25 10:44:48 +00:00
+								  EmptyBlobSeq* = seq[Blob].default
 								    ## Useful shortcut
-												Update snap server client test scenario (#1518)

* Redesign snap1 message GetTrieNodes argument prototypes

why:
  A list of sub-objects `seq[SnapTriePath]` is more intuitive to work with
  than an opaque definition `seq[seq[Blob]]` because the inner object
  `SnapTriePath` object has a dedicated inner structure (for how to
  interprete `seq[Blob]`.)

* Collect some public constants into `constants.nim` file

* Reorg `hexary_paths.nim`

why:
+ Collecting nodes following a partial path properly ending at an
  extension node failed to collect this last node.
+ Merged the nodes collecting algorithm for persistent and in-memory
  into a single generic function `hexary_paths.rootPathExtend()`

info:
  Extracted common tasks to `hexary_nodes_helper.nim`

* Implement `StorageRanges` message handler for snap/1 protocol
											
										
										
											2023-03-22 20:11:49 +00:00
+								  EmptyNibbleSeq* = EmptyBlob.initNibbleRange
 								    ## Useful shortcut
 								  # ---------
-												Snap sync swap in other pivots (#1363)

* Provide index to reconstruct missing storage slots

why;
  Pivots will be changed anymore once they are officially archived. The
  account of the archived pivots are ready to be swapped into the active
  pivot. This leaves open how to treat storage slots not fetched yet.

  Solution: when mothballing, an `account->storage-root` index is
  compiled that can be used when swapping in accounts.

* Implement swap-in from earlier pivots

details;
  When most accounts are covered by the current and previous pivot
  sessions, swapping inthe accounts and storage slots  (i.e. registering
  account ranges done) from earlier pivots takes place if there is a
  common sub-trie.

* Throttle pivot change when healing state has bean reached

why:
  There is a hope to complete the current pivot, so pivot update can be
  throttled. This is achieved by setting another minimum block number
  distance for the pivot headers. This feature is still experimental
											
										
										
											2022-12-12 22:00:24 +00:00
+								  pivotTableLruEntriesMax* = 50
 								    ## Max depth of pivot table. On overflow, the oldest one will be removed.
-												Prep for full sync after snap make 5 (#1286)

* Update docu and logging

* Extracted and updated constants from `worker_desc` into separate file

* Update and re-calibrate communication error handling

* Allow simplified pivot negotiation

why:
  This feature allows to turn off pivot negotiation so that peers agree
  on a a pivot header.

  For snap sync with fast changing pivots this only throttles the sync
  process. The finally downloaded DB snapshot is typically a merged
  version of different pivot states augmented by a healing process.

* Re-model worker queues for accounts download & healing

why:
  Currently there is only one data fetch per download or healing task.
  This task is then repeated by the scheduler after a short time. In
  many cases, this short time seems enough for some peers to decide to
  terminate connection.

* Update main task batch `runMulti()`

details:
  The function `runMulti()` is activated in quasi-parallel mode by the
  scheduler. This function calls the download, healing and fast-sync
  functions.

  While in debug mode, after each set of jobs run by this function the
  database is analysed (by the `snapdb_check` module) and the result
  printed.
											
										
										
											2022-11-01 15:07:44 +00:00
+								  pivotBlockDistanceMin* = 128
 								    ## The minimal depth of two block headers needed to activate a new state
 								    ## root pivot.
 								    ##
 								    ## Effects on assembling the state via `snap/1` protocol:
 								    ##
 								    ## * A small value of this constant increases the propensity to update the
 								    ##   pivot header more often. This is so because each new peer negoiates a
 								    ##   pivot block number at least the current one.
 								    ##
 								    ## * A large value keeps the current pivot more stable but some experiments
 								    ##   suggest that the `snap/1` protocol is answered only for later block
 								    ##   numbers (aka pivot blocks.) So a large value tends to keep the pivot
 								    ##   farther away from the chain head.
 								    ##
 								    ##   Note that 128 is the magic distance for snapshots used by *Geth*.
 								  # --------------
-												Snap sync refactor healing (#1397)

* Simplify accounts healing threshold management

why:
  Was over-engineered.

details:
  Previously, healing was based on recursive hexary trie perusal.

  Due to "cheap" envelope decomposition of a range complement for the
  hexary trie, the cost of running extra laps have become time-affordable
  again and a simple trigger mechanism for healing will do.

* Control number of dangling result nodes in `hexaryInspectTrie()`

also:
+ Returns number of visited nodes available for logging so the maximum
  number of nodes can be tuned accordingly.
+ Some code and docu update

* Update names of constants

why:
  Declutter, more systematic naming

* Re-implemented `worker_desc.merge()` for storage slots

why:
  Provided as proper queue management in `storage_queue_helper`.

details:
+ Several append modes (replaces `merge()`)
+ Added third queue to record entries currently fetched by a worker. So
  another parallel running worker can safe the complete set of storage
  slots in as checkpoint. This was previously lost.

* Refactor healing

why:
  Simplify and remove deep hexary trie perusal for finding completeness.

   Due to "cheap" envelope decomposition of a range complement for the
   hexary trie, the cost of running extra laps have become time-affordable
   again and a simple trigger mechanism for healing will do.

* Docu update

* Run a storage job only once in download loop

why:
  Download failure or rejection (i.e. missing data) lead to repeated
  fetch requests until peer disconnects, otherwise.
											
										
										
											2022-12-24 09:54:18 +00:00
+								  fetchRequestBytesLimit* = 2 * 1024 * 1024
 								    ## Soft bytes limit to request in `snap/1` protocol calls.
-												Prep for full sync after snap make 5 (#1286)

* Update docu and logging

* Extracted and updated constants from `worker_desc` into separate file

* Update and re-calibrate communication error handling

* Allow simplified pivot negotiation

why:
  This feature allows to turn off pivot negotiation so that peers agree
  on a a pivot header.

  For snap sync with fast changing pivots this only throttles the sync
  process. The finally downloaded DB snapshot is typically a merged
  version of different pivot states augmented by a healing process.

* Re-model worker queues for accounts download & healing

why:
  Currently there is only one data fetch per download or healing task.
  This task is then repeated by the scheduler after a short time. In
  many cases, this short time seems enough for some peers to decide to
  terminate connection.

* Update main task batch `runMulti()`

details:
  The function `runMulti()` is activated in quasi-parallel mode by the
  scheduler. This function calls the download, healing and fast-sync
  functions.

  While in debug mode, after each set of jobs run by this function the
  database is analysed (by the `snapdb_check` module) and the result
  printed.
											
										
										
											2022-11-01 15:07:44 +00:00
-												Snap sync refactor healing (#1397)

* Simplify accounts healing threshold management

why:
  Was over-engineered.

details:
  Previously, healing was based on recursive hexary trie perusal.

  Due to "cheap" envelope decomposition of a range complement for the
  hexary trie, the cost of running extra laps have become time-affordable
  again and a simple trigger mechanism for healing will do.

* Control number of dangling result nodes in `hexaryInspectTrie()`

also:
+ Returns number of visited nodes available for logging so the maximum
  number of nodes can be tuned accordingly.
+ Some code and docu update

* Update names of constants

why:
  Declutter, more systematic naming

* Re-implemented `worker_desc.merge()` for storage slots

why:
  Provided as proper queue management in `storage_queue_helper`.

details:
+ Several append modes (replaces `merge()`)
+ Added third queue to record entries currently fetched by a worker. So
  another parallel running worker can safe the complete set of storage
  slots in as checkpoint. This was previously lost.

* Refactor healing

why:
  Simplify and remove deep hexary trie perusal for finding completeness.

   Due to "cheap" envelope decomposition of a range complement for the
   hexary trie, the cost of running extra laps have become time-affordable
   again and a simple trigger mechanism for healing will do.

* Docu update

* Run a storage job only once in download loop

why:
  Download failure or rejection (i.e. missing data) lead to repeated
  fetch requests until peer disconnects, otherwise.
											
										
										
											2022-12-24 09:54:18 +00:00
+								  fetchRequestTrieNodesMax* = 1024
 								    ## Informal maximal number of trie nodes to fetch at once in `snap/1`
-												Snap sync can start on saved checkpoint (#1327)

* Stop negotiating pivot if peer repeatedly replies w/usesless answers

why:
  There is some fringe condition where a peer replies with legit but
  useless empty headers repetely. This goes on until somebody stops.
  We stop now.

* Rename `missingNodes` => `sickSubTries`

why:
  These (probably missing) nodes represent in reality fully or partially
  missing sub-tries. The top nodes may even exist, e.g. as a shallow
  sub-trie.

also:
  Keep track of account healing on/of by bool variable `accountsHealing`
  controlled in `pivot_helper.execSnapSyncAction()`

* Add `nimbus` option argument `snapCtx` for starting snap recovery (if any)

also:
+ Trigger the recovery (or similar) process from inside the global peer
  worker initialisation `worker.setup()` and not by the `snap.start()`
  function.
+ Have `runPool()` returned a `bool` code to indicate early stop to
  scheduler.

* Can import partial snap sync checkpoint at start

details:
 + Modified what is stored with the checkpoint in `snapdb_pivot.nim`
 + Will be loaded within `runDaemon()` if activated

* Forgot to import total coverage range

why:
  Only the top (or latest) pivot needs coverage but the total coverage
  is the list of all ranges for all pivots -- simply forgotten.
											
										
										
											2022-11-25 14:56:42 +00:00
+								    ## protocol calls. This is not an official limit but found with several
 								    ## implementations (e.g. Geth.)
 								    ##
-												Snap sync refactor healing (#1397)

* Simplify accounts healing threshold management

why:
  Was over-engineered.

details:
  Previously, healing was based on recursive hexary trie perusal.

  Due to "cheap" envelope decomposition of a range complement for the
  hexary trie, the cost of running extra laps have become time-affordable
  again and a simple trigger mechanism for healing will do.

* Control number of dangling result nodes in `hexaryInspectTrie()`

also:
+ Returns number of visited nodes available for logging so the maximum
  number of nodes can be tuned accordingly.
+ Some code and docu update

* Update names of constants

why:
  Declutter, more systematic naming

* Re-implemented `worker_desc.merge()` for storage slots

why:
  Provided as proper queue management in `storage_queue_helper`.

details:
+ Several append modes (replaces `merge()`)
+ Added third queue to record entries currently fetched by a worker. So
  another parallel running worker can safe the complete set of storage
  slots in as checkpoint. This was previously lost.

* Refactor healing

why:
  Simplify and remove deep hexary trie perusal for finding completeness.

   Due to "cheap" envelope decomposition of a range complement for the
   hexary trie, the cost of running extra laps have become time-affordable
   again and a simple trigger mechanism for healing will do.

* Docu update

* Run a storage job only once in download loop

why:
  Download failure or rejection (i.e. missing data) lead to repeated
  fetch requests until peer disconnects, otherwise.
											
										
										
											2022-12-24 09:54:18 +00:00
+								    ## Resticting the fetch list length early allows to better parallelise
-												Snap sync can start on saved checkpoint (#1327)

* Stop negotiating pivot if peer repeatedly replies w/usesless answers

why:
  There is some fringe condition where a peer replies with legit but
  useless empty headers repetely. This goes on until somebody stops.
  We stop now.

* Rename `missingNodes` => `sickSubTries`

why:
  These (probably missing) nodes represent in reality fully or partially
  missing sub-tries. The top nodes may even exist, e.g. as a shallow
  sub-trie.

also:
  Keep track of account healing on/of by bool variable `accountsHealing`
  controlled in `pivot_helper.execSnapSyncAction()`

* Add `nimbus` option argument `snapCtx` for starting snap recovery (if any)

also:
+ Trigger the recovery (or similar) process from inside the global peer
  worker initialisation `worker.setup()` and not by the `snap.start()`
  function.
+ Have `runPool()` returned a `bool` code to indicate early stop to
  scheduler.

* Can import partial snap sync checkpoint at start

details:
 + Modified what is stored with the checkpoint in `snapdb_pivot.nim`
 + Will be loaded within `runDaemon()` if activated

* Forgot to import total coverage range

why:
  Only the top (or latest) pivot needs coverage but the total coverage
  is the list of all ranges for all pivots -- simply forgotten.
											
										
										
											2022-11-25 14:56:42 +00:00
+								    ## healing.
-												Snap sync refactor healing (#1397)

* Simplify accounts healing threshold management

why:
  Was over-engineered.

details:
  Previously, healing was based on recursive hexary trie perusal.

  Due to "cheap" envelope decomposition of a range complement for the
  hexary trie, the cost of running extra laps have become time-affordable
  again and a simple trigger mechanism for healing will do.

* Control number of dangling result nodes in `hexaryInspectTrie()`

also:
+ Returns number of visited nodes available for logging so the maximum
  number of nodes can be tuned accordingly.
+ Some code and docu update

* Update names of constants

why:
  Declutter, more systematic naming

* Re-implemented `worker_desc.merge()` for storage slots

why:
  Provided as proper queue management in `storage_queue_helper`.

details:
+ Several append modes (replaces `merge()`)
+ Added third queue to record entries currently fetched by a worker. So
  another parallel running worker can safe the complete set of storage
  slots in as checkpoint. This was previously lost.

* Refactor healing

why:
  Simplify and remove deep hexary trie perusal for finding completeness.

   Due to "cheap" envelope decomposition of a range complement for the
   hexary trie, the cost of running extra laps have become time-affordable
   again and a simple trigger mechanism for healing will do.

* Docu update

* Run a storage job only once in download loop

why:
  Download failure or rejection (i.e. missing data) lead to repeated
  fetch requests until peer disconnects, otherwise.
											
										
										
											2022-12-24 09:54:18 +00:00
+								  fetchRequestStorageSlotsMax* = 2 * 1024
 								    ## Maximal number of storage tries to fetch with a single request message.
-												Snap sync can start on saved checkpoint (#1327)

* Stop negotiating pivot if peer repeatedly replies w/usesless answers

why:
  There is some fringe condition where a peer replies with legit but
  useless empty headers repetely. This goes on until somebody stops.
  We stop now.

* Rename `missingNodes` => `sickSubTries`

why:
  These (probably missing) nodes represent in reality fully or partially
  missing sub-tries. The top nodes may even exist, e.g. as a shallow
  sub-trie.

also:
  Keep track of account healing on/of by bool variable `accountsHealing`
  controlled in `pivot_helper.execSnapSyncAction()`

* Add `nimbus` option argument `snapCtx` for starting snap recovery (if any)

also:
+ Trigger the recovery (or similar) process from inside the global peer
  worker initialisation `worker.setup()` and not by the `snap.start()`
  function.
+ Have `runPool()` returned a `bool` code to indicate early stop to
  scheduler.

* Can import partial snap sync checkpoint at start

details:
 + Modified what is stored with the checkpoint in `snapdb_pivot.nim`
 + Will be loaded within `runDaemon()` if activated

* Forgot to import total coverage range

why:
  Only the top (or latest) pivot needs coverage but the total coverage
  is the list of all ranges for all pivots -- simply forgotten.
											
										
										
											2022-11-25 14:56:42 +00:00
-												Snap sync refactor healing (#1397)

* Simplify accounts healing threshold management

why:
  Was over-engineered.

details:
  Previously, healing was based on recursive hexary trie perusal.

  Due to "cheap" envelope decomposition of a range complement for the
  hexary trie, the cost of running extra laps have become time-affordable
  again and a simple trigger mechanism for healing will do.

* Control number of dangling result nodes in `hexaryInspectTrie()`

also:
+ Returns number of visited nodes available for logging so the maximum
  number of nodes can be tuned accordingly.
+ Some code and docu update

* Update names of constants

why:
  Declutter, more systematic naming

* Re-implemented `worker_desc.merge()` for storage slots

why:
  Provided as proper queue management in `storage_queue_helper`.

details:
+ Several append modes (replaces `merge()`)
+ Added third queue to record entries currently fetched by a worker. So
  another parallel running worker can safe the complete set of storage
  slots in as checkpoint. This was previously lost.

* Refactor healing

why:
  Simplify and remove deep hexary trie perusal for finding completeness.

   Due to "cheap" envelope decomposition of a range complement for the
   hexary trie, the cost of running extra laps have become time-affordable
   again and a simple trigger mechanism for healing will do.

* Docu update

* Run a storage job only once in download loop

why:
  Download failure or rejection (i.e. missing data) lead to repeated
  fetch requests until peer disconnects, otherwise.
											
										
										
											2022-12-24 09:54:18 +00:00
+								  # --------------
-												Update snap sync ready to succeed at lab test (#1556)

* Extract RocksDB timing tests from snap unit tests as separate module

why:
  Declutter, make space for more snap related unit tests.

* Renamed `undumpNextGroup()` => `undumpBlocks()`

why:
  Source file name is called `undump_blocks.nim` which should be sort
  of in sync with the method name(s).

* Implement snap/1 server method `getByteCodes()`

* Implement snap/1 client method `getByteCodes()`

* Implement faculty for handling contract code fetching via snap/1

* Provide persistent storage for contract code records

* Implement contract code snap sync fetch & store

* Code massage, cosmetics

* Unit tests for verifying snap sync snapshot dump

details:
  Use `undump_kvp.dumpAllDb()` to dump any database.
											
										
										
											2023-04-21 21:11:04 +00:00
+								  fetchRequestContractsMax* = 1024
 								    ## Maximal number of contract codes fetch with a single request message.
 								  # --------------
 								  saveAccountsProcessedChunksMax* = 1000
-												Snap sync can start on saved checkpoint (#1327)

* Stop negotiating pivot if peer repeatedly replies w/usesless answers

why:
  There is some fringe condition where a peer replies with legit but
  useless empty headers repetely. This goes on until somebody stops.
  We stop now.

* Rename `missingNodes` => `sickSubTries`

why:
  These (probably missing) nodes represent in reality fully or partially
  missing sub-tries. The top nodes may even exist, e.g. as a shallow
  sub-trie.

also:
  Keep track of account healing on/of by bool variable `accountsHealing`
  controlled in `pivot_helper.execSnapSyncAction()`

* Add `nimbus` option argument `snapCtx` for starting snap recovery (if any)

also:
+ Trigger the recovery (or similar) process from inside the global peer
  worker initialisation `worker.setup()` and not by the `snap.start()`
  function.
+ Have `runPool()` returned a `bool` code to indicate early stop to
  scheduler.

* Can import partial snap sync checkpoint at start

details:
 + Modified what is stored with the checkpoint in `snapdb_pivot.nim`
 + Will be loaded within `runDaemon()` if activated

* Forgot to import total coverage range

why:
  Only the top (or latest) pivot needs coverage but the total coverage
  is the list of all ranges for all pivots -- simply forgotten.
											
										
										
											2022-11-25 14:56:42 +00:00
+								    ## Recovery data are stored if the processed ranges list contains no more
-												Snap sync swap in other pivots (#1363)

* Provide index to reconstruct missing storage slots

why;
  Pivots will be changed anymore once they are officially archived. The
  account of the archived pivots are ready to be swapped into the active
  pivot. This leaves open how to treat storage slots not fetched yet.

  Solution: when mothballing, an `account->storage-root` index is
  compiled that can be used when swapping in accounts.

* Implement swap-in from earlier pivots

details;
  When most accounts are covered by the current and previous pivot
  sessions, swapping inthe accounts and storage slots  (i.e. registering
  account ranges done) from earlier pivots takes place if there is a
  common sub-trie.

* Throttle pivot change when healing state has bean reached

why:
  There is a hope to complete the current pivot, so pivot update can be
  throttled. This is achieved by setting another minimum block number
  distance for the pivot headers. This feature is still experimental
											
										
										
											2022-12-12 22:00:24 +00:00
+								    ## than this many range *chunks*.
-												Snap sync state save (#1302)

* Piecemeal trie inspection

details:
  Trie inspection will stop after maximum number of nodes visited.
  The inspection can be resumed using the returned state from the
  last session.

why:
  This feature allows for task switch between `piecemeal` sessions.

* Extract pivot helper code from `worker.nim` => `pivot_helper.nim`

* Accounts import will now return dangling paths from `proof` nodes

why:
  With proper bookkeeping, this can be used to start healing without
  analysing the the probably full trie.

* Update `unprocessed` account range handling

why:
  More generally, the API of a pairs of unprocessed intervals favours
  the first set and not before that is exhausted the second set comes
  into play.

  This was unfortunately implemented which caused the ranges to be
  unnecessarily fractioned. Now the number of range interval typically
  remains in the lower single digit numbers.

* Save sync state after end of downloading some accounts

details:
  restore/resume to be implemented later
											
										
										
											2022-11-16 23:51:06 +00:00
+								    ##
-												Snap sync swap in other pivots (#1363)

* Provide index to reconstruct missing storage slots

why;
  Pivots will be changed anymore once they are officially archived. The
  account of the archived pivots are ready to be swapped into the active
  pivot. This leaves open how to treat storage slots not fetched yet.

  Solution: when mothballing, an `account->storage-root` index is
  compiled that can be used when swapping in accounts.

* Implement swap-in from earlier pivots

details;
  When most accounts are covered by the current and previous pivot
  sessions, swapping inthe accounts and storage slots  (i.e. registering
  account ranges done) from earlier pivots takes place if there is a
  common sub-trie.

* Throttle pivot change when healing state has bean reached

why:
  There is a hope to complete the current pivot, so pivot update can be
  throttled. This is achieved by setting another minimum block number
  distance for the pivot headers. This feature is still experimental
											
										
										
											2022-12-12 22:00:24 +00:00
+								    ## If the range set is too much fragmented, no data will be saved and
 								    ## restart has to perform from scratch or an earlier checkpoint.
-												Prep for full sync after snap make 5 (#1286)

* Update docu and logging

* Extracted and updated constants from `worker_desc` into separate file

* Update and re-calibrate communication error handling

* Allow simplified pivot negotiation

why:
  This feature allows to turn off pivot negotiation so that peers agree
  on a a pivot header.

  For snap sync with fast changing pivots this only throttles the sync
  process. The finally downloaded DB snapshot is typically a merged
  version of different pivot states augmented by a healing process.

* Re-model worker queues for accounts download & healing

why:
  Currently there is only one data fetch per download or healing task.
  This task is then repeated by the scheduler after a short time. In
  many cases, this short time seems enough for some peers to decide to
  terminate connection.

* Update main task batch `runMulti()`

details:
  The function `runMulti()` is activated in quasi-parallel mode by the
  scheduler. This function calls the download, healing and fast-sync
  functions.

  While in debug mode, after each set of jobs run by this function the
  database is analysed (by the `snapdb_check` module) and the result
  printed.
											
										
										
											2022-11-01 15:07:44 +00:00
-												Update snap sync ready to succeed at lab test (#1556)

* Extract RocksDB timing tests from snap unit tests as separate module

why:
  Declutter, make space for more snap related unit tests.

* Renamed `undumpNextGroup()` => `undumpBlocks()`

why:
  Source file name is called `undump_blocks.nim` which should be sort
  of in sync with the method name(s).

* Implement snap/1 server method `getByteCodes()`

* Implement snap/1 client method `getByteCodes()`

* Implement faculty for handling contract code fetching via snap/1

* Provide persistent storage for contract code records

* Implement contract code snap sync fetch & store

* Code massage, cosmetics

* Unit tests for verifying snap sync snapshot dump

details:
  Use `undump_kvp.dumpAllDb()` to dump any database.
											
										
										
											2023-04-21 21:11:04 +00:00
+								  saveStorageSlotsMax* = 20_000
-												Snap sync can start on saved checkpoint (#1327)

* Stop negotiating pivot if peer repeatedly replies w/usesless answers

why:
  There is some fringe condition where a peer replies with legit but
  useless empty headers repetely. This goes on until somebody stops.
  We stop now.

* Rename `missingNodes` => `sickSubTries`

why:
  These (probably missing) nodes represent in reality fully or partially
  missing sub-tries. The top nodes may even exist, e.g. as a shallow
  sub-trie.

also:
  Keep track of account healing on/of by bool variable `accountsHealing`
  controlled in `pivot_helper.execSnapSyncAction()`

* Add `nimbus` option argument `snapCtx` for starting snap recovery (if any)

also:
+ Trigger the recovery (or similar) process from inside the global peer
  worker initialisation `worker.setup()` and not by the `snap.start()`
  function.
+ Have `runPool()` returned a `bool` code to indicate early stop to
  scheduler.

* Can import partial snap sync checkpoint at start

details:
 + Modified what is stored with the checkpoint in `snapdb_pivot.nim`
 + Will be loaded within `runDaemon()` if activated

* Forgot to import total coverage range

why:
  Only the top (or latest) pivot needs coverage but the total coverage
  is the list of all ranges for all pivots -- simply forgotten.
											
										
										
											2022-11-25 14:56:42 +00:00
+								    ## Recovery data are stored if the oustanding storage slots to process do
 								    ## not amount to more than this many entries.
-												Prep for full sync after snap make 5 (#1286)

* Update docu and logging

* Extracted and updated constants from `worker_desc` into separate file

* Update and re-calibrate communication error handling

* Allow simplified pivot negotiation

why:
  This feature allows to turn off pivot negotiation so that peers agree
  on a a pivot header.

  For snap sync with fast changing pivots this only throttles the sync
  process. The finally downloaded DB snapshot is typically a merged
  version of different pivot states augmented by a healing process.

* Re-model worker queues for accounts download & healing

why:
  Currently there is only one data fetch per download or healing task.
  This task is then repeated by the scheduler after a short time. In
  many cases, this short time seems enough for some peers to decide to
  terminate connection.

* Update main task batch `runMulti()`

details:
  The function `runMulti()` is activated in quasi-parallel mode by the
  scheduler. This function calls the download, healing and fast-sync
  functions.

  While in debug mode, after each set of jobs run by this function the
  database is analysed (by the `snapdb_check` module) and the result
  printed.
											
										
										
											2022-11-01 15:07:44 +00:00
+								    ##
-												Snap sync can start on saved checkpoint (#1327)

* Stop negotiating pivot if peer repeatedly replies w/usesless answers

why:
  There is some fringe condition where a peer replies with legit but
  useless empty headers repetely. This goes on until somebody stops.
  We stop now.

* Rename `missingNodes` => `sickSubTries`

why:
  These (probably missing) nodes represent in reality fully or partially
  missing sub-tries. The top nodes may even exist, e.g. as a shallow
  sub-trie.

also:
  Keep track of account healing on/of by bool variable `accountsHealing`
  controlled in `pivot_helper.execSnapSyncAction()`

* Add `nimbus` option argument `snapCtx` for starting snap recovery (if any)

also:
+ Trigger the recovery (or similar) process from inside the global peer
  worker initialisation `worker.setup()` and not by the `snap.start()`
  function.
+ Have `runPool()` returned a `bool` code to indicate early stop to
  scheduler.

* Can import partial snap sync checkpoint at start

details:
 + Modified what is stored with the checkpoint in `snapdb_pivot.nim`
 + Will be loaded within `runDaemon()` if activated

* Forgot to import total coverage range

why:
  Only the top (or latest) pivot needs coverage but the total coverage
  is the list of all ranges for all pivots -- simply forgotten.
											
										
										
											2022-11-25 14:56:42 +00:00
+								    ## If there are too many dangling nodes, no data will be saved and restart
-												Snap sync swap in other pivots (#1363)

* Provide index to reconstruct missing storage slots

why;
  Pivots will be changed anymore once they are officially archived. The
  account of the archived pivots are ready to be swapped into the active
  pivot. This leaves open how to treat storage slots not fetched yet.

  Solution: when mothballing, an `account->storage-root` index is
  compiled that can be used when swapping in accounts.

* Implement swap-in from earlier pivots

details;
  When most accounts are covered by the current and previous pivot
  sessions, swapping inthe accounts and storage slots  (i.e. registering
  account ranges done) from earlier pivots takes place if there is a
  common sub-trie.

* Throttle pivot change when healing state has bean reached

why:
  There is a hope to complete the current pivot, so pivot update can be
  throttled. This is achieved by setting another minimum block number
  distance for the pivot headers. This feature is still experimental
											
										
										
											2022-12-12 22:00:24 +00:00
+								    ## has to perform from scratch or an earlier checkpoint.
-												Prep for full sync after snap make 5 (#1286)

* Update docu and logging

* Extracted and updated constants from `worker_desc` into separate file

* Update and re-calibrate communication error handling

* Allow simplified pivot negotiation

why:
  This feature allows to turn off pivot negotiation so that peers agree
  on a a pivot header.

  For snap sync with fast changing pivots this only throttles the sync
  process. The finally downloaded DB snapshot is typically a merged
  version of different pivot states augmented by a healing process.

* Re-model worker queues for accounts download & healing

why:
  Currently there is only one data fetch per download or healing task.
  This task is then repeated by the scheduler after a short time. In
  many cases, this short time seems enough for some peers to decide to
  terminate connection.

* Update main task batch `runMulti()`

details:
  The function `runMulti()` is activated in quasi-parallel mode by the
  scheduler. This function calls the download, healing and fast-sync
  functions.

  While in debug mode, after each set of jobs run by this function the
  database is analysed (by the `snapdb_check` module) and the result
  printed.
											
										
										
											2022-11-01 15:07:44 +00:00
-												Update snap sync ready to succeed at lab test (#1556)

* Extract RocksDB timing tests from snap unit tests as separate module

why:
  Declutter, make space for more snap related unit tests.

* Renamed `undumpNextGroup()` => `undumpBlocks()`

why:
  Source file name is called `undump_blocks.nim` which should be sort
  of in sync with the method name(s).

* Implement snap/1 server method `getByteCodes()`

* Implement snap/1 client method `getByteCodes()`

* Implement faculty for handling contract code fetching via snap/1

* Provide persistent storage for contract code records

* Implement contract code snap sync fetch & store

* Code massage, cosmetics

* Unit tests for verifying snap sync snapshot dump

details:
  Use `undump_kvp.dumpAllDb()` to dump any database.
											
										
										
											2023-04-21 21:11:04 +00:00
+								  saveContactsMax* = 10_000
 								    ## Similar to `saveStorageSlotsMax`
-												Update snap client storage slots download and healing (#1529)

* Fix fringe condition for `GetStorageRanges` message handler

why:
  Receiving a proved empty range was not considered at all. This lead to
  inconsistencies of the return value which led to subsequent errors.

* Update storage range bulk download

details;
  Mainly re-org of storage queue processing in `storage_queue_helper.nim`

* Update logging variables/messages

* Update storage slots healing

details:
  Mainly clean up after improved helper functions from the sources
  `find_missing_nodes.nim` and `storage_queue_helper.nim`.

* Simplify account fetch

why:
  To much fuss made tolerating some errors. There will be an overall
  strategy implemented where the concert of download and healing function
  is orchestrated.

* Add error resilience to the concert of download and healing.

why:
  The idea is that a peer might stop serving snap/1 accounts and storage
  slot downloads while still able to support fetching nodes for healing.
											
										
										
											2023-04-04 13:36:18 +00:00
+								  # --------------
 								  storageSlotsFetchFailedFullMax* = fetchRequestStorageSlotsMax + 100
 								    ## Maximal number of failures when fetching full range storage slots.
 								    ## These failed slot ranges are only called for once in the same cycle.
 								  storageSlotsFetchFailedPartialMax* = 300
 								    ## Ditto for partial range storage slots.
-												Prep for full sync after snap make 5 (#1286)

* Update docu and logging

* Extracted and updated constants from `worker_desc` into separate file

* Update and re-calibrate communication error handling

* Allow simplified pivot negotiation

why:
  This feature allows to turn off pivot negotiation so that peers agree
  on a a pivot header.

  For snap sync with fast changing pivots this only throttles the sync
  process. The finally downloaded DB snapshot is typically a merged
  version of different pivot states augmented by a healing process.

* Re-model worker queues for accounts download & healing

why:
  Currently there is only one data fetch per download or healing task.
  This task is then repeated by the scheduler after a short time. In
  many cases, this short time seems enough for some peers to decide to
  terminate connection.

* Update main task batch `runMulti()`

details:
  The function `runMulti()` is activated in quasi-parallel mode by the
  scheduler. This function calls the download, healing and fast-sync
  functions.

  While in debug mode, after each set of jobs run by this function the
  database is analysed (by the `snapdb_check` module) and the result
  printed.
											
										
										
											2022-11-01 15:07:44 +00:00
-												Snap sync refactor healing (#1397)

* Simplify accounts healing threshold management

why:
  Was over-engineered.

details:
  Previously, healing was based on recursive hexary trie perusal.

  Due to "cheap" envelope decomposition of a range complement for the
  hexary trie, the cost of running extra laps have become time-affordable
  again and a simple trigger mechanism for healing will do.

* Control number of dangling result nodes in `hexaryInspectTrie()`

also:
+ Returns number of visited nodes available for logging so the maximum
  number of nodes can be tuned accordingly.
+ Some code and docu update

* Update names of constants

why:
  Declutter, more systematic naming

* Re-implemented `worker_desc.merge()` for storage slots

why:
  Provided as proper queue management in `storage_queue_helper`.

details:
+ Several append modes (replaces `merge()`)
+ Added third queue to record entries currently fetched by a worker. So
  another parallel running worker can safe the complete set of storage
  slots in as checkpoint. This was previously lost.

* Refactor healing

why:
  Simplify and remove deep hexary trie perusal for finding completeness.

   Due to "cheap" envelope decomposition of a range complement for the
   hexary trie, the cost of running extra laps have become time-affordable
   again and a simple trigger mechanism for healing will do.

* Docu update

* Run a storage job only once in download loop

why:
  Download failure or rejection (i.e. missing data) lead to repeated
  fetch requests until peer disconnects, otherwise.
											
										
										
											2022-12-24 09:54:18 +00:00
+								  storageSlotsTrieInheritPerusalMax* = 30_000
 								    ## Maximal number of nodes to visit in order to find out whether this
 								    ## storage slots trie is complete. This allows to *inherit* the full trie
 								    ## for an existing root node if the trie is small enough.
-												Snap sync state save (#1302)

* Piecemeal trie inspection

details:
  Trie inspection will stop after maximum number of nodes visited.
  The inspection can be resumed using the returned state from the
  last session.

why:
  This feature allows for task switch between `piecemeal` sessions.

* Extract pivot helper code from `worker.nim` => `pivot_helper.nim`

* Accounts import will now return dangling paths from `proof` nodes

why:
  With proper bookkeeping, this can be used to start healing without
  analysing the the probably full trie.

* Update `unprocessed` account range handling

why:
  More generally, the API of a pairs of unprocessed intervals favours
  the first set and not before that is exhausted the second set comes
  into play.

  This was unfortunately implemented which caused the ranges to be
  unnecessarily fractioned. Now the number of range interval typically
  remains in the lower single digit numbers.

* Save sync state after end of downloading some accounts

details:
  restore/resume to be implemented later
											
										
										
											2022-11-16 23:51:06 +00:00
-												Snap sync refactor healing (#1397)

* Simplify accounts healing threshold management

why:
  Was over-engineered.

details:
  Previously, healing was based on recursive hexary trie perusal.

  Due to "cheap" envelope decomposition of a range complement for the
  hexary trie, the cost of running extra laps have become time-affordable
  again and a simple trigger mechanism for healing will do.

* Control number of dangling result nodes in `hexaryInspectTrie()`

also:
+ Returns number of visited nodes available for logging so the maximum
  number of nodes can be tuned accordingly.
+ Some code and docu update

* Update names of constants

why:
  Declutter, more systematic naming

* Re-implemented `worker_desc.merge()` for storage slots

why:
  Provided as proper queue management in `storage_queue_helper`.

details:
+ Several append modes (replaces `merge()`)
+ Added third queue to record entries currently fetched by a worker. So
  another parallel running worker can safe the complete set of storage
  slots in as checkpoint. This was previously lost.

* Refactor healing

why:
  Simplify and remove deep hexary trie perusal for finding completeness.

   Due to "cheap" envelope decomposition of a range complement for the
   hexary trie, the cost of running extra laps have become time-affordable
   again and a simple trigger mechanism for healing will do.

* Docu update

* Run a storage job only once in download loop

why:
  Download failure or rejection (i.e. missing data) lead to repeated
  fetch requests until peer disconnects, otherwise.
											
										
										
											2022-12-24 09:54:18 +00:00
+								  storageSlotsQuPrioThresh* = 5_000
-												Prep for full sync after snap make 6 (#1291)

* Update log ticker, using time interval rather than ticker count

why:
  Counting and logging ticker occurrences is inherently imprecise. So
  time intervals are used.

* Use separate storage tables for snap sync data

* Left boundary proof update

why:
  Was not properly implemented, yet.

* Capture pivot in peer worker (aka buddy) tasks

why:
  The pivot environment is linked to the `buddy` descriptor. While
  there is a task switch, the pivot may change. So it is passed on as
  function argument `env` rather than retrieved from the buddy at
  the start of a sub-function.

* Split queues `fetchStorage` into `fetchStorageFull` and `fetchStoragePart`

* Remove obsolete account range returned from `GetAccountRange` message

why:
  Handler returned the wrong right value of the range. This range was
  for convenience, only.

* Prioritise storage slots if the queue becomes large

why:
  Currently, accounts processing is prioritised up until all accounts
  are downloaded. The new prioritisation has two thresholds for
  + start processing storage slots with a new worker
  + stop account processing and switch to storage processing

also:
  Provide api for `SnapTodoRanges` pair of range sets in `worker_desc.nim`

* Generalise left boundary proof for accounts or storage slots.

why:
  Detailed explanation how this works is documented with
  `snapdb_accounts.importAccounts()`.

  Instead of enforcing a left boundary proof (which is still the default),
  the importer functions return a list of `holes` (aka node paths) found in
  the argument ranges of leaf nodes. This in turn is used by the book
   keeping software for data download.

* Forgot to pass on variable in function wrapper

also:
  + Start healing not before 99% accounts covered (previously 95%)
  + Logging updated/prettified
											
										
										
											2022-11-08 18:56:04 +00:00
+								    ## For a new worker, prioritise processing the storage slots queue over
 								    ## processing accounts if the queue has more than this many items.
-												Snap sync state save (#1302)

* Piecemeal trie inspection

details:
  Trie inspection will stop after maximum number of nodes visited.
  The inspection can be resumed using the returned state from the
  last session.

why:
  This feature allows for task switch between `piecemeal` sessions.

* Extract pivot helper code from `worker.nim` => `pivot_helper.nim`

* Accounts import will now return dangling paths from `proof` nodes

why:
  With proper bookkeeping, this can be used to start healing without
  analysing the the probably full trie.

* Update `unprocessed` account range handling

why:
  More generally, the API of a pairs of unprocessed intervals favours
  the first set and not before that is exhausted the second set comes
  into play.

  This was unfortunately implemented which caused the ranges to be
  unnecessarily fractioned. Now the number of range interval typically
  remains in the lower single digit numbers.

* Save sync state after end of downloading some accounts

details:
  restore/resume to be implemented later
											
										
										
											2022-11-16 23:51:06 +00:00
+								    ##
-												Prep for full sync after snap make 6 (#1291)

* Update log ticker, using time interval rather than ticker count

why:
  Counting and logging ticker occurrences is inherently imprecise. So
  time intervals are used.

* Use separate storage tables for snap sync data

* Left boundary proof update

why:
  Was not properly implemented, yet.

* Capture pivot in peer worker (aka buddy) tasks

why:
  The pivot environment is linked to the `buddy` descriptor. While
  there is a task switch, the pivot may change. So it is passed on as
  function argument `env` rather than retrieved from the buddy at
  the start of a sub-function.

* Split queues `fetchStorage` into `fetchStorageFull` and `fetchStoragePart`

* Remove obsolete account range returned from `GetAccountRange` message

why:
  Handler returned the wrong right value of the range. This range was
  for convenience, only.

* Prioritise storage slots if the queue becomes large

why:
  Currently, accounts processing is prioritised up until all accounts
  are downloaded. The new prioritisation has two thresholds for
  + start processing storage slots with a new worker
  + stop account processing and switch to storage processing

also:
  Provide api for `SnapTodoRanges` pair of range sets in `worker_desc.nim`

* Generalise left boundary proof for accounts or storage slots.

why:
  Detailed explanation how this works is documented with
  `snapdb_accounts.importAccounts()`.

  Instead of enforcing a left boundary proof (which is still the default),
  the importer functions return a list of `holes` (aka node paths) found in
  the argument ranges of leaf nodes. This in turn is used by the book
   keeping software for data download.

* Forgot to pass on variable in function wrapper

also:
  + Start healing not before 99% accounts covered (previously 95%)
  + Logging updated/prettified
											
										
										
											2022-11-08 18:56:04 +00:00
+								    ## For a running worker processing accounts, stop processing accounts
 								    ## and switch to processing the storage slots queue if the queue has
 								    ## more than this many items.
-												Snap sync state save (#1302)

* Piecemeal trie inspection

details:
  Trie inspection will stop after maximum number of nodes visited.
  The inspection can be resumed using the returned state from the
  last session.

why:
  This feature allows for task switch between `piecemeal` sessions.

* Extract pivot helper code from `worker.nim` => `pivot_helper.nim`

* Accounts import will now return dangling paths from `proof` nodes

why:
  With proper bookkeeping, this can be used to start healing without
  analysing the the probably full trie.

* Update `unprocessed` account range handling

why:
  More generally, the API of a pairs of unprocessed intervals favours
  the first set and not before that is exhausted the second set comes
  into play.

  This was unfortunately implemented which caused the ranges to be
  unnecessarily fractioned. Now the number of range interval typically
  remains in the lower single digit numbers.

* Save sync state after end of downloading some accounts

details:
  restore/resume to be implemented later
											
										
										
											2022-11-16 23:51:06 +00:00
-												Snap sync can start on saved checkpoint (#1327)

* Stop negotiating pivot if peer repeatedly replies w/usesless answers

why:
  There is some fringe condition where a peer replies with legit but
  useless empty headers repetely. This goes on until somebody stops.
  We stop now.

* Rename `missingNodes` => `sickSubTries`

why:
  These (probably missing) nodes represent in reality fully or partially
  missing sub-tries. The top nodes may even exist, e.g. as a shallow
  sub-trie.

also:
  Keep track of account healing on/of by bool variable `accountsHealing`
  controlled in `pivot_helper.execSnapSyncAction()`

* Add `nimbus` option argument `snapCtx` for starting snap recovery (if any)

also:
+ Trigger the recovery (or similar) process from inside the global peer
  worker initialisation `worker.setup()` and not by the `snap.start()`
  function.
+ Have `runPool()` returned a `bool` code to indicate early stop to
  scheduler.

* Can import partial snap sync checkpoint at start

details:
 + Modified what is stored with the checkpoint in `snapdb_pivot.nim`
 + Will be loaded within `runDaemon()` if activated

* Forgot to import total coverage range

why:
  Only the top (or latest) pivot needs coverage but the total coverage
  is the list of all ranges for all pivots -- simply forgotten.
											
										
										
											2022-11-25 14:56:42 +00:00
+								  # --------------
-												Snap sync state save (#1302)

* Piecemeal trie inspection

details:
  Trie inspection will stop after maximum number of nodes visited.
  The inspection can be resumed using the returned state from the
  last session.

why:
  This feature allows for task switch between `piecemeal` sessions.

* Extract pivot helper code from `worker.nim` => `pivot_helper.nim`

* Accounts import will now return dangling paths from `proof` nodes

why:
  With proper bookkeeping, this can be used to start healing without
  analysing the the probably full trie.

* Update `unprocessed` account range handling

why:
  More generally, the API of a pairs of unprocessed intervals favours
  the first set and not before that is exhausted the second set comes
  into play.

  This was unfortunately implemented which caused the ranges to be
  unnecessarily fractioned. Now the number of range interval typically
  remains in the lower single digit numbers.

* Save sync state after end of downloading some accounts

details:
  restore/resume to be implemented later
											
										
										
											2022-11-16 23:51:06 +00:00
-												Update snap sync ready to succeed at lab test (#1556)

* Extract RocksDB timing tests from snap unit tests as separate module

why:
  Declutter, make space for more snap related unit tests.

* Renamed `undumpNextGroup()` => `undumpBlocks()`

why:
  Source file name is called `undump_blocks.nim` which should be sort
  of in sync with the method name(s).

* Implement snap/1 server method `getByteCodes()`

* Implement snap/1 client method `getByteCodes()`

* Implement faculty for handling contract code fetching via snap/1

* Provide persistent storage for contract code records

* Implement contract code snap sync fetch & store

* Code massage, cosmetics

* Unit tests for verifying snap sync snapshot dump

details:
  Use `undump_kvp.dumpAllDb()` to dump any database.
											
										
										
											2023-04-21 21:11:04 +00:00
+								  contractsQuPrioThresh* = 2_000
 								    ## Similar to `storageSlotsQuPrioThresh`
 								  # --------------
-												Code reorg 4 snap sync suite (#1560)

* Rename `playXXX` => `passXXX`

why:
  Better purpose match

* Code massage, log message updates

* Moved `ticker.nim` to `misc` folder to be used the same by full and snap sync

why:
  Simplifies maintenance

* Move `worker/pivot*` => `worker/pass/pass_snap/*`

why:
  better for maintenance

* Moved helper source file => `pass/pass_snap/helper`

* Renamed ComError => GetError, `worker/com/` => `worker/get/`

* Keep ticker enable flag in worker descriptor

why:
  This allows to pass this flag with the descriptor and not an extra
  function argument when calling the setup function.

* Extracted setup/release code from `worker.nim` => `pass/pass_init.nim`
											
										
										
											2023-04-24 20:24:07 +00:00
+								  healAccountsCoverageTrigger* = 1.01
-												Prep for full sync after snap make 5 (#1286)

* Update docu and logging

* Extracted and updated constants from `worker_desc` into separate file

* Update and re-calibrate communication error handling

* Allow simplified pivot negotiation

why:
  This feature allows to turn off pivot negotiation so that peers agree
  on a a pivot header.

  For snap sync with fast changing pivots this only throttles the sync
  process. The finally downloaded DB snapshot is typically a merged
  version of different pivot states augmented by a healing process.

* Re-model worker queues for accounts download & healing

why:
  Currently there is only one data fetch per download or healing task.
  This task is then repeated by the scheduler after a short time. In
  many cases, this short time seems enough for some peers to decide to
  terminate connection.

* Update main task batch `runMulti()`

details:
  The function `runMulti()` is activated in quasi-parallel mode by the
  scheduler. This function calls the download, healing and fast-sync
  functions.

  While in debug mode, after each set of jobs run by this function the
  database is analysed (by the `snapdb_check` module) and the result
  printed.
											
										
										
											2022-11-01 15:07:44 +00:00
+								    ## Apply accounts healing if the global snap download coverage factor
 								    ## exceeds this setting. The global coverage factor is derived by merging
 								    ## all account ranges retrieved for all pivot state roots (see
-												Snap sync refactor healing (#1397)

* Simplify accounts healing threshold management

why:
  Was over-engineered.

details:
  Previously, healing was based on recursive hexary trie perusal.

  Due to "cheap" envelope decomposition of a range complement for the
  hexary trie, the cost of running extra laps have become time-affordable
  again and a simple trigger mechanism for healing will do.

* Control number of dangling result nodes in `hexaryInspectTrie()`

also:
+ Returns number of visited nodes available for logging so the maximum
  number of nodes can be tuned accordingly.
+ Some code and docu update

* Update names of constants

why:
  Declutter, more systematic naming

* Re-implemented `worker_desc.merge()` for storage slots

why:
  Provided as proper queue management in `storage_queue_helper`.

details:
+ Several append modes (replaces `merge()`)
+ Added third queue to record entries currently fetched by a worker. So
  another parallel running worker can safe the complete set of storage
  slots in as checkpoint. This was previously lost.

* Refactor healing

why:
  Simplify and remove deep hexary trie perusal for finding completeness.

   Due to "cheap" envelope decomposition of a range complement for the
   hexary trie, the cost of running extra laps have become time-affordable
   again and a simple trigger mechanism for healing will do.

* Docu update

* Run a storage job only once in download loop

why:
  Download failure or rejection (i.e. missing data) lead to repeated
  fetch requests until peer disconnects, otherwise.
											
										
										
											2022-12-24 09:54:18 +00:00
+								    ## `coveredAccounts` in the object `CtxData`.) Note that a coverage factor
 								    ## greater than 100% is not exact but rather a lower bound estimate.
 								  healAccountsInspectionPlanBLevel* = 4
 								    ## Search this level deep for missing nodes if `hexaryEnvelopeDecompose()`
 								    ## only produces existing nodes.
-												Snap sync interval complement method to speed up trie perusal (#1328)

* Add quick hexary trie inspector, called `dismantle()`

why:
+ Full hexary trie perusal is slow if running down leaf nodes
+ For known range of leaf nodes, work out the UInt126-complement of
  partial sub-trie paths (for existing nodes). The result should cover
  no (or only a few) sub-tries with leaf nodes.

* Extract common healing methods => `sub_tries_helper.nim`

details:
  Also apply quick hexary trie inspection tool `dismantle()`
  Replace `inspectAccountsTrie()` wrapper by `hexaryInspectTrie()`

* Re-arrange task dispatching in main peer worker

* Refactor accounts and storage slots downloaders

* Rename `HexaryDbError` => `HexaryError`
											
										
										
											2022-11-28 09:03:23 +00:00
-												Update snap client account healing (#1521)

* Update nearby/neighbour leaf nodes finder

details:
  Update return error codes so that in the case that there is no more
  leaf node beyond the search direction, the particular error code
  `NearbyBeyondRange` is returned.

* Compile largest interval range containing only this leaf point

why:
  Will be needed in snap sync for adding single leaf nodes to the range
  of already allocated nodes.

* Reorg `hexary_inspect.nim`

why:
 Merged the nodes collecting algorithm for persistent and in-memory
 into a single generic function `hexary_inspect.inspectTrieImpl()`

* Update fetching accounts range failure handling in `rangeFetchAccounts()`

why:
  Rejected response leads now to fetching for another account range. Only
  repeated failures (or all done) terminate the algorithm.

* Update accounts healing

why:
+ Fixed looping over a bogus node response that could not inserted into
  the database. As a solution, these nodes are locally registered and not
  asked for in this download cycle.
+ Sub-optimal handling of interval range for a healed account leaf node.
  Now the maximal range interval containing this node is registered as
  processed which leafs to de-fragementation of the processed (and
  unprocessed) range list(s). So *gap* ranges which are known not to
  cover any account leaf node are not asked for on the network, anymore.
+ Sporadically remove empty interval ranges (if any)

* Update logging, better variable names
											
										
										
											2023-03-25 10:44:48 +00:00
+								  healAccountsInspectionPlanBRetryMax* = 2
-												Update snap client storage slots download and healing (#1529)

* Fix fringe condition for `GetStorageRanges` message handler

why:
  Receiving a proved empty range was not considered at all. This lead to
  inconsistencies of the return value which led to subsequent errors.

* Update storage range bulk download

details;
  Mainly re-org of storage queue processing in `storage_queue_helper.nim`

* Update logging variables/messages

* Update storage slots healing

details:
  Mainly clean up after improved helper functions from the sources
  `find_missing_nodes.nim` and `storage_queue_helper.nim`.

* Simplify account fetch

why:
  To much fuss made tolerating some errors. There will be an overall
  strategy implemented where the concert of download and healing function
  is orchestrated.

* Add error resilience to the concert of download and healing.

why:
  The idea is that a peer might stop serving snap/1 accounts and storage
  slot downloads while still able to support fetching nodes for healing.
											
										
										
											2023-04-04 13:36:18 +00:00
+								    ## Retry inspection with depth level argument starting at
 								    ## `healAccountsInspectionPlanBLevel-1` and counting down at most this
 								    ## many times until there is at least one dangling node found and the
 								    ## depth level argument remains positive. The cumulative depth of the
 								    ## iterated seach is
 								    ## ::
 								    ##      b        1
 								    ##      Σ ν  =  --- (b - a + 1) (a + b)
 								    ##      a        2
 								    ## for
 								    ## ::
 								    ##      b = healAccountsInspectionPlanBLevel
 								    ##      a = b - healAccountsInspectionPlanBRetryMax
 								    ##
-												Snap sync state save (#1302)

* Piecemeal trie inspection

details:
  Trie inspection will stop after maximum number of nodes visited.
  The inspection can be resumed using the returned state from the
  last session.

why:
  This feature allows for task switch between `piecemeal` sessions.

* Extract pivot helper code from `worker.nim` => `pivot_helper.nim`

* Accounts import will now return dangling paths from `proof` nodes

why:
  With proper bookkeeping, this can be used to start healing without
  analysing the the probably full trie.

* Update `unprocessed` account range handling

why:
  More generally, the API of a pairs of unprocessed intervals favours
  the first set and not before that is exhausted the second set comes
  into play.

  This was unfortunately implemented which caused the ranges to be
  unnecessarily fractioned. Now the number of range interval typically
  remains in the lower single digit numbers.

* Save sync state after end of downloading some accounts

details:
  restore/resume to be implemented later
											
										
										
											2022-11-16 23:51:06 +00:00
-												Update snap client account healing (#1521)

* Update nearby/neighbour leaf nodes finder

details:
  Update return error codes so that in the case that there is no more
  leaf node beyond the search direction, the particular error code
  `NearbyBeyondRange` is returned.

* Compile largest interval range containing only this leaf point

why:
  Will be needed in snap sync for adding single leaf nodes to the range
  of already allocated nodes.

* Reorg `hexary_inspect.nim`

why:
 Merged the nodes collecting algorithm for persistent and in-memory
 into a single generic function `hexary_inspect.inspectTrieImpl()`

* Update fetching accounts range failure handling in `rangeFetchAccounts()`

why:
  Rejected response leads now to fetching for another account range. Only
  repeated failures (or all done) terminate the algorithm.

* Update accounts healing

why:
+ Fixed looping over a bogus node response that could not inserted into
  the database. As a solution, these nodes are locally registered and not
  asked for in this download cycle.
+ Sub-optimal handling of interval range for a healed account leaf node.
  Now the maximal range interval containing this node is registered as
  processed which leafs to de-fragementation of the processed (and
  unprocessed) range list(s). So *gap* ranges which are known not to
  cover any account leaf node are not asked for on the network, anymore.
+ Sporadically remove empty interval ranges (if any)

* Update logging, better variable names
											
										
										
											2023-03-25 10:44:48 +00:00
+								  healAccountsInspectionPlanBRetryNapMSecs* = 2
 								    ## Sleep beween inspection retrys to allow thread switch. If this constant
 								    ## is set `0`, `1`ns wait is used.
-												Snap sync state save (#1302)

* Piecemeal trie inspection

details:
  Trie inspection will stop after maximum number of nodes visited.
  The inspection can be resumed using the returned state from the
  last session.

why:
  This feature allows for task switch between `piecemeal` sessions.

* Extract pivot helper code from `worker.nim` => `pivot_helper.nim`

* Accounts import will now return dangling paths from `proof` nodes

why:
  With proper bookkeeping, this can be used to start healing without
  analysing the the probably full trie.

* Update `unprocessed` account range handling

why:
  More generally, the API of a pairs of unprocessed intervals favours
  the first set and not before that is exhausted the second set comes
  into play.

  This was unfortunately implemented which caused the ranges to be
  unnecessarily fractioned. Now the number of range interval typically
  remains in the lower single digit numbers.

* Save sync state after end of downloading some accounts

details:
  restore/resume to be implemented later
											
										
										
											2022-11-16 23:51:06 +00:00
-												Update snap client storage slots download and healing (#1529)

* Fix fringe condition for `GetStorageRanges` message handler

why:
  Receiving a proved empty range was not considered at all. This lead to
  inconsistencies of the return value which led to subsequent errors.

* Update storage range bulk download

details;
  Mainly re-org of storage queue processing in `storage_queue_helper.nim`

* Update logging variables/messages

* Update storage slots healing

details:
  Mainly clean up after improved helper functions from the sources
  `find_missing_nodes.nim` and `storage_queue_helper.nim`.

* Simplify account fetch

why:
  To much fuss made tolerating some errors. There will be an overall
  strategy implemented where the concert of download and healing function
  is orchestrated.

* Add error resilience to the concert of download and healing.

why:
  The idea is that a peer might stop serving snap/1 accounts and storage
  slot downloads while still able to support fetching nodes for healing.
											
										
										
											2023-04-04 13:36:18 +00:00
+								  # --------------
-												Prep for full sync after snap make 5 (#1286)

* Update docu and logging

* Extracted and updated constants from `worker_desc` into separate file

* Update and re-calibrate communication error handling

* Allow simplified pivot negotiation

why:
  This feature allows to turn off pivot negotiation so that peers agree
  on a a pivot header.

  For snap sync with fast changing pivots this only throttles the sync
  process. The finally downloaded DB snapshot is typically a merged
  version of different pivot states augmented by a healing process.

* Re-model worker queues for accounts download & healing

why:
  Currently there is only one data fetch per download or healing task.
  This task is then repeated by the scheduler after a short time. In
  many cases, this short time seems enough for some peers to decide to
  terminate connection.

* Update main task batch `runMulti()`

details:
  The function `runMulti()` is activated in quasi-parallel mode by the
  scheduler. This function calls the download, healing and fast-sync
  functions.

  While in debug mode, after each set of jobs run by this function the
  database is analysed (by the `snapdb_check` module) and the result
  printed.
											
										
										
											2022-11-01 15:07:44 +00:00
-												Update snap client storage slots download and healing (#1529)

* Fix fringe condition for `GetStorageRanges` message handler

why:
  Receiving a proved empty range was not considered at all. This lead to
  inconsistencies of the return value which led to subsequent errors.

* Update storage range bulk download

details;
  Mainly re-org of storage queue processing in `storage_queue_helper.nim`

* Update logging variables/messages

* Update storage slots healing

details:
  Mainly clean up after improved helper functions from the sources
  `find_missing_nodes.nim` and `storage_queue_helper.nim`.

* Simplify account fetch

why:
  To much fuss made tolerating some errors. There will be an overall
  strategy implemented where the concert of download and healing function
  is orchestrated.

* Add error resilience to the concert of download and healing.

why:
  The idea is that a peer might stop serving snap/1 accounts and storage
  slot downloads while still able to support fetching nodes for healing.
											
										
										
											2023-04-04 13:36:18 +00:00
+								  healStorageSlotsInspectionPlanBLevel* = 5
-												Snap sync refactor healing (#1397)

* Simplify accounts healing threshold management

why:
  Was over-engineered.

details:
  Previously, healing was based on recursive hexary trie perusal.

  Due to "cheap" envelope decomposition of a range complement for the
  hexary trie, the cost of running extra laps have become time-affordable
  again and a simple trigger mechanism for healing will do.

* Control number of dangling result nodes in `hexaryInspectTrie()`

also:
+ Returns number of visited nodes available for logging so the maximum
  number of nodes can be tuned accordingly.
+ Some code and docu update

* Update names of constants

why:
  Declutter, more systematic naming

* Re-implemented `worker_desc.merge()` for storage slots

why:
  Provided as proper queue management in `storage_queue_helper`.

details:
+ Several append modes (replaces `merge()`)
+ Added third queue to record entries currently fetched by a worker. So
  another parallel running worker can safe the complete set of storage
  slots in as checkpoint. This was previously lost.

* Refactor healing

why:
  Simplify and remove deep hexary trie perusal for finding completeness.

   Due to "cheap" envelope decomposition of a range complement for the
   hexary trie, the cost of running extra laps have become time-affordable
   again and a simple trigger mechanism for healing will do.

* Docu update

* Run a storage job only once in download loop

why:
  Download failure or rejection (i.e. missing data) lead to repeated
  fetch requests until peer disconnects, otherwise.
											
										
										
											2022-12-24 09:54:18 +00:00
+								    ## Similar to `healAccountsInspectionPlanBLevel`
-												Snap sync state save (#1302)

* Piecemeal trie inspection

details:
  Trie inspection will stop after maximum number of nodes visited.
  The inspection can be resumed using the returned state from the
  last session.

why:
  This feature allows for task switch between `piecemeal` sessions.

* Extract pivot helper code from `worker.nim` => `pivot_helper.nim`

* Accounts import will now return dangling paths from `proof` nodes

why:
  With proper bookkeeping, this can be used to start healing without
  analysing the the probably full trie.

* Update `unprocessed` account range handling

why:
  More generally, the API of a pairs of unprocessed intervals favours
  the first set and not before that is exhausted the second set comes
  into play.

  This was unfortunately implemented which caused the ranges to be
  unnecessarily fractioned. Now the number of range interval typically
  remains in the lower single digit numbers.

* Save sync state after end of downloading some accounts

details:
  restore/resume to be implemented later
											
										
										
											2022-11-16 23:51:06 +00:00
-												Update snap client storage slots download and healing (#1529)

* Fix fringe condition for `GetStorageRanges` message handler

why:
  Receiving a proved empty range was not considered at all. This lead to
  inconsistencies of the return value which led to subsequent errors.

* Update storage range bulk download

details;
  Mainly re-org of storage queue processing in `storage_queue_helper.nim`

* Update logging variables/messages

* Update storage slots healing

details:
  Mainly clean up after improved helper functions from the sources
  `find_missing_nodes.nim` and `storage_queue_helper.nim`.

* Simplify account fetch

why:
  To much fuss made tolerating some errors. There will be an overall
  strategy implemented where the concert of download and healing function
  is orchestrated.

* Add error resilience to the concert of download and healing.

why:
  The idea is that a peer might stop serving snap/1 accounts and storage
  slot downloads while still able to support fetching nodes for healing.
											
										
										
											2023-04-04 13:36:18 +00:00
+								  healStorageSlotsInspectionPlanBRetryMax* = 99 # 5 + 4 + .. + 1 => 15
-												Update snap client account healing (#1521)

* Update nearby/neighbour leaf nodes finder

details:
  Update return error codes so that in the case that there is no more
  leaf node beyond the search direction, the particular error code
  `NearbyBeyondRange` is returned.

* Compile largest interval range containing only this leaf point

why:
  Will be needed in snap sync for adding single leaf nodes to the range
  of already allocated nodes.

* Reorg `hexary_inspect.nim`

why:
 Merged the nodes collecting algorithm for persistent and in-memory
 into a single generic function `hexary_inspect.inspectTrieImpl()`

* Update fetching accounts range failure handling in `rangeFetchAccounts()`

why:
  Rejected response leads now to fetching for another account range. Only
  repeated failures (or all done) terminate the algorithm.

* Update accounts healing

why:
+ Fixed looping over a bogus node response that could not inserted into
  the database. As a solution, these nodes are locally registered and not
  asked for in this download cycle.
+ Sub-optimal handling of interval range for a healed account leaf node.
  Now the maximal range interval containing this node is registered as
  processed which leafs to de-fragementation of the processed (and
  unprocessed) range list(s). So *gap* ranges which are known not to
  cover any account leaf node are not asked for on the network, anymore.
+ Sporadically remove empty interval ranges (if any)

* Update logging, better variable names
											
										
										
											2023-03-25 10:44:48 +00:00
+								    ## Similar to `healAccountsInspectionPlanBRetryMax`
 								  healStorageSlotsInspectionPlanBRetryNapMSecs* = 2
 								    ## Similar to `healAccountsInspectionPlanBRetryNapMSecs`
-												Snap sync state save (#1302)

* Piecemeal trie inspection

details:
  Trie inspection will stop after maximum number of nodes visited.
  The inspection can be resumed using the returned state from the
  last session.

why:
  This feature allows for task switch between `piecemeal` sessions.

* Extract pivot helper code from `worker.nim` => `pivot_helper.nim`

* Accounts import will now return dangling paths from `proof` nodes

why:
  With proper bookkeeping, this can be used to start healing without
  analysing the the probably full trie.

* Update `unprocessed` account range handling

why:
  More generally, the API of a pairs of unprocessed intervals favours
  the first set and not before that is exhausted the second set comes
  into play.

  This was unfortunately implemented which caused the ranges to be
  unnecessarily fractioned. Now the number of range interval typically
  remains in the lower single digit numbers.

* Save sync state after end of downloading some accounts

details:
  restore/resume to be implemented later
											
										
										
											2022-11-16 23:51:06 +00:00
+								  healStorageSlotsBatchMax* = 32
-												Prep for full sync after snap make 5 (#1286)

* Update docu and logging

* Extracted and updated constants from `worker_desc` into separate file

* Update and re-calibrate communication error handling

* Allow simplified pivot negotiation

why:
  This feature allows to turn off pivot negotiation so that peers agree
  on a a pivot header.

  For snap sync with fast changing pivots this only throttles the sync
  process. The finally downloaded DB snapshot is typically a merged
  version of different pivot states augmented by a healing process.

* Re-model worker queues for accounts download & healing

why:
  Currently there is only one data fetch per download or healing task.
  This task is then repeated by the scheduler after a short time. In
  many cases, this short time seems enough for some peers to decide to
  terminate connection.

* Update main task batch `runMulti()`

details:
  The function `runMulti()` is activated in quasi-parallel mode by the
  scheduler. This function calls the download, healing and fast-sync
  functions.

  While in debug mode, after each set of jobs run by this function the
  database is analysed (by the `snapdb_check` module) and the result
  printed.
											
										
										
											2022-11-01 15:07:44 +00:00
+								    ## Maximal number of storage tries to to heal in a single batch run. Only
 								    ## this many items will be removed from the batch queue. These items will
 								    ## then be processed one by one.
-												Update snap client storage slots download and healing (#1529)

* Fix fringe condition for `GetStorageRanges` message handler

why:
  Receiving a proved empty range was not considered at all. This lead to
  inconsistencies of the return value which led to subsequent errors.

* Update storage range bulk download

details;
  Mainly re-org of storage queue processing in `storage_queue_helper.nim`

* Update logging variables/messages

* Update storage slots healing

details:
  Mainly clean up after improved helper functions from the sources
  `find_missing_nodes.nim` and `storage_queue_helper.nim`.

* Simplify account fetch

why:
  To much fuss made tolerating some errors. There will be an overall
  strategy implemented where the concert of download and healing function
  is orchestrated.

* Add error resilience to the concert of download and healing.

why:
  The idea is that a peer might stop serving snap/1 accounts and storage
  slot downloads while still able to support fetching nodes for healing.
											
										
										
											2023-04-04 13:36:18 +00:00
+								  healStorageSlotsFailedMax* = 300
 								    ## Ditto for partial range storage slots.
-												Prep for full sync after snap make 5 (#1286)

* Update docu and logging

* Extracted and updated constants from `worker_desc` into separate file

* Update and re-calibrate communication error handling

* Allow simplified pivot negotiation

why:
  This feature allows to turn off pivot negotiation so that peers agree
  on a a pivot header.

  For snap sync with fast changing pivots this only throttles the sync
  process. The finally downloaded DB snapshot is typically a merged
  version of different pivot states augmented by a healing process.

* Re-model worker queues for accounts download & healing

why:
  Currently there is only one data fetch per download or healing task.
  This task is then repeated by the scheduler after a short time. In
  many cases, this short time seems enough for some peers to decide to
  terminate connection.

* Update main task batch `runMulti()`

details:
  The function `runMulti()` is activated in quasi-parallel mode by the
  scheduler. This function calls the download, healing and fast-sync
  functions.

  While in debug mode, after each set of jobs run by this function the
  database is analysed (by the `snapdb_check` module) and the result
  printed.
											
										
										
											2022-11-01 15:07:44 +00:00
+								  # --------------
-												Prep for full sync after snap make 6 (#1291)

* Update log ticker, using time interval rather than ticker count

why:
  Counting and logging ticker occurrences is inherently imprecise. So
  time intervals are used.

* Use separate storage tables for snap sync data

* Left boundary proof update

why:
  Was not properly implemented, yet.

* Capture pivot in peer worker (aka buddy) tasks

why:
  The pivot environment is linked to the `buddy` descriptor. While
  there is a task switch, the pivot may change. So it is passed on as
  function argument `env` rather than retrieved from the buddy at
  the start of a sub-function.

* Split queues `fetchStorage` into `fetchStorageFull` and `fetchStoragePart`

* Remove obsolete account range returned from `GetAccountRange` message

why:
  Handler returned the wrong right value of the range. This range was
  for convenience, only.

* Prioritise storage slots if the queue becomes large

why:
  Currently, accounts processing is prioritised up until all accounts
  are downloaded. The new prioritisation has two thresholds for
  + start processing storage slots with a new worker
  + stop account processing and switch to storage processing

also:
  Provide api for `SnapTodoRanges` pair of range sets in `worker_desc.nim`

* Generalise left boundary proof for accounts or storage slots.

why:
  Detailed explanation how this works is documented with
  `snapdb_accounts.importAccounts()`.

  Instead of enforcing a left boundary proof (which is still the default),
  the importer functions return a list of `holes` (aka node paths) found in
  the argument ranges of leaf nodes. This in turn is used by the book
   keeping software for data download.

* Forgot to pass on variable in function wrapper

also:
  + Start healing not before 99% accounts covered (previously 95%)
  + Logging updated/prettified
											
										
										
											2022-11-08 18:56:04 +00:00
+								  comErrorsTimeoutMax* = 3
-												Prep for full sync after snap make 5 (#1286)

* Update docu and logging

* Extracted and updated constants from `worker_desc` into separate file

* Update and re-calibrate communication error handling

* Allow simplified pivot negotiation

why:
  This feature allows to turn off pivot negotiation so that peers agree
  on a a pivot header.

  For snap sync with fast changing pivots this only throttles the sync
  process. The finally downloaded DB snapshot is typically a merged
  version of different pivot states augmented by a healing process.

* Re-model worker queues for accounts download & healing

why:
  Currently there is only one data fetch per download or healing task.
  This task is then repeated by the scheduler after a short time. In
  many cases, this short time seems enough for some peers to decide to
  terminate connection.

* Update main task batch `runMulti()`

details:
  The function `runMulti()` is activated in quasi-parallel mode by the
  scheduler. This function calls the download, healing and fast-sync
  functions.

  While in debug mode, after each set of jobs run by this function the
  database is analysed (by the `snapdb_check` module) and the result
  printed.
											
										
										
											2022-11-01 15:07:44 +00:00
+								    ## Maximal number of non-resonses accepted in a row. If there are more than
 								    ## `comErrorsTimeoutMax` consecutive errors, the worker will be degraded
 								    ## as zombie.
 								  comErrorsTimeoutSleepMSecs* = 5000
 								    ## Wait/suspend for this many seconds after a timeout error if there are
 								    ## not more than `comErrorsTimeoutMax` errors in a row (maybe some other
 								    ## network or no-data errors mixed in.) Set 0 to disable.
 								  comErrorsNetworkMax* = 5
 								    ## Similar to `comErrorsTimeoutMax` but for network errors.
 								  comErrorsNetworkSleepMSecs* = 5000
 								    ## Similar to `comErrorsTimeoutSleepSecs` but for network errors.
 								    ## Set 0 to disable.
 								  comErrorsNoDataMax* = 3
 								    ## Similar to `comErrorsTimeoutMax` but for missing data errors.
 								  comErrorsNoDataSleepMSecs* = 0
 								    ## Similar to `comErrorsTimeoutSleepSecs` but for missing data errors.
 								    ## Set 0 to disable.
 								static:
-												Update snap sync ready to succeed at lab test (#1556)

* Extract RocksDB timing tests from snap unit tests as separate module

why:
  Declutter, make space for more snap related unit tests.

* Renamed `undumpNextGroup()` => `undumpBlocks()`

why:
  Source file name is called `undump_blocks.nim` which should be sort
  of in sync with the method name(s).

* Implement snap/1 server method `getByteCodes()`

* Implement snap/1 client method `getByteCodes()`

* Implement faculty for handling contract code fetching via snap/1

* Provide persistent storage for contract code records

* Implement contract code snap sync fetch & store

* Code massage, cosmetics

* Unit tests for verifying snap sync snapshot dump

details:
  Use `undump_kvp.dumpAllDb()` to dump any database.
											
										
										
											2023-04-21 21:11:04 +00:00
+								  doAssert storageSlotsQuPrioThresh < saveStorageSlotsMax
 								  doAssert contractsQuPrioThresh < saveContactsMax
-												Update snap client storage slots download and healing (#1529)

* Fix fringe condition for `GetStorageRanges` message handler

why:
  Receiving a proved empty range was not considered at all. This lead to
  inconsistencies of the return value which led to subsequent errors.

* Update storage range bulk download

details;
  Mainly re-org of storage queue processing in `storage_queue_helper.nim`

* Update logging variables/messages

* Update storage slots healing

details:
  Mainly clean up after improved helper functions from the sources
  `find_missing_nodes.nim` and `storage_queue_helper.nim`.

* Simplify account fetch

why:
  To much fuss made tolerating some errors. There will be an overall
  strategy implemented where the concert of download and healing function
  is orchestrated.

* Add error resilience to the concert of download and healing.

why:
  The idea is that a peer might stop serving snap/1 accounts and storage
  slot downloads while still able to support fetching nodes for healing.
											
										
										
											2023-04-04 13:36:18 +00:00
+								  doAssert 0 <= storageSlotsFetchFailedFullMax
 								  doAssert 0 <= storageSlotsFetchFailedPartialMax
-												Prep for full sync after snap make 5 (#1286)

* Update docu and logging

* Extracted and updated constants from `worker_desc` into separate file

* Update and re-calibrate communication error handling

* Allow simplified pivot negotiation

why:
  This feature allows to turn off pivot negotiation so that peers agree
  on a a pivot header.

  For snap sync with fast changing pivots this only throttles the sync
  process. The finally downloaded DB snapshot is typically a merged
  version of different pivot states augmented by a healing process.

* Re-model worker queues for accounts download & healing

why:
  Currently there is only one data fetch per download or healing task.
  This task is then repeated by the scheduler after a short time. In
  many cases, this short time seems enough for some peers to decide to
  terminate connection.

* Update main task batch `runMulti()`

details:
  The function `runMulti()` is activated in quasi-parallel mode by the
  scheduler. This function calls the download, healing and fast-sync
  functions.

  While in debug mode, after each set of jobs run by this function the
  database is analysed (by the `snapdb_check` module) and the result
  printed.
											
										
										
											2022-11-01 15:07:44 +00:00
 								# ------------------------------------------------------------------------------
 								# End
 								# ------------------------------------------------------------------------------