Misc snap sync updates (#1192)

* Bump nim-stew why: Need fixed interval set * Keep track of accumulated account ranges over all state roots * Added comments and explanations to unit tests * typo
2022-08-17 08:30:11 +01:00 · 2022-08-17 08:30:11 +01:00 · f07945d37b
parent c5101c16e9
commit f07945d37b
9 changed files with 2678 additions and 45 deletions
--- a/nimbus/sync/snap/range_desc.nim
+++ b/nimbus/sync/snap/range_desc.nim
@ -122,10 +122,19 @@ proc freeFactor*(lrs: LeafRangeSet): float =
  if 0 < lrs.total:
    ((high(NodeTag) - lrs.total).u256 + 1).to(float) / (2.0^256)
  elif lrs.chunks == 0:
-    1.0
+    1.0 # `total` represents the residue class `mod 2^256` from `0`..`(2^256-1)`
  else:
    0.0

+proc fullFactor*(lrs: LeafRangeSet): float =
+  ## Free factor, ie. `#items-contained / 2^256` to be used in statistics
+  if 0 < lrs.total:
+    lrs.total.u256.to(float) / (2.0^256)
+  elif lrs.chunks == 0:
+    0.0
+  else:
+    1.0 # `total` represents the residue class `mod 2^256` from `0`..`(2^256-1)`
+
 # Printing & pretty printing
 proc `$`*(nt: NodeTag): string =
  if nt == high(NodeTag):
--- a/nimbus/sync/snap/worker.nim
+++ b/nimbus/sync/snap/worker.nim
@ -122,6 +122,7 @@ proc tickerUpdate*(ctx: SnapCtxRef): TickerStatsUpdater =
      activeQueues:  tabLen,
      flushedQueues: ctx.data.pivotCount.int64 - tabLen,
      accounts:      meanStdDev(aSum, aSqSum, count),
+      accCoverage:   ctx.data.coveredAccounts.fullFactor,
      fillFactor:    meanStdDev(uSum, uSqSum, count),
      bulkStore:     ctx.data.accountsDb.dbImportStats)

@ -132,6 +133,7 @@ proc tickerUpdate*(ctx: SnapCtxRef): TickerStatsUpdater =
 proc setup*(ctx: SnapCtxRef; tickerOK: bool): bool =
  ## Global set up
  ctx.data.accountRangeMax = high(UInt256) div ctx.buddiesMax.u256
+  ctx.data.coveredAccounts = LeafRangeSet.init()
  ctx.data.accountsDb =
      if ctx.data.dbBackend.isNil: AccountsDbRef.init(ctx.chain.getTrieDB)
      else: AccountsDbRef.init(ctx.data.dbBackend)
@ -218,7 +220,8 @@ proc runMulti*(buddy: SnapBuddyRef) {.async.} =
    ctx = buddy.ctx
    peer = buddy.peer

-  if buddy.data.pivotHeader.isNone:
+  if buddy.data.pivotHeader.isNone or
+     buddy.data.pivotHeader.get.blockNumber == 0:

    await buddy.pivotExec()

--- a/nimbus/sync/snap/worker/fetch_accounts.nim
+++ b/nimbus/sync/snap/worker/fetch_accounts.nim
@ -149,17 +149,22 @@ proc fetchAccounts*(buddy: SnapBuddyRef): Future[bool] {.async.} =
    rc = ctx.data.accountsDb.importAccounts(
      peer, stateRoot, iv.minPt, dd.data, storeData = true)
  if rc.isErr:
+    buddy.putUnprocessed(iv)
+
+    # Just try another peer
+    buddy.ctrl.zombie = true
+
    # TODO: Prevent deadlock in case there is a problem with the approval
    #       data which is not in production state, yet.
    trace "Import failed, restoring unprocessed accounts", peer, stateRoot,
      range=dd.consumed, nAccounts, error=rc.error
-
-    # Just try another peer
-    buddy.ctrl.zombie = true
  else:
    # Statistics
    env.nAccounts.inc(nAccounts)

+    # Register consumed intervals on the accumulator over all state roots
+    discard buddy.ctx.data.coveredAccounts.merge(dd.consumed)
+
    # Register consumed and bulk-imported (well, not yet) accounts range
    let rx = iv - dd.consumed
    if rx.isOk:
--- a/nimbus/sync/snap/worker/get_account_range.nim
+++ b/nimbus/sync/snap/worker/get_account_range.nim
@ -156,7 +156,7 @@ proc getAccountRange*(
          reqRange=iv, stateRoot
        return err(AccountsMaxTooLarge)

-  dd.consumed = LeafRange.new(iv.minPt, accMaxPt)
+  dd.consumed = LeafRange.new(iv.minPt, max(iv.maxPt,accMaxPt))
  trace trSnapRecvReceived & "AccountRange", peer,
    nAccounts, nProof, accRange=dd.consumed, reqRange=iv, stateRoot
  return ok(dd)
--- a/nimbus/sync/snap/worker/ticker.nim
+++ b/nimbus/sync/snap/worker/ticker.nim
@ -29,6 +29,7 @@ type
    pivotBlock*: Option[BlockNumber]
    accounts*: (float,float)   ## mean and standard deviation
    fillFactor*: (float,float) ## mean and standard deviation
+    accCoverage*: float        ## as factor
    activeQueues*: int
    flushedQueues*: int64
    bulkStore*: AccountLoadStats
@ -117,6 +118,7 @@ proc runLogTicker(t: TickerRef) {.gcsafe.} =
      avUtilisation = ""
      pivot = "n/a"
      bulker = ""
+      accCoverage = "n/a"
    let
      flushed = data.flushedQueues

@ -130,13 +132,16 @@ proc runLogTicker(t: TickerRef) {.gcsafe.} =
      avAccounts =
        &"{(data.accounts[0]+0.5).int64}({(data.accounts[1]+0.5).int64})"
      avUtilisation =
-        &"{data.fillFactor[0]*100.0:.2f}%({data.fillFactor[1]*100.0:.2f}%)"
+        &"{data.fillFactor[0]*100.0:.1f}%({data.fillFactor[1]*100.0:.1f}%)"
      bulker =
        "[" & data.bulkStore.size.toSeq.mapIt(it.toSI).join(",") & "," &
              data.bulkStore.dura.toSeq.mapIt(it.pp).join(",") & "]"
+      accCoverage =
+        &"{(data.accCoverage*100.0):.1f}%"

    info "Snap sync statistics",
-      tick, buddies, pivot, avAccounts, avUtilisation, flushed, bulker, mem
+      tick, buddies, pivot, avAccounts, avUtilisation, accCoverage,
+      flushed, bulker, mem

  t.tick.inc
  t.setLogTicker(Moment.fromNow(tickerLogInterval))
--- a/nimbus/sync/snap/worker_desc.nim
+++ b/nimbus/sync/snap/worker_desc.nim
@ -78,22 +78,12 @@ type
  WorkerSeenBlocks = KeyedQueue[array[32,byte],BlockNumber]
    ## Temporary for pretty debugging, `BlockHash` keyed lru cache

-  WorkerTickerBase* = ref object of RootObj
-    ## Stub object, to be inherited in file `ticker.nim`
-
-  WorkerFetchBase* = ref object of RootObj
-    ## Stub object, to be inherited in file `fetch.nim`
-
-  WorkerFetchEnvBase* = ref object of RootObj
-    ## Stub object, to be inherited in file `fetch.nim`
-
  SnapPivotRef* = ref object
-    ## Stub object, cache for particular snap data environment
+    ## Per-state root cache for particular snap data environment
    stateHeader*: BlockHeader         ## Pivot state, containg state root
    pivotAccount*: NodeTag            ## Random account
    availAccounts*: LeafRangeSet      ## Accounts to fetch (organised as ranges)
    nAccounts*: uint64                ## Number of accounts imported
-    # fetchEnv*: WorkerFetchEnvBase     ## Opaque object reference
    # ---
    proofDumpOk*: bool
    proofDumpInx*: int
@ -103,7 +93,7 @@ type
    KeyedQueue[Hash256,SnapPivotRef]

  BuddyData* = object
-    ## Local descriptor data extension
+    ## Per-worker local descriptor data extension
    stats*: SnapBuddyStats            ## Statistics counters
    errors*: SnapBuddyErrors          ## For error handling
    pivotHeader*: Option[BlockHeader] ## For pivot state hunter
@ -113,6 +103,7 @@ type
    ## Globally shared data extension
    seenBlock: WorkerSeenBlocks       ## Temporary, debugging, pretty logs
    rng*: ref HmacDrbgContext         ## Random generator
+    coveredAccounts*: LeafRangeSet    ## Derived from all available accounts
    dbBackend*: ChainDB               ## Low level DB driver access (if any)
    ticker*: TickerRef                ## Ticker, logger
    pivotTable*: SnapPivotTable       ## Per state root environment
--- a/tests/test_sync_snap.nim
+++ b/tests/test_sync_snap.nim
@ -28,7 +28,7 @@ import
  ../nimbus/sync/snap/worker/[accounts_db, db/hexary_desc, db/rocky_bulk_load],
  ../nimbus/utils/prettify,
  ./replay/[pp, undump],
-  ./test_sync_snap/sample0
+  ./test_sync_snap/[sample0, sample1]

 const
  baseDir = [".", "..", ".."/"..", $DirSep]
@ -292,18 +292,25 @@ proc accountsRunner(noisy = true;  persistent = true; sample = accSample0) =
      let dbBase = if persistent: AccountsDbRef.init(db.cdb[1])
                   else: AccountsDbRef.init(newMemoryDB())
      desc = AccountsDbSessionRef.init(dbBase, root, peer)
+
+      # Load/accumulate `proofs` data from several samples
      for w in testItemLst:
        check desc.merge(w.data.proof) == OkHexDb
-      let base = testItemLst.mapIt(it.base).sortMerge
+
+      # Load/accumulate accounts (needs some unique sorting)
+      let lowerBound = testItemLst.mapIt(it.base).sortMerge
      accounts = testItemLst.mapIt(it.data.accounts).sortMerge
-      check desc.merge(base, accounts) == OkHexDb
-      desc.assignPrettyKeys() # for debugging (if any)
+      check desc.merge(lowerBound, accounts) == OkHexDb
+      desc.assignPrettyKeys() # for debugging, make sure that state root ~ "$0"
+
+      # Build/complete hexary trie for accounts
      check desc.interpolate() == OkHexDb

+      # Save/bulk-store hexary trie on disk
      check desc.dbImports() == OkHexDb
      noisy.say "***", "import stats=",  desc.dbImportStats.pp

-    test &"Revisting {accounts.len} items stored items on BaseChainDb":
+    test &"Revisiting {accounts.len} items stored items on BaseChainDb":
      for acc in accounts:
        let
          byChainDB = desc.getChainDbAccount(acc.accHash)
@ -315,7 +322,30 @@ proc accountsRunner(noisy = true;  persistent = true; sample = accSample0) =
          check byBulker.isOk
          check byChainDB == byBulker

-      #noisy.say "***", "database dump\n    ", desc.dumpProofsDB.join("\n    ")
+      # Hexary trie memory database dump. These are key value pairs for
+      # ::
+      #   Branch:    ($1,b(<$2,$3,..,$17>,))
+      #   Extension: ($18,e(832b5e..06e697,$19))
+      #   Leaf:      ($20,l(cc9b5d..1c3b4,f84401..f9e5129d[#70]))
+      #
+      # where keys are typically represented as `$<id>` or `¶<id>` or `ø`
+      # depending on whether a key is final (`$<id>`), temporary (`¶<id>`)
+      # or unset/missing (`ø`).
+      #
+      # The node types are indicated by a letter after the first key before
+      # the round brackets
+      # ::
+      #   Branch:    'b', 'þ', or 'B'
+      #   Extension: 'e', '€', or 'E'
+      #   Leaf:      'l', 'ł', or 'L'
+      #
+      # Here a small letter indicates a `Static` node which was from the
+      # original `proofs` list, a capital letter indicates a `Mutable` node
+      # added on the fly which might need some change, and the decorated
+      # letters stand for `Locked` nodes which are like `Static` ones but
+      # added later (typically these nodes are update `Mutable` nodes.)
+      #
+      noisy.say "***", "database dump\n    ", desc.dumpProofsDB.join("\n    ")


 proc importRunner(noisy = true;  persistent = true; capture = goerliCapture) =
@ -765,12 +795,27 @@ proc syncSnapMain*(noisy = defined(debug)) =
 when isMainModule:
  const
    noisy = defined(debug) or true
-    snapTest0 = accSample0
+
+    # Some 20 `snap/1` reply equivalents
+    snapTest0 =
+      accSample0
+  
+    # Only the the first `snap/1` reply from the sample
    snapTest1 = AccountsProofSample(
      name: "test1",
      root:  snapTest0.root,
      data:  snapTest0.data[0..0])

+    # Ditto for sample1
+    snapTest2 = AccountsProofSample(
+      name: "test2",
+      root: sample1.snapRoot,
+      data: sample1.snapProofData)
+    snapTest3 = AccountsProofSample(
+      name: "test3",
+      root:  snapTest2.root,
+      data:  snapTest2.data[0..0])
+
    bulkTest0 = goerliCapture
    bulkTest1: CaptureSpecs = (
      name:      "full-goerli",
@ -788,30 +833,65 @@ when isMainModule:
      file:      "mainnet332160.txt.gz",
      numBlocks: high(int))

-  when false: # or true:
-    import ../../nimbus-eth1-blobs/replay/sync_sample1 as sample1
-    const
-      snapTest2 = AccountsProofSample(
-        name: "test2",
-        root: sample1.snapRoot,
-        data: sample1.snapProofData)
-      snapTest3 = AccountsProofSample(
-        name: "test3",
-        root:  snapTest2.root,
-        data:  snapTest2.data[0..0])

  #setTraceLevel()
  setErrorLevel()

+  # The `accountsRunner()` tests a snap sync functionality for storing chain
+  # chain data directly rather than derive them by executing the EVM. Here,
+  # only accounts are considered.
+  #
+  # The `snap/1` protocol allows to fetch data for a certain account range. The
+  # following boundary conditions apply to the received data:
+  #
+  # * `State root`: All data are relaive to the same state root.
+  #
+  # * `Accounts`: There is an accounts interval sorted in strictly increasing
+  #   order. The accounts are required consecutive, i.e. without holes in
+  #   between although this cannot be verified immediately.
+  #
+  # * `Lower bound`: There is a start value which might be lower than the first
+  #   account hash. There must be no other account between this start value and
+  #   the first account (not verifyable yet.) For all practicat purposes, this
+  #   value is mostly ignored but carried through.
+  #
+  # * `Proof`: There is a list of hexary nodes which allow to build a partial
+  #   Patricia-Mercle trie starting at the state root with all the account
+  #   leaves. There are enough nodes that show that there is no account before
+  #   the least account (which is currently ignored.)
+  #    
+  # There are test data samples on the sub-directory `test_sync_snap`. These
+  # are complete replies for some (admittedly smapp) test requests from a `kiln`
+  # session.
+  #
+  # The `accountsRunner()` does three tests:
+  #
+  # 1. Run the `importAccounts()` function which is the all-in-one production
+  #    function processoing the data described above. The test applies it
+  #    sequentially to about 20 data sets.
+  #
+  # 2. Test individual functional items which are hidden in test 1. while
+  #    merging the sample data. 
+  #    * Load/accumulate `proofs` data from several samples
+  #    * Load/accumulate accounts (needs some unique sorting)
+  #    * Build/complete hexary trie for accounts
+  #    * Save/bulk-store hexary trie on disk. If rocksdb is available, data
+  #      are bulk stored via sst. An additional data set is stored in a table
+  #      with key prefix 200 using transactional `put()` (for time comparison.)
+  #      If there is no rocksdb, standard transactional `put()` is used, only
+  #      (no key prefix 200 storage.)
+  #
+  # 3. Traverse trie nodes stored earlier. The accounts from test 2 are
+  #    re-visted using the account hash as access path.
+  #
+
  false.accountsRunner(persistent=true,  snapTest0)
  false.accountsRunner(persistent=false,  snapTest0)
-  #noisy.accountsRunner(persistent=true,  snapTest1)
+  noisy.accountsRunner(persistent=true,  snapTest1)
+  false.accountsRunner(persistent=false,  snapTest2)
+  #noisy.accountsRunner(persistent=true,  snapTest3)

-  when declared(snapTest2):
-    noisy.accountsRunner(persistent=false,  snapTest2)
-    #noisy.accountsRunner(persistent=true,  snapTest3)
-
-  when true: # and false:
+  when true and false:
    # ---- database storage timings -------

    noisy.showElapsed("importRunner()"):
--- a/tests/test_sync_snap/sample1.nim
+++ b/tests/test_sync_snap/sample1.nim
--- a/vendor/nim-stew
+++ b/vendor/nim-stew
@ -1 +1 @@
-Subproject commit 49db5b27b9933165cf53287e7302b9d2a37a8d26
+Subproject commit 1e86bd1ef38f78c601b07da7188e65785f2c0ed8