nimbus-eth2/beacon_chain/sync/sync_queue.nim

# beacon_chain
# Copyright (c) 2018-2023 Status Research & Development GmbH
# Licensed and distributed under either of
#   * MIT license (license terms in the root directory or at https://opensource.org/licenses/MIT).
#   * Apache v2 license (license terms in the root directory or at https://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.

{.push raises: [].}

import std/[heapqueue, tables, strutils, sequtils, math]
import stew/[results, base10], chronos, chronicles
import
  ../spec/datatypes/[base, phase0, altair],
  ../spec/[helpers, forks],
  ../networking/[peer_pool, eth2_network],
  ../gossip_processing/block_processor,
  ../consensus_object_pools/block_pools_types

export base, phase0, altair, merge, chronos, chronicles, results,
       block_pools_types, helpers

logScope:
  topics = "syncqueue"

type
  GetSlotCallback* = proc(): Slot {.gcsafe, raises: [].}
  ProcessingCallback* = proc() {.gcsafe, raises: [].}
  BlockVerifier* =  proc(signedBlock: ForkedSignedBeaconBlock,
                         blobs: Opt[BlobSidecars], maybeFinalized: bool):
      Future[Result[void, VerifierError]] {.gcsafe, raises: [].}

  SyncQueueKind* {.pure.} = enum
    Forward, Backward

  SyncRequest*[T] = object
    kind*: SyncQueueKind
    index*: uint64
    slot*: Slot
    count*: uint64
    item*: T

  SyncResult*[T] = object
    request*: SyncRequest[T]
    data*: seq[ref ForkedSignedBeaconBlock]
    blobs*: Opt[seq[BlobSidecars]]

  GapItem*[T] = object
    start*: Slot
    finish*: Slot
    item*: T

  SyncWaiter* = ref object
    future: Future[void]
    reset: bool

  RewindPoint = object
    failSlot: Slot
    epochCount: uint64

  SyncQueue*[T] = ref object
    kind*: SyncQueueKind
    inpSlot*: Slot
    outSlot*: Slot
    startSlot*: Slot
    finalSlot*: Slot
    chunkSize*: uint64
    queueSize*: int
    counter*: uint64
    pending*: Table[uint64, SyncRequest[T]]
    gapList*: seq[GapItem[T]]
    waiters: seq[SyncWaiter]
    getSafeSlot*: GetSlotCallback
    debtsQueue: HeapQueue[SyncRequest[T]]
    debtsCount: uint64
    readyQueue: HeapQueue[SyncResult[T]]
    rewind: Option[RewindPoint]
    blockVerifier: BlockVerifier
    ident*: string

chronicles.formatIt SyncQueueKind: toLowerAscii($it)

template shortLog*[T](req: SyncRequest[T]): string =
  Base10.toString(uint64(req.slot)) & ":" &
  Base10.toString(req.count) & "@" &
  Base10.toString(req.index)

chronicles.expandIt SyncRequest:
  `it` = shortLog(it)
  peer = shortLog(it.item)
  direction = toLowerAscii($it.kind)

proc getShortMap*[T](req: SyncRequest[T],
                     data: openArray[ref ForkedSignedBeaconBlock]): string =
  ## Returns all slot numbers in ``data`` as placement map.
  var res = newStringOfCap(req.count)
  var slider = req.slot
  var last = 0
  for i in 0 ..< req.count:
    if last < len(data):
      for k in last ..< len(data):
        if slider == data[k][].slot:
          res.add('x')
          last = k + 1
          break
        elif slider < data[k][].slot:
          res.add('.')
          break
    else:
      res.add('.')
    slider = slider + 1
  res

proc getShortMap*[T](req: SyncRequest[T],
                     data: openArray[ref BlobSidecar]): string =
  ## Returns all slot numbers in ``data`` as placement map.
  var res = newStringOfCap(req.count * MAX_BLOBS_PER_BLOCK)
  var cur : uint64 = 0
  for slot in req.slot..<req.slot+req.count:
    if cur >= lenu64(data):
      res.add('|')
      continue
    if slot == data[cur].signed_block_header.message.slot:
      for k in cur..<cur+MAX_BLOBS_PER_BLOCK:
        if k >= lenu64(data) or slot != data[k].signed_block_header.message.slot:
          res.add('|')
          break
        else:
          inc(cur)
          res.add('x')
    else:
      res.add('|')
  res

proc contains*[T](req: SyncRequest[T], slot: Slot): bool {.inline.} =
  slot >= req.slot and slot < req.slot + req.count

proc cmp*[T](a, b: SyncRequest[T]): int =
  cmp(uint64(a.slot), uint64(b.slot))

proc checkResponse*[T](req: SyncRequest[T],
                       data: openArray[Slot]): bool =
  if len(data) == 0:
    # Impossible to verify empty response.
    return true

  if uint64(len(data)) > req.count:
    # Number of blocks in response should be less or equal to number of
    # requested blocks.
    return false

  var slot = req.slot
  var rindex = 0'u64
  var dindex = 0

  while (rindex < req.count) and (dindex < len(data)):
    if slot < data[dindex]:
      discard
    elif slot == data[dindex]:
      inc(dindex)
    else:
      return false
    slot += 1'u64
    rindex += 1'u64

  if dindex == len(data):
    return true
  else:
    return false

proc init[T](t1: typedesc[SyncRequest], kind: SyncQueueKind, start: Slot,
             finish: Slot, t2: typedesc[T]): SyncRequest[T] =
  let count = finish - start + 1'u64
  SyncRequest[T](kind: kind, slot: start, count: count)

proc init[T](t1: typedesc[SyncRequest], kind: SyncQueueKind, slot: Slot,
             count: uint64, item: T): SyncRequest[T] =
  SyncRequest[T](kind: kind, slot: slot, count: count, item: item)

proc init[T](t1: typedesc[SyncRequest], kind: SyncQueueKind, start: Slot,
             finish: Slot, item: T): SyncRequest[T] =
  let count = finish - start + 1'u64
  SyncRequest[T](kind: kind, slot: start, count: count, item: item)

proc empty*[T](t: typedesc[SyncRequest], kind: SyncQueueKind,
               t2: typedesc[T]): SyncRequest[T] {.inline.} =
  SyncRequest[T](kind: kind, count: 0'u64)

proc setItem*[T](sr: var SyncRequest[T], item: T) =
  sr.item = item

proc isEmpty*[T](sr: SyncRequest[T]): bool {.inline.} =
  (sr.count == 0'u64)

proc init*[T](t1: typedesc[SyncQueue], t2: typedesc[T],
              queueKind: SyncQueueKind,
              start, final: Slot, chunkSize: uint64,
              getSafeSlotCb: GetSlotCallback,
              blockVerifier: BlockVerifier,
              syncQueueSize: int = -1,
              ident: string = "main"): SyncQueue[T] =
  ## Create new synchronization queue with parameters
  ##
  ## ``start`` and ``final`` are starting and final Slots.
  ##
  ## ``chunkSize`` maximum number of slots in one request.
  ##
  ## ``syncQueueSize`` maximum queue size for incoming data.
  ## If ``syncQueueSize > 0`` queue will help to keep backpressure under
  ## control. If ``syncQueueSize <= 0`` then queue size is unlimited (default).

  # SyncQueue is the core of sync manager, this data structure distributes
  # requests to peers and manages responses from peers.
  #
  # Because SyncQueue is async data structure it manages backpressure and
  # order of incoming responses and it also resolves "joker's" problem.
  #
  # Joker's problem
  #
  # According to pre-v0.12.0 Ethereum consensus network specification
  # > Clients MUST respond with at least one block, if they have it and it
  # > exists in the range. Clients MAY limit the number of blocks in the
  # > response.
  # https://github.com/ethereum/consensus-specs/blob/v0.11.3/specs/phase0/p2p-interface.md#L590
  #
  # Such rule can lead to very uncertain responses, for example let slots from
  # 10 to 12 will be not empty. Client which follows specification can answer
  # with any response from this list (X - block, `-` empty space):
  #
  # 1.   X X X
  # 2.   - - X
  # 3.   - X -
  # 4.   - X X
  # 5.   X - -
  # 6.   X - X
  # 7.   X X -
  #
  # If peer answers with `1` everything will be fine and `block_processor`
  # will be able to process all 3 blocks.
  # In case of `2`, `3`, `4`, `6` - `block_processor` will fail immediately
  # with chunk and report "parent is missing" error.
  # But in case of `5` and `7` blocks will be processed by `block_processor`
  # without any problems, however it will start producing problems right from
  # this uncertain last slot. SyncQueue will start producing requests for next
  # blocks, but all the responses from this point will fail with "parent is
  # missing" error. Lets call such peers "jokers", because they are joking
  # with responses.
  #
  # To fix "joker" problem we going to perform rollback to the latest finalized
  # epoch's first slot.
  #
  # Note that as of spec v0.12.0, well-behaving clients are forbidden from
  # answering this way. However, it still makes sense to attempt to handle
  # this case to increase compatibility (e.g., with weak subjectivity nodes
  # that are still backfilling blocks)
  doAssert(chunkSize > 0'u64, "Chunk size should not be zero")
  SyncQueue[T](
    kind: queueKind,
    startSlot: start,
    finalSlot: final,
    chunkSize: chunkSize,
    queueSize: syncQueueSize,
    getSafeSlot: getSafeSlotCb,
    waiters: newSeq[SyncWaiter](),
    counter: 1'u64,
    pending: initTable[uint64, SyncRequest[T]](),
    debtsQueue: initHeapQueue[SyncRequest[T]](),
    inpSlot: start,
    outSlot: start,
    blockVerifier: blockVerifier,
    ident: ident
  )

proc `<`*[T](a, b: SyncRequest[T]): bool =
  doAssert(a.kind == b.kind)
  case a.kind
  of SyncQueueKind.Forward:
    a.slot < b.slot
  of SyncQueueKind.Backward:
    a.slot > b.slot

proc `<`*[T](a, b: SyncResult[T]): bool =
  doAssert(a.request.kind == b.request.kind)
  case a.request.kind
  of SyncQueueKind.Forward:
    a.request.slot < b.request.slot
  of SyncQueueKind.Backward:
    a.request.slot > b.request.slot

proc `==`*[T](a, b: SyncRequest[T]): bool =
  (a.kind == b.kind) and (a.slot == b.slot) and (a.count == b.count)

proc lastSlot*[T](req: SyncRequest[T]): Slot =
  ## Returns last slot for request ``req``.
  req.slot + req.count - 1'u64

proc makePending*[T](sq: SyncQueue[T], req: var SyncRequest[T]) =
  req.index = sq.counter
  sq.counter = sq.counter + 1'u64
  sq.pending[req.index] = req

proc updateLastSlot*[T](sq: SyncQueue[T], last: Slot) {.inline.} =
  ## Update last slot stored in queue ``sq`` with value ``last``.
  sq.finalSlot = last

proc wakeupWaiters[T](sq: SyncQueue[T], reset = false) =
  ## Wakeup one or all blocked waiters.
  for item in sq.waiters:
    if reset:
      item.reset = true

    if not(item.future.finished()):
      item.future.complete()

proc waitForChanges[T](sq: SyncQueue[T]): Future[bool] {.async.} =
  ## Create new waiter and wait for completion from `wakeupWaiters()`.
  var waitfut = newFuture[void]("SyncQueue.waitForChanges")
  let waititem = SyncWaiter(future: waitfut)
  sq.waiters.add(waititem)
  try:
    await waitfut
    return waititem.reset
  finally:
    sq.waiters.delete(sq.waiters.find(waititem))

proc wakeupAndWaitWaiters[T](sq: SyncQueue[T]) {.async.} =
  ## This procedure will perform wakeupWaiters(true) and blocks until last
  ## waiter will be awakened.
  var waitChanges = sq.waitForChanges()
  sq.wakeupWaiters(true)
  discard await waitChanges

proc clearAndWakeup*[T](sq: SyncQueue[T]) =
  sq.pending.clear()
  sq.wakeupWaiters(true)

proc resetWait*[T](sq: SyncQueue[T], toSlot: Option[Slot]) {.async.} =
  ## Perform reset of all the blocked waiters in SyncQueue.
  ##
  ## We adding one more waiter to the waiters sequence and
  ## call wakeupWaiters(true). Because our waiter is last in sequence of
  ## waiters it will be resumed only after all waiters will be awakened and
  ## finished.

  # We are clearing pending list, so that all requests that are still running
  # around (still downloading, but not yet pushed to the SyncQueue) will be
  # expired. Its important to perform this call first (before await), otherwise
  # you can introduce race problem.
  sq.pending.clear()

  # We calculating minimal slot number to which we will be able to reset,
  # without missing any blocks. There 3 sources:
  # 1. Debts queue.
  # 2. Processing queue (`inpSlot`, `outSlot`).
  # 3. Requested slot `toSlot`.
  #
  # Queue's `outSlot` is the lowest slot we added to `block_pool`, but
  # `toSlot` slot can be less then `outSlot`. `debtsQueue` holds only not
  # added slot requests, so it can't be bigger then `outSlot` value.
  let minSlot =
    case sq.kind
    of SyncQueueKind.Forward:
      if toSlot.isSome():
        min(toSlot.get(), sq.outSlot)
      else:
        sq.outSlot
    of SyncQueueKind.Backward:
      if toSlot.isSome():
        toSlot.get()
      else:
        sq.outSlot
  sq.debtsQueue.clear()
  sq.debtsCount = 0
  sq.readyQueue.clear()
  sq.inpSlot = minSlot
  sq.outSlot = minSlot
  # We are going to wakeup all the waiters and wait for last one.
  await sq.wakeupAndWaitWaiters()

proc isEmpty*[T](sr: SyncResult[T]): bool {.inline.} =
  ## Returns ``true`` if response chain of blocks is empty (has only empty
  ## slots).
  len(sr.data) == 0

proc hasEndGap*[T](sr: SyncResult[T]): bool {.inline.} =
  ## Returns ``true`` if response chain of blocks has gap at the end.
  let lastslot = sr.request.slot + sr.request.count - 1'u64
  if len(sr.data) == 0:
    return true
  if sr.data[^1][].slot != lastslot:
    return true
  return false

proc getLastNonEmptySlot*[T](sr: SyncResult[T]): Slot {.inline.} =
  ## Returns last non-empty slot from result ``sr``. If response has only
  ## empty slots, original request slot will be returned.
  if len(sr.data) == 0:
    # If response has only empty slots we going to use original request slot
    sr.request.slot
  else:
    sr.data[^1][].slot

proc processGap[T](sq: SyncQueue[T], sr: SyncResult[T]) =
  if sr.isEmpty():
    let gitem = GapItem[T](start: sr.request.slot,
                           finish: sr.request.slot + sr.request.count - 1'u64,
                           item: sr.request.item)
    sq.gapList.add(gitem)
  else:
    if sr.hasEndGap():
      let gitem = GapItem[T](start: sr.getLastNonEmptySlot() + 1'u64,
                             finish: sr.request.slot + sr.request.count - 1'u64,
                             item: sr.request.item)
      sq.gapList.add(gitem)
    else:
      sq.gapList.reset()

proc rewardForGaps[T](sq: SyncQueue[T], score: int) =
  mixin updateScore, getStats
  logScope:
    sync_ident = sq.ident
    direction = sq.kind
    topics = "syncman"

  for gap in sq.gapList:
    if score < 0:
      # Every empty response increases penalty by 25%, but not more than 200%.
      let
        emptyCount = gap.item.getStats(SyncResponseKind.Empty)
        goodCount = gap.item.getStats(SyncResponseKind.Good)

      if emptyCount <= goodCount:
        gap.item.updateScore(score)
      else:
        let
          weight = int(min(emptyCount - goodCount, 8'u64))
          newScore = score + score * weight div 4
        gap.item.updateScore(newScore)
        debug "Peer received gap penalty", peer = gap.item,
              penalty = newScore
    else:
      gap.item.updateScore(score)

proc toDebtsQueue[T](sq: SyncQueue[T], sr: SyncRequest[T]) =
  sq.debtsQueue.push(sr)
  sq.debtsCount = sq.debtsCount + sr.count

proc getRewindPoint*[T](sq: SyncQueue[T], failSlot: Slot,
                        safeSlot: Slot): Slot =
  logScope:
    sync_ident = sq.ident
    direction = sq.kind
    topics = "syncman"

  case sq.kind
  of SyncQueueKind.Forward:
    # Calculate the latest finalized epoch.
    let finalizedEpoch = epoch(safeSlot)

    # Calculate failure epoch.
    let failEpoch = epoch(failSlot)

    # Calculate exponential rewind point in number of epochs.
    let epochCount =
      if sq.rewind.isSome():
        let rewind = sq.rewind.get()
        if failSlot == rewind.failSlot:
          # `MissingParent` happened at same slot so we increase rewind point by
          # factor of 2.
          if failEpoch > finalizedEpoch:
            let rewindPoint = rewind.epochCount shl 1
            if rewindPoint < rewind.epochCount:
              # If exponential rewind point produces `uint64` overflow we will
              # make rewind to latest finalized epoch.
              failEpoch - finalizedEpoch
            else:
              if (failEpoch < rewindPoint) or
                 (failEpoch - rewindPoint < finalizedEpoch):
                # If exponential rewind point points to position which is far
                # behind latest finalized epoch.
                failEpoch - finalizedEpoch
              else:
                rewindPoint
          else:
            warn "Trying to rewind over the last finalized epoch",
                 finalized_slot = safeSlot, fail_slot = failSlot,
                 finalized_epoch = finalizedEpoch, fail_epoch = failEpoch,
                 rewind_epoch_count = rewind.epochCount,
                 finalized_epoch = finalizedEpoch
            0'u64
        else:
          # `MissingParent` happened at different slot so we going to rewind for
          # 1 epoch only.
          if (failEpoch < 1'u64) or (failEpoch - 1'u64 < finalizedEpoch):
            warn "Сould not rewind further than the last finalized epoch",
                 finalized_slot = safeSlot, fail_slot = failSlot,
                 finalized_epoch = finalizedEpoch, fail_epoch = failEpoch,
                 rewind_epoch_count = rewind.epochCount,
                 finalized_epoch = finalizedEpoch
            0'u64
          else:
            1'u64
      else:
        # `MissingParent` happened first time.
        if (failEpoch < 1'u64) or (failEpoch - 1'u64 < finalizedEpoch):
          warn "Сould not rewind further than the last finalized epoch",
               finalized_slot = safeSlot, fail_slot = failSlot,
               finalized_epoch = finalizedEpoch, fail_epoch = failEpoch,
               finalized_epoch = finalizedEpoch
          0'u64
        else:
          1'u64

    if epochCount == 0'u64:
      warn "Unable to continue syncing, please restart the node",
           finalized_slot = safeSlot, fail_slot = failSlot,
           finalized_epoch = finalizedEpoch, fail_epoch = failEpoch,
           finalized_epoch = finalizedEpoch
      # Calculate the rewind epoch, which will be equal to last rewind point or
      # finalizedEpoch
      let rewindEpoch =
        if sq.rewind.isNone():
          finalizedEpoch
        else:
          epoch(sq.rewind.get().failSlot) - sq.rewind.get().epochCount
      rewindEpoch.start_slot()
    else:
      # Calculate the rewind epoch, which should not be less than the latest
      # finalized epoch.
      let rewindEpoch = failEpoch - epochCount
      # Update and save new rewind point in SyncQueue.
      sq.rewind = some(RewindPoint(failSlot: failSlot, epochCount: epochCount))
      rewindEpoch.start_slot()
  of SyncQueueKind.Backward:
    # While we perform backward sync, the only possible slot we could rewind is
    # latest stored block.
    if failSlot == safeSlot:
      warn "Unable to continue syncing, please restart the node",
           safe_slot = safeSlot, fail_slot = failSlot
    safeSlot

# This belongs inside the blocks iterator below, but can't be there due to
# https://github.com/nim-lang/Nim/issues/21242
func getOpt(blobs: Opt[seq[BlobSidecars]], i: int): Opt[BlobSidecars] =
  if blobs.isSome:
    Opt.some(blobs.get()[i])
  else:
    Opt.none(BlobSidecars)

iterator blocks[T](sq: SyncQueue[T],
                   sr: SyncResult[T]): (ref ForkedSignedBeaconBlock, Opt[BlobSidecars]) =
  case sq.kind
  of SyncQueueKind.Forward:
    for i in countup(0, len(sr.data) - 1):
      yield (sr.data[i], sr.blobs.getOpt(i))
  of SyncQueueKind.Backward:
    for i in countdown(len(sr.data) - 1, 0):
      yield (sr.data[i], sr.blobs.getOpt(i))

proc advanceOutput*[T](sq: SyncQueue[T], number: uint64) =
  case sq.kind
  of SyncQueueKind.Forward:
    sq.outSlot = sq.outSlot + number
  of SyncQueueKind.Backward:
    sq.outSlot = sq.outSlot - number

proc advanceInput[T](sq: SyncQueue[T], number: uint64) =
  case sq.kind
  of SyncQueueKind.Forward:
    sq.inpSlot = sq.inpSlot + number
  of SyncQueueKind.Backward:
    sq.inpSlot = sq.inpSlot - number

proc notInRange[T](sq: SyncQueue[T], sr: SyncRequest[T]): bool =
  case sq.kind
  of SyncQueueKind.Forward:
    (sq.queueSize > 0) and (sr.slot > sq.outSlot)
  of SyncQueueKind.Backward:
    (sq.queueSize > 0) and (sr.lastSlot < sq.outSlot)

func numAlreadyKnownSlots[T](sq: SyncQueue[T], sr: SyncRequest[T]): uint64 =
  ## Compute the number of slots covered by a given `SyncRequest` that are
  ## already known and, hence, no longer relevant for sync progression.
  let
    outSlot = sq.outSlot
    lowSlot = sr.slot
    highSlot = sr.lastSlot
  case sq.kind
  of SyncQueueKind.Forward:
    if outSlot > highSlot:
      # Entire request is no longer relevant.
      sr.count
    elif outSlot > lowSlot:
      # Request is only partially relevant.
      outSlot - lowSlot
    else:
      # Entire request is still relevant.
      0
  of SyncQueueKind.Backward:
    if lowSlot > outSlot:
      # Entire request is no longer relevant.
      sr.count
    elif highSlot > outSlot:
      # Request is only partially relevant.
      highSlot - outSlot
    else:
      # Entire request is still relevant.
      0

proc push*[T](sq: SyncQueue[T], sr: SyncRequest[T],
              data: seq[ref ForkedSignedBeaconBlock],
              blobs: Opt[seq[BlobSidecars]],
              maybeFinalized: bool = false,
              processingCb: ProcessingCallback = nil) {.async.} =
  logScope:
    sync_ident = sq.ident
    topics = "syncman"

  ## Push successful result to queue ``sq``.
  mixin updateScore, updateStats, getStats

  if sr.index notin sq.pending:
    # If request `sr` not in our pending list, it only means that
    # SyncQueue.resetWait() happens and all pending requests are expired, so
    # we swallow `old` requests, and in such way sync-workers are able to get
    # proper new requests from SyncQueue.
    return

  sq.pending.del(sr.index)

  # This is backpressure handling algorithm, this algorithm is blocking
  # all pending `push` requests if `request.slot` not in range.
  while true:
    if sq.notInRange(sr):
      let reset = await sq.waitForChanges()
      if reset:
        # SyncQueue reset happens. We are exiting to wake up sync-worker.
        return
    else:
      let syncres = SyncResult[T](request: sr, data: data, blobs: blobs)
      sq.readyQueue.push(syncres)
      break

  while len(sq.readyQueue) > 0:
    let reqres =
      case sq.kind
      of SyncQueueKind.Forward:
        let minSlot = sq.readyQueue[0].request.slot
        if sq.outSlot < minSlot:
          none[SyncResult[T]]()
        else:
          some(sq.readyQueue.pop())
      of SyncQueueKind.Backward:
        let maxslot = sq.readyQueue[0].request.slot +
                      (sq.readyQueue[0].request.count - 1'u64)
        if sq.outSlot > maxslot:
          none[SyncResult[T]]()
        else:
          some(sq.readyQueue.pop())

    let item =
      if reqres.isSome():
        reqres.get()
      else:
        let rewindSlot = sq.getRewindPoint(sq.outSlot, sq.getSafeSlot())
        warn "Got incorrect sync result in queue, rewind happens",
             blocks_map = getShortMap(sq.readyQueue[0].request,
                                      sq.readyQueue[0].data),
             blocks_count = len(sq.readyQueue[0].data),
             output_slot = sq.outSlot, input_slot = sq.inpSlot,
             rewind_to_slot = rewindSlot, request = sq.readyQueue[0].request
        await sq.resetWait(some(rewindSlot))
        break

    if processingCb != nil:
      processingCb()

    # Validating received blocks one by one
    var
      hasInvalidBlock = false
      unviableBlock: Option[(Eth2Digest, Slot)]
      missingParentSlot: Option[Slot]
      goodBlock: Option[Slot]

      # TODO when https://github.com/nim-lang/Nim/issues/21306 is fixed in used
      # Nim versions, remove workaround and move `res` into for loop
      res: Result[void, VerifierError]

    var i=0
    for blk, blb in sq.blocks(item):
      res = await sq.blockVerifier(blk[], blb, maybeFinalized)
      inc(i)

      if res.isOk():
        goodBlock = some(blk[].slot)
      else:
        case res.error()
        of VerifierError.MissingParent:
          missingParentSlot = some(blk[].slot)
          break
        of VerifierError.Duplicate:
          # Keep going, happens naturally
          discard
        of VerifierError.UnviableFork:
          # Keep going so as to register other unviable blocks with the
          # quarantine
          if unviableBlock.isNone:
            # Remember the first unviable block, so we can log it
            unviableBlock = some((blk[].root, blk[].slot))

        of VerifierError.Invalid:
          hasInvalidBlock = true

          let req = item.request
          notice "Received invalid sequence of blocks", request = req,
                  blocks_count = len(item.data),
                  blocks_map = getShortMap(req, item.data)
          req.item.updateScore(PeerScoreBadValues)
          break

    # When errors happen while processing blocks, we retry the same request
    # with, hopefully, a different peer
    let retryRequest =
      hasInvalidBlock or unviableBlock.isSome() or missingParentSlot.isSome()
    if not(retryRequest):
      let numSlotsAdvanced = item.request.count - sq.numAlreadyKnownSlots(sr)
      sq.advanceOutput(numSlotsAdvanced)

      if goodBlock.isSome():
        # If there no error and response was not empty we should reward peer
        # with some bonus score - not for duplicate blocks though.
        item.request.item.updateScore(PeerScoreGoodValues)
        item.request.item.updateStats(SyncResponseKind.Good, 1'u64)

        # BlockProcessor reports good block, so we can reward all the peers
        # who sent us empty responses.
        sq.rewardForGaps(PeerScoreGoodValues)
        sq.gapList.reset()
      else:
        # Response was empty
        item.request.item.updateStats(SyncResponseKind.Empty, 1'u64)

      sq.processGap(item)

      if numSlotsAdvanced > 0:
        sq.wakeupWaiters()
    else:
      debug "Block pool rejected peer's response", request = item.request,
            blocks_map = getShortMap(item.request, item.data),
            blocks_count = len(item.data),
            ok = goodBlock.isSome(),
            unviable = unviableBlock.isSome(),
            missing_parent = missingParentSlot.isSome()
      # We need to move failed response to the debts queue.
      sq.toDebtsQueue(item.request)

      if unviableBlock.isSome():
        let req = item.request
        notice "Received blocks from an unviable fork", request = req,
              blockRoot = unviableBlock.get()[0],
              blockSlot = unviableBlock.get()[1],
              blocks_count = len(item.data),
              blocks_map = getShortMap(req, item.data)
        req.item.updateScore(PeerScoreUnviableFork)

      if missingParentSlot.isSome():
        var
          resetSlot: Option[Slot]
          failSlot = missingParentSlot.get()

        # If we got `VerifierError.MissingParent` it means that peer returns
        # chain of blocks with holes or `block_pool` is in incomplete state. We
        # going to rewind the SyncQueue some distance back (2ⁿ, where n∈[0,∞],
        # but no more than `finalized_epoch`).
        let
          req = item.request
          safeSlot = sq.getSafeSlot()
          gapsCount = len(sq.gapList)

        # We should penalize all the peers which responded with gaps.
        sq.rewardForGaps(PeerScoreMissingValues)
        sq.gapList.reset()

        case sq.kind
        of SyncQueueKind.Forward:
          if goodBlock.isSome():
            # `VerifierError.MissingParent` and `Success` present in response,
            # it means that we just need to request this range one more time.
            debug "Unexpected missing parent, but no rewind needed",
                  request = req, finalized_slot = safeSlot,
                  last_good_slot = goodBlock.get(),
                  missing_parent_slot = missingParentSlot.get(),
                  blocks_count = len(item.data),
                  blocks_map = getShortMap(req, item.data),
                  gaps_count = gapsCount
            req.item.updateScore(PeerScoreMissingValues)
          else:
            if safeSlot < req.slot:
              let rewindSlot = sq.getRewindPoint(failSlot, safeSlot)
              debug "Unexpected missing parent, rewind happens",
                   request = req, rewind_to_slot = rewindSlot,
                   rewind_point = sq.rewind, finalized_slot = safeSlot,
                   blocks_count = len(item.data),
                   blocks_map = getShortMap(req, item.data),
                   gaps_count = gapsCount
              resetSlot = some(rewindSlot)
            else:
              error "Unexpected missing parent at finalized epoch slot",
                  request = req, rewind_to_slot = safeSlot,
                  blocks_count = len(item.data),
                  blocks_map = getShortMap(req, item.data),
                  gaps_count = gapsCount
              req.item.updateScore(PeerScoreBadValues)
        of SyncQueueKind.Backward:
          if safeSlot > failSlot:
            let rewindSlot = sq.getRewindPoint(failSlot, safeSlot)
            # It's quite common peers give us fewer blocks than we ask for
            debug "Gap in block range response, rewinding", request = req,
                 rewind_to_slot = rewindSlot, rewind_fail_slot = failSlot,
                 finalized_slot = safeSlot, blocks_count = len(item.data),
                 blocks_map = getShortMap(req, item.data)
            resetSlot = some(rewindSlot)
            req.item.updateScore(PeerScoreMissingValues)
          else:
            error "Unexpected missing parent at safe slot", request = req,
                  to_slot = safeSlot, blocks_count = len(item.data),
                  blocks_map = getShortMap(req, item.data)
            req.item.updateScore(PeerScoreBadValues)

        if resetSlot.isSome():
          await sq.resetWait(resetSlot)
          case sq.kind
          of SyncQueueKind.Forward:
            debug "Rewind to slot has happened", reset_slot = resetSlot.get(),
                  queue_input_slot = sq.inpSlot, queue_output_slot = sq.outSlot,
                  rewind_point = sq.rewind, direction = sq.kind
          of SyncQueueKind.Backward:
            debug "Rewind to slot has happened", reset_slot = resetSlot.get(),
                  queue_input_slot = sq.inpSlot, queue_output_slot = sq.outSlot,
                  direction = sq.kind

      break

proc push*[T](sq: SyncQueue[T], sr: SyncRequest[T]) =
  ## Push failed request back to queue.
  if sr.index notin sq.pending:
    # If request `sr` not in our pending list, it only means that
    # SyncQueue.resetWait() happens and all pending requests are expired, so
    # we swallow `old` requests, and in such way sync-workers are able to get
    # proper new requests from SyncQueue.
    return
  sq.pending.del(sr.index)
  sq.toDebtsQueue(sr)

proc handlePotentialSafeSlotAdvancement[T](sq: SyncQueue[T]) =
  # It may happen that sync progress advanced to a newer `safeSlot`, either
  # by a response that started with good values and only had errors late, or
  # through an out-of-band mechanism, e.g., VC / REST.
  # If that happens, advance to the new `safeSlot` to avoid repeating requests
  # for data that is considered immutable and no longer relevant.
  let safeSlot = sq.getSafeSlot()
  func numSlotsBehindSafeSlot(slot: Slot): uint64 =
    case sq.kind
    of SyncQueueKind.Forward:
      if safeSlot > slot:
        safeSlot - slot
      else:
        0
    of SyncQueueKind.Backward:
      if slot > safeSlot:
        slot - safeSlot
      else:
        0

  let
    numOutSlotsAdvanced = sq.outSlot.numSlotsBehindSafeSlot
    numInpSlotsAdvanced =
      case sq.kind
      of SyncQueueKind.Forward:
        sq.inpSlot.numSlotsBehindSafeSlot
      of SyncQueueKind.Backward:
        if sq.inpSlot == 0xFFFF_FFFF_FFFF_FFFF'u64:
          0'u64
        else:
          sq.inpSlot.numSlotsBehindSafeSlot
  if numOutSlotsAdvanced != 0 or numInpSlotsAdvanced != 0:
    debug "Sync progress advanced out-of-band",
      safeSlot, outSlot = sq.outSlot, inpSlot = sq.inpSlot
    if numOutSlotsAdvanced != 0:
      sq.advanceOutput(numOutSlotsAdvanced)
    if numInpSlotsAdvanced != 0:
      sq.advanceInput(numInpSlotsAdvanced)
    sq.wakeupWaiters()

func updateRequestForNewSafeSlot[T](sq: SyncQueue[T], sr: var SyncRequest[T]) =
  # Requests may have originated before the latest `safeSlot` advancement.
  # Update it to not request any data prior to `safeSlot`.
  let
    outSlot = sq.outSlot
    lowSlot = sr.slot
    highSlot = sr.lastSlot
  case sq.kind
  of SyncQueueKind.Forward:
    if outSlot <= lowSlot:
      # Entire request is still relevant.
      discard
    elif outSlot <= highSlot:
      # Request is only partially relevant.
      let
        numSlotsDone = outSlot - lowSlot
      sr.slot += numSlotsDone
      sr.count -= numSlotsDone
    else:
      # Entire request is no longer relevant.
      sr.count = 0
  of SyncQueueKind.Backward:
    if outSlot >= highSlot:
      # Entire request is still relevant.
      discard
    elif outSlot >= lowSlot:
      # Request is only partially relevant.
      let
        numSlotsDone = highSlot - outSlot
      sr.count -= numSlotsDone
    else:
      # Entire request is no longer relevant.
      sr.count = 0

proc pop*[T](sq: SyncQueue[T], maxslot: Slot, item: T): SyncRequest[T] =
  ## Create new request according to current SyncQueue parameters.
  sq.handlePotentialSafeSlotAdvancement()
  while len(sq.debtsQueue) > 0:
    if maxslot < sq.debtsQueue[0].slot:
      # Peer's latest slot is less than starting request's slot.
      return SyncRequest.empty(sq.kind, T)
    if maxslot < sq.debtsQueue[0].lastSlot():
      # Peer's latest slot is less than finishing request's slot.
      return SyncRequest.empty(sq.kind, T)
    var sr = sq.debtsQueue.pop()
    sq.debtsCount = sq.debtsCount - sr.count
    sq.updateRequestForNewSafeSlot(sr)
    if sr.isEmpty:
      continue
    sr.setItem(item)
    sq.makePending(sr)
    return sr

  case sq.kind
  of SyncQueueKind.Forward:
    if maxslot < sq.inpSlot:
      # Peer's latest slot is less than queue's input slot.
      return SyncRequest.empty(sq.kind, T)
    if sq.inpSlot > sq.finalSlot:
      # Queue's input slot is bigger than queue's final slot.
      return SyncRequest.empty(sq.kind, T)
    let lastSlot = min(maxslot, sq.finalSlot)
    let count = min(sq.chunkSize, lastSlot + 1'u64 - sq.inpSlot)
    var sr = SyncRequest.init(sq.kind, sq.inpSlot, count, item)
    sq.advanceInput(count)
    sq.makePending(sr)
    sr
  of SyncQueueKind.Backward:
    if sq.inpSlot == 0xFFFF_FFFF_FFFF_FFFF'u64:
      return SyncRequest.empty(sq.kind, T)
    if sq.inpSlot < sq.finalSlot:
      return SyncRequest.empty(sq.kind, T)
    let (slot, count) =
      block:
        let baseSlot = sq.inpSlot + 1'u64
        if baseSlot - sq.finalSlot < sq.chunkSize:
          let count = uint64(baseSlot - sq.finalSlot)
          (baseSlot - count, count)
        else:
          (baseSlot - sq.chunkSize, sq.chunkSize)
    if (maxslot + 1'u64) < slot + count:
      # Peer's latest slot is less than queue's input slot.
      return SyncRequest.empty(sq.kind, T)
    var sr = SyncRequest.init(sq.kind, slot, count, item)
    sq.advanceInput(count)
    sq.makePending(sr)
    sr

proc debtLen*[T](sq: SyncQueue[T]): uint64 =
  sq.debtsCount

proc pendingLen*[T](sq: SyncQueue[T]): uint64 =
  case sq.kind
  of SyncQueueKind.Forward:
    # When moving forward `outSlot` will be <= of `inpSlot`.
    sq.inpSlot - sq.outSlot
  of SyncQueueKind.Backward:
    # When moving backward `outSlot` will be >= of `inpSlot`
    sq.outSlot - sq.inpSlot

proc len*[T](sq: SyncQueue[T]): uint64 {.inline.} =
  ## Returns number of slots left in queue ``sq``.
  case sq.kind
  of SyncQueueKind.Forward:
    if sq.finalSlot >= sq.outSlot:
      sq.finalSlot + 1'u64 - sq.outSlot
    else:
      0'u64
  of SyncQueueKind.Backward:
    if sq.outSlot >= sq.finalSlot:
      sq.outSlot + 1'u64 - sq.finalSlot
    else:
      0'u64

proc total*[T](sq: SyncQueue[T]): uint64 {.inline.} =
  ## Returns total number of slots in queue ``sq``.
  case sq.kind
  of SyncQueueKind.Forward:
    if sq.finalSlot >= sq.startSlot:
      sq.finalSlot + 1'u64 - sq.startSlot
    else:
      0'u64
  of SyncQueueKind.Backward:
    if sq.startSlot >= sq.finalSlot:
      sq.startSlot + 1'u64 - sq.finalSlot
    else:
      0'u64

proc progress*[T](sq: SyncQueue[T]): uint64 =
  ## How many useful slots we've synced so far, adjusting for how much has
  ## become obsolete by time movements
  sq.total - sq.len
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
+								# beacon_chain
-												remove Nim 1.2-compatible `push raise`s and update copyright notice years (#4528)


											
										
										
											2023-01-20 14:14:37 +00:00
+								# Copyright (c) 2018-2023 Status Research & Development GmbH
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
+								# Licensed and distributed under either of
 								#   * MIT license (license terms in the root directory or at https://opensource.org/licenses/MIT).
 								#   * Apache v2 license (license terms in the root directory or at https://www.apache.org/licenses/LICENSE-2.0).
 								# at your option. This file may not be copied, modified, or distributed except according to those terms.
-												remove Nim 1.2-compatible `push raise`s and update copyright notice years (#4528)


											
										
										
											2023-01-20 14:14:37 +00:00
+								{.push raises: [].}
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
-												remove most std/options imports (#4778)


											
										
										
											2023-03-31 20:46:47 +00:00
+								import std/[heapqueue, tables, strutils, sequtils, math]
-												Refactor and optimize sync logs. (#3451)

* Refactor and optimize logs.

* Introduce shortLog(SyncRequest).

* Address review comment.

* make sync queue logs more consistent

Adds a few minor logging improvements:
- Fixes a typo (`was happened` -> `has happened`)
- Avoids passing `reset_slot` argument to log statement multiple times
- Uses same `rewind_to_slot` label when logging in both sync directions
- Consistent rewind point logging

Co-authored-by: cheatfate <eugene.kabanov@status.im>
											
										
										
											2022-03-03 08:05:33 +00:00
+								import stew/[results, base10], chronos, chronicles
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
+								import
-												complete switch to beacon_chain/specs/datatypes/bellatrix (#3295)


											
										
										
											2022-01-18 13:36:52 +00:00
+								  ../spec/datatypes/[base, phase0, altair],
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
+								  ../spec/[helpers, forks],
 								  ../networking/[peer_pool, eth2_network],
 								  ../gossip_processing/block_processor,
-												Harden handling of unviable forks (#3312)

* Harden handling of unviable forks

In our current handling of unviable forks, we allow peers to send us
blocks that come from a different fork - this is not necessarily an
error as it can happen naturally, but it does open up the client to a
case where the same unviable fork keeps getting requested - rather than
allowing this to happen, we'll now give these peers a small negative
score - if it keeps happening, we'll disconnect them.

* keep track of unviable forks in quarantine, to avoid filling it with
known junk
* collect peer scores in single module
* descore peers when they send unviable blocks during sync
* don't give score for duplicate blocks
* increase quarantine size to a level that allows finality to happen
under optimal conditions - this helps avoid downloading the same blocks
over and over in case of an unviable fork
* increase initial score for new peers to make room for one more failure
before disconnection
* log and score invalid/unviable blocks in requestmanager too
* avoid ChainDAG dependency in quarantine
* reject gossip blocks with unviable parent
* continue processing unviable sync blocks in order to build unviable
dag

* docs

* Update beacon_chain/consensus_object_pools/block_pools_types.nim

* add unviable queue test
											
										
										
											2022-01-26 12:20:08 +00:00
+								  ../consensus_object_pools/block_pools_types
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
 								export base, phase0, altair, merge, chronos, chronicles, results,
-												Harden handling of unviable forks (#3312)

* Harden handling of unviable forks

In our current handling of unviable forks, we allow peers to send us
blocks that come from a different fork - this is not necessarily an
error as it can happen naturally, but it does open up the client to a
case where the same unviable fork keeps getting requested - rather than
allowing this to happen, we'll now give these peers a small negative
score - if it keeps happening, we'll disconnect them.

* keep track of unviable forks in quarantine, to avoid filling it with
known junk
* collect peer scores in single module
* descore peers when they send unviable blocks during sync
* don't give score for duplicate blocks
* increase quarantine size to a level that allows finality to happen
under optimal conditions - this helps avoid downloading the same blocks
over and over in case of an unviable fork
* increase initial score for new peers to make room for one more failure
before disconnection
* log and score invalid/unviable blocks in requestmanager too
* avoid ChainDAG dependency in quarantine
* reject gossip blocks with unviable parent
* continue processing unviable sync blocks in order to build unviable
dag

* docs

* Update beacon_chain/consensus_object_pools/block_pools_types.nim

* add unviable queue test
											
										
										
											2022-01-26 12:20:08 +00:00
+								       block_pools_types, helpers
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
 								logScope:
 								  topics = "syncqueue"
 								type
-												remove `{.raises: [Defect].}` Nim 1.2 compatibility (#5352)

In Nim 1.6, `{.raises: [Defect].}` is no longer necessary. Remove it.
											
										
										
											2023-08-25 09:29:07 +00:00
+								  GetSlotCallback* = proc(): Slot {.gcsafe, raises: [].}
 								  ProcessingCallback* = proc() {.gcsafe, raises: [].}
-												Remove blockBlobsVerifier (#4829)

Having a separate 'blockBlobsVerifier' function for post-Deneb blocks
is no longer of any benefit. This commit removes it.
											
										
										
											2023-04-18 00:12:57 +00:00
+								  BlockVerifier* =  proc(signedBlock: ForkedSignedBeaconBlock,
-												Fix sync for blocks older than MIN_EPOCHS_FOR_BLOB_SIDECARS_REQUESTS (#4977)

When doing sync for blocks older than
MIN_EPOCHS_FOR_BLOB_SIDECARS_REQUESTS, we skip the blobs by range
request, but we then pass en empty blob sequence to
validation, which then fails.

To fix this: Use an Option[Blobsidecars] to allow expressing the
distinction between "empty blob sequence" and "blobs unavailable". Use
the latter for "old" blocks, and don't attempt to run blob validation.
											
										
										
											2023-05-19 16:25:11 +00:00
+								                         blobs: Opt[BlobSidecars], maybeFinalized: bool):
-												remove `{.raises: [Defect].}` Nim 1.2 compatibility (#5352)

In Nim 1.6, `{.raises: [Defect].}` is no longer necessary. Remove it.
											
										
										
											2023-08-25 09:29:07 +00:00
+								      Future[Result[void, VerifierError]] {.gcsafe, raises: [].}
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
 								  SyncQueueKind* {.pure.} = enum
 								    Forward, Backward
 								  SyncRequest*[T] = object
-												Refactor and optimize sync logs. (#3451)

* Refactor and optimize logs.

* Introduce shortLog(SyncRequest).

* Address review comment.

* make sync queue logs more consistent

Adds a few minor logging improvements:
- Fixes a typo (`was happened` -> `has happened`)
- Avoids passing `reset_slot` argument to log statement multiple times
- Uses same `rewind_to_slot` label when logging in both sync directions
- Consistent rewind point logging

Co-authored-by: cheatfate <eugene.kabanov@status.im>
											
										
										
											2022-03-03 08:05:33 +00:00
+								    kind*: SyncQueueKind
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
+								    index*: uint64
 								    slot*: Slot
 								    count*: uint64
 								    item*: T
 								  SyncResult*[T] = object
 								    request*: SyncRequest[T]
-												harden and speed up block sync (#3358)

* harden and speed up block sync

The `GetBlockBy*` server implementation currently reads SSZ bytes from
database, deserializes them into a Nim object then serializes them right
back to SSZ - here, we eliminate the deser/ser steps and send the bytes
straight to the network. Unfortunately, the snappy recoding must still
be done because of differences in framing.

Also, the quota system makes one giant request for quota right before
sending all blocks - this means that a 1024 block request will be
"paused" for a long time, then all blocks will be sent at once causing a
spike in database reads which potentially will see the reading client
time out before any block is sent.

Finally, on the reading side we make several copies of blocks as they
travel through various queues - this was not noticeable before but
becomes a problem in two cases: bellatrix blocks are up to 10mb (instead
of .. 30-40kb) and when backfilling, we process a lot more of them a lot
faster.

* fix status comparisons for nodes syncing from genesis (#3327 was a bit
too hard)
* don't hit database at all for post-altair slots in GetBlock v1
requests
											
										
										
											2022-02-07 17:20:10 +00:00
+								    data*: seq[ref ForkedSignedBeaconBlock]
-												Update sync to use post-decoupling RPC (#4701)

* Update sync to use post-decoupling RPCs

blob_sidecars_by_range returns a flat list of sidecars, which must
then be grouped per-slot.

* Add test for groupBlobs

* createBlobs: convert proc to func
											
										
										
											2023-03-07 20:19:17 +00:00
+								    blobs*: Opt[seq[BlobSidecars]]
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
-												Sync gaps fix (#4090)


											
										
										
											2022-09-19 09:37:42 +00:00
+								  GapItem*[T] = object
 								    start*: Slot
 								    finish*: Slot
 								    item*: T
-												SyncManager cleanups for backfill support (#3189)

* SyncManager cleanups for backfill support

Cleanups, fixes and simplifications, in anticipation of backfill support
for the `SyncManager`:

* reformat sync progress indicator to show time left and % done more
prominently:
  * old: `sync="sPssPsssss:2:2.4229:00h57m (2706898)"`
  * new: `sync="14d12h31m (0.52%) 1.1378slots/s (wQQQQQDDQQ:1287520)"`
* reset average speed when going out of sync
* pass all block errors to sync manager, including duplicate/unviable
* penalize peers for reporting a head block that is outside of our
expected wall clock time (they're likely on a different network or
trying to disrupt sync)
* remove `SyncFailureKind` (unused)
* remove `inRange` (unused)
* add `Q` for sync queue requests that are in the `SyncQueue` but not
yet in the `BlockProcessor` queue
* update last slot in `SyncQueue` after getting peer status
* fix race condition between `wakeupWaiters` and `resetWait`, where
workers would not be correctly reset if block verification returned a
completed future without event loop
* log syncmanager direction

* Fix ordering issue.
Some of the requests size of which are not equal to `chunkSize` could be processed in wrong order which could lead to sync process freezes.

Co-authored-by: cheatfate <eugene.kabanov@status.im>
											
										
										
											2021-12-16 14:57:16 +00:00
+								  SyncWaiter* = ref object
 								    future: Future[void]
 								    reset: bool
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
 								  RewindPoint = object
 								    failSlot: Slot
 								    epochCount: uint64
 								  SyncQueue*[T] = ref object
 								    kind*: SyncQueueKind
 								    inpSlot*: Slot
 								    outSlot*: Slot
 								    startSlot*: Slot
 								    finalSlot*: Slot
 								    chunkSize*: uint64
 								    queueSize*: int
 								    counter*: uint64
 								    pending*: Table[uint64, SyncRequest[T]]
-												Sync gaps fix (#4090)


											
										
										
											2022-09-19 09:37:42 +00:00
+								    gapList*: seq[GapItem[T]]
-												SyncManager cleanups for backfill support (#3189)

* SyncManager cleanups for backfill support

Cleanups, fixes and simplifications, in anticipation of backfill support
for the `SyncManager`:

* reformat sync progress indicator to show time left and % done more
prominently:
  * old: `sync="sPssPsssss:2:2.4229:00h57m (2706898)"`
  * new: `sync="14d12h31m (0.52%) 1.1378slots/s (wQQQQQDDQQ:1287520)"`
* reset average speed when going out of sync
* pass all block errors to sync manager, including duplicate/unviable
* penalize peers for reporting a head block that is outside of our
expected wall clock time (they're likely on a different network or
trying to disrupt sync)
* remove `SyncFailureKind` (unused)
* remove `inRange` (unused)
* add `Q` for sync queue requests that are in the `SyncQueue` but not
yet in the `BlockProcessor` queue
* update last slot in `SyncQueue` after getting peer status
* fix race condition between `wakeupWaiters` and `resetWait`, where
workers would not be correctly reset if block verification returned a
completed future without event loop
* log syncmanager direction

* Fix ordering issue.
Some of the requests size of which are not equal to `chunkSize` could be processed in wrong order which could lead to sync process freezes.

Co-authored-by: cheatfate <eugene.kabanov@status.im>
											
										
										
											2021-12-16 14:57:16 +00:00
+								    waiters: seq[SyncWaiter]
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
+								    getSafeSlot*: GetSlotCallback
 								    debtsQueue: HeapQueue[SyncRequest[T]]
 								    debtsCount: uint64
 								    readyQueue: HeapQueue[SyncResult[T]]
 								    rewind: Option[RewindPoint]
-												SyncManager cleanups for backfill support (#3189)

* SyncManager cleanups for backfill support

Cleanups, fixes and simplifications, in anticipation of backfill support
for the `SyncManager`:

* reformat sync progress indicator to show time left and % done more
prominently:
  * old: `sync="sPssPsssss:2:2.4229:00h57m (2706898)"`
  * new: `sync="14d12h31m (0.52%) 1.1378slots/s (wQQQQQDDQQ:1287520)"`
* reset average speed when going out of sync
* pass all block errors to sync manager, including duplicate/unviable
* penalize peers for reporting a head block that is outside of our
expected wall clock time (they're likely on a different network or
trying to disrupt sync)
* remove `SyncFailureKind` (unused)
* remove `inRange` (unused)
* add `Q` for sync queue requests that are in the `SyncQueue` but not
yet in the `BlockProcessor` queue
* update last slot in `SyncQueue` after getting peer status
* fix race condition between `wakeupWaiters` and `resetWait`, where
workers would not be correctly reset if block verification returned a
completed future without event loop
* log syncmanager direction

* Fix ordering issue.
Some of the requests size of which are not equal to `chunkSize` could be processed in wrong order which could lead to sync process freezes.

Co-authored-by: cheatfate <eugene.kabanov@status.im>
											
										
										
											2021-12-16 14:57:16 +00:00
+								    blockVerifier: BlockVerifier
-												Refactor and optimize sync logs. (#3451)

* Refactor and optimize logs.

* Introduce shortLog(SyncRequest).

* Address review comment.

* make sync queue logs more consistent

Adds a few minor logging improvements:
- Fixes a typo (`was happened` -> `has happened`)
- Avoids passing `reset_slot` argument to log statement multiple times
- Uses same `rewind_to_slot` label when logging in both sync directions
- Consistent rewind point logging

Co-authored-by: cheatfate <eugene.kabanov@status.im>
											
										
										
											2022-03-03 08:05:33 +00:00
+								    ident*: string
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
-												Refactor and optimize sync logs. (#3451)

* Refactor and optimize logs.

* Introduce shortLog(SyncRequest).

* Address review comment.

* make sync queue logs more consistent

Adds a few minor logging improvements:
- Fixes a typo (`was happened` -> `has happened`)
- Avoids passing `reset_slot` argument to log statement multiple times
- Uses same `rewind_to_slot` label when logging in both sync directions
- Consistent rewind point logging

Co-authored-by: cheatfate <eugene.kabanov@status.im>
											
										
										
											2022-03-03 08:05:33 +00:00
+								chronicles.formatIt SyncQueueKind: toLowerAscii($it)
 								template shortLog*[T](req: SyncRequest[T]): string =
 								  Base10.toString(uint64(req.slot)) & ":" &
 								  Base10.toString(req.count) & "@" &
 								  Base10.toString(req.index)
 								chronicles.expandIt SyncRequest:
 								  `it` = shortLog(it)
 								  peer = shortLog(it.item)
 								  direction = toLowerAscii($it.kind)
-												Backfiller (#3263)

Backfilling is the process of downloading historical blocks via P2P that
are required to fulfill `GetBlocksByRange` duties - this happens during
both trusted node and finalized checkpoint syncs.

In particular, backfilling happens after syncing to head, such that
attestation work can start as soon as possible.

* Fix SyncQueue initialization procedure.
Remove usage of `awaitne`.
Add cancellation support.
Remove unneeded `sleepAsync()` if peer's head is older than needed.
Add `direction` field to all logs.
Fix syncmanager wedge issue.
Add proper resource cleaning procedure on backward sync finish.

Co-authored-by: cheatfate <eugene.kabanov@status.im>
											
										
										
											2022-01-20 07:25:45 +00:00
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
+								proc getShortMap*[T](req: SyncRequest[T],
-												harden and speed up block sync (#3358)

* harden and speed up block sync

The `GetBlockBy*` server implementation currently reads SSZ bytes from
database, deserializes them into a Nim object then serializes them right
back to SSZ - here, we eliminate the deser/ser steps and send the bytes
straight to the network. Unfortunately, the snappy recoding must still
be done because of differences in framing.

Also, the quota system makes one giant request for quota right before
sending all blocks - this means that a 1024 block request will be
"paused" for a long time, then all blocks will be sent at once causing a
spike in database reads which potentially will see the reading client
time out before any block is sent.

Finally, on the reading side we make several copies of blocks as they
travel through various queues - this was not noticeable before but
becomes a problem in two cases: bellatrix blocks are up to 10mb (instead
of .. 30-40kb) and when backfilling, we process a lot more of them a lot
faster.

* fix status comparisons for nodes syncing from genesis (#3327 was a bit
too hard)
* don't hit database at all for post-altair slots in GetBlock v1
requests
											
										
										
											2022-02-07 17:20:10 +00:00
+								                     data: openArray[ref ForkedSignedBeaconBlock]): string =
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
+								  ## Returns all slot numbers in ``data`` as placement map.
 								  var res = newStringOfCap(req.count)
 								  var slider = req.slot
 								  var last = 0
 								  for i in 0 ..< req.count:
 								    if last < len(data):
 								      for k in last ..< len(data):
-												harden and speed up block sync (#3358)

* harden and speed up block sync

The `GetBlockBy*` server implementation currently reads SSZ bytes from
database, deserializes them into a Nim object then serializes them right
back to SSZ - here, we eliminate the deser/ser steps and send the bytes
straight to the network. Unfortunately, the snappy recoding must still
be done because of differences in framing.

Also, the quota system makes one giant request for quota right before
sending all blocks - this means that a 1024 block request will be
"paused" for a long time, then all blocks will be sent at once causing a
spike in database reads which potentially will see the reading client
time out before any block is sent.

Finally, on the reading side we make several copies of blocks as they
travel through various queues - this was not noticeable before but
becomes a problem in two cases: bellatrix blocks are up to 10mb (instead
of .. 30-40kb) and when backfilling, we process a lot more of them a lot
faster.

* fix status comparisons for nodes syncing from genesis (#3327 was a bit
too hard)
* don't hit database at all for post-altair slots in GetBlock v1
requests
											
										
										
											2022-02-07 17:20:10 +00:00
+								        if slider == data[k][].slot:
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
+								          res.add('x')
 								          last = k + 1
 								          break
-												harden and speed up block sync (#3358)

* harden and speed up block sync

The `GetBlockBy*` server implementation currently reads SSZ bytes from
database, deserializes them into a Nim object then serializes them right
back to SSZ - here, we eliminate the deser/ser steps and send the bytes
straight to the network. Unfortunately, the snappy recoding must still
be done because of differences in framing.

Also, the quota system makes one giant request for quota right before
sending all blocks - this means that a 1024 block request will be
"paused" for a long time, then all blocks will be sent at once causing a
spike in database reads which potentially will see the reading client
time out before any block is sent.

Finally, on the reading side we make several copies of blocks as they
travel through various queues - this was not noticeable before but
becomes a problem in two cases: bellatrix blocks are up to 10mb (instead
of .. 30-40kb) and when backfilling, we process a lot more of them a lot
faster.

* fix status comparisons for nodes syncing from genesis (#3327 was a bit
too hard)
* don't hit database at all for post-altair slots in GetBlock v1
requests
											
										
										
											2022-02-07 17:20:10 +00:00
+								        elif slider < data[k][].slot:
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
+								          res.add('.')
 								          break
 								    else:
 								      res.add('.')
-												sync: remove `step` from sync client implementation (#3678)

* sync: remove `step` from sync client implementation

Deprecated in the spec:
https://github.com/ethereum/consensus-specs/pull/2856 - future PR:s will
deprecate server support as well.
											
										
										
											2022-06-06 13:56:59 +00:00
+								    slider = slider + 1
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
+								  res
-												EIP4844 sync (#4581)

* EIP4844 Sync

* Pass eip4844 fork epoch rather than cfg to syncmanager

* Fix sync

* Update test

* map->mapIt
											
										
										
											2023-02-11 20:48:35 +00:00
+								proc getShortMap*[T](req: SyncRequest[T],
-												Update sync to use post-decoupling RPC (#4701)

* Update sync to use post-decoupling RPCs

blob_sidecars_by_range returns a flat list of sidecars, which must
then be grouped per-slot.

* Add test for groupBlobs

* createBlobs: convert proc to func
											
										
										
											2023-03-07 20:19:17 +00:00
+								                     data: openArray[ref BlobSidecar]): string =
-												EIP4844 sync (#4581)

* EIP4844 Sync

* Pass eip4844 fork epoch rather than cfg to syncmanager

* Fix sync

* Update test

* map->mapIt
											
										
										
											2023-02-11 20:48:35 +00:00
+								  ## Returns all slot numbers in ``data`` as placement map.
-												Update sync to use post-decoupling RPC (#4701)

* Update sync to use post-decoupling RPCs

blob_sidecars_by_range returns a flat list of sidecars, which must
then be grouped per-slot.

* Add test for groupBlobs

* createBlobs: convert proc to func
											
										
										
											2023-03-07 20:19:17 +00:00
+								  var res = newStringOfCap(req.count * MAX_BLOBS_PER_BLOCK)
 								  var cur : uint64 = 0
 								  for slot in req.slot..<req.slot+req.count:
-												Blob handling sync fixes (#4888)

* Fix groupBlobs

* Fix getShortMap

* Fix blob handling in sync

* lint

* Add some blob-related logging
											
										
										
											2023-05-06 08:58:50 +00:00
+								    if cur >= lenu64(data):
 								      res.add('|')
 								      continue
-												update Deneb for blob sidecar inclusion proofs (#5565)

`BlobSidecar` is no longer signed, instead use Merkle proof to link
blobs with block.

- https://github.com/ethereum/consensus-specs/pull/3531

Associated beacon-API / builder-specs still TBD; minimal changes done
to compile in similar style to previous spec, but not standardized yet.

- https://github.com/ethereum/beacon-APIs/pull/369
- https://github.com/ethereum/builder-specs/pull/90
											
										
										
											2023-11-06 06:48:43 +00:00
+								    if slot == data[cur].signed_block_header.message.slot:
-												Update sync to use post-decoupling RPC (#4701)

* Update sync to use post-decoupling RPCs

blob_sidecars_by_range returns a flat list of sidecars, which must
then be grouped per-slot.

* Add test for groupBlobs

* createBlobs: convert proc to func
											
										
										
											2023-03-07 20:19:17 +00:00
+								      for k in cur..<cur+MAX_BLOBS_PER_BLOCK:
-												update Deneb for blob sidecar inclusion proofs (#5565)

`BlobSidecar` is no longer signed, instead use Merkle proof to link
blobs with block.

- https://github.com/ethereum/consensus-specs/pull/3531

Associated beacon-API / builder-specs still TBD; minimal changes done
to compile in similar style to previous spec, but not standardized yet.

- https://github.com/ethereum/beacon-APIs/pull/369
- https://github.com/ethereum/builder-specs/pull/90
											
										
										
											2023-11-06 06:48:43 +00:00
+								        if k >= lenu64(data) or slot != data[k].signed_block_header.message.slot:
-												Update sync to use post-decoupling RPC (#4701)

* Update sync to use post-decoupling RPCs

blob_sidecars_by_range returns a flat list of sidecars, which must
then be grouped per-slot.

* Add test for groupBlobs

* createBlobs: convert proc to func
											
										
										
											2023-03-07 20:19:17 +00:00
+								          res.add('|')
-												EIP4844 sync (#4581)

* EIP4844 Sync

* Pass eip4844 fork epoch rather than cfg to syncmanager

* Fix sync

* Update test

* map->mapIt
											
										
										
											2023-02-11 20:48:35 +00:00
+								          break
-												Blob handling sync fixes (#4888)

* Fix groupBlobs

* Fix getShortMap

* Fix blob handling in sync

* lint

* Add some blob-related logging
											
										
										
											2023-05-06 08:58:50 +00:00
+								        else:
 								          inc(cur)
 								          res.add('x')
-												EIP4844 sync (#4581)

* EIP4844 Sync

* Pass eip4844 fork epoch rather than cfg to syncmanager

* Fix sync

* Update test

* map->mapIt
											
										
										
											2023-02-11 20:48:35 +00:00
+								    else:
-												Update sync to use post-decoupling RPC (#4701)

* Update sync to use post-decoupling RPCs

blob_sidecars_by_range returns a flat list of sidecars, which must
then be grouped per-slot.

* Add test for groupBlobs

* createBlobs: convert proc to func
											
										
										
											2023-03-07 20:19:17 +00:00
+								      res.add('|')
-												EIP4844 sync (#4581)

* EIP4844 Sync

* Pass eip4844 fork epoch rather than cfg to syncmanager

* Fix sync

* Update test

* map->mapIt
											
										
										
											2023-02-11 20:48:35 +00:00
+								  res
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
+								proc contains*[T](req: SyncRequest[T], slot: Slot): bool {.inline.} =
-												sync: remove `step` from sync client implementation (#3678)

* sync: remove `step` from sync client implementation

Deprecated in the spec:
https://github.com/ethereum/consensus-specs/pull/2856 - future PR:s will
deprecate server support as well.
											
										
										
											2022-06-06 13:56:59 +00:00
+								  slot >= req.slot and slot < req.slot + req.count
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
 								proc cmp*[T](a, b: SyncRequest[T]): int =
 								  cmp(uint64(a.slot), uint64(b.slot))
 								proc checkResponse*[T](req: SyncRequest[T],
-												EIP4844 sync (#4581)

* EIP4844 Sync

* Pass eip4844 fork epoch rather than cfg to syncmanager

* Fix sync

* Update test

* map->mapIt
											
										
										
											2023-02-11 20:48:35 +00:00
+								                       data: openArray[Slot]): bool =
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
+								  if len(data) == 0:
 								    # Impossible to verify empty response.
 								    return true
 								  if uint64(len(data)) > req.count:
 								    # Number of blocks in response should be less or equal to number of
 								    # requested blocks.
 								    return false
 								  var slot = req.slot
 								  var rindex = 0'u64
 								  var dindex = 0
 								  while (rindex < req.count) and (dindex < len(data)):
-												EIP4844 sync (#4581)

* EIP4844 Sync

* Pass eip4844 fork epoch rather than cfg to syncmanager

* Fix sync

* Update test

* map->mapIt
											
										
										
											2023-02-11 20:48:35 +00:00
+								    if slot < data[dindex]:
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
+								      discard
-												EIP4844 sync (#4581)

* EIP4844 Sync

* Pass eip4844 fork epoch rather than cfg to syncmanager

* Fix sync

* Update test

* map->mapIt
											
										
										
											2023-02-11 20:48:35 +00:00
+								    elif slot == data[dindex]:
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
+								      inc(dindex)
 								    else:
 								      return false
-												use unsigned literals (#3717)

in the hopes of avoiding potential for conversion bugs on i386
											
										
										
											2022-06-08 11:09:33 +00:00
+								    slot += 1'u64
 								    rindex += 1'u64
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
 								  if dindex == len(data):
 								    return true
 								  else:
 								    return false
 								proc init[T](t1: typedesc[SyncRequest], kind: SyncQueueKind, start: Slot,
 								             finish: Slot, t2: typedesc[T]): SyncRequest[T] =
 								  let count = finish - start + 1'u64
-												sync: remove `step` from sync client implementation (#3678)

* sync: remove `step` from sync client implementation

Deprecated in the spec:
https://github.com/ethereum/consensus-specs/pull/2856 - future PR:s will
deprecate server support as well.
											
										
										
											2022-06-06 13:56:59 +00:00
+								  SyncRequest[T](kind: kind, slot: start, count: count)
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
 								proc init[T](t1: typedesc[SyncRequest], kind: SyncQueueKind, slot: Slot,
 								             count: uint64, item: T): SyncRequest[T] =
-												sync: remove `step` from sync client implementation (#3678)

* sync: remove `step` from sync client implementation

Deprecated in the spec:
https://github.com/ethereum/consensus-specs/pull/2856 - future PR:s will
deprecate server support as well.
											
										
										
											2022-06-06 13:56:59 +00:00
+								  SyncRequest[T](kind: kind, slot: slot, count: count, item: item)
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
 								proc init[T](t1: typedesc[SyncRequest], kind: SyncQueueKind, start: Slot,
 								             finish: Slot, item: T): SyncRequest[T] =
 								  let count = finish - start + 1'u64
-												sync: remove `step` from sync client implementation (#3678)

* sync: remove `step` from sync client implementation

Deprecated in the spec:
https://github.com/ethereum/consensus-specs/pull/2856 - future PR:s will
deprecate server support as well.
											
										
										
											2022-06-06 13:56:59 +00:00
+								  SyncRequest[T](kind: kind, slot: start, count: count, item: item)
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
 								proc empty*[T](t: typedesc[SyncRequest], kind: SyncQueueKind,
 								               t2: typedesc[T]): SyncRequest[T] {.inline.} =
-												sync: remove `step` from sync client implementation (#3678)

* sync: remove `step` from sync client implementation

Deprecated in the spec:
https://github.com/ethereum/consensus-specs/pull/2856 - future PR:s will
deprecate server support as well.
											
										
										
											2022-06-06 13:56:59 +00:00
+								  SyncRequest[T](kind: kind, count: 0'u64)
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
 								proc setItem*[T](sr: var SyncRequest[T], item: T) =
 								  sr.item = item
 								proc isEmpty*[T](sr: SyncRequest[T]): bool {.inline.} =
-												sync: remove `step` from sync client implementation (#3678)

* sync: remove `step` from sync client implementation

Deprecated in the spec:
https://github.com/ethereum/consensus-specs/pull/2856 - future PR:s will
deprecate server support as well.
											
										
										
											2022-06-06 13:56:59 +00:00
+								  (sr.count == 0'u64)
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
 								proc init*[T](t1: typedesc[SyncQueue], t2: typedesc[T],
 								              queueKind: SyncQueueKind,
 								              start, final: Slot, chunkSize: uint64,
 								              getSafeSlotCb: GetSlotCallback,
-												SyncManager cleanups for backfill support (#3189)

* SyncManager cleanups for backfill support

Cleanups, fixes and simplifications, in anticipation of backfill support
for the `SyncManager`:

* reformat sync progress indicator to show time left and % done more
prominently:
  * old: `sync="sPssPsssss:2:2.4229:00h57m (2706898)"`
  * new: `sync="14d12h31m (0.52%) 1.1378slots/s (wQQQQQDDQQ:1287520)"`
* reset average speed when going out of sync
* pass all block errors to sync manager, including duplicate/unviable
* penalize peers for reporting a head block that is outside of our
expected wall clock time (they're likely on a different network or
trying to disrupt sync)
* remove `SyncFailureKind` (unused)
* remove `inRange` (unused)
* add `Q` for sync queue requests that are in the `SyncQueue` but not
yet in the `BlockProcessor` queue
* update last slot in `SyncQueue` after getting peer status
* fix race condition between `wakeupWaiters` and `resetWait`, where
workers would not be correctly reset if block verification returned a
completed future without event loop
* log syncmanager direction

* Fix ordering issue.
Some of the requests size of which are not equal to `chunkSize` could be processed in wrong order which could lead to sync process freezes.

Co-authored-by: cheatfate <eugene.kabanov@status.im>
											
										
										
											2021-12-16 14:57:16 +00:00
+								              blockVerifier: BlockVerifier,
-												Refactor and optimize sync logs. (#3451)

* Refactor and optimize logs.

* Introduce shortLog(SyncRequest).

* Address review comment.

* make sync queue logs more consistent

Adds a few minor logging improvements:
- Fixes a typo (`was happened` -> `has happened`)
- Avoids passing `reset_slot` argument to log statement multiple times
- Uses same `rewind_to_slot` label when logging in both sync directions
- Consistent rewind point logging

Co-authored-by: cheatfate <eugene.kabanov@status.im>
											
										
										
											2022-03-03 08:05:33 +00:00
+								              syncQueueSize: int = -1,
 								              ident: string = "main"): SyncQueue[T] =
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
+								  ## Create new synchronization queue with parameters
 								  ##
-												update `sync_queue` docs w.r.t. joker's problem (#4317)

Explicitly mention in-line documentation within `sync_queue` relating to
older specification version to make rationale clearer.
											
										
										
											2022-11-11 14:36:02 +00:00
+								  ## ``start`` and ``final`` are starting and final Slots.
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
+								  ##
 								  ## ``chunkSize`` maximum number of slots in one request.
 								  ##
 								  ## ``syncQueueSize`` maximum queue size for incoming data.
 								  ## If ``syncQueueSize > 0`` queue will help to keep backpressure under
 								  ## control. If ``syncQueueSize <= 0`` then queue size is unlimited (default).
 								  # SyncQueue is the core of sync manager, this data structure distributes
 								  # requests to peers and manages responses from peers.
 								  #
 								  # Because SyncQueue is async data structure it manages backpressure and
 								  # order of incoming responses and it also resolves "joker's" problem.
 								  #
 								  # Joker's problem
 								  #
-												update `sync_queue` docs w.r.t. joker's problem (#4317)

Explicitly mention in-line documentation within `sync_queue` relating to
older specification version to make rationale clearer.
											
										
										
											2022-11-11 14:36:02 +00:00
+								  # According to pre-v0.12.0 Ethereum consensus network specification
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
+								  # > Clients MUST respond with at least one block, if they have it and it
 								  # > exists in the range. Clients MAY limit the number of blocks in the
 								  # > response.
-												update `sync_queue` docs w.r.t. joker's problem (#4317)

Explicitly mention in-line documentation within `sync_queue` relating to
older specification version to make rationale clearer.
											
										
										
											2022-11-11 14:36:02 +00:00
+								  # https://github.com/ethereum/consensus-specs/blob/v0.11.3/specs/phase0/p2p-interface.md#L590
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
+								  #
 								  # Such rule can lead to very uncertain responses, for example let slots from
 								  # 10 to 12 will be not empty. Client which follows specification can answer
 								  # with any response from this list (X - block, `-` empty space):
 								  #
 								  # 1.   X X X
 								  # 2.   - - X
 								  # 3.   - X -
 								  # 4.   - X X
 								  # 5.   X - -
 								  # 6.   X - X
 								  # 7.   X X -
 								  #
-												update `sync_queue` docs w.r.t. joker's problem (#4317)

Explicitly mention in-line documentation within `sync_queue` relating to
older specification version to make rationale clearer.
											
										
										
											2022-11-11 14:36:02 +00:00
+								  # If peer answers with `1` everything will be fine and `block_processor`
 								  # will be able to process all 3 blocks.
 								  # In case of `2`, `3`, `4`, `6` - `block_processor` will fail immediately
 								  # with chunk and report "parent is missing" error.
 								  # But in case of `5` and `7` blocks will be processed by `block_processor`
 								  # without any problems, however it will start producing problems right from
 								  # this uncertain last slot. SyncQueue will start producing requests for next
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
+								  # blocks, but all the responses from this point will fail with "parent is
 								  # missing" error. Lets call such peers "jokers", because they are joking
 								  # with responses.
 								  #
 								  # To fix "joker" problem we going to perform rollback to the latest finalized
 								  # epoch's first slot.
-												update `sync_queue` docs w.r.t. joker's problem (#4317)

Explicitly mention in-line documentation within `sync_queue` relating to
older specification version to make rationale clearer.
											
										
										
											2022-11-11 14:36:02 +00:00
+								  #
 								  # Note that as of spec v0.12.0, well-behaving clients are forbidden from
 								  # answering this way. However, it still makes sense to attempt to handle
 								  # this case to increase compatibility (e.g., with weak subjectivity nodes
 								  # that are still backfilling blocks)
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
+								  doAssert(chunkSize > 0'u64, "Chunk size should not be zero")
 								  SyncQueue[T](
 								    kind: queueKind,
 								    startSlot: start,
 								    finalSlot: final,
 								    chunkSize: chunkSize,
 								    queueSize: syncQueueSize,
 								    getSafeSlot: getSafeSlotCb,
-												SyncManager cleanups for backfill support (#3189)

* SyncManager cleanups for backfill support

Cleanups, fixes and simplifications, in anticipation of backfill support
for the `SyncManager`:

* reformat sync progress indicator to show time left and % done more
prominently:
  * old: `sync="sPssPsssss:2:2.4229:00h57m (2706898)"`
  * new: `sync="14d12h31m (0.52%) 1.1378slots/s (wQQQQQDDQQ:1287520)"`
* reset average speed when going out of sync
* pass all block errors to sync manager, including duplicate/unviable
* penalize peers for reporting a head block that is outside of our
expected wall clock time (they're likely on a different network or
trying to disrupt sync)
* remove `SyncFailureKind` (unused)
* remove `inRange` (unused)
* add `Q` for sync queue requests that are in the `SyncQueue` but not
yet in the `BlockProcessor` queue
* update last slot in `SyncQueue` after getting peer status
* fix race condition between `wakeupWaiters` and `resetWait`, where
workers would not be correctly reset if block verification returned a
completed future without event loop
* log syncmanager direction

* Fix ordering issue.
Some of the requests size of which are not equal to `chunkSize` could be processed in wrong order which could lead to sync process freezes.

Co-authored-by: cheatfate <eugene.kabanov@status.im>
											
										
										
											2021-12-16 14:57:16 +00:00
+								    waiters: newSeq[SyncWaiter](),
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
+								    counter: 1'u64,
 								    pending: initTable[uint64, SyncRequest[T]](),
 								    debtsQueue: initHeapQueue[SyncRequest[T]](),
 								    inpSlot: start,
 								    outSlot: start,
-												Refactor and optimize sync logs. (#3451)

* Refactor and optimize logs.

* Introduce shortLog(SyncRequest).

* Address review comment.

* make sync queue logs more consistent

Adds a few minor logging improvements:
- Fixes a typo (`was happened` -> `has happened`)
- Avoids passing `reset_slot` argument to log statement multiple times
- Uses same `rewind_to_slot` label when logging in both sync directions
- Consistent rewind point logging

Co-authored-by: cheatfate <eugene.kabanov@status.im>
											
										
										
											2022-03-03 08:05:33 +00:00
+								    blockVerifier: blockVerifier,
 								    ident: ident
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
+								  )
 								proc `<`*[T](a, b: SyncRequest[T]): bool =
 								  doAssert(a.kind == b.kind)
 								  case a.kind
 								  of SyncQueueKind.Forward:
 								    a.slot < b.slot
 								  of SyncQueueKind.Backward:
 								    a.slot > b.slot
 								proc `<`*[T](a, b: SyncResult[T]): bool =
 								  doAssert(a.request.kind == b.request.kind)
 								  case a.request.kind
 								  of SyncQueueKind.Forward:
 								    a.request.slot < b.request.slot
 								  of SyncQueueKind.Backward:
 								    a.request.slot > b.request.slot
 								proc `==`*[T](a, b: SyncRequest[T]): bool =
-												sync: remove `step` from sync client implementation (#3678)

* sync: remove `step` from sync client implementation

Deprecated in the spec:
https://github.com/ethereum/consensus-specs/pull/2856 - future PR:s will
deprecate server support as well.
											
										
										
											2022-06-06 13:56:59 +00:00
+								  (a.kind == b.kind) and (a.slot == b.slot) and (a.count == b.count)
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
 								proc lastSlot*[T](req: SyncRequest[T]): Slot =
 								  ## Returns last slot for request ``req``.
 								  req.slot + req.count - 1'u64
 								proc makePending*[T](sq: SyncQueue[T], req: var SyncRequest[T]) =
 								  req.index = sq.counter
 								  sq.counter = sq.counter + 1'u64
 								  sq.pending[req.index] = req
 								proc updateLastSlot*[T](sq: SyncQueue[T], last: Slot) {.inline.} =
 								  ## Update last slot stored in queue ``sq`` with value ``last``.
-												Backfill only up to MIN_EPOCHS_FOR_BLOCK_REQUESTS blocks (#4421)

When backfilling, we only need to download blocks that are newer than
MIN_EPOCHS_FOR_BLOCK_REQUESTS - the rest cannot reliably be fetched from
the network and does not have to be provided to others.

This change affects only trusted-node-synced clients - genesis sync
continues to work as before (because it needs to construct a state by
building it from genesis).

Those wishing to complete a backfill should do so with era files
instead.
											
										
										
											2022-12-23 07:42:55 +00:00
+								  sq.finalSlot = last
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
-												SyncManager cleanups for backfill support (#3189)

* SyncManager cleanups for backfill support

Cleanups, fixes and simplifications, in anticipation of backfill support
for the `SyncManager`:

* reformat sync progress indicator to show time left and % done more
prominently:
  * old: `sync="sPssPsssss:2:2.4229:00h57m (2706898)"`
  * new: `sync="14d12h31m (0.52%) 1.1378slots/s (wQQQQQDDQQ:1287520)"`
* reset average speed when going out of sync
* pass all block errors to sync manager, including duplicate/unviable
* penalize peers for reporting a head block that is outside of our
expected wall clock time (they're likely on a different network or
trying to disrupt sync)
* remove `SyncFailureKind` (unused)
* remove `inRange` (unused)
* add `Q` for sync queue requests that are in the `SyncQueue` but not
yet in the `BlockProcessor` queue
* update last slot in `SyncQueue` after getting peer status
* fix race condition between `wakeupWaiters` and `resetWait`, where
workers would not be correctly reset if block verification returned a
completed future without event loop
* log syncmanager direction

* Fix ordering issue.
Some of the requests size of which are not equal to `chunkSize` could be processed in wrong order which could lead to sync process freezes.

Co-authored-by: cheatfate <eugene.kabanov@status.im>
											
										
										
											2021-12-16 14:57:16 +00:00
+								proc wakeupWaiters[T](sq: SyncQueue[T], reset = false) =
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
+								  ## Wakeup one or all blocked waiters.
 								  for item in sq.waiters:
-												SyncManager cleanups for backfill support (#3189)

* SyncManager cleanups for backfill support

Cleanups, fixes and simplifications, in anticipation of backfill support
for the `SyncManager`:

* reformat sync progress indicator to show time left and % done more
prominently:
  * old: `sync="sPssPsssss:2:2.4229:00h57m (2706898)"`
  * new: `sync="14d12h31m (0.52%) 1.1378slots/s (wQQQQQDDQQ:1287520)"`
* reset average speed when going out of sync
* pass all block errors to sync manager, including duplicate/unviable
* penalize peers for reporting a head block that is outside of our
expected wall clock time (they're likely on a different network or
trying to disrupt sync)
* remove `SyncFailureKind` (unused)
* remove `inRange` (unused)
* add `Q` for sync queue requests that are in the `SyncQueue` but not
yet in the `BlockProcessor` queue
* update last slot in `SyncQueue` after getting peer status
* fix race condition between `wakeupWaiters` and `resetWait`, where
workers would not be correctly reset if block verification returned a
completed future without event loop
* log syncmanager direction

* Fix ordering issue.
Some of the requests size of which are not equal to `chunkSize` could be processed in wrong order which could lead to sync process freezes.

Co-authored-by: cheatfate <eugene.kabanov@status.im>
											
										
										
											2021-12-16 14:57:16 +00:00
+								    if reset:
 								      item.reset = true
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
+								    if not(item.future.finished()):
-												SyncManager cleanups for backfill support (#3189)

* SyncManager cleanups for backfill support

Cleanups, fixes and simplifications, in anticipation of backfill support
for the `SyncManager`:

* reformat sync progress indicator to show time left and % done more
prominently:
  * old: `sync="sPssPsssss:2:2.4229:00h57m (2706898)"`
  * new: `sync="14d12h31m (0.52%) 1.1378slots/s (wQQQQQDDQQ:1287520)"`
* reset average speed when going out of sync
* pass all block errors to sync manager, including duplicate/unviable
* penalize peers for reporting a head block that is outside of our
expected wall clock time (they're likely on a different network or
trying to disrupt sync)
* remove `SyncFailureKind` (unused)
* remove `inRange` (unused)
* add `Q` for sync queue requests that are in the `SyncQueue` but not
yet in the `BlockProcessor` queue
* update last slot in `SyncQueue` after getting peer status
* fix race condition between `wakeupWaiters` and `resetWait`, where
workers would not be correctly reset if block verification returned a
completed future without event loop
* log syncmanager direction

* Fix ordering issue.
Some of the requests size of which are not equal to `chunkSize` could be processed in wrong order which could lead to sync process freezes.

Co-authored-by: cheatfate <eugene.kabanov@status.im>
											
										
										
											2021-12-16 14:57:16 +00:00
+								      item.future.complete()
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
-												SyncManager cleanups for backfill support (#3189)

* SyncManager cleanups for backfill support

Cleanups, fixes and simplifications, in anticipation of backfill support
for the `SyncManager`:

* reformat sync progress indicator to show time left and % done more
prominently:
  * old: `sync="sPssPsssss:2:2.4229:00h57m (2706898)"`
  * new: `sync="14d12h31m (0.52%) 1.1378slots/s (wQQQQQDDQQ:1287520)"`
* reset average speed when going out of sync
* pass all block errors to sync manager, including duplicate/unviable
* penalize peers for reporting a head block that is outside of our
expected wall clock time (they're likely on a different network or
trying to disrupt sync)
* remove `SyncFailureKind` (unused)
* remove `inRange` (unused)
* add `Q` for sync queue requests that are in the `SyncQueue` but not
yet in the `BlockProcessor` queue
* update last slot in `SyncQueue` after getting peer status
* fix race condition between `wakeupWaiters` and `resetWait`, where
workers would not be correctly reset if block verification returned a
completed future without event loop
* log syncmanager direction

* Fix ordering issue.
Some of the requests size of which are not equal to `chunkSize` could be processed in wrong order which could lead to sync process freezes.

Co-authored-by: cheatfate <eugene.kabanov@status.im>
											
										
										
											2021-12-16 14:57:16 +00:00
+								proc waitForChanges[T](sq: SyncQueue[T]): Future[bool] {.async.} =
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
+								  ## Create new waiter and wait for completion from `wakeupWaiters()`.
-												SyncManager cleanups for backfill support (#3189)

* SyncManager cleanups for backfill support

Cleanups, fixes and simplifications, in anticipation of backfill support
for the `SyncManager`:

* reformat sync progress indicator to show time left and % done more
prominently:
  * old: `sync="sPssPsssss:2:2.4229:00h57m (2706898)"`
  * new: `sync="14d12h31m (0.52%) 1.1378slots/s (wQQQQQDDQQ:1287520)"`
* reset average speed when going out of sync
* pass all block errors to sync manager, including duplicate/unviable
* penalize peers for reporting a head block that is outside of our
expected wall clock time (they're likely on a different network or
trying to disrupt sync)
* remove `SyncFailureKind` (unused)
* remove `inRange` (unused)
* add `Q` for sync queue requests that are in the `SyncQueue` but not
yet in the `BlockProcessor` queue
* update last slot in `SyncQueue` after getting peer status
* fix race condition between `wakeupWaiters` and `resetWait`, where
workers would not be correctly reset if block verification returned a
completed future without event loop
* log syncmanager direction

* Fix ordering issue.
Some of the requests size of which are not equal to `chunkSize` could be processed in wrong order which could lead to sync process freezes.

Co-authored-by: cheatfate <eugene.kabanov@status.im>
											
										
										
											2021-12-16 14:57:16 +00:00
+								  var waitfut = newFuture[void]("SyncQueue.waitForChanges")
 								  let waititem = SyncWaiter(future: waitfut)
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
+								  sq.waiters.add(waititem)
 								  try:
-												SyncManager cleanups for backfill support (#3189)

* SyncManager cleanups for backfill support

Cleanups, fixes and simplifications, in anticipation of backfill support
for the `SyncManager`:

* reformat sync progress indicator to show time left and % done more
prominently:
  * old: `sync="sPssPsssss:2:2.4229:00h57m (2706898)"`
  * new: `sync="14d12h31m (0.52%) 1.1378slots/s (wQQQQQDDQQ:1287520)"`
* reset average speed when going out of sync
* pass all block errors to sync manager, including duplicate/unviable
* penalize peers for reporting a head block that is outside of our
expected wall clock time (they're likely on a different network or
trying to disrupt sync)
* remove `SyncFailureKind` (unused)
* remove `inRange` (unused)
* add `Q` for sync queue requests that are in the `SyncQueue` but not
yet in the `BlockProcessor` queue
* update last slot in `SyncQueue` after getting peer status
* fix race condition between `wakeupWaiters` and `resetWait`, where
workers would not be correctly reset if block verification returned a
completed future without event loop
* log syncmanager direction

* Fix ordering issue.
Some of the requests size of which are not equal to `chunkSize` could be processed in wrong order which could lead to sync process freezes.

Co-authored-by: cheatfate <eugene.kabanov@status.im>
											
										
										
											2021-12-16 14:57:16 +00:00
+								    await waitfut
 								    return waititem.reset
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
+								  finally:
 								    sq.waiters.delete(sq.waiters.find(waititem))
 								proc wakeupAndWaitWaiters[T](sq: SyncQueue[T]) {.async.} =
-												update `sync_queue` docs w.r.t. joker's problem (#4317)

Explicitly mention in-line documentation within `sync_queue` relating to
older specification version to make rationale clearer.
											
										
										
											2022-11-11 14:36:02 +00:00
+								  ## This procedure will perform wakeupWaiters(true) and blocks until last
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
+								  ## waiter will be awakened.
-												SyncManager cleanups for backfill support (#3189)

* SyncManager cleanups for backfill support

Cleanups, fixes and simplifications, in anticipation of backfill support
for the `SyncManager`:

* reformat sync progress indicator to show time left and % done more
prominently:
  * old: `sync="sPssPsssss:2:2.4229:00h57m (2706898)"`
  * new: `sync="14d12h31m (0.52%) 1.1378slots/s (wQQQQQDDQQ:1287520)"`
* reset average speed when going out of sync
* pass all block errors to sync manager, including duplicate/unviable
* penalize peers for reporting a head block that is outside of our
expected wall clock time (they're likely on a different network or
trying to disrupt sync)
* remove `SyncFailureKind` (unused)
* remove `inRange` (unused)
* add `Q` for sync queue requests that are in the `SyncQueue` but not
yet in the `BlockProcessor` queue
* update last slot in `SyncQueue` after getting peer status
* fix race condition between `wakeupWaiters` and `resetWait`, where
workers would not be correctly reset if block verification returned a
completed future without event loop
* log syncmanager direction

* Fix ordering issue.
Some of the requests size of which are not equal to `chunkSize` could be processed in wrong order which could lead to sync process freezes.

Co-authored-by: cheatfate <eugene.kabanov@status.im>
											
										
										
											2021-12-16 14:57:16 +00:00
+								  var waitChanges = sq.waitForChanges()
 								  sq.wakeupWaiters(true)
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
+								  discard await waitChanges
-												Backfiller (#3263)

Backfilling is the process of downloading historical blocks via P2P that
are required to fulfill `GetBlocksByRange` duties - this happens during
both trusted node and finalized checkpoint syncs.

In particular, backfilling happens after syncing to head, such that
attestation work can start as soon as possible.

* Fix SyncQueue initialization procedure.
Remove usage of `awaitne`.
Add cancellation support.
Remove unneeded `sleepAsync()` if peer's head is older than needed.
Add `direction` field to all logs.
Fix syncmanager wedge issue.
Add proper resource cleaning procedure on backward sync finish.

Co-authored-by: cheatfate <eugene.kabanov@status.im>
											
										
										
											2022-01-20 07:25:45 +00:00
+								proc clearAndWakeup*[T](sq: SyncQueue[T]) =
 								  sq.pending.clear()
 								  sq.wakeupWaiters(true)
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
+								proc resetWait*[T](sq: SyncQueue[T], toSlot: Option[Slot]) {.async.} =
 								  ## Perform reset of all the blocked waiters in SyncQueue.
 								  ##
 								  ## We adding one more waiter to the waiters sequence and
-												SyncManager cleanups for backfill support (#3189)

* SyncManager cleanups for backfill support

Cleanups, fixes and simplifications, in anticipation of backfill support
for the `SyncManager`:

* reformat sync progress indicator to show time left and % done more
prominently:
  * old: `sync="sPssPsssss:2:2.4229:00h57m (2706898)"`
  * new: `sync="14d12h31m (0.52%) 1.1378slots/s (wQQQQQDDQQ:1287520)"`
* reset average speed when going out of sync
* pass all block errors to sync manager, including duplicate/unviable
* penalize peers for reporting a head block that is outside of our
expected wall clock time (they're likely on a different network or
trying to disrupt sync)
* remove `SyncFailureKind` (unused)
* remove `inRange` (unused)
* add `Q` for sync queue requests that are in the `SyncQueue` but not
yet in the `BlockProcessor` queue
* update last slot in `SyncQueue` after getting peer status
* fix race condition between `wakeupWaiters` and `resetWait`, where
workers would not be correctly reset if block verification returned a
completed future without event loop
* log syncmanager direction

* Fix ordering issue.
Some of the requests size of which are not equal to `chunkSize` could be processed in wrong order which could lead to sync process freezes.

Co-authored-by: cheatfate <eugene.kabanov@status.im>
											
										
										
											2021-12-16 14:57:16 +00:00
+								  ## call wakeupWaiters(true). Because our waiter is last in sequence of
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
+								  ## waiters it will be resumed only after all waiters will be awakened and
 								  ## finished.
 								  # We are clearing pending list, so that all requests that are still running
 								  # around (still downloading, but not yet pushed to the SyncQueue) will be
 								  # expired. Its important to perform this call first (before await), otherwise
 								  # you can introduce race problem.
 								  sq.pending.clear()
 								  # We calculating minimal slot number to which we will be able to reset,
 								  # without missing any blocks. There 3 sources:
 								  # 1. Debts queue.
 								  # 2. Processing queue (`inpSlot`, `outSlot`).
 								  # 3. Requested slot `toSlot`.
 								  #
 								  # Queue's `outSlot` is the lowest slot we added to `block_pool`, but
 								  # `toSlot` slot can be less then `outSlot`. `debtsQueue` holds only not
 								  # added slot requests, so it can't be bigger then `outSlot` value.
 								  let minSlot =
 								    case sq.kind
 								    of SyncQueueKind.Forward:
 								      if toSlot.isSome():
 								        min(toSlot.get(), sq.outSlot)
 								      else:
 								        sq.outSlot
 								    of SyncQueueKind.Backward:
 								      if toSlot.isSome():
 								        toSlot.get()
 								      else:
 								        sq.outSlot
 								  sq.debtsQueue.clear()
 								  sq.debtsCount = 0
 								  sq.readyQueue.clear()
 								  sq.inpSlot = minSlot
 								  sq.outSlot = minSlot
 								  # We are going to wakeup all the waiters and wait for last one.
 								  await sq.wakeupAndWaitWaiters()
 								proc isEmpty*[T](sr: SyncResult[T]): bool {.inline.} =
 								  ## Returns ``true`` if response chain of blocks is empty (has only empty
 								  ## slots).
 								  len(sr.data) == 0
 								proc hasEndGap*[T](sr: SyncResult[T]): bool {.inline.} =
 								  ## Returns ``true`` if response chain of blocks has gap at the end.
 								  let lastslot = sr.request.slot + sr.request.count - 1'u64
 								  if len(sr.data) == 0:
 								    return true
-												harden and speed up block sync (#3358)

* harden and speed up block sync

The `GetBlockBy*` server implementation currently reads SSZ bytes from
database, deserializes them into a Nim object then serializes them right
back to SSZ - here, we eliminate the deser/ser steps and send the bytes
straight to the network. Unfortunately, the snappy recoding must still
be done because of differences in framing.

Also, the quota system makes one giant request for quota right before
sending all blocks - this means that a 1024 block request will be
"paused" for a long time, then all blocks will be sent at once causing a
spike in database reads which potentially will see the reading client
time out before any block is sent.

Finally, on the reading side we make several copies of blocks as they
travel through various queues - this was not noticeable before but
becomes a problem in two cases: bellatrix blocks are up to 10mb (instead
of .. 30-40kb) and when backfilling, we process a lot more of them a lot
faster.

* fix status comparisons for nodes syncing from genesis (#3327 was a bit
too hard)
* don't hit database at all for post-altair slots in GetBlock v1
requests
											
										
										
											2022-02-07 17:20:10 +00:00
+								  if sr.data[^1][].slot != lastslot:
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
+								    return true
 								  return false
 								proc getLastNonEmptySlot*[T](sr: SyncResult[T]): Slot {.inline.} =
 								  ## Returns last non-empty slot from result ``sr``. If response has only
 								  ## empty slots, original request slot will be returned.
 								  if len(sr.data) == 0:
 								    # If response has only empty slots we going to use original request slot
 								    sr.request.slot
 								  else:
-												harden and speed up block sync (#3358)

* harden and speed up block sync

The `GetBlockBy*` server implementation currently reads SSZ bytes from
database, deserializes them into a Nim object then serializes them right
back to SSZ - here, we eliminate the deser/ser steps and send the bytes
straight to the network. Unfortunately, the snappy recoding must still
be done because of differences in framing.

Also, the quota system makes one giant request for quota right before
sending all blocks - this means that a 1024 block request will be
"paused" for a long time, then all blocks will be sent at once causing a
spike in database reads which potentially will see the reading client
time out before any block is sent.

Finally, on the reading side we make several copies of blocks as they
travel through various queues - this was not noticeable before but
becomes a problem in two cases: bellatrix blocks are up to 10mb (instead
of .. 30-40kb) and when backfilling, we process a lot more of them a lot
faster.

* fix status comparisons for nodes syncing from genesis (#3327 was a bit
too hard)
* don't hit database at all for post-altair slots in GetBlock v1
requests
											
										
										
											2022-02-07 17:20:10 +00:00
+								    sr.data[^1][].slot
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
-												Sync gaps fix (#4090)


											
										
										
											2022-09-19 09:37:42 +00:00
+								proc processGap[T](sq: SyncQueue[T], sr: SyncResult[T]) =
 								  if sr.isEmpty():
 								    let gitem = GapItem[T](start: sr.request.slot,
 								                           finish: sr.request.slot + sr.request.count - 1'u64,
 								                           item: sr.request.item)
 								    sq.gapList.add(gitem)
 								  else:
 								    if sr.hasEndGap():
 								      let gitem = GapItem[T](start: sr.getLastNonEmptySlot() + 1'u64,
 								                             finish: sr.request.slot + sr.request.count - 1'u64,
 								                             item: sr.request.item)
 								      sq.gapList.add(gitem)
 								    else:
 								      sq.gapList.reset()
 								proc rewardForGaps[T](sq: SyncQueue[T], score: int) =
 								  mixin updateScore, getStats
 								  logScope:
 								    sync_ident = sq.ident
 								    direction = sq.kind
 								    topics = "syncman"
 								  for gap in sq.gapList:
 								    if score < 0:
 								      # Every empty response increases penalty by 25%, but not more than 200%.
 								      let
 								        emptyCount = gap.item.getStats(SyncResponseKind.Empty)
 								        goodCount = gap.item.getStats(SyncResponseKind.Good)
 								      if emptyCount <= goodCount:
 								        gap.item.updateScore(score)
 								      else:
 								        let
 								          weight = int(min(emptyCount - goodCount, 8'u64))
 								          newScore = score + score * weight div 4
 								        gap.item.updateScore(newScore)
 								        debug "Peer received gap penalty", peer = gap.item,
 								              penalty = newScore
 								    else:
 								      gap.item.updateScore(score)
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
+								proc toDebtsQueue[T](sq: SyncQueue[T], sr: SyncRequest[T]) =
 								  sq.debtsQueue.push(sr)
 								  sq.debtsCount = sq.debtsCount + sr.count
 								proc getRewindPoint*[T](sq: SyncQueue[T], failSlot: Slot,
 								                        safeSlot: Slot): Slot =
-												Refactor and optimize sync logs. (#3451)

* Refactor and optimize logs.

* Introduce shortLog(SyncRequest).

* Address review comment.

* make sync queue logs more consistent

Adds a few minor logging improvements:
- Fixes a typo (`was happened` -> `has happened`)
- Avoids passing `reset_slot` argument to log statement multiple times
- Uses same `rewind_to_slot` label when logging in both sync directions
- Consistent rewind point logging

Co-authored-by: cheatfate <eugene.kabanov@status.im>
											
										
										
											2022-03-03 08:05:33 +00:00
+								  logScope:
 								    sync_ident = sq.ident
 								    direction = sq.kind
 								    topics = "syncman"
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
+								  case sq.kind
 								  of SyncQueueKind.Forward:
 								    # Calculate the latest finalized epoch.
-												time: spring cleaning (#3262)

Time in the beacon chain is expressed relative to the genesis time -
this PR creates a `beacon_time` module that collects helpers and
utilities for dealing the time units - the new module does not deal with
actual wall time (that's remains in `beacon_clock`).

Collecting the time related stuff in one place makes it easier to find,
avoids some circular imports and allows more easily identifying the code
actually needs wall time to operate.

* move genesis-time-related functionality into `spec/beacon_time`
* avoid using `chronos.Duration` for time differences - it does not
support negative values (such as when something happens earlier than it
should)
* saturate conversions between `FAR_FUTURE_XXX`, so as to avoid
overflows
* fix delay reporting in validator client so it uses the expected
deadline of the slot, not "closest wall slot"
* simplify looping over the slots of an epoch
* `compute_start_slot_at_epoch` -> `start_slot`
* `compute_epoch_at_slot` -> `epoch`

A follow-up PR will (likely) introduce saturating arithmetic for the
time units - this is merely code moves, renames and fixing of small
bugs.
											
										
										
											2022-01-11 10:01:54 +00:00
+								    let finalizedEpoch = epoch(safeSlot)
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
 								    # Calculate failure epoch.
-												time: spring cleaning (#3262)

Time in the beacon chain is expressed relative to the genesis time -
this PR creates a `beacon_time` module that collects helpers and
utilities for dealing the time units - the new module does not deal with
actual wall time (that's remains in `beacon_clock`).

Collecting the time related stuff in one place makes it easier to find,
avoids some circular imports and allows more easily identifying the code
actually needs wall time to operate.

* move genesis-time-related functionality into `spec/beacon_time`
* avoid using `chronos.Duration` for time differences - it does not
support negative values (such as when something happens earlier than it
should)
* saturate conversions between `FAR_FUTURE_XXX`, so as to avoid
overflows
* fix delay reporting in validator client so it uses the expected
deadline of the slot, not "closest wall slot"
* simplify looping over the slots of an epoch
* `compute_start_slot_at_epoch` -> `start_slot`
* `compute_epoch_at_slot` -> `epoch`

A follow-up PR will (likely) introduce saturating arithmetic for the
time units - this is merely code moves, renames and fixing of small
bugs.
											
										
										
											2022-01-11 10:01:54 +00:00
+								    let failEpoch = epoch(failSlot)
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
 								    # Calculate exponential rewind point in number of epochs.
 								    let epochCount =
 								      if sq.rewind.isSome():
 								        let rewind = sq.rewind.get()
 								        if failSlot == rewind.failSlot:
 								          # `MissingParent` happened at same slot so we increase rewind point by
 								          # factor of 2.
 								          if failEpoch > finalizedEpoch:
 								            let rewindPoint = rewind.epochCount shl 1
 								            if rewindPoint < rewind.epochCount:
 								              # If exponential rewind point produces `uint64` overflow we will
 								              # make rewind to latest finalized epoch.
 								              failEpoch - finalizedEpoch
 								            else:
 								              if (failEpoch < rewindPoint) or
 								                 (failEpoch - rewindPoint < finalizedEpoch):
 								                # If exponential rewind point points to position which is far
 								                # behind latest finalized epoch.
 								                failEpoch - finalizedEpoch
 								              else:
 								                rewindPoint
 								          else:
 								            warn "Trying to rewind over the last finalized epoch",
 								                 finalized_slot = safeSlot, fail_slot = failSlot,
 								                 finalized_epoch = finalizedEpoch, fail_epoch = failEpoch,
 								                 rewind_epoch_count = rewind.epochCount,
-												Refactor and optimize sync logs. (#3451)

* Refactor and optimize logs.

* Introduce shortLog(SyncRequest).

* Address review comment.

* make sync queue logs more consistent

Adds a few minor logging improvements:
- Fixes a typo (`was happened` -> `has happened`)
- Avoids passing `reset_slot` argument to log statement multiple times
- Uses same `rewind_to_slot` label when logging in both sync directions
- Consistent rewind point logging

Co-authored-by: cheatfate <eugene.kabanov@status.im>
											
										
										
											2022-03-03 08:05:33 +00:00
+								                 finalized_epoch = finalizedEpoch
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
+'u64
 								        else:
 								          # `MissingParent` happened at different slot so we going to rewind for
 								          # 1 epoch only.
 								          if (failEpoch < 1'u64) or (failEpoch - 1'u64 < finalizedEpoch):
 								            warn "Сould not rewind further than the last finalized epoch",
 								                 finalized_slot = safeSlot, fail_slot = failSlot,
 								                 finalized_epoch = finalizedEpoch, fail_epoch = failEpoch,
 								                 rewind_epoch_count = rewind.epochCount,
-												Refactor and optimize sync logs. (#3451)

* Refactor and optimize logs.

* Introduce shortLog(SyncRequest).

* Address review comment.

* make sync queue logs more consistent

Adds a few minor logging improvements:
- Fixes a typo (`was happened` -> `has happened`)
- Avoids passing `reset_slot` argument to log statement multiple times
- Uses same `rewind_to_slot` label when logging in both sync directions
- Consistent rewind point logging

Co-authored-by: cheatfate <eugene.kabanov@status.im>
											
										
										
											2022-03-03 08:05:33 +00:00
+								                 finalized_epoch = finalizedEpoch
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
+'u64
 								          else:
 'u64
 								      else:
 								        # `MissingParent` happened first time.
 								        if (failEpoch < 1'u64) or (failEpoch - 1'u64 < finalizedEpoch):
 								          warn "Сould not rewind further than the last finalized epoch",
 								               finalized_slot = safeSlot, fail_slot = failSlot,
 								               finalized_epoch = finalizedEpoch, fail_epoch = failEpoch,
-												Refactor and optimize sync logs. (#3451)

* Refactor and optimize logs.

* Introduce shortLog(SyncRequest).

* Address review comment.

* make sync queue logs more consistent

Adds a few minor logging improvements:
- Fixes a typo (`was happened` -> `has happened`)
- Avoids passing `reset_slot` argument to log statement multiple times
- Uses same `rewind_to_slot` label when logging in both sync directions
- Consistent rewind point logging

Co-authored-by: cheatfate <eugene.kabanov@status.im>
											
										
										
											2022-03-03 08:05:33 +00:00
+								               finalized_epoch = finalizedEpoch
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
+'u64
 								        else:
 'u64
 								    if epochCount == 0'u64:
 								      warn "Unable to continue syncing, please restart the node",
 								           finalized_slot = safeSlot, fail_slot = failSlot,
 								           finalized_epoch = finalizedEpoch, fail_epoch = failEpoch,
-												Refactor and optimize sync logs. (#3451)

* Refactor and optimize logs.

* Introduce shortLog(SyncRequest).

* Address review comment.

* make sync queue logs more consistent

Adds a few minor logging improvements:
- Fixes a typo (`was happened` -> `has happened`)
- Avoids passing `reset_slot` argument to log statement multiple times
- Uses same `rewind_to_slot` label when logging in both sync directions
- Consistent rewind point logging

Co-authored-by: cheatfate <eugene.kabanov@status.im>
											
										
										
											2022-03-03 08:05:33 +00:00
+								           finalized_epoch = finalizedEpoch
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
+								      # Calculate the rewind epoch, which will be equal to last rewind point or
 								      # finalizedEpoch
 								      let rewindEpoch =
 								        if sq.rewind.isNone():
 								          finalizedEpoch
 								        else:
-												time: spring cleaning (#3262)

Time in the beacon chain is expressed relative to the genesis time -
this PR creates a `beacon_time` module that collects helpers and
utilities for dealing the time units - the new module does not deal with
actual wall time (that's remains in `beacon_clock`).

Collecting the time related stuff in one place makes it easier to find,
avoids some circular imports and allows more easily identifying the code
actually needs wall time to operate.

* move genesis-time-related functionality into `spec/beacon_time`
* avoid using `chronos.Duration` for time differences - it does not
support negative values (such as when something happens earlier than it
should)
* saturate conversions between `FAR_FUTURE_XXX`, so as to avoid
overflows
* fix delay reporting in validator client so it uses the expected
deadline of the slot, not "closest wall slot"
* simplify looping over the slots of an epoch
* `compute_start_slot_at_epoch` -> `start_slot`
* `compute_epoch_at_slot` -> `epoch`

A follow-up PR will (likely) introduce saturating arithmetic for the
time units - this is merely code moves, renames and fixing of small
bugs.
											
										
										
											2022-01-11 10:01:54 +00:00
+								          epoch(sq.rewind.get().failSlot) - sq.rewind.get().epochCount
 								      rewindEpoch.start_slot()
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
+								    else:
 								      # Calculate the rewind epoch, which should not be less than the latest
 								      # finalized epoch.
 								      let rewindEpoch = failEpoch - epochCount
 								      # Update and save new rewind point in SyncQueue.
 								      sq.rewind = some(RewindPoint(failSlot: failSlot, epochCount: epochCount))
-												time: spring cleaning (#3262)

Time in the beacon chain is expressed relative to the genesis time -
this PR creates a `beacon_time` module that collects helpers and
utilities for dealing the time units - the new module does not deal with
actual wall time (that's remains in `beacon_clock`).

Collecting the time related stuff in one place makes it easier to find,
avoids some circular imports and allows more easily identifying the code
actually needs wall time to operate.

* move genesis-time-related functionality into `spec/beacon_time`
* avoid using `chronos.Duration` for time differences - it does not
support negative values (such as when something happens earlier than it
should)
* saturate conversions between `FAR_FUTURE_XXX`, so as to avoid
overflows
* fix delay reporting in validator client so it uses the expected
deadline of the slot, not "closest wall slot"
* simplify looping over the slots of an epoch
* `compute_start_slot_at_epoch` -> `start_slot`
* `compute_epoch_at_slot` -> `epoch`

A follow-up PR will (likely) introduce saturating arithmetic for the
time units - this is merely code moves, renames and fixing of small
bugs.
											
										
										
											2022-01-11 10:01:54 +00:00
+								      rewindEpoch.start_slot()
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
+								  of SyncQueueKind.Backward:
 								    # While we perform backward sync, the only possible slot we could rewind is
 								    # latest stored block.
 								    if failSlot == safeSlot:
 								      warn "Unable to continue syncing, please restart the node",
-												Refactor and optimize sync logs. (#3451)

* Refactor and optimize logs.

* Introduce shortLog(SyncRequest).

* Address review comment.

* make sync queue logs more consistent

Adds a few minor logging improvements:
- Fixes a typo (`was happened` -> `has happened`)
- Avoids passing `reset_slot` argument to log statement multiple times
- Uses same `rewind_to_slot` label when logging in both sync directions
- Consistent rewind point logging

Co-authored-by: cheatfate <eugene.kabanov@status.im>
											
										
										
											2022-03-03 08:05:33 +00:00
+								           safe_slot = safeSlot, fail_slot = failSlot
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
+								    safeSlot
-												Sync: fix backfill blob iteration order (#4993)

* Sync: fix backfill blob iteration order

* Address review feedback

* Add comment clarifying reason for func top-level placement
											
										
										
											2023-06-05 14:42:27 +00:00
+								# This belongs inside the blocks iterator below, but can't be there due to
 								# https://github.com/nim-lang/Nim/issues/21242
 								func getOpt(blobs: Opt[seq[BlobSidecars]], i: int): Opt[BlobSidecars] =
 								  if blobs.isSome:
 								    Opt.some(blobs.get()[i])
 								  else:
 								    Opt.none(BlobSidecars)
 								iterator blocks[T](sq: SyncQueue[T],
 								                   sr: SyncResult[T]): (ref ForkedSignedBeaconBlock, Opt[BlobSidecars]) =
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
+								  case sq.kind
 								  of SyncQueueKind.Forward:
 								    for i in countup(0, len(sr.data) - 1):
-												Sync: fix backfill blob iteration order (#4993)

* Sync: fix backfill blob iteration order

* Address review feedback

* Add comment clarifying reason for func top-level placement
											
										
										
											2023-06-05 14:42:27 +00:00
+								      yield (sr.data[i], sr.blobs.getOpt(i))
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
+								  of SyncQueueKind.Backward:
 								    for i in countdown(len(sr.data) - 1, 0):
-												Sync: fix backfill blob iteration order (#4993)

* Sync: fix backfill blob iteration order

* Address review feedback

* Add comment clarifying reason for func top-level placement
											
										
										
											2023-06-05 14:42:27 +00:00
+								      yield (sr.data[i], sr.blobs.getOpt(i))
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
 								proc advanceOutput*[T](sq: SyncQueue[T], number: uint64) =
 								  case sq.kind
 								  of SyncQueueKind.Forward:
 								    sq.outSlot = sq.outSlot + number
 								  of SyncQueueKind.Backward:
 								    sq.outSlot = sq.outSlot - number
 								proc advanceInput[T](sq: SyncQueue[T], number: uint64) =
 								  case sq.kind
 								  of SyncQueueKind.Forward:
 								    sq.inpSlot = sq.inpSlot + number
 								  of SyncQueueKind.Backward:
 								    sq.inpSlot = sq.inpSlot - number
-												SyncManager cleanups for backfill support (#3189)

* SyncManager cleanups for backfill support

Cleanups, fixes and simplifications, in anticipation of backfill support
for the `SyncManager`:

* reformat sync progress indicator to show time left and % done more
prominently:
  * old: `sync="sPssPsssss:2:2.4229:00h57m (2706898)"`
  * new: `sync="14d12h31m (0.52%) 1.1378slots/s (wQQQQQDDQQ:1287520)"`
* reset average speed when going out of sync
* pass all block errors to sync manager, including duplicate/unviable
* penalize peers for reporting a head block that is outside of our
expected wall clock time (they're likely on a different network or
trying to disrupt sync)
* remove `SyncFailureKind` (unused)
* remove `inRange` (unused)
* add `Q` for sync queue requests that are in the `SyncQueue` but not
yet in the `BlockProcessor` queue
* update last slot in `SyncQueue` after getting peer status
* fix race condition between `wakeupWaiters` and `resetWait`, where
workers would not be correctly reset if block verification returned a
completed future without event loop
* log syncmanager direction

* Fix ordering issue.
Some of the requests size of which are not equal to `chunkSize` could be processed in wrong order which could lead to sync process freezes.

Co-authored-by: cheatfate <eugene.kabanov@status.im>
											
										
										
											2021-12-16 14:57:16 +00:00
+								proc notInRange[T](sq: SyncQueue[T], sr: SyncRequest[T]): bool =
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
+								  case sq.kind
 								  of SyncQueueKind.Forward:
-												avoid re-requesting finalized blocks during sync (#3461)

When a `beaconBlocksByRange` response advances the `safeSlot`, but later
has errors, the sync queue keeps repeating that same request until it is
fulfilled without errors. Data up through `safeSlot` is considered to be
immutable, i.e., finalized, so re-requesting that data is not useful.
By advancing the sync progress in that scenario, those redundant query
portions can be avoided. Note, the finalized block _itself_ is always
requested, even in the initial request. This behaviour is kept same.
											
										
										
											2022-03-15 17:56:56 +00:00
+								    (sq.queueSize > 0) and (sr.slot > sq.outSlot)
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
+								  of SyncQueueKind.Backward:
-												avoid re-requesting finalized blocks during sync (#3461)

When a `beaconBlocksByRange` response advances the `safeSlot`, but later
has errors, the sync queue keeps repeating that same request until it is
fulfilled without errors. Data up through `safeSlot` is considered to be
immutable, i.e., finalized, so re-requesting that data is not useful.
By advancing the sync progress in that scenario, those redundant query
portions can be avoided. Note, the finalized block _itself_ is always
requested, even in the initial request. This behaviour is kept same.
											
										
										
											2022-03-15 17:56:56 +00:00
+								    (sq.queueSize > 0) and (sr.lastSlot < sq.outSlot)
 								func numAlreadyKnownSlots[T](sq: SyncQueue[T], sr: SyncRequest[T]): uint64 =
 								  ## Compute the number of slots covered by a given `SyncRequest` that are
 								  ## already known and, hence, no longer relevant for sync progression.
 								  let
 								    outSlot = sq.outSlot
 								    lowSlot = sr.slot
 								    highSlot = sr.lastSlot
 								  case sq.kind
 								  of SyncQueueKind.Forward:
 								    if outSlot > highSlot:
 								      # Entire request is no longer relevant.
 								      sr.count
 								    elif outSlot > lowSlot:
 								      # Request is only partially relevant.
 								      outSlot - lowSlot
 								    else:
 								      # Entire request is still relevant.
 
 								  of SyncQueueKind.Backward:
 								    if lowSlot > outSlot:
 								      # Entire request is no longer relevant.
 								      sr.count
 								    elif highSlot > outSlot:
 								      # Request is only partially relevant.
 								      highSlot - outSlot
 								    else:
 								      # Entire request is still relevant.
 
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
 								proc push*[T](sq: SyncQueue[T], sr: SyncRequest[T],
-												harden and speed up block sync (#3358)

* harden and speed up block sync

The `GetBlockBy*` server implementation currently reads SSZ bytes from
database, deserializes them into a Nim object then serializes them right
back to SSZ - here, we eliminate the deser/ser steps and send the bytes
straight to the network. Unfortunately, the snappy recoding must still
be done because of differences in framing.

Also, the quota system makes one giant request for quota right before
sending all blocks - this means that a 1024 block request will be
"paused" for a long time, then all blocks will be sent at once causing a
spike in database reads which potentially will see the reading client
time out before any block is sent.

Finally, on the reading side we make several copies of blocks as they
travel through various queues - this was not noticeable before but
becomes a problem in two cases: bellatrix blocks are up to 10mb (instead
of .. 30-40kb) and when backfilling, we process a lot more of them a lot
faster.

* fix status comparisons for nodes syncing from genesis (#3327 was a bit
too hard)
* don't hit database at all for post-altair slots in GetBlock v1
requests
											
										
										
											2022-02-07 17:20:10 +00:00
+								              data: seq[ref ForkedSignedBeaconBlock],
-												Update sync to use post-decoupling RPC (#4701)

* Update sync to use post-decoupling RPCs

blob_sidecars_by_range returns a flat list of sidecars, which must
then be grouped per-slot.

* Add test for groupBlobs

* createBlobs: convert proc to func
											
										
										
											2023-03-07 20:19:17 +00:00
+								              blobs: Opt[seq[BlobSidecars]],
-												Skip execution payload verification for finalized blocks (#4591)

While syncing the finalized portion of the chain, the execution client
cannot efficiently sync and most of the time returns `SYNCING` - in this
PR, we use CL-verified optmistic sync as long as the block is claimed to
be finalized, only occasionally updating the EL with progress.

Although a peer might lie about what is finalized and what isn't,
eventually we'll call the execution client - thus, all a dishonest
client can do is delay execution verification slightly. Gossip blocks in
particular are never assumed to be finalized.
											
										
										
											2023-02-06 07:22:08 +00:00
+								              maybeFinalized: bool = false,
-												SyncManager cleanups for backfill support (#3189)

* SyncManager cleanups for backfill support

Cleanups, fixes and simplifications, in anticipation of backfill support
for the `SyncManager`:

* reformat sync progress indicator to show time left and % done more
prominently:
  * old: `sync="sPssPsssss:2:2.4229:00h57m (2706898)"`
  * new: `sync="14d12h31m (0.52%) 1.1378slots/s (wQQQQQDDQQ:1287520)"`
* reset average speed when going out of sync
* pass all block errors to sync manager, including duplicate/unviable
* penalize peers for reporting a head block that is outside of our
expected wall clock time (they're likely on a different network or
trying to disrupt sync)
* remove `SyncFailureKind` (unused)
* remove `inRange` (unused)
* add `Q` for sync queue requests that are in the `SyncQueue` but not
yet in the `BlockProcessor` queue
* update last slot in `SyncQueue` after getting peer status
* fix race condition between `wakeupWaiters` and `resetWait`, where
workers would not be correctly reset if block verification returned a
completed future without event loop
* log syncmanager direction

* Fix ordering issue.
Some of the requests size of which are not equal to `chunkSize` could be processed in wrong order which could lead to sync process freezes.

Co-authored-by: cheatfate <eugene.kabanov@status.im>
											
										
										
											2021-12-16 14:57:16 +00:00
+								              processingCb: ProcessingCallback = nil) {.async.} =
-												Refactor and optimize sync logs. (#3451)

* Refactor and optimize logs.

* Introduce shortLog(SyncRequest).

* Address review comment.

* make sync queue logs more consistent

Adds a few minor logging improvements:
- Fixes a typo (`was happened` -> `has happened`)
- Avoids passing `reset_slot` argument to log statement multiple times
- Uses same `rewind_to_slot` label when logging in both sync directions
- Consistent rewind point logging

Co-authored-by: cheatfate <eugene.kabanov@status.im>
											
										
										
											2022-03-03 08:05:33 +00:00
+								  logScope:
 								    sync_ident = sq.ident
 								    topics = "syncman"
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
+								  ## Push successful result to queue ``sq``.
-												Sync gaps fix (#4090)


											
										
										
											2022-09-19 09:37:42 +00:00
+								  mixin updateScore, updateStats, getStats
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
 								  if sr.index notin sq.pending:
 								    # If request `sr` not in our pending list, it only means that
 								    # SyncQueue.resetWait() happens and all pending requests are expired, so
 								    # we swallow `old` requests, and in such way sync-workers are able to get
 								    # proper new requests from SyncQueue.
 								    return
 								  sq.pending.del(sr.index)
 								  # This is backpressure handling algorithm, this algorithm is blocking
-												SyncManager cleanups for backfill support (#3189)

* SyncManager cleanups for backfill support

Cleanups, fixes and simplifications, in anticipation of backfill support
for the `SyncManager`:

* reformat sync progress indicator to show time left and % done more
prominently:
  * old: `sync="sPssPsssss:2:2.4229:00h57m (2706898)"`
  * new: `sync="14d12h31m (0.52%) 1.1378slots/s (wQQQQQDDQQ:1287520)"`
* reset average speed when going out of sync
* pass all block errors to sync manager, including duplicate/unviable
* penalize peers for reporting a head block that is outside of our
expected wall clock time (they're likely on a different network or
trying to disrupt sync)
* remove `SyncFailureKind` (unused)
* remove `inRange` (unused)
* add `Q` for sync queue requests that are in the `SyncQueue` but not
yet in the `BlockProcessor` queue
* update last slot in `SyncQueue` after getting peer status
* fix race condition between `wakeupWaiters` and `resetWait`, where
workers would not be correctly reset if block verification returned a
completed future without event loop
* log syncmanager direction

* Fix ordering issue.
Some of the requests size of which are not equal to `chunkSize` could be processed in wrong order which could lead to sync process freezes.

Co-authored-by: cheatfate <eugene.kabanov@status.im>
											
										
										
											2021-12-16 14:57:16 +00:00
+								  # all pending `push` requests if `request.slot` not in range.
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
+								  while true:
-												SyncManager cleanups for backfill support (#3189)

* SyncManager cleanups for backfill support

Cleanups, fixes and simplifications, in anticipation of backfill support
for the `SyncManager`:

* reformat sync progress indicator to show time left and % done more
prominently:
  * old: `sync="sPssPsssss:2:2.4229:00h57m (2706898)"`
  * new: `sync="14d12h31m (0.52%) 1.1378slots/s (wQQQQQDDQQ:1287520)"`
* reset average speed when going out of sync
* pass all block errors to sync manager, including duplicate/unviable
* penalize peers for reporting a head block that is outside of our
expected wall clock time (they're likely on a different network or
trying to disrupt sync)
* remove `SyncFailureKind` (unused)
* remove `inRange` (unused)
* add `Q` for sync queue requests that are in the `SyncQueue` but not
yet in the `BlockProcessor` queue
* update last slot in `SyncQueue` after getting peer status
* fix race condition between `wakeupWaiters` and `resetWait`, where
workers would not be correctly reset if block verification returned a
completed future without event loop
* log syncmanager direction

* Fix ordering issue.
Some of the requests size of which are not equal to `chunkSize` could be processed in wrong order which could lead to sync process freezes.

Co-authored-by: cheatfate <eugene.kabanov@status.im>
											
										
										
											2021-12-16 14:57:16 +00:00
+								    if sq.notInRange(sr):
 								      let reset = await sq.waitForChanges()
 								      if reset:
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
+								        # SyncQueue reset happens. We are exiting to wake up sync-worker.
-												SyncManager cleanups for backfill support (#3189)

* SyncManager cleanups for backfill support

Cleanups, fixes and simplifications, in anticipation of backfill support
for the `SyncManager`:

* reformat sync progress indicator to show time left and % done more
prominently:
  * old: `sync="sPssPsssss:2:2.4229:00h57m (2706898)"`
  * new: `sync="14d12h31m (0.52%) 1.1378slots/s (wQQQQQDDQQ:1287520)"`
* reset average speed when going out of sync
* pass all block errors to sync manager, including duplicate/unviable
* penalize peers for reporting a head block that is outside of our
expected wall clock time (they're likely on a different network or
trying to disrupt sync)
* remove `SyncFailureKind` (unused)
* remove `inRange` (unused)
* add `Q` for sync queue requests that are in the `SyncQueue` but not
yet in the `BlockProcessor` queue
* update last slot in `SyncQueue` after getting peer status
* fix race condition between `wakeupWaiters` and `resetWait`, where
workers would not be correctly reset if block verification returned a
completed future without event loop
* log syncmanager direction

* Fix ordering issue.
Some of the requests size of which are not equal to `chunkSize` could be processed in wrong order which could lead to sync process freezes.

Co-authored-by: cheatfate <eugene.kabanov@status.im>
											
										
										
											2021-12-16 14:57:16 +00:00
+								        return
 								    else:
-												EIP4844 sync (#4581)

* EIP4844 Sync

* Pass eip4844 fork epoch rather than cfg to syncmanager

* Fix sync

* Update test

* map->mapIt
											
										
										
											2023-02-11 20:48:35 +00:00
+								      let syncres = SyncResult[T](request: sr, data: data, blobs: blobs)
-												SyncManager cleanups for backfill support (#3189)

* SyncManager cleanups for backfill support

Cleanups, fixes and simplifications, in anticipation of backfill support
for the `SyncManager`:

* reformat sync progress indicator to show time left and % done more
prominently:
  * old: `sync="sPssPsssss:2:2.4229:00h57m (2706898)"`
  * new: `sync="14d12h31m (0.52%) 1.1378slots/s (wQQQQQDDQQ:1287520)"`
* reset average speed when going out of sync
* pass all block errors to sync manager, including duplicate/unviable
* penalize peers for reporting a head block that is outside of our
expected wall clock time (they're likely on a different network or
trying to disrupt sync)
* remove `SyncFailureKind` (unused)
* remove `inRange` (unused)
* add `Q` for sync queue requests that are in the `SyncQueue` but not
yet in the `BlockProcessor` queue
* update last slot in `SyncQueue` after getting peer status
* fix race condition between `wakeupWaiters` and `resetWait`, where
workers would not be correctly reset if block verification returned a
completed future without event loop
* log syncmanager direction

* Fix ordering issue.
Some of the requests size of which are not equal to `chunkSize` could be processed in wrong order which could lead to sync process freezes.

Co-authored-by: cheatfate <eugene.kabanov@status.im>
											
										
										
											2021-12-16 14:57:16 +00:00
+								      sq.readyQueue.push(syncres)
 								      break
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
 								  while len(sq.readyQueue) > 0:
 								    let reqres =
 								      case sq.kind
 								      of SyncQueueKind.Forward:
 								        let minSlot = sq.readyQueue[0].request.slot
-												avoid re-requesting finalized blocks during sync (#3461)

When a `beaconBlocksByRange` response advances the `safeSlot`, but later
has errors, the sync queue keeps repeating that same request until it is
fulfilled without errors. Data up through `safeSlot` is considered to be
immutable, i.e., finalized, so re-requesting that data is not useful.
By advancing the sync progress in that scenario, those redundant query
portions can be avoided. Note, the finalized block _itself_ is always
requested, even in the initial request. This behaviour is kept same.
											
										
										
											2022-03-15 17:56:56 +00:00
+								        if sq.outSlot < minSlot:
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
+								          none[SyncResult[T]]()
 								        else:
 								          some(sq.readyQueue.pop())
 								      of SyncQueueKind.Backward:
-												enable `styleCheck:usages` (#3573)

Some upstream repos still need fixes, but this gets us close enough that
style hints can be enabled by default.

In general, "canonical" spellings are preferred even if they violate
nep-1 - this applies in particular to spec-related stuff like
`genesis_validators_root` which appears throughout the codebase.

											
										
										
											2022-04-08 16:22:49 +00:00
+								        let maxslot = sq.readyQueue[0].request.slot +
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
+								                      (sq.readyQueue[0].request.count - 1'u64)
-												enable `styleCheck:usages` (#3573)

Some upstream repos still need fixes, but this gets us close enough that
style hints can be enabled by default.

In general, "canonical" spellings are preferred even if they violate
nep-1 - this applies in particular to spec-related stuff like
`genesis_validators_root` which appears throughout the codebase.

											
										
										
											2022-04-08 16:22:49 +00:00
+								        if sq.outSlot > maxslot:
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
+								          none[SyncResult[T]]()
 								        else:
 								          some(sq.readyQueue.pop())
 								    let item =
 								      if reqres.isSome():
 								        reqres.get()
 								      else:
 								        let rewindSlot = sq.getRewindPoint(sq.outSlot, sq.getSafeSlot())
 								        warn "Got incorrect sync result in queue, rewind happens",
 								             blocks_map = getShortMap(sq.readyQueue[0].request,
 								                                      sq.readyQueue[0].data),
 								             blocks_count = len(sq.readyQueue[0].data),
 								             output_slot = sq.outSlot, input_slot = sq.inpSlot,
-												Refactor and optimize sync logs. (#3451)

* Refactor and optimize logs.

* Introduce shortLog(SyncRequest).

* Address review comment.

* make sync queue logs more consistent

Adds a few minor logging improvements:
- Fixes a typo (`was happened` -> `has happened`)
- Avoids passing `reset_slot` argument to log statement multiple times
- Uses same `rewind_to_slot` label when logging in both sync directions
- Consistent rewind point logging

Co-authored-by: cheatfate <eugene.kabanov@status.im>
											
										
										
											2022-03-03 08:05:33 +00:00
+								             rewind_to_slot = rewindSlot, request = sq.readyQueue[0].request
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
+								        await sq.resetWait(some(rewindSlot))
 								        break
-												SyncManager cleanups for backfill support (#3189)

* SyncManager cleanups for backfill support

Cleanups, fixes and simplifications, in anticipation of backfill support
for the `SyncManager`:

* reformat sync progress indicator to show time left and % done more
prominently:
  * old: `sync="sPssPsssss:2:2.4229:00h57m (2706898)"`
  * new: `sync="14d12h31m (0.52%) 1.1378slots/s (wQQQQQDDQQ:1287520)"`
* reset average speed when going out of sync
* pass all block errors to sync manager, including duplicate/unviable
* penalize peers for reporting a head block that is outside of our
expected wall clock time (they're likely on a different network or
trying to disrupt sync)
* remove `SyncFailureKind` (unused)
* remove `inRange` (unused)
* add `Q` for sync queue requests that are in the `SyncQueue` but not
yet in the `BlockProcessor` queue
* update last slot in `SyncQueue` after getting peer status
* fix race condition between `wakeupWaiters` and `resetWait`, where
workers would not be correctly reset if block verification returned a
completed future without event loop
* log syncmanager direction

* Fix ordering issue.
Some of the requests size of which are not equal to `chunkSize` could be processed in wrong order which could lead to sync process freezes.

Co-authored-by: cheatfate <eugene.kabanov@status.im>
											
										
										
											2021-12-16 14:57:16 +00:00
+								    if processingCb != nil:
 								      processingCb()
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
+								    # Validating received blocks one by one
-												Harden handling of unviable forks (#3312)

* Harden handling of unviable forks

In our current handling of unviable forks, we allow peers to send us
blocks that come from a different fork - this is not necessarily an
error as it can happen naturally, but it does open up the client to a
case where the same unviable fork keeps getting requested - rather than
allowing this to happen, we'll now give these peers a small negative
score - if it keeps happening, we'll disconnect them.

* keep track of unviable forks in quarantine, to avoid filling it with
known junk
* collect peer scores in single module
* descore peers when they send unviable blocks during sync
* don't give score for duplicate blocks
* increase quarantine size to a level that allows finality to happen
under optimal conditions - this helps avoid downloading the same blocks
over and over in case of an unviable fork
* increase initial score for new peers to make room for one more failure
before disconnection
* log and score invalid/unviable blocks in requestmanager too
* avoid ChainDAG dependency in quarantine
* reject gossip blocks with unviable parent
* continue processing unviable sync blocks in order to build unviable
dag

* docs

* Update beacon_chain/consensus_object_pools/block_pools_types.nim

* add unviable queue test
											
										
										
											2022-01-26 12:20:08 +00:00
+								    var
 								      hasInvalidBlock = false
 								      unviableBlock: Option[(Eth2Digest, Slot)]
 								      missingParentSlot: Option[Slot]
-												Sync gaps fix (#4090)


											
										
										
											2022-09-19 09:37:42 +00:00
+								      goodBlock: Option[Slot]
-												Harden handling of unviable forks (#3312)

* Harden handling of unviable forks

In our current handling of unviable forks, we allow peers to send us
blocks that come from a different fork - this is not necessarily an
error as it can happen naturally, but it does open up the client to a
case where the same unviable fork keeps getting requested - rather than
allowing this to happen, we'll now give these peers a small negative
score - if it keeps happening, we'll disconnect them.

* keep track of unviable forks in quarantine, to avoid filling it with
known junk
* collect peer scores in single module
* descore peers when they send unviable blocks during sync
* don't give score for duplicate blocks
* increase quarantine size to a level that allows finality to happen
under optimal conditions - this helps avoid downloading the same blocks
over and over in case of an unviable fork
* increase initial score for new peers to make room for one more failure
before disconnection
* log and score invalid/unviable blocks in requestmanager too
* avoid ChainDAG dependency in quarantine
* reject gossip blocks with unviable parent
* continue processing unviable sync blocks in order to build unviable
dag

* docs

* Update beacon_chain/consensus_object_pools/block_pools_types.nim

* add unviable queue test
											
										
										
											2022-01-26 12:20:08 +00:00
-												update/clarify Nim issue link after retesting/re-isolating

											
										
										
											2023-01-27 10:41:19 +00:00
+								      # TODO when https://github.com/nim-lang/Nim/issues/21306 is fixed in used
 								      # Nim versions, remove workaround and move `res` into for loop
-												rename `BlockError` -> `VerifierError` (#4310)

We currently use `BlockError` for both beacon blocks and LC objects.
In light of EIP4844, we will likely also use it for blob sidecars.
To avoid confusion, renaming it to a more generic `VerifierError`,
and update its documentation to be more generic.

To avoid long lines as a followup, also renaming the `block_processor`'s
`BlockProcessingCompleted.completed`->`ProcessingStatus.completed` and
`BlockProcessingCompleted.notCompleted`->`ProcessingStatus.notCompleted`
											
										
										
											2022-11-10 17:40:27 +00:00
+								      res: Result[void, VerifierError]
-												Harden handling of unviable forks (#3312)

* Harden handling of unviable forks

In our current handling of unviable forks, we allow peers to send us
blocks that come from a different fork - this is not necessarily an
error as it can happen naturally, but it does open up the client to a
case where the same unviable fork keeps getting requested - rather than
allowing this to happen, we'll now give these peers a small negative
score - if it keeps happening, we'll disconnect them.

* keep track of unviable forks in quarantine, to avoid filling it with
known junk
* collect peer scores in single module
* descore peers when they send unviable blocks during sync
* don't give score for duplicate blocks
* increase quarantine size to a level that allows finality to happen
under optimal conditions - this helps avoid downloading the same blocks
over and over in case of an unviable fork
* increase initial score for new peers to make room for one more failure
before disconnection
* log and score invalid/unviable blocks in requestmanager too
* avoid ChainDAG dependency in quarantine
* reject gossip blocks with unviable parent
* continue processing unviable sync blocks in order to build unviable
dag

* docs

* Update beacon_chain/consensus_object_pools/block_pools_types.nim

* add unviable queue test
											
										
										
											2022-01-26 12:20:08 +00:00
-												EIP4844 sync (#4581)

* EIP4844 Sync

* Pass eip4844 fork epoch rather than cfg to syncmanager

* Fix sync

* Update test

* map->mapIt
											
										
										
											2023-02-11 20:48:35 +00:00
+								    var i=0
-												Sync: fix backfill blob iteration order (#4993)

* Sync: fix backfill blob iteration order

* Address review feedback

* Add comment clarifying reason for func top-level placement
											
										
										
											2023-06-05 14:42:27 +00:00
+								    for blk, blb in sq.blocks(item):
 								      res = await sq.blockVerifier(blk[], blb, maybeFinalized)
-												EIP4844 sync (#4581)

* EIP4844 Sync

* Pass eip4844 fork epoch rather than cfg to syncmanager

* Fix sync

* Update test

* map->mapIt
											
										
										
											2023-02-11 20:48:35 +00:00
+								      inc(i)
-												Harden handling of unviable forks (#3312)

* Harden handling of unviable forks

In our current handling of unviable forks, we allow peers to send us
blocks that come from a different fork - this is not necessarily an
error as it can happen naturally, but it does open up the client to a
case where the same unviable fork keeps getting requested - rather than
allowing this to happen, we'll now give these peers a small negative
score - if it keeps happening, we'll disconnect them.

* keep track of unviable forks in quarantine, to avoid filling it with
known junk
* collect peer scores in single module
* descore peers when they send unviable blocks during sync
* don't give score for duplicate blocks
* increase quarantine size to a level that allows finality to happen
under optimal conditions - this helps avoid downloading the same blocks
over and over in case of an unviable fork
* increase initial score for new peers to make room for one more failure
before disconnection
* log and score invalid/unviable blocks in requestmanager too
* avoid ChainDAG dependency in quarantine
* reject gossip blocks with unviable parent
* continue processing unviable sync blocks in order to build unviable
dag

* docs

* Update beacon_chain/consensus_object_pools/block_pools_types.nim

* add unviable queue test
											
										
										
											2022-01-26 12:20:08 +00:00
+								      if res.isOk():
-												Sync gaps fix (#4090)


											
										
										
											2022-09-19 09:37:42 +00:00
+								        goodBlock = some(blk[].slot)
-												Harden handling of unviable forks (#3312)

* Harden handling of unviable forks

In our current handling of unviable forks, we allow peers to send us
blocks that come from a different fork - this is not necessarily an
error as it can happen naturally, but it does open up the client to a
case where the same unviable fork keeps getting requested - rather than
allowing this to happen, we'll now give these peers a small negative
score - if it keeps happening, we'll disconnect them.

* keep track of unviable forks in quarantine, to avoid filling it with
known junk
* collect peer scores in single module
* descore peers when they send unviable blocks during sync
* don't give score for duplicate blocks
* increase quarantine size to a level that allows finality to happen
under optimal conditions - this helps avoid downloading the same blocks
over and over in case of an unviable fork
* increase initial score for new peers to make room for one more failure
before disconnection
* log and score invalid/unviable blocks in requestmanager too
* avoid ChainDAG dependency in quarantine
* reject gossip blocks with unviable parent
* continue processing unviable sync blocks in order to build unviable
dag

* docs

* Update beacon_chain/consensus_object_pools/block_pools_types.nim

* add unviable queue test
											
										
										
											2022-01-26 12:20:08 +00:00
+								      else:
 								        case res.error()
-												rename `BlockError` -> `VerifierError` (#4310)

We currently use `BlockError` for both beacon blocks and LC objects.
In light of EIP4844, we will likely also use it for blob sidecars.
To avoid confusion, renaming it to a more generic `VerifierError`,
and update its documentation to be more generic.

To avoid long lines as a followup, also renaming the `block_processor`'s
`BlockProcessingCompleted.completed`->`ProcessingStatus.completed` and
`BlockProcessingCompleted.notCompleted`->`ProcessingStatus.notCompleted`
											
										
										
											2022-11-10 17:40:27 +00:00
+								        of VerifierError.MissingParent:
-												harden and speed up block sync (#3358)

* harden and speed up block sync

The `GetBlockBy*` server implementation currently reads SSZ bytes from
database, deserializes them into a Nim object then serializes them right
back to SSZ - here, we eliminate the deser/ser steps and send the bytes
straight to the network. Unfortunately, the snappy recoding must still
be done because of differences in framing.

Also, the quota system makes one giant request for quota right before
sending all blocks - this means that a 1024 block request will be
"paused" for a long time, then all blocks will be sent at once causing a
spike in database reads which potentially will see the reading client
time out before any block is sent.

Finally, on the reading side we make several copies of blocks as they
travel through various queues - this was not noticeable before but
becomes a problem in two cases: bellatrix blocks are up to 10mb (instead
of .. 30-40kb) and when backfilling, we process a lot more of them a lot
faster.

* fix status comparisons for nodes syncing from genesis (#3327 was a bit
too hard)
* don't hit database at all for post-altair slots in GetBlock v1
requests
											
										
										
											2022-02-07 17:20:10 +00:00
+								          missingParentSlot = some(blk[].slot)
-												Harden handling of unviable forks (#3312)

* Harden handling of unviable forks

In our current handling of unviable forks, we allow peers to send us
blocks that come from a different fork - this is not necessarily an
error as it can happen naturally, but it does open up the client to a
case where the same unviable fork keeps getting requested - rather than
allowing this to happen, we'll now give these peers a small negative
score - if it keeps happening, we'll disconnect them.

* keep track of unviable forks in quarantine, to avoid filling it with
known junk
* collect peer scores in single module
* descore peers when they send unviable blocks during sync
* don't give score for duplicate blocks
* increase quarantine size to a level that allows finality to happen
under optimal conditions - this helps avoid downloading the same blocks
over and over in case of an unviable fork
* increase initial score for new peers to make room for one more failure
before disconnection
* log and score invalid/unviable blocks in requestmanager too
* avoid ChainDAG dependency in quarantine
* reject gossip blocks with unviable parent
* continue processing unviable sync blocks in order to build unviable
dag

* docs

* Update beacon_chain/consensus_object_pools/block_pools_types.nim

* add unviable queue test
											
										
										
											2022-01-26 12:20:08 +00:00
+								          break
-												rename `BlockError` -> `VerifierError` (#4310)

We currently use `BlockError` for both beacon blocks and LC objects.
In light of EIP4844, we will likely also use it for blob sidecars.
To avoid confusion, renaming it to a more generic `VerifierError`,
and update its documentation to be more generic.

To avoid long lines as a followup, also renaming the `block_processor`'s
`BlockProcessingCompleted.completed`->`ProcessingStatus.completed` and
`BlockProcessingCompleted.notCompleted`->`ProcessingStatus.notCompleted`
											
										
										
											2022-11-10 17:40:27 +00:00
+								        of VerifierError.Duplicate:
-												Harden handling of unviable forks (#3312)

* Harden handling of unviable forks

In our current handling of unviable forks, we allow peers to send us
blocks that come from a different fork - this is not necessarily an
error as it can happen naturally, but it does open up the client to a
case where the same unviable fork keeps getting requested - rather than
allowing this to happen, we'll now give these peers a small negative
score - if it keeps happening, we'll disconnect them.

* keep track of unviable forks in quarantine, to avoid filling it with
known junk
* collect peer scores in single module
* descore peers when they send unviable blocks during sync
* don't give score for duplicate blocks
* increase quarantine size to a level that allows finality to happen
under optimal conditions - this helps avoid downloading the same blocks
over and over in case of an unviable fork
* increase initial score for new peers to make room for one more failure
before disconnection
* log and score invalid/unviable blocks in requestmanager too
* avoid ChainDAG dependency in quarantine
* reject gossip blocks with unviable parent
* continue processing unviable sync blocks in order to build unviable
dag

* docs

* Update beacon_chain/consensus_object_pools/block_pools_types.nim

* add unviable queue test
											
										
										
											2022-01-26 12:20:08 +00:00
+								          # Keep going, happens naturally
 								          discard
-												rename `BlockError` -> `VerifierError` (#4310)

We currently use `BlockError` for both beacon blocks and LC objects.
In light of EIP4844, we will likely also use it for blob sidecars.
To avoid confusion, renaming it to a more generic `VerifierError`,
and update its documentation to be more generic.

To avoid long lines as a followup, also renaming the `block_processor`'s
`BlockProcessingCompleted.completed`->`ProcessingStatus.completed` and
`BlockProcessingCompleted.notCompleted`->`ProcessingStatus.notCompleted`
											
										
										
											2022-11-10 17:40:27 +00:00
+								        of VerifierError.UnviableFork:
-												Harden handling of unviable forks (#3312)

* Harden handling of unviable forks

In our current handling of unviable forks, we allow peers to send us
blocks that come from a different fork - this is not necessarily an
error as it can happen naturally, but it does open up the client to a
case where the same unviable fork keeps getting requested - rather than
allowing this to happen, we'll now give these peers a small negative
score - if it keeps happening, we'll disconnect them.

* keep track of unviable forks in quarantine, to avoid filling it with
known junk
* collect peer scores in single module
* descore peers when they send unviable blocks during sync
* don't give score for duplicate blocks
* increase quarantine size to a level that allows finality to happen
under optimal conditions - this helps avoid downloading the same blocks
over and over in case of an unviable fork
* increase initial score for new peers to make room for one more failure
before disconnection
* log and score invalid/unviable blocks in requestmanager too
* avoid ChainDAG dependency in quarantine
* reject gossip blocks with unviable parent
* continue processing unviable sync blocks in order to build unviable
dag

* docs

* Update beacon_chain/consensus_object_pools/block_pools_types.nim

* add unviable queue test
											
										
										
											2022-01-26 12:20:08 +00:00
+								          # Keep going so as to register other unviable blocks with the
 								          # quarantine
 								          if unviableBlock.isNone:
 								            # Remember the first unviable block, so we can log it
-												harden and speed up block sync (#3358)

* harden and speed up block sync

The `GetBlockBy*` server implementation currently reads SSZ bytes from
database, deserializes them into a Nim object then serializes them right
back to SSZ - here, we eliminate the deser/ser steps and send the bytes
straight to the network. Unfortunately, the snappy recoding must still
be done because of differences in framing.

Also, the quota system makes one giant request for quota right before
sending all blocks - this means that a 1024 block request will be
"paused" for a long time, then all blocks will be sent at once causing a
spike in database reads which potentially will see the reading client
time out before any block is sent.

Finally, on the reading side we make several copies of blocks as they
travel through various queues - this was not noticeable before but
becomes a problem in two cases: bellatrix blocks are up to 10mb (instead
of .. 30-40kb) and when backfilling, we process a lot more of them a lot
faster.

* fix status comparisons for nodes syncing from genesis (#3327 was a bit
too hard)
* don't hit database at all for post-altair slots in GetBlock v1
requests
											
										
										
											2022-02-07 17:20:10 +00:00
+								            unviableBlock = some((blk[].root, blk[].slot))
-												Harden handling of unviable forks (#3312)

* Harden handling of unviable forks

In our current handling of unviable forks, we allow peers to send us
blocks that come from a different fork - this is not necessarily an
error as it can happen naturally, but it does open up the client to a
case where the same unviable fork keeps getting requested - rather than
allowing this to happen, we'll now give these peers a small negative
score - if it keeps happening, we'll disconnect them.

* keep track of unviable forks in quarantine, to avoid filling it with
known junk
* collect peer scores in single module
* descore peers when they send unviable blocks during sync
* don't give score for duplicate blocks
* increase quarantine size to a level that allows finality to happen
under optimal conditions - this helps avoid downloading the same blocks
over and over in case of an unviable fork
* increase initial score for new peers to make room for one more failure
before disconnection
* log and score invalid/unviable blocks in requestmanager too
* avoid ChainDAG dependency in quarantine
* reject gossip blocks with unviable parent
* continue processing unviable sync blocks in order to build unviable
dag

* docs

* Update beacon_chain/consensus_object_pools/block_pools_types.nim

* add unviable queue test
											
										
										
											2022-01-26 12:20:08 +00:00
-												rename `BlockError` -> `VerifierError` (#4310)

We currently use `BlockError` for both beacon blocks and LC objects.
In light of EIP4844, we will likely also use it for blob sidecars.
To avoid confusion, renaming it to a more generic `VerifierError`,
and update its documentation to be more generic.

To avoid long lines as a followup, also renaming the `block_processor`'s
`BlockProcessingCompleted.completed`->`ProcessingStatus.completed` and
`BlockProcessingCompleted.notCompleted`->`ProcessingStatus.notCompleted`
											
										
										
											2022-11-10 17:40:27 +00:00
+								        of VerifierError.Invalid:
-												Harden handling of unviable forks (#3312)

* Harden handling of unviable forks

In our current handling of unviable forks, we allow peers to send us
blocks that come from a different fork - this is not necessarily an
error as it can happen naturally, but it does open up the client to a
case where the same unviable fork keeps getting requested - rather than
allowing this to happen, we'll now give these peers a small negative
score - if it keeps happening, we'll disconnect them.

* keep track of unviable forks in quarantine, to avoid filling it with
known junk
* collect peer scores in single module
* descore peers when they send unviable blocks during sync
* don't give score for duplicate blocks
* increase quarantine size to a level that allows finality to happen
under optimal conditions - this helps avoid downloading the same blocks
over and over in case of an unviable fork
* increase initial score for new peers to make room for one more failure
before disconnection
* log and score invalid/unviable blocks in requestmanager too
* avoid ChainDAG dependency in quarantine
* reject gossip blocks with unviable parent
* continue processing unviable sync blocks in order to build unviable
dag

* docs

* Update beacon_chain/consensus_object_pools/block_pools_types.nim

* add unviable queue test
											
										
										
											2022-01-26 12:20:08 +00:00
+								          hasInvalidBlock = true
 								          let req = item.request
-												reduce received invalid sync block logging to notice; decimal TTD logging (#3839)


											
										
										
											2022-07-06 10:34:12 +00:00
+								          notice "Received invalid sequence of blocks", request = req,
 								                  blocks_count = len(item.data),
 								                  blocks_map = getShortMap(req, item.data)
-												rename `PeerScoreXyzBlocks` -> `PeerScoreXyzValues` (#4318)

The various `PeerScore` constants are used for both beacon blocks and
LC objects, and will likely also find use for EIP4844 blob sidecars.
Renaming them to use more generically applicable names not referring
to blocks explicitly aymore.
											
										
										
											2022-11-11 11:34:28 +00:00
+								          req.item.updateScore(PeerScoreBadValues)
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
+								          break
-												Harden handling of unviable forks (#3312)

* Harden handling of unviable forks

In our current handling of unviable forks, we allow peers to send us
blocks that come from a different fork - this is not necessarily an
error as it can happen naturally, but it does open up the client to a
case where the same unviable fork keeps getting requested - rather than
allowing this to happen, we'll now give these peers a small negative
score - if it keeps happening, we'll disconnect them.

* keep track of unviable forks in quarantine, to avoid filling it with
known junk
* collect peer scores in single module
* descore peers when they send unviable blocks during sync
* don't give score for duplicate blocks
* increase quarantine size to a level that allows finality to happen
under optimal conditions - this helps avoid downloading the same blocks
over and over in case of an unviable fork
* increase initial score for new peers to make room for one more failure
before disconnection
* log and score invalid/unviable blocks in requestmanager too
* avoid ChainDAG dependency in quarantine
* reject gossip blocks with unviable parent
* continue processing unviable sync blocks in order to build unviable
dag

* docs

* Update beacon_chain/consensus_object_pools/block_pools_types.nim

* add unviable queue test
											
										
										
											2022-01-26 12:20:08 +00:00
+								    # When errors happen while processing blocks, we retry the same request
 								    # with, hopefully, a different peer
 								    let retryRequest =
 								      hasInvalidBlock or unviableBlock.isSome() or missingParentSlot.isSome()
-												Sync gaps fix (#4090)


											
										
										
											2022-09-19 09:37:42 +00:00
+								    if not(retryRequest):
-												avoid re-requesting finalized blocks during sync (#3461)

When a `beaconBlocksByRange` response advances the `safeSlot`, but later
has errors, the sync queue keeps repeating that same request until it is
fulfilled without errors. Data up through `safeSlot` is considered to be
immutable, i.e., finalized, so re-requesting that data is not useful.
By advancing the sync progress in that scenario, those redundant query
portions can be avoided. Note, the finalized block _itself_ is always
requested, even in the initial request. This behaviour is kept same.
											
										
										
											2022-03-15 17:56:56 +00:00
+								      let numSlotsAdvanced = item.request.count - sq.numAlreadyKnownSlots(sr)
 								      sq.advanceOutput(numSlotsAdvanced)
-												Harden handling of unviable forks (#3312)

* Harden handling of unviable forks

In our current handling of unviable forks, we allow peers to send us
blocks that come from a different fork - this is not necessarily an
error as it can happen naturally, but it does open up the client to a
case where the same unviable fork keeps getting requested - rather than
allowing this to happen, we'll now give these peers a small negative
score - if it keeps happening, we'll disconnect them.

* keep track of unviable forks in quarantine, to avoid filling it with
known junk
* collect peer scores in single module
* descore peers when they send unviable blocks during sync
* don't give score for duplicate blocks
* increase quarantine size to a level that allows finality to happen
under optimal conditions - this helps avoid downloading the same blocks
over and over in case of an unviable fork
* increase initial score for new peers to make room for one more failure
before disconnection
* log and score invalid/unviable blocks in requestmanager too
* avoid ChainDAG dependency in quarantine
* reject gossip blocks with unviable parent
* continue processing unviable sync blocks in order to build unviable
dag

* docs

* Update beacon_chain/consensus_object_pools/block_pools_types.nim

* add unviable queue test
											
										
										
											2022-01-26 12:20:08 +00:00
-												Sync gaps fix (#4090)


											
										
										
											2022-09-19 09:37:42 +00:00
+								      if goodBlock.isSome():
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
+								        # If there no error and response was not empty we should reward peer
-												Harden handling of unviable forks (#3312)

* Harden handling of unviable forks

In our current handling of unviable forks, we allow peers to send us
blocks that come from a different fork - this is not necessarily an
error as it can happen naturally, but it does open up the client to a
case where the same unviable fork keeps getting requested - rather than
allowing this to happen, we'll now give these peers a small negative
score - if it keeps happening, we'll disconnect them.

* keep track of unviable forks in quarantine, to avoid filling it with
known junk
* collect peer scores in single module
* descore peers when they send unviable blocks during sync
* don't give score for duplicate blocks
* increase quarantine size to a level that allows finality to happen
under optimal conditions - this helps avoid downloading the same blocks
over and over in case of an unviable fork
* increase initial score for new peers to make room for one more failure
before disconnection
* log and score invalid/unviable blocks in requestmanager too
* avoid ChainDAG dependency in quarantine
* reject gossip blocks with unviable parent
* continue processing unviable sync blocks in order to build unviable
dag

* docs

* Update beacon_chain/consensus_object_pools/block_pools_types.nim

* add unviable queue test
											
										
										
											2022-01-26 12:20:08 +00:00
+								        # with some bonus score - not for duplicate blocks though.
-												rename `PeerScoreXyzBlocks` -> `PeerScoreXyzValues` (#4318)

The various `PeerScore` constants are used for both beacon blocks and
LC objects, and will likely also find use for EIP4844 blob sidecars.
Renaming them to use more generically applicable names not referring
to blocks explicitly aymore.
											
										
										
											2022-11-11 11:34:28 +00:00
+								        item.request.item.updateScore(PeerScoreGoodValues)
-												Sync gaps fix (#4090)


											
										
										
											2022-09-19 09:37:42 +00:00
+								        item.request.item.updateStats(SyncResponseKind.Good, 1'u64)
 								        # BlockProcessor reports good block, so we can reward all the peers
 								        # who sent us empty responses.
-												rename `PeerScoreXyzBlocks` -> `PeerScoreXyzValues` (#4318)

The various `PeerScore` constants are used for both beacon blocks and
LC objects, and will likely also find use for EIP4844 blob sidecars.
Renaming them to use more generically applicable names not referring
to blocks explicitly aymore.
											
										
										
											2022-11-11 11:34:28 +00:00
+								        sq.rewardForGaps(PeerScoreGoodValues)
-												Sync gaps fix (#4090)


											
										
										
											2022-09-19 09:37:42 +00:00
+								        sq.gapList.reset()
 								      else:
 								        # Response was empty
 								        item.request.item.updateStats(SyncResponseKind.Empty, 1'u64)
 								      sq.processGap(item)
-												Harden handling of unviable forks (#3312)

* Harden handling of unviable forks

In our current handling of unviable forks, we allow peers to send us
blocks that come from a different fork - this is not necessarily an
error as it can happen naturally, but it does open up the client to a
case where the same unviable fork keeps getting requested - rather than
allowing this to happen, we'll now give these peers a small negative
score - if it keeps happening, we'll disconnect them.

* keep track of unviable forks in quarantine, to avoid filling it with
known junk
* collect peer scores in single module
* descore peers when they send unviable blocks during sync
* don't give score for duplicate blocks
* increase quarantine size to a level that allows finality to happen
under optimal conditions - this helps avoid downloading the same blocks
over and over in case of an unviable fork
* increase initial score for new peers to make room for one more failure
before disconnection
* log and score invalid/unviable blocks in requestmanager too
* avoid ChainDAG dependency in quarantine
* reject gossip blocks with unviable parent
* continue processing unviable sync blocks in order to build unviable
dag

* docs

* Update beacon_chain/consensus_object_pools/block_pools_types.nim

* add unviable queue test
											
										
										
											2022-01-26 12:20:08 +00:00
-												avoid re-requesting finalized blocks during sync (#3461)

When a `beaconBlocksByRange` response advances the `safeSlot`, but later
has errors, the sync queue keeps repeating that same request until it is
fulfilled without errors. Data up through `safeSlot` is considered to be
immutable, i.e., finalized, so re-requesting that data is not useful.
By advancing the sync progress in that scenario, those redundant query
portions can be avoided. Note, the finalized block _itself_ is always
requested, even in the initial request. This behaviour is kept same.
											
										
										
											2022-03-15 17:56:56 +00:00
+								      if numSlotsAdvanced > 0:
 								        sq.wakeupWaiters()
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
+								    else:
-												Refactor and optimize sync logs. (#3451)

* Refactor and optimize logs.

* Introduce shortLog(SyncRequest).

* Address review comment.

* make sync queue logs more consistent

Adds a few minor logging improvements:
- Fixes a typo (`was happened` -> `has happened`)
- Avoids passing `reset_slot` argument to log statement multiple times
- Uses same `rewind_to_slot` label when logging in both sync directions
- Consistent rewind point logging

Co-authored-by: cheatfate <eugene.kabanov@status.im>
											
										
										
											2022-03-03 08:05:33 +00:00
+								      debug "Block pool rejected peer's response", request = item.request,
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
+								            blocks_map = getShortMap(item.request, item.data),
-												Harden handling of unviable forks (#3312)

* Harden handling of unviable forks

In our current handling of unviable forks, we allow peers to send us
blocks that come from a different fork - this is not necessarily an
error as it can happen naturally, but it does open up the client to a
case where the same unviable fork keeps getting requested - rather than
allowing this to happen, we'll now give these peers a small negative
score - if it keeps happening, we'll disconnect them.

* keep track of unviable forks in quarantine, to avoid filling it with
known junk
* collect peer scores in single module
* descore peers when they send unviable blocks during sync
* don't give score for duplicate blocks
* increase quarantine size to a level that allows finality to happen
under optimal conditions - this helps avoid downloading the same blocks
over and over in case of an unviable fork
* increase initial score for new peers to make room for one more failure
before disconnection
* log and score invalid/unviable blocks in requestmanager too
* avoid ChainDAG dependency in quarantine
* reject gossip blocks with unviable parent
* continue processing unviable sync blocks in order to build unviable
dag

* docs

* Update beacon_chain/consensus_object_pools/block_pools_types.nim

* add unviable queue test
											
										
										
											2022-01-26 12:20:08 +00:00
+								            blocks_count = len(item.data),
-												Sync gaps fix (#4090)


											
										
										
											2022-09-19 09:37:42 +00:00
+								            ok = goodBlock.isSome(),
-												Harden handling of unviable forks (#3312)

* Harden handling of unviable forks

In our current handling of unviable forks, we allow peers to send us
blocks that come from a different fork - this is not necessarily an
error as it can happen naturally, but it does open up the client to a
case where the same unviable fork keeps getting requested - rather than
allowing this to happen, we'll now give these peers a small negative
score - if it keeps happening, we'll disconnect them.

* keep track of unviable forks in quarantine, to avoid filling it with
known junk
* collect peer scores in single module
* descore peers when they send unviable blocks during sync
* don't give score for duplicate blocks
* increase quarantine size to a level that allows finality to happen
under optimal conditions - this helps avoid downloading the same blocks
over and over in case of an unviable fork
* increase initial score for new peers to make room for one more failure
before disconnection
* log and score invalid/unviable blocks in requestmanager too
* avoid ChainDAG dependency in quarantine
* reject gossip blocks with unviable parent
* continue processing unviable sync blocks in order to build unviable
dag

* docs

* Update beacon_chain/consensus_object_pools/block_pools_types.nim

* add unviable queue test
											
										
										
											2022-01-26 12:20:08 +00:00
+								            unviable = unviableBlock.isSome(),
-												Refactor and optimize sync logs. (#3451)

* Refactor and optimize logs.

* Introduce shortLog(SyncRequest).

* Address review comment.

* make sync queue logs more consistent

Adds a few minor logging improvements:
- Fixes a typo (`was happened` -> `has happened`)
- Avoids passing `reset_slot` argument to log statement multiple times
- Uses same `rewind_to_slot` label when logging in both sync directions
- Consistent rewind point logging

Co-authored-by: cheatfate <eugene.kabanov@status.im>
											
										
										
											2022-03-03 08:05:33 +00:00
+								            missing_parent = missingParentSlot.isSome()
-												Harden handling of unviable forks (#3312)

* Harden handling of unviable forks

In our current handling of unviable forks, we allow peers to send us
blocks that come from a different fork - this is not necessarily an
error as it can happen naturally, but it does open up the client to a
case where the same unviable fork keeps getting requested - rather than
allowing this to happen, we'll now give these peers a small negative
score - if it keeps happening, we'll disconnect them.

* keep track of unviable forks in quarantine, to avoid filling it with
known junk
* collect peer scores in single module
* descore peers when they send unviable blocks during sync
* don't give score for duplicate blocks
* increase quarantine size to a level that allows finality to happen
under optimal conditions - this helps avoid downloading the same blocks
over and over in case of an unviable fork
* increase initial score for new peers to make room for one more failure
before disconnection
* log and score invalid/unviable blocks in requestmanager too
* avoid ChainDAG dependency in quarantine
* reject gossip blocks with unviable parent
* continue processing unviable sync blocks in order to build unviable
dag

* docs

* Update beacon_chain/consensus_object_pools/block_pools_types.nim

* add unviable queue test
											
										
										
											2022-01-26 12:20:08 +00:00
+								      # We need to move failed response to the debts queue.
 								      sq.toDebtsQueue(item.request)
-												Sync gaps fix (#4090)


											
										
										
											2022-09-19 09:37:42 +00:00
+								      if unviableBlock.isSome():
-												Harden handling of unviable forks (#3312)

* Harden handling of unviable forks

In our current handling of unviable forks, we allow peers to send us
blocks that come from a different fork - this is not necessarily an
error as it can happen naturally, but it does open up the client to a
case where the same unviable fork keeps getting requested - rather than
allowing this to happen, we'll now give these peers a small negative
score - if it keeps happening, we'll disconnect them.

* keep track of unviable forks in quarantine, to avoid filling it with
known junk
* collect peer scores in single module
* descore peers when they send unviable blocks during sync
* don't give score for duplicate blocks
* increase quarantine size to a level that allows finality to happen
under optimal conditions - this helps avoid downloading the same blocks
over and over in case of an unviable fork
* increase initial score for new peers to make room for one more failure
before disconnection
* log and score invalid/unviable blocks in requestmanager too
* avoid ChainDAG dependency in quarantine
* reject gossip blocks with unviable parent
* continue processing unviable sync blocks in order to build unviable
dag

* docs

* Update beacon_chain/consensus_object_pools/block_pools_types.nim

* add unviable queue test
											
										
										
											2022-01-26 12:20:08 +00:00
+								        let req = item.request
-												Refactor and optimize sync logs. (#3451)

* Refactor and optimize logs.

* Introduce shortLog(SyncRequest).

* Address review comment.

* make sync queue logs more consistent

Adds a few minor logging improvements:
- Fixes a typo (`was happened` -> `has happened`)
- Avoids passing `reset_slot` argument to log statement multiple times
- Uses same `rewind_to_slot` label when logging in both sync directions
- Consistent rewind point logging

Co-authored-by: cheatfate <eugene.kabanov@status.im>
											
										
										
											2022-03-03 08:05:33 +00:00
+								        notice "Received blocks from an unviable fork", request = req,
-												Harden handling of unviable forks (#3312)

* Harden handling of unviable forks

In our current handling of unviable forks, we allow peers to send us
blocks that come from a different fork - this is not necessarily an
error as it can happen naturally, but it does open up the client to a
case where the same unviable fork keeps getting requested - rather than
allowing this to happen, we'll now give these peers a small negative
score - if it keeps happening, we'll disconnect them.

* keep track of unviable forks in quarantine, to avoid filling it with
known junk
* collect peer scores in single module
* descore peers when they send unviable blocks during sync
* don't give score for duplicate blocks
* increase quarantine size to a level that allows finality to happen
under optimal conditions - this helps avoid downloading the same blocks
over and over in case of an unviable fork
* increase initial score for new peers to make room for one more failure
before disconnection
* log and score invalid/unviable blocks in requestmanager too
* avoid ChainDAG dependency in quarantine
* reject gossip blocks with unviable parent
* continue processing unviable sync blocks in order to build unviable
dag

* docs

* Update beacon_chain/consensus_object_pools/block_pools_types.nim

* add unviable queue test
											
										
										
											2022-01-26 12:20:08 +00:00
+								              blockRoot = unviableBlock.get()[0],
-												Refactor and optimize sync logs. (#3451)

* Refactor and optimize logs.

* Introduce shortLog(SyncRequest).

* Address review comment.

* make sync queue logs more consistent

Adds a few minor logging improvements:
- Fixes a typo (`was happened` -> `has happened`)
- Avoids passing `reset_slot` argument to log statement multiple times
- Uses same `rewind_to_slot` label when logging in both sync directions
- Consistent rewind point logging

Co-authored-by: cheatfate <eugene.kabanov@status.im>
											
										
										
											2022-03-03 08:05:33 +00:00
+								              blockSlot = unviableBlock.get()[1],
 								              blocks_count = len(item.data),
 								              blocks_map = getShortMap(req, item.data)
-												Harden handling of unviable forks (#3312)

* Harden handling of unviable forks

In our current handling of unviable forks, we allow peers to send us
blocks that come from a different fork - this is not necessarily an
error as it can happen naturally, but it does open up the client to a
case where the same unviable fork keeps getting requested - rather than
allowing this to happen, we'll now give these peers a small negative
score - if it keeps happening, we'll disconnect them.

* keep track of unviable forks in quarantine, to avoid filling it with
known junk
* collect peer scores in single module
* descore peers when they send unviable blocks during sync
* don't give score for duplicate blocks
* increase quarantine size to a level that allows finality to happen
under optimal conditions - this helps avoid downloading the same blocks
over and over in case of an unviable fork
* increase initial score for new peers to make room for one more failure
before disconnection
* log and score invalid/unviable blocks in requestmanager too
* avoid ChainDAG dependency in quarantine
* reject gossip blocks with unviable parent
* continue processing unviable sync blocks in order to build unviable
dag

* docs

* Update beacon_chain/consensus_object_pools/block_pools_types.nim

* add unviable queue test
											
										
										
											2022-01-26 12:20:08 +00:00
+								        req.item.updateScore(PeerScoreUnviableFork)
-												Sync gaps fix (#4090)


											
										
										
											2022-09-19 09:37:42 +00:00
+								      if missingParentSlot.isSome():
-												Harden handling of unviable forks (#3312)

* Harden handling of unviable forks

In our current handling of unviable forks, we allow peers to send us
blocks that come from a different fork - this is not necessarily an
error as it can happen naturally, but it does open up the client to a
case where the same unviable fork keeps getting requested - rather than
allowing this to happen, we'll now give these peers a small negative
score - if it keeps happening, we'll disconnect them.

* keep track of unviable forks in quarantine, to avoid filling it with
known junk
* collect peer scores in single module
* descore peers when they send unviable blocks during sync
* don't give score for duplicate blocks
* increase quarantine size to a level that allows finality to happen
under optimal conditions - this helps avoid downloading the same blocks
over and over in case of an unviable fork
* increase initial score for new peers to make room for one more failure
before disconnection
* log and score invalid/unviable blocks in requestmanager too
* avoid ChainDAG dependency in quarantine
* reject gossip blocks with unviable parent
* continue processing unviable sync blocks in order to build unviable
dag

* docs

* Update beacon_chain/consensus_object_pools/block_pools_types.nim

* add unviable queue test
											
										
										
											2022-01-26 12:20:08 +00:00
+								        var
 								          resetSlot: Option[Slot]
 								          failSlot = missingParentSlot.get()
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
-												rename `BlockError` -> `VerifierError` (#4310)

We currently use `BlockError` for both beacon blocks and LC objects.
In light of EIP4844, we will likely also use it for blob sidecars.
To avoid confusion, renaming it to a more generic `VerifierError`,
and update its documentation to be more generic.

To avoid long lines as a followup, also renaming the `block_processor`'s
`BlockProcessingCompleted.completed`->`ProcessingStatus.completed` and
`BlockProcessingCompleted.notCompleted`->`ProcessingStatus.notCompleted`
											
										
										
											2022-11-10 17:40:27 +00:00
+								        # If we got `VerifierError.MissingParent` it means that peer returns
 								        # chain of blocks with holes or `block_pool` is in incomplete state. We
 								        # going to rewind the SyncQueue some distance back (2ⁿ, where n∈[0,∞],
 								        # but no more than `finalized_epoch`).
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
+								        let
 								          req = item.request
 								          safeSlot = sq.getSafeSlot()
-												Sync gaps fix (#4090)


											
										
										
											2022-09-19 09:37:42 +00:00
+								          gapsCount = len(sq.gapList)
 								        # We should penalize all the peers which responded with gaps.
-												rename `PeerScoreXyzBlocks` -> `PeerScoreXyzValues` (#4318)

The various `PeerScore` constants are used for both beacon blocks and
LC objects, and will likely also find use for EIP4844 blob sidecars.
Renaming them to use more generically applicable names not referring
to blocks explicitly aymore.
											
										
										
											2022-11-11 11:34:28 +00:00
+								        sq.rewardForGaps(PeerScoreMissingValues)
-												Sync gaps fix (#4090)


											
										
										
											2022-09-19 09:37:42 +00:00
+								        sq.gapList.reset()
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
+								        case sq.kind
 								        of SyncQueueKind.Forward:
-												Sync gaps fix (#4090)


											
										
										
											2022-09-19 09:37:42 +00:00
+								          if goodBlock.isSome():
-												rename `BlockError` -> `VerifierError` (#4310)

We currently use `BlockError` for both beacon blocks and LC objects.
In light of EIP4844, we will likely also use it for blob sidecars.
To avoid confusion, renaming it to a more generic `VerifierError`,
and update its documentation to be more generic.

To avoid long lines as a followup, also renaming the `block_processor`'s
`BlockProcessingCompleted.completed`->`ProcessingStatus.completed` and
`BlockProcessingCompleted.notCompleted`->`ProcessingStatus.notCompleted`
											
										
										
											2022-11-10 17:40:27 +00:00
+								            # `VerifierError.MissingParent` and `Success` present in response,
-												Sync gaps fix (#4090)


											
										
										
											2022-09-19 09:37:42 +00:00
+								            # it means that we just need to request this range one more time.
 								            debug "Unexpected missing parent, but no rewind needed",
 								                  request = req, finalized_slot = safeSlot,
 								                  last_good_slot = goodBlock.get(),
 								                  missing_parent_slot = missingParentSlot.get(),
 								                  blocks_count = len(item.data),
 								                  blocks_map = getShortMap(req, item.data),
 								                  gaps_count = gapsCount
-												rename `PeerScoreXyzBlocks` -> `PeerScoreXyzValues` (#4318)

The various `PeerScore` constants are used for both beacon blocks and
LC objects, and will likely also find use for EIP4844 blob sidecars.
Renaming them to use more generically applicable names not referring
to blocks explicitly aymore.
											
										
										
											2022-11-11 11:34:28 +00:00
+								            req.item.updateScore(PeerScoreMissingValues)
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
+								          else:
-												Sync gaps fix (#4090)


											
										
										
											2022-09-19 09:37:42 +00:00
+								            if safeSlot < req.slot:
 								              let rewindSlot = sq.getRewindPoint(failSlot, safeSlot)
 								              debug "Unexpected missing parent, rewind happens",
 								                   request = req, rewind_to_slot = rewindSlot,
 								                   rewind_point = sq.rewind, finalized_slot = safeSlot,
 								                   blocks_count = len(item.data),
 								                   blocks_map = getShortMap(req, item.data),
 								                   gaps_count = gapsCount
 								              resetSlot = some(rewindSlot)
 								            else:
 								              error "Unexpected missing parent at finalized epoch slot",
-												Refactor and optimize sync logs. (#3451)

* Refactor and optimize logs.

* Introduce shortLog(SyncRequest).

* Address review comment.

* make sync queue logs more consistent

Adds a few minor logging improvements:
- Fixes a typo (`was happened` -> `has happened`)
- Avoids passing `reset_slot` argument to log statement multiple times
- Uses same `rewind_to_slot` label when logging in both sync directions
- Consistent rewind point logging

Co-authored-by: cheatfate <eugene.kabanov@status.im>
											
										
										
											2022-03-03 08:05:33 +00:00
+								                  request = req, rewind_to_slot = safeSlot,
 								                  blocks_count = len(item.data),
-												Sync gaps fix (#4090)


											
										
										
											2022-09-19 09:37:42 +00:00
+								                  blocks_map = getShortMap(req, item.data),
 								                  gaps_count = gapsCount
-												rename `PeerScoreXyzBlocks` -> `PeerScoreXyzValues` (#4318)

The various `PeerScore` constants are used for both beacon blocks and
LC objects, and will likely also find use for EIP4844 blob sidecars.
Renaming them to use more generically applicable names not referring
to blocks explicitly aymore.
											
										
										
											2022-11-11 11:34:28 +00:00
+								              req.item.updateScore(PeerScoreBadValues)
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
+								        of SyncQueueKind.Backward:
-												consistent peer scoring for missing non-finalized parent (#3381)

When the sync queue processes results for a blocks by range request,
and the requested range contained some slots that are already finalized,
`BlockError.MissingParent` currently leads to `PeerScoreBadBlocks` even
when the error occurs on a non-finalized slot in the requested range.
This patch changes the scoring in that case to `PeerScoreMissingBlocks`
for consistency with range requests solely covering non-finalized slots,
and, likewise, rewinds the sync queue to the next `rewindSlot`.
											
										
										
											2022-09-16 19:45:53 +00:00
+								          if safeSlot > failSlot:
-												Harden handling of unviable forks (#3312)

* Harden handling of unviable forks

In our current handling of unviable forks, we allow peers to send us
blocks that come from a different fork - this is not necessarily an
error as it can happen naturally, but it does open up the client to a
case where the same unviable fork keeps getting requested - rather than
allowing this to happen, we'll now give these peers a small negative
score - if it keeps happening, we'll disconnect them.

* keep track of unviable forks in quarantine, to avoid filling it with
known junk
* collect peer scores in single module
* descore peers when they send unviable blocks during sync
* don't give score for duplicate blocks
* increase quarantine size to a level that allows finality to happen
under optimal conditions - this helps avoid downloading the same blocks
over and over in case of an unviable fork
* increase initial score for new peers to make room for one more failure
before disconnection
* log and score invalid/unviable blocks in requestmanager too
* avoid ChainDAG dependency in quarantine
* reject gossip blocks with unviable parent
* continue processing unviable sync blocks in order to build unviable
dag

* docs

* Update beacon_chain/consensus_object_pools/block_pools_types.nim

* add unviable queue test
											
										
										
											2022-01-26 12:20:08 +00:00
+								            let rewindSlot = sq.getRewindPoint(failSlot, safeSlot)
-												Backfiller (#3263)

Backfilling is the process of downloading historical blocks via P2P that
are required to fulfill `GetBlocksByRange` duties - this happens during
both trusted node and finalized checkpoint syncs.

In particular, backfilling happens after syncing to head, such that
attestation work can start as soon as possible.

* Fix SyncQueue initialization procedure.
Remove usage of `awaitne`.
Add cancellation support.
Remove unneeded `sleepAsync()` if peer's head is older than needed.
Add `direction` field to all logs.
Fix syncmanager wedge issue.
Add proper resource cleaning procedure on backward sync finish.

Co-authored-by: cheatfate <eugene.kabanov@status.im>
											
										
										
											2022-01-20 07:25:45 +00:00
+								            # It's quite common peers give us fewer blocks than we ask for
-												Backfill only up to MIN_EPOCHS_FOR_BLOCK_REQUESTS blocks (#4421)

When backfilling, we only need to download blocks that are newer than
MIN_EPOCHS_FOR_BLOCK_REQUESTS - the rest cannot reliably be fetched from
the network and does not have to be provided to others.

This change affects only trusted-node-synced clients - genesis sync
continues to work as before (because it needs to construct a state by
building it from genesis).

Those wishing to complete a backfill should do so with era files
instead.
											
										
										
											2022-12-23 07:42:55 +00:00
+								            debug "Gap in block range response, rewinding", request = req,
-												Refactor and optimize sync logs. (#3451)

* Refactor and optimize logs.

* Introduce shortLog(SyncRequest).

* Address review comment.

* make sync queue logs more consistent

Adds a few minor logging improvements:
- Fixes a typo (`was happened` -> `has happened`)
- Avoids passing `reset_slot` argument to log statement multiple times
- Uses same `rewind_to_slot` label when logging in both sync directions
- Consistent rewind point logging

Co-authored-by: cheatfate <eugene.kabanov@status.im>
											
										
										
											2022-03-03 08:05:33 +00:00
+								                 rewind_to_slot = rewindSlot, rewind_fail_slot = failSlot,
 								                 finalized_slot = safeSlot, blocks_count = len(item.data),
 								                 blocks_map = getShortMap(req, item.data)
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
+								            resetSlot = some(rewindSlot)
-												rename `PeerScoreXyzBlocks` -> `PeerScoreXyzValues` (#4318)

The various `PeerScore` constants are used for both beacon blocks and
LC objects, and will likely also find use for EIP4844 blob sidecars.
Renaming them to use more generically applicable names not referring
to blocks explicitly aymore.
											
										
										
											2022-11-11 11:34:28 +00:00
+								            req.item.updateScore(PeerScoreMissingValues)
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
+								          else:
-												Refactor and optimize sync logs. (#3451)

* Refactor and optimize logs.

* Introduce shortLog(SyncRequest).

* Address review comment.

* make sync queue logs more consistent

Adds a few minor logging improvements:
- Fixes a typo (`was happened` -> `has happened`)
- Avoids passing `reset_slot` argument to log statement multiple times
- Uses same `rewind_to_slot` label when logging in both sync directions
- Consistent rewind point logging

Co-authored-by: cheatfate <eugene.kabanov@status.im>
											
										
										
											2022-03-03 08:05:33 +00:00
+								            error "Unexpected missing parent at safe slot", request = req,
 								                  to_slot = safeSlot, blocks_count = len(item.data),
 								                  blocks_map = getShortMap(req, item.data)
-												rename `PeerScoreXyzBlocks` -> `PeerScoreXyzValues` (#4318)

The various `PeerScore` constants are used for both beacon blocks and
LC objects, and will likely also find use for EIP4844 blob sidecars.
Renaming them to use more generically applicable names not referring
to blocks explicitly aymore.
											
										
										
											2022-11-11 11:34:28 +00:00
+								            req.item.updateScore(PeerScoreBadValues)
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
-												Harden handling of unviable forks (#3312)

* Harden handling of unviable forks

In our current handling of unviable forks, we allow peers to send us
blocks that come from a different fork - this is not necessarily an
error as it can happen naturally, but it does open up the client to a
case where the same unviable fork keeps getting requested - rather than
allowing this to happen, we'll now give these peers a small negative
score - if it keeps happening, we'll disconnect them.

* keep track of unviable forks in quarantine, to avoid filling it with
known junk
* collect peer scores in single module
* descore peers when they send unviable blocks during sync
* don't give score for duplicate blocks
* increase quarantine size to a level that allows finality to happen
under optimal conditions - this helps avoid downloading the same blocks
over and over in case of an unviable fork
* increase initial score for new peers to make room for one more failure
before disconnection
* log and score invalid/unviable blocks in requestmanager too
* avoid ChainDAG dependency in quarantine
* reject gossip blocks with unviable parent
* continue processing unviable sync blocks in order to build unviable
dag

* docs

* Update beacon_chain/consensus_object_pools/block_pools_types.nim

* add unviable queue test
											
										
										
											2022-01-26 12:20:08 +00:00
+								        if resetSlot.isSome():
 								          await sq.resetWait(resetSlot)
 								          case sq.kind
 								          of SyncQueueKind.Forward:
-												Refactor and optimize sync logs. (#3451)

* Refactor and optimize logs.

* Introduce shortLog(SyncRequest).

* Address review comment.

* make sync queue logs more consistent

Adds a few minor logging improvements:
- Fixes a typo (`was happened` -> `has happened`)
- Avoids passing `reset_slot` argument to log statement multiple times
- Uses same `rewind_to_slot` label when logging in both sync directions
- Consistent rewind point logging

Co-authored-by: cheatfate <eugene.kabanov@status.im>
											
										
										
											2022-03-03 08:05:33 +00:00
+								            debug "Rewind to slot has happened", reset_slot = resetSlot.get(),
-												Harden handling of unviable forks (#3312)

* Harden handling of unviable forks

In our current handling of unviable forks, we allow peers to send us
blocks that come from a different fork - this is not necessarily an
error as it can happen naturally, but it does open up the client to a
case where the same unviable fork keeps getting requested - rather than
allowing this to happen, we'll now give these peers a small negative
score - if it keeps happening, we'll disconnect them.

* keep track of unviable forks in quarantine, to avoid filling it with
known junk
* collect peer scores in single module
* descore peers when they send unviable blocks during sync
* don't give score for duplicate blocks
* increase quarantine size to a level that allows finality to happen
under optimal conditions - this helps avoid downloading the same blocks
over and over in case of an unviable fork
* increase initial score for new peers to make room for one more failure
before disconnection
* log and score invalid/unviable blocks in requestmanager too
* avoid ChainDAG dependency in quarantine
* reject gossip blocks with unviable parent
* continue processing unviable sync blocks in order to build unviable
dag

* docs

* Update beacon_chain/consensus_object_pools/block_pools_types.nim

* add unviable queue test
											
										
										
											2022-01-26 12:20:08 +00:00
+								                  queue_input_slot = sq.inpSlot, queue_output_slot = sq.outSlot,
-												Refactor and optimize sync logs. (#3451)

* Refactor and optimize logs.

* Introduce shortLog(SyncRequest).

* Address review comment.

* make sync queue logs more consistent

Adds a few minor logging improvements:
- Fixes a typo (`was happened` -> `has happened`)
- Avoids passing `reset_slot` argument to log statement multiple times
- Uses same `rewind_to_slot` label when logging in both sync directions
- Consistent rewind point logging

Co-authored-by: cheatfate <eugene.kabanov@status.im>
											
										
										
											2022-03-03 08:05:33 +00:00
+								                  rewind_point = sq.rewind, direction = sq.kind
-												Harden handling of unviable forks (#3312)

* Harden handling of unviable forks

In our current handling of unviable forks, we allow peers to send us
blocks that come from a different fork - this is not necessarily an
error as it can happen naturally, but it does open up the client to a
case where the same unviable fork keeps getting requested - rather than
allowing this to happen, we'll now give these peers a small negative
score - if it keeps happening, we'll disconnect them.

* keep track of unviable forks in quarantine, to avoid filling it with
known junk
* collect peer scores in single module
* descore peers when they send unviable blocks during sync
* don't give score for duplicate blocks
* increase quarantine size to a level that allows finality to happen
under optimal conditions - this helps avoid downloading the same blocks
over and over in case of an unviable fork
* increase initial score for new peers to make room for one more failure
before disconnection
* log and score invalid/unviable blocks in requestmanager too
* avoid ChainDAG dependency in quarantine
* reject gossip blocks with unviable parent
* continue processing unviable sync blocks in order to build unviable
dag

* docs

* Update beacon_chain/consensus_object_pools/block_pools_types.nim

* add unviable queue test
											
										
										
											2022-01-26 12:20:08 +00:00
+								          of SyncQueueKind.Backward:
-												Refactor and optimize sync logs. (#3451)

* Refactor and optimize logs.

* Introduce shortLog(SyncRequest).

* Address review comment.

* make sync queue logs more consistent

Adds a few minor logging improvements:
- Fixes a typo (`was happened` -> `has happened`)
- Avoids passing `reset_slot` argument to log statement multiple times
- Uses same `rewind_to_slot` label when logging in both sync directions
- Consistent rewind point logging

Co-authored-by: cheatfate <eugene.kabanov@status.im>
											
										
										
											2022-03-03 08:05:33 +00:00
+								            debug "Rewind to slot has happened", reset_slot = resetSlot.get(),
-												Harden handling of unviable forks (#3312)

* Harden handling of unviable forks

In our current handling of unviable forks, we allow peers to send us
blocks that come from a different fork - this is not necessarily an
error as it can happen naturally, but it does open up the client to a
case where the same unviable fork keeps getting requested - rather than
allowing this to happen, we'll now give these peers a small negative
score - if it keeps happening, we'll disconnect them.

* keep track of unviable forks in quarantine, to avoid filling it with
known junk
* collect peer scores in single module
* descore peers when they send unviable blocks during sync
* don't give score for duplicate blocks
* increase quarantine size to a level that allows finality to happen
under optimal conditions - this helps avoid downloading the same blocks
over and over in case of an unviable fork
* increase initial score for new peers to make room for one more failure
before disconnection
* log and score invalid/unviable blocks in requestmanager too
* avoid ChainDAG dependency in quarantine
* reject gossip blocks with unviable parent
* continue processing unviable sync blocks in order to build unviable
dag

* docs

* Update beacon_chain/consensus_object_pools/block_pools_types.nim

* add unviable queue test
											
										
										
											2022-01-26 12:20:08 +00:00
+								                  queue_input_slot = sq.inpSlot, queue_output_slot = sq.outSlot,
-												Refactor and optimize sync logs. (#3451)

* Refactor and optimize logs.

* Introduce shortLog(SyncRequest).

* Address review comment.

* make sync queue logs more consistent

Adds a few minor logging improvements:
- Fixes a typo (`was happened` -> `has happened`)
- Avoids passing `reset_slot` argument to log statement multiple times
- Uses same `rewind_to_slot` label when logging in both sync directions
- Consistent rewind point logging

Co-authored-by: cheatfate <eugene.kabanov@status.im>
											
										
										
											2022-03-03 08:05:33 +00:00
+								                  direction = sq.kind
-												Harden handling of unviable forks (#3312)

* Harden handling of unviable forks

In our current handling of unviable forks, we allow peers to send us
blocks that come from a different fork - this is not necessarily an
error as it can happen naturally, but it does open up the client to a
case where the same unviable fork keeps getting requested - rather than
allowing this to happen, we'll now give these peers a small negative
score - if it keeps happening, we'll disconnect them.

* keep track of unviable forks in quarantine, to avoid filling it with
known junk
* collect peer scores in single module
* descore peers when they send unviable blocks during sync
* don't give score for duplicate blocks
* increase quarantine size to a level that allows finality to happen
under optimal conditions - this helps avoid downloading the same blocks
over and over in case of an unviable fork
* increase initial score for new peers to make room for one more failure
before disconnection
* log and score invalid/unviable blocks in requestmanager too
* avoid ChainDAG dependency in quarantine
* reject gossip blocks with unviable parent
* continue processing unviable sync blocks in order to build unviable
dag

* docs

* Update beacon_chain/consensus_object_pools/block_pools_types.nim

* add unviable queue test
											
										
										
											2022-01-26 12:20:08 +00:00
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
+								      break
 								proc push*[T](sq: SyncQueue[T], sr: SyncRequest[T]) =
 								  ## Push failed request back to queue.
 								  if sr.index notin sq.pending:
 								    # If request `sr` not in our pending list, it only means that
 								    # SyncQueue.resetWait() happens and all pending requests are expired, so
 								    # we swallow `old` requests, and in such way sync-workers are able to get
 								    # proper new requests from SyncQueue.
 								    return
 								  sq.pending.del(sr.index)
 								  sq.toDebtsQueue(sr)
-												avoid re-requesting finalized blocks during sync (#3461)

When a `beaconBlocksByRange` response advances the `safeSlot`, but later
has errors, the sync queue keeps repeating that same request until it is
fulfilled without errors. Data up through `safeSlot` is considered to be
immutable, i.e., finalized, so re-requesting that data is not useful.
By advancing the sync progress in that scenario, those redundant query
portions can be avoided. Note, the finalized block _itself_ is always
requested, even in the initial request. This behaviour is kept same.
											
										
										
											2022-03-15 17:56:56 +00:00
+								proc handlePotentialSafeSlotAdvancement[T](sq: SyncQueue[T]) =
 								  # It may happen that sync progress advanced to a newer `safeSlot`, either
 								  # by a response that started with good values and only had errors late, or
 								  # through an out-of-band mechanism, e.g., VC / REST.
 								  # If that happens, advance to the new `safeSlot` to avoid repeating requests
 								  # for data that is considered immutable and no longer relevant.
-												never request blocks before `safeSlot` in sync (#3512)

Follows up on https://github.com/status-im/nimbus-eth2/pull/3461 which
ensured that repeated `beaconBlocksByRange` requests get shrinked to
account for potential out-of-band advancements to `safeSlot`, with
similar logic for the initial request.
											
										
										
											2022-05-10 11:46:14 +00:00
+								  let safeSlot = sq.getSafeSlot()
 								  func numSlotsBehindSafeSlot(slot: Slot): uint64 =
 								    case sq.kind
 								    of SyncQueueKind.Forward:
 								      if safeSlot > slot:
 								        safeSlot - slot
 								      else:
 
 								    of SyncQueueKind.Backward:
 								      if slot > safeSlot:
 								        slot - safeSlot
 								      else:
 
-												avoid re-requesting finalized blocks during sync (#3461)

When a `beaconBlocksByRange` response advances the `safeSlot`, but later
has errors, the sync queue keeps repeating that same request until it is
fulfilled without errors. Data up through `safeSlot` is considered to be
immutable, i.e., finalized, so re-requesting that data is not useful.
By advancing the sync progress in that scenario, those redundant query
portions can be avoided. Note, the finalized block _itself_ is always
requested, even in the initial request. This behaviour is kept same.
											
										
										
											2022-03-15 17:56:56 +00:00
+								  let
-												never request blocks before `safeSlot` in sync (#3512)

Follows up on https://github.com/status-im/nimbus-eth2/pull/3461 which
ensured that repeated `beaconBlocksByRange` requests get shrinked to
account for potential out-of-band advancements to `safeSlot`, with
similar logic for the initial request.
											
										
										
											2022-05-10 11:46:14 +00:00
+								    numOutSlotsAdvanced = sq.outSlot.numSlotsBehindSafeSlot
 								    numInpSlotsAdvanced =
-												avoid re-requesting finalized blocks during sync (#3461)

When a `beaconBlocksByRange` response advances the `safeSlot`, but later
has errors, the sync queue keeps repeating that same request until it is
fulfilled without errors. Data up through `safeSlot` is considered to be
immutable, i.e., finalized, so re-requesting that data is not useful.
By advancing the sync progress in that scenario, those redundant query
portions can be avoided. Note, the finalized block _itself_ is always
requested, even in the initial request. This behaviour is kept same.
											
										
										
											2022-03-15 17:56:56 +00:00
+								      case sq.kind
 								      of SyncQueueKind.Forward:
-												never request blocks before `safeSlot` in sync (#3512)

Follows up on https://github.com/status-im/nimbus-eth2/pull/3461 which
ensured that repeated `beaconBlocksByRange` requests get shrinked to
account for potential out-of-band advancements to `safeSlot`, with
similar logic for the initial request.
											
										
										
											2022-05-10 11:46:14 +00:00
+								        sq.inpSlot.numSlotsBehindSafeSlot
-												avoid re-requesting finalized blocks during sync (#3461)

When a `beaconBlocksByRange` response advances the `safeSlot`, but later
has errors, the sync queue keeps repeating that same request until it is
fulfilled without errors. Data up through `safeSlot` is considered to be
immutable, i.e., finalized, so re-requesting that data is not useful.
By advancing the sync progress in that scenario, those redundant query
portions can be avoided. Note, the finalized block _itself_ is always
requested, even in the initial request. This behaviour is kept same.
											
										
										
											2022-03-15 17:56:56 +00:00
+								      of SyncQueueKind.Backward:
-												never request blocks before `safeSlot` in sync (#3512)

Follows up on https://github.com/status-im/nimbus-eth2/pull/3461 which
ensured that repeated `beaconBlocksByRange` requests get shrinked to
account for potential out-of-band advancements to `safeSlot`, with
similar logic for the initial request.
											
										
										
											2022-05-10 11:46:14 +00:00
+								        if sq.inpSlot == 0xFFFF_FFFF_FFFF_FFFF'u64:
 'u64
-												avoid re-requesting finalized blocks during sync (#3461)

When a `beaconBlocksByRange` response advances the `safeSlot`, but later
has errors, the sync queue keeps repeating that same request until it is
fulfilled without errors. Data up through `safeSlot` is considered to be
immutable, i.e., finalized, so re-requesting that data is not useful.
By advancing the sync progress in that scenario, those redundant query
portions can be avoided. Note, the finalized block _itself_ is always
requested, even in the initial request. This behaviour is kept same.
											
										
										
											2022-03-15 17:56:56 +00:00
+								        else:
-												never request blocks before `safeSlot` in sync (#3512)

Follows up on https://github.com/status-im/nimbus-eth2/pull/3461 which
ensured that repeated `beaconBlocksByRange` requests get shrinked to
account for potential out-of-band advancements to `safeSlot`, with
similar logic for the initial request.
											
										
										
											2022-05-10 11:46:14 +00:00
+								          sq.inpSlot.numSlotsBehindSafeSlot
 								  if numOutSlotsAdvanced != 0 or numInpSlotsAdvanced != 0:
-												avoid re-requesting finalized blocks during sync (#3461)

When a `beaconBlocksByRange` response advances the `safeSlot`, but later
has errors, the sync queue keeps repeating that same request until it is
fulfilled without errors. Data up through `safeSlot` is considered to be
immutable, i.e., finalized, so re-requesting that data is not useful.
By advancing the sync progress in that scenario, those redundant query
portions can be avoided. Note, the finalized block _itself_ is always
requested, even in the initial request. This behaviour is kept same.
											
										
										
											2022-03-15 17:56:56 +00:00
+								    debug "Sync progress advanced out-of-band",
-												never request blocks before `safeSlot` in sync (#3512)

Follows up on https://github.com/status-im/nimbus-eth2/pull/3461 which
ensured that repeated `beaconBlocksByRange` requests get shrinked to
account for potential out-of-band advancements to `safeSlot`, with
similar logic for the initial request.
											
										
										
											2022-05-10 11:46:14 +00:00
+								      safeSlot, outSlot = sq.outSlot, inpSlot = sq.inpSlot
 								    if numOutSlotsAdvanced != 0:
 								      sq.advanceOutput(numOutSlotsAdvanced)
 								    if numInpSlotsAdvanced != 0:
 								      sq.advanceInput(numInpSlotsAdvanced)
-												avoid re-requesting finalized blocks during sync (#3461)

When a `beaconBlocksByRange` response advances the `safeSlot`, but later
has errors, the sync queue keeps repeating that same request until it is
fulfilled without errors. Data up through `safeSlot` is considered to be
immutable, i.e., finalized, so re-requesting that data is not useful.
By advancing the sync progress in that scenario, those redundant query
portions can be avoided. Note, the finalized block _itself_ is always
requested, even in the initial request. This behaviour is kept same.
											
										
										
											2022-03-15 17:56:56 +00:00
+								    sq.wakeupWaiters()
 								func updateRequestForNewSafeSlot[T](sq: SyncQueue[T], sr: var SyncRequest[T]) =
 								  # Requests may have originated before the latest `safeSlot` advancement.
 								  # Update it to not request any data prior to `safeSlot`.
 								  let
 								    outSlot = sq.outSlot
 								    lowSlot = sr.slot
 								    highSlot = sr.lastSlot
 								  case sq.kind
 								  of SyncQueueKind.Forward:
 								    if outSlot <= lowSlot:
 								      # Entire request is still relevant.
 								      discard
 								    elif outSlot <= highSlot:
 								      # Request is only partially relevant.
 								      let
 								        numSlotsDone = outSlot - lowSlot
-												sync: remove `step` from sync client implementation (#3678)

* sync: remove `step` from sync client implementation

Deprecated in the spec:
https://github.com/ethereum/consensus-specs/pull/2856 - future PR:s will
deprecate server support as well.
											
										
										
											2022-06-06 13:56:59 +00:00
+								      sr.slot += numSlotsDone
 								      sr.count -= numSlotsDone
-												avoid re-requesting finalized blocks during sync (#3461)

When a `beaconBlocksByRange` response advances the `safeSlot`, but later
has errors, the sync queue keeps repeating that same request until it is
fulfilled without errors. Data up through `safeSlot` is considered to be
immutable, i.e., finalized, so re-requesting that data is not useful.
By advancing the sync progress in that scenario, those redundant query
portions can be avoided. Note, the finalized block _itself_ is always
requested, even in the initial request. This behaviour is kept same.
											
										
										
											2022-03-15 17:56:56 +00:00
+								    else:
 								      # Entire request is no longer relevant.
 								      sr.count = 0
 								  of SyncQueueKind.Backward:
 								    if outSlot >= highSlot:
 								      # Entire request is still relevant.
 								      discard
 								    elif outSlot >= lowSlot:
 								      # Request is only partially relevant.
 								      let
 								        numSlotsDone = highSlot - outSlot
-												sync: remove `step` from sync client implementation (#3678)

* sync: remove `step` from sync client implementation

Deprecated in the spec:
https://github.com/ethereum/consensus-specs/pull/2856 - future PR:s will
deprecate server support as well.
											
										
										
											2022-06-06 13:56:59 +00:00
+								      sr.count -= numSlotsDone
-												avoid re-requesting finalized blocks during sync (#3461)

When a `beaconBlocksByRange` response advances the `safeSlot`, but later
has errors, the sync queue keeps repeating that same request until it is
fulfilled without errors. Data up through `safeSlot` is considered to be
immutable, i.e., finalized, so re-requesting that data is not useful.
By advancing the sync progress in that scenario, those redundant query
portions can be avoided. Note, the finalized block _itself_ is always
requested, even in the initial request. This behaviour is kept same.
											
										
										
											2022-03-15 17:56:56 +00:00
+								    else:
 								      # Entire request is no longer relevant.
 								      sr.count = 0
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
+								proc pop*[T](sq: SyncQueue[T], maxslot: Slot, item: T): SyncRequest[T] =
 								  ## Create new request according to current SyncQueue parameters.
-												avoid re-requesting finalized blocks during sync (#3461)

When a `beaconBlocksByRange` response advances the `safeSlot`, but later
has errors, the sync queue keeps repeating that same request until it is
fulfilled without errors. Data up through `safeSlot` is considered to be
immutable, i.e., finalized, so re-requesting that data is not useful.
By advancing the sync progress in that scenario, those redundant query
portions can be avoided. Note, the finalized block _itself_ is always
requested, even in the initial request. This behaviour is kept same.
											
										
										
											2022-03-15 17:56:56 +00:00
+								  sq.handlePotentialSafeSlotAdvancement()
 								  while len(sq.debtsQueue) > 0:
-												enable `styleCheck:usages` (#3573)

Some upstream repos still need fixes, but this gets us close enough that
style hints can be enabled by default.

In general, "canonical" spellings are preferred even if they violate
nep-1 - this applies in particular to spec-related stuff like
`genesis_validators_root` which appears throughout the codebase.

											
										
										
											2022-04-08 16:22:49 +00:00
+								    if maxslot < sq.debtsQueue[0].slot:
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
+								      # Peer's latest slot is less than starting request's slot.
 								      return SyncRequest.empty(sq.kind, T)
-												enable `styleCheck:usages` (#3573)

Some upstream repos still need fixes, but this gets us close enough that
style hints can be enabled by default.

In general, "canonical" spellings are preferred even if they violate
nep-1 - this applies in particular to spec-related stuff like
`genesis_validators_root` which appears throughout the codebase.

											
										
										
											2022-04-08 16:22:49 +00:00
+								    if maxslot < sq.debtsQueue[0].lastSlot():
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
+								      # Peer's latest slot is less than finishing request's slot.
 								      return SyncRequest.empty(sq.kind, T)
 								    var sr = sq.debtsQueue.pop()
 								    sq.debtsCount = sq.debtsCount - sr.count
-												avoid re-requesting finalized blocks during sync (#3461)

When a `beaconBlocksByRange` response advances the `safeSlot`, but later
has errors, the sync queue keeps repeating that same request until it is
fulfilled without errors. Data up through `safeSlot` is considered to be
immutable, i.e., finalized, so re-requesting that data is not useful.
By advancing the sync progress in that scenario, those redundant query
portions can be avoided. Note, the finalized block _itself_ is always
requested, even in the initial request. This behaviour is kept same.
											
										
										
											2022-03-15 17:56:56 +00:00
+								    sq.updateRequestForNewSafeSlot(sr)
 								    if sr.isEmpty:
 								      continue
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
+								    sr.setItem(item)
 								    sq.makePending(sr)
-												avoid re-requesting finalized blocks during sync (#3461)

When a `beaconBlocksByRange` response advances the `safeSlot`, but later
has errors, the sync queue keeps repeating that same request until it is
fulfilled without errors. Data up through `safeSlot` is considered to be
immutable, i.e., finalized, so re-requesting that data is not useful.
By advancing the sync progress in that scenario, those redundant query
portions can be avoided. Note, the finalized block _itself_ is always
requested, even in the initial request. This behaviour is kept same.
											
										
										
											2022-03-15 17:56:56 +00:00
+								    return sr
 								  case sq.kind
 								  of SyncQueueKind.Forward:
-												enable `styleCheck:usages` (#3573)

Some upstream repos still need fixes, but this gets us close enough that
style hints can be enabled by default.

In general, "canonical" spellings are preferred even if they violate
nep-1 - this applies in particular to spec-related stuff like
`genesis_validators_root` which appears throughout the codebase.

											
										
										
											2022-04-08 16:22:49 +00:00
+								    if maxslot < sq.inpSlot:
-												avoid re-requesting finalized blocks during sync (#3461)

When a `beaconBlocksByRange` response advances the `safeSlot`, but later
has errors, the sync queue keeps repeating that same request until it is
fulfilled without errors. Data up through `safeSlot` is considered to be
immutable, i.e., finalized, so re-requesting that data is not useful.
By advancing the sync progress in that scenario, those redundant query
portions can be avoided. Note, the finalized block _itself_ is always
requested, even in the initial request. This behaviour is kept same.
											
										
										
											2022-03-15 17:56:56 +00:00
+								      # Peer's latest slot is less than queue's input slot.
 								      return SyncRequest.empty(sq.kind, T)
 								    if sq.inpSlot > sq.finalSlot:
 								      # Queue's input slot is bigger than queue's final slot.
 								      return SyncRequest.empty(sq.kind, T)
 								    let lastSlot = min(maxslot, sq.finalSlot)
 								    let count = min(sq.chunkSize, lastSlot + 1'u64 - sq.inpSlot)
 								    var sr = SyncRequest.init(sq.kind, sq.inpSlot, count, item)
 								    sq.advanceInput(count)
 								    sq.makePending(sr)
 								    sr
 								  of SyncQueueKind.Backward:
 								    if sq.inpSlot == 0xFFFF_FFFF_FFFF_FFFF'u64:
 								      return SyncRequest.empty(sq.kind, T)
 								    if sq.inpSlot < sq.finalSlot:
 								      return SyncRequest.empty(sq.kind, T)
 								    let (slot, count) =
 								      block:
 								        let baseSlot = sq.inpSlot + 1'u64
 								        if baseSlot - sq.finalSlot < sq.chunkSize:
 								          let count = uint64(baseSlot - sq.finalSlot)
 								          (baseSlot - count, count)
 								        else:
 								          (baseSlot - sq.chunkSize, sq.chunkSize)
-												enable `styleCheck:usages` (#3573)

Some upstream repos still need fixes, but this gets us close enough that
style hints can be enabled by default.

In general, "canonical" spellings are preferred even if they violate
nep-1 - this applies in particular to spec-related stuff like
`genesis_validators_root` which appears throughout the codebase.

											
										
										
											2022-04-08 16:22:49 +00:00
+								    if (maxslot + 1'u64) < slot + count:
-												avoid re-requesting finalized blocks during sync (#3461)

When a `beaconBlocksByRange` response advances the `safeSlot`, but later
has errors, the sync queue keeps repeating that same request until it is
fulfilled without errors. Data up through `safeSlot` is considered to be
immutable, i.e., finalized, so re-requesting that data is not useful.
By advancing the sync progress in that scenario, those redundant query
portions can be avoided. Note, the finalized block _itself_ is always
requested, even in the initial request. This behaviour is kept same.
											
										
										
											2022-03-15 17:56:56 +00:00
+								      # Peer's latest slot is less than queue's input slot.
 								      return SyncRequest.empty(sq.kind, T)
 								    var sr = SyncRequest.init(sq.kind, slot, count, item)
 								    sq.advanceInput(count)
 								    sq.makePending(sr)
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
+								    sr
 								proc debtLen*[T](sq: SyncQueue[T]): uint64 =
 								  sq.debtsCount
 								proc pendingLen*[T](sq: SyncQueue[T]): uint64 =
 								  case sq.kind
 								  of SyncQueueKind.Forward:
 								    # When moving forward `outSlot` will be <= of `inpSlot`.
 								    sq.inpSlot - sq.outSlot
 								  of SyncQueueKind.Backward:
 								    # When moving backward `outSlot` will be >= of `inpSlot`
 								    sq.outSlot - sq.inpSlot
 								proc len*[T](sq: SyncQueue[T]): uint64 {.inline.} =
 								  ## Returns number of slots left in queue ``sq``.
 								  case sq.kind
 								  of SyncQueueKind.Forward:
-												Backfill only up to MIN_EPOCHS_FOR_BLOCK_REQUESTS blocks (#4421)

When backfilling, we only need to download blocks that are newer than
MIN_EPOCHS_FOR_BLOCK_REQUESTS - the rest cannot reliably be fetched from
the network and does not have to be provided to others.

This change affects only trusted-node-synced clients - genesis sync
continues to work as before (because it needs to construct a state by
building it from genesis).

Those wishing to complete a backfill should do so with era files
instead.
											
										
										
											2022-12-23 07:42:55 +00:00
+								    if sq.finalSlot >= sq.outSlot:
 								      sq.finalSlot + 1'u64 - sq.outSlot
 								    else:
 'u64
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
+								  of SyncQueueKind.Backward:
-												Backfill only up to MIN_EPOCHS_FOR_BLOCK_REQUESTS blocks (#4421)

When backfilling, we only need to download blocks that are newer than
MIN_EPOCHS_FOR_BLOCK_REQUESTS - the rest cannot reliably be fetched from
the network and does not have to be provided to others.

This change affects only trusted-node-synced clients - genesis sync
continues to work as before (because it needs to construct a state by
building it from genesis).

Those wishing to complete a backfill should do so with era files
instead.
											
										
										
											2022-12-23 07:42:55 +00:00
+								    if sq.outSlot >= sq.finalSlot:
 								      sq.outSlot + 1'u64 - sq.finalSlot
 								    else:
 'u64
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
 								proc total*[T](sq: SyncQueue[T]): uint64 {.inline.} =
 								  ## Returns total number of slots in queue ``sq``.
 								  case sq.kind
 								  of SyncQueueKind.Forward:
-												Backfill only up to MIN_EPOCHS_FOR_BLOCK_REQUESTS blocks (#4421)

When backfilling, we only need to download blocks that are newer than
MIN_EPOCHS_FOR_BLOCK_REQUESTS - the rest cannot reliably be fetched from
the network and does not have to be provided to others.

This change affects only trusted-node-synced clients - genesis sync
continues to work as before (because it needs to construct a state by
building it from genesis).

Those wishing to complete a backfill should do so with era files
instead.
											
										
										
											2022-12-23 07:42:55 +00:00
+								    if sq.finalSlot >= sq.startSlot:
 								      sq.finalSlot + 1'u64 - sq.startSlot
 								    else:
 'u64
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
+								  of SyncQueueKind.Backward:
-												Backfill only up to MIN_EPOCHS_FOR_BLOCK_REQUESTS blocks (#4421)

When backfilling, we only need to download blocks that are newer than
MIN_EPOCHS_FOR_BLOCK_REQUESTS - the rest cannot reliably be fetched from
the network and does not have to be provided to others.

This change affects only trusted-node-synced clients - genesis sync
continues to work as before (because it needs to construct a state by
building it from genesis).

Those wishing to complete a backfill should do so with era files
instead.
											
										
										
											2022-12-23 07:42:55 +00:00
+								    if sq.startSlot >= sq.finalSlot:
 								      sq.startSlot + 1'u64 - sq.finalSlot
 								    else:
 'u64
-												Backward sync support for SyncManager. (#3131)

* Unbundle SyncQueue from sync_manager.nim.
Unbundle Peer scores constants to peer_scores.nim.
Add Forward/Backward enum.

* Further improvements and tests.

* Adopt getRewindPoint() and fix MissingParent handler.

* Remove unused procedures.
Refactor `result` usage.
Fix resetWait().

* Add all the tests and fix the issue with rewind point.

* Fix get() issue.

* Fix flaky tests.

* test fixes

Co-authored-by: Jacek Sieka <jacek@status.im>
											
										
										
											2021-12-08 21:15:29 +00:00
 								proc progress*[T](sq: SyncQueue[T]): uint64 =
-												Backfill only up to MIN_EPOCHS_FOR_BLOCK_REQUESTS blocks (#4421)

When backfilling, we only need to download blocks that are newer than
MIN_EPOCHS_FOR_BLOCK_REQUESTS - the rest cannot reliably be fetched from
the network and does not have to be provided to others.

This change affects only trusted-node-synced clients - genesis sync
continues to work as before (because it needs to construct a state by
building it from genesis).

Those wishing to complete a backfill should do so with era files
instead.
											
										
										
											2022-12-23 07:42:55 +00:00
+								  ## How many useful slots we've synced so far, adjusting for how much has
 								  ## become obsolete by time movements
 								  sq.total - sq.len