mirror of
https://github.com/status-im/nimbus-eth2.git
synced 2025-02-22 11:18:25 +00:00
Make syncing performance more steady. (#6722)
* Initial commit. * Update tests. * Update AllTests. * Post-rebase update AllTests. * Improve log statements which was hidden because of logScope issues. * Eliminate duplicate `direction` fields. Debugging blobs map. * Fix blobs shortmap generation. * Add fixes to map generation and add test helpers. * Fix missing dependency. * Adjust callbacks functionality for untrusted sync. * Attempt to fix defect state issue with new flag `setLastStateRoot`. * Make possible fix less intrusive. * Fix forward declaration missing `=`. * Revert attempt to improve performance. * Revert some all the changes. * Remove spaces. * Remove LFS fixes from the PR. * Update AllTests. * Update copyright year. * Update AllTests. * Post-rebase fixes.
This commit is contained in:
parent
426856edcb
commit
ba934af925
@ -984,33 +984,28 @@ OK: 1/1 Fail: 0/1 Skip: 0/1
|
|||||||
OK: 7/7 Fail: 0/7 Skip: 0/7
|
OK: 7/7 Fail: 0/7 Skip: 0/7
|
||||||
## SyncManager test suite
|
## SyncManager test suite
|
||||||
```diff
|
```diff
|
||||||
+ Process all unviable blocks OK
|
|
||||||
+ [SyncManager] groupBlobs() test OK
|
+ [SyncManager] groupBlobs() test OK
|
||||||
+ [SyncQueue#Backward] Async unordered push test OK
|
+ [SyncQueue# & Backward] Combination of missing parent and good blocks [3 peers] test OK
|
||||||
+ [SyncQueue#Backward] Async unordered push with rewind test OK
|
+ [SyncQueue# & Backward] Failure request push test OK
|
||||||
+ [SyncQueue#Backward] Good response with missing values towards end OK
|
+ [SyncQueue# & Backward] Invalid block [3 peers] test OK
|
||||||
+ [SyncQueue#Backward] Handle out-of-band sync progress advancement OK
|
+ [SyncQueue# & Backward] Smoke [3 peers] test OK
|
||||||
+ [SyncQueue#Backward] Pass through established limits test OK
|
+ [SyncQueue# & Backward] Smoke [single peer] test OK
|
||||||
+ [SyncQueue#Backward] Smoke test OK
|
+ [SyncQueue# & Backward] Unviable block [3 peers] test OK
|
||||||
+ [SyncQueue#Backward] Start and finish slots equal OK
|
+ [SyncQueue# & Forward] Combination of missing parent and good blocks [3 peers] test OK
|
||||||
+ [SyncQueue#Backward] Two full requests success/fail OK
|
+ [SyncQueue# & Forward] Failure request push test OK
|
||||||
|
+ [SyncQueue# & Forward] Invalid block [3 peers] test OK
|
||||||
|
+ [SyncQueue# & Forward] Smoke [3 peers] test OK
|
||||||
|
+ [SyncQueue# & Forward] Smoke [single peer] test OK
|
||||||
|
+ [SyncQueue# & Forward] Unviable block [3 peers] test OK
|
||||||
|
+ [SyncQueue#Backward] Missing parent and exponential rewind [3 peers] test OK
|
||||||
+ [SyncQueue#Backward] getRewindPoint() test OK
|
+ [SyncQueue#Backward] getRewindPoint() test OK
|
||||||
+ [SyncQueue#Forward] Async unordered push test OK
|
+ [SyncQueue#Forward] Missing parent and exponential rewind [3 peers] test OK
|
||||||
+ [SyncQueue#Forward] Async unordered push with rewind test OK
|
|
||||||
+ [SyncQueue#Forward] Good response with missing values towards end OK
|
|
||||||
+ [SyncQueue#Forward] Handle out-of-band sync progress advancement OK
|
|
||||||
+ [SyncQueue#Forward] Pass through established limits test OK
|
|
||||||
+ [SyncQueue#Forward] Smoke test OK
|
|
||||||
+ [SyncQueue#Forward] Start and finish slots equal OK
|
|
||||||
+ [SyncQueue#Forward] Two full requests success/fail OK
|
|
||||||
+ [SyncQueue#Forward] getRewindPoint() test OK
|
+ [SyncQueue#Forward] getRewindPoint() test OK
|
||||||
+ [SyncQueue] checkBlobsResponse() test OK
|
+ [SyncQueue] checkBlobsResponse() test OK
|
||||||
+ [SyncQueue] checkResponse() test OK
|
+ [SyncQueue] checkResponse() test OK
|
||||||
+ [SyncQueue] contains() test OK
|
|
||||||
+ [SyncQueue] getLastNonEmptySlot() test OK
|
|
||||||
+ [SyncQueue] hasEndGap() test OK
|
+ [SyncQueue] hasEndGap() test OK
|
||||||
```
|
```
|
||||||
OK: 25/25 Fail: 0/25 Skip: 0/25
|
OK: 20/20 Fail: 0/20 Skip: 0/20
|
||||||
## Type helpers
|
## Type helpers
|
||||||
```diff
|
```diff
|
||||||
+ BeaconBlock OK
|
+ BeaconBlock OK
|
||||||
@ -1154,4 +1149,4 @@ OK: 2/2 Fail: 0/2 Skip: 0/2
|
|||||||
OK: 9/9 Fail: 0/9 Skip: 0/9
|
OK: 9/9 Fail: 0/9 Skip: 0/9
|
||||||
|
|
||||||
---TOTAL---
|
---TOTAL---
|
||||||
OK: 783/788 Fail: 0/788 Skip: 5/788
|
OK: 778/783 Fail: 0/783 Skip: 5/783
|
||||||
|
@ -520,10 +520,6 @@ proc addBackfillBlockData*(
|
|||||||
"database corrupt?", clearanceBlock = shortLog(clearanceBlock)
|
"database corrupt?", clearanceBlock = shortLog(clearanceBlock)
|
||||||
return err(VerifierError.MissingParent)
|
return err(VerifierError.MissingParent)
|
||||||
|
|
||||||
# dag.clearanceState.setStateRoot(trustedStateRoot)
|
|
||||||
# TODO (cheatfate): This is last part of previous TODO comment, we should
|
|
||||||
# set state's `root` to block's `state_root`.
|
|
||||||
|
|
||||||
let proposerVerifyTick = Moment.now()
|
let proposerVerifyTick = Moment.now()
|
||||||
|
|
||||||
if not(isNil(onStateUpdated)):
|
if not(isNil(onStateUpdated)):
|
||||||
|
@ -385,14 +385,11 @@ proc initFullNode(
|
|||||||
else:
|
else:
|
||||||
dag.tail.slot
|
dag.tail.slot
|
||||||
|
|
||||||
proc getUntrustedBackfillSlot(): Slot =
|
func getUntrustedBackfillSlot(): Slot =
|
||||||
if clist.tail.isSome():
|
if clist.tail.isSome():
|
||||||
clist.tail.get().blck.slot
|
clist.tail.get().blck.slot
|
||||||
else:
|
else:
|
||||||
getLocalWallSlot()
|
dag.tail.slot
|
||||||
|
|
||||||
func getUntrustedFrontfillSlot(): Slot =
|
|
||||||
getFirstSlotAtFinalizedEpoch()
|
|
||||||
|
|
||||||
func getFrontfillSlot(): Slot =
|
func getFrontfillSlot(): Slot =
|
||||||
max(dag.frontfill.get(BlockId()).slot, dag.horizon)
|
max(dag.frontfill.get(BlockId()).slot, dag.horizon)
|
||||||
@ -535,7 +532,7 @@ proc initFullNode(
|
|||||||
dag.cfg.DENEB_FORK_EPOCH, dag.cfg.MIN_EPOCHS_FOR_BLOB_SIDECARS_REQUESTS,
|
dag.cfg.DENEB_FORK_EPOCH, dag.cfg.MIN_EPOCHS_FOR_BLOB_SIDECARS_REQUESTS,
|
||||||
SyncQueueKind.Backward, getLocalHeadSlot,
|
SyncQueueKind.Backward, getLocalHeadSlot,
|
||||||
getLocalWallSlot, getFirstSlotAtFinalizedEpoch, getUntrustedBackfillSlot,
|
getLocalWallSlot, getFirstSlotAtFinalizedEpoch, getUntrustedBackfillSlot,
|
||||||
getUntrustedFrontfillSlot, isWithinWeakSubjectivityPeriod,
|
getFrontfillSlot, isWithinWeakSubjectivityPeriod,
|
||||||
clistPivotSlot, untrustedBlockVerifier, maxHeadAge = 0,
|
clistPivotSlot, untrustedBlockVerifier, maxHeadAge = 0,
|
||||||
shutdownEvent = node.shutdownEvent,
|
shutdownEvent = node.shutdownEvent,
|
||||||
flags = syncManagerFlags)
|
flags = syncManagerFlags)
|
||||||
|
@ -21,9 +21,6 @@ import
|
|||||||
export phase0, altair, merge, chronos, chronicles, results,
|
export phase0, altair, merge, chronos, chronicles, results,
|
||||||
helpers, peer_scores, sync_queue, forks, sync_protocol
|
helpers, peer_scores, sync_queue, forks, sync_protocol
|
||||||
|
|
||||||
logScope:
|
|
||||||
topics = "syncman"
|
|
||||||
|
|
||||||
const
|
const
|
||||||
SyncWorkersCount* = 10
|
SyncWorkersCount* = 10
|
||||||
## Number of sync workers to spawn
|
## Number of sync workers to spawn
|
||||||
@ -34,6 +31,12 @@ const
|
|||||||
StatusExpirationTime* = chronos.minutes(2)
|
StatusExpirationTime* = chronos.minutes(2)
|
||||||
## Time time it takes for the peer's status information to expire.
|
## Time time it takes for the peer's status information to expire.
|
||||||
|
|
||||||
|
ConcurrentRequestsCount* = 3
|
||||||
|
## Number of requests performed by one peer in single syncing step
|
||||||
|
|
||||||
|
RepeatingFailuresCount* = 2
|
||||||
|
## Number of repeating errors before starting rewind process.
|
||||||
|
|
||||||
WeakSubjectivityLogMessage* =
|
WeakSubjectivityLogMessage* =
|
||||||
"Database state missing or too old, cannot sync - resync the client " &
|
"Database state missing or too old, cannot sync - resync the client " &
|
||||||
"using a trusted node or allow lenient long-range syncing with the " &
|
"using a trusted node or allow lenient long-range syncing with the " &
|
||||||
@ -81,6 +84,8 @@ type
|
|||||||
direction: SyncQueueKind
|
direction: SyncQueueKind
|
||||||
ident*: string
|
ident*: string
|
||||||
flags: set[SyncManagerFlag]
|
flags: set[SyncManagerFlag]
|
||||||
|
concurrentRequestsCount: int
|
||||||
|
repeatingFailuresCount: int
|
||||||
|
|
||||||
SyncMoment* = object
|
SyncMoment* = object
|
||||||
stamp*: chronos.Moment
|
stamp*: chronos.Moment
|
||||||
@ -115,8 +120,10 @@ proc initQueue[A, B](man: SyncManager[A, B]) =
|
|||||||
of SyncQueueKind.Forward:
|
of SyncQueueKind.Forward:
|
||||||
man.queue = SyncQueue.init(A, man.direction, man.getFirstSlot(),
|
man.queue = SyncQueue.init(A, man.direction, man.getFirstSlot(),
|
||||||
man.getLastSlot(), man.chunkSize,
|
man.getLastSlot(), man.chunkSize,
|
||||||
|
man.concurrentRequestsCount,
|
||||||
|
man.repeatingFailuresCount,
|
||||||
man.getSafeSlot, man.blockVerifier,
|
man.getSafeSlot, man.blockVerifier,
|
||||||
1, man.ident)
|
man.ident)
|
||||||
of SyncQueueKind.Backward:
|
of SyncQueueKind.Backward:
|
||||||
let
|
let
|
||||||
firstSlot = man.getFirstSlot()
|
firstSlot = man.getFirstSlot()
|
||||||
@ -128,27 +135,34 @@ proc initQueue[A, B](man: SyncManager[A, B]) =
|
|||||||
else:
|
else:
|
||||||
firstSlot - 1'u64
|
firstSlot - 1'u64
|
||||||
man.queue = SyncQueue.init(A, man.direction, startSlot, lastSlot,
|
man.queue = SyncQueue.init(A, man.direction, startSlot, lastSlot,
|
||||||
man.chunkSize, man.getSafeSlot,
|
man.chunkSize,
|
||||||
man.blockVerifier, 1, man.ident)
|
man.concurrentRequestsCount,
|
||||||
|
man.repeatingFailuresCount,
|
||||||
|
man.getSafeSlot,
|
||||||
|
man.blockVerifier, man.ident)
|
||||||
|
|
||||||
|
proc newSyncManager*[A, B](
|
||||||
|
pool: PeerPool[A, B],
|
||||||
|
denebEpoch: Epoch,
|
||||||
|
minEpochsForBlobSidecarsRequests: uint64,
|
||||||
|
direction: SyncQueueKind,
|
||||||
|
getLocalHeadSlotCb: GetSlotCallback,
|
||||||
|
getLocalWallSlotCb: GetSlotCallback,
|
||||||
|
getFinalizedSlotCb: GetSlotCallback,
|
||||||
|
getBackfillSlotCb: GetSlotCallback,
|
||||||
|
getFrontfillSlotCb: GetSlotCallback,
|
||||||
|
weakSubjectivityPeriodCb: GetBoolCallback,
|
||||||
|
progressPivot: Slot,
|
||||||
|
blockVerifier: BlockVerifier,
|
||||||
|
shutdownEvent: AsyncEvent,
|
||||||
|
maxHeadAge = uint64(SLOTS_PER_EPOCH * 1),
|
||||||
|
chunkSize = uint64(SLOTS_PER_EPOCH),
|
||||||
|
flags: set[SyncManagerFlag] = {},
|
||||||
|
concurrentRequestsCount = ConcurrentRequestsCount,
|
||||||
|
repeatingFailuresCount = RepeatingFailuresCount,
|
||||||
|
ident = "main"
|
||||||
|
): SyncManager[A, B] =
|
||||||
|
|
||||||
proc newSyncManager*[A, B](pool: PeerPool[A, B],
|
|
||||||
denebEpoch: Epoch,
|
|
||||||
minEpochsForBlobSidecarsRequests: uint64,
|
|
||||||
direction: SyncQueueKind,
|
|
||||||
getLocalHeadSlotCb: GetSlotCallback,
|
|
||||||
getLocalWallSlotCb: GetSlotCallback,
|
|
||||||
getFinalizedSlotCb: GetSlotCallback,
|
|
||||||
getBackfillSlotCb: GetSlotCallback,
|
|
||||||
getFrontfillSlotCb: GetSlotCallback,
|
|
||||||
weakSubjectivityPeriodCb: GetBoolCallback,
|
|
||||||
progressPivot: Slot,
|
|
||||||
blockVerifier: BlockVerifier,
|
|
||||||
shutdownEvent: AsyncEvent,
|
|
||||||
maxHeadAge = uint64(SLOTS_PER_EPOCH * 1),
|
|
||||||
chunkSize = uint64(SLOTS_PER_EPOCH),
|
|
||||||
flags: set[SyncManagerFlag] = {},
|
|
||||||
ident = "main"
|
|
||||||
): SyncManager[A, B] =
|
|
||||||
let (getFirstSlot, getLastSlot, getSafeSlot) = case direction
|
let (getFirstSlot, getLastSlot, getSafeSlot) = case direction
|
||||||
of SyncQueueKind.Forward:
|
of SyncQueueKind.Forward:
|
||||||
(getLocalHeadSlotCb, getLocalWallSlotCb, getFinalizedSlotCb)
|
(getLocalHeadSlotCb, getLocalWallSlotCb, getFinalizedSlotCb)
|
||||||
@ -173,7 +187,9 @@ proc newSyncManager*[A, B](pool: PeerPool[A, B],
|
|||||||
direction: direction,
|
direction: direction,
|
||||||
shutdownEvent: shutdownEvent,
|
shutdownEvent: shutdownEvent,
|
||||||
ident: ident,
|
ident: ident,
|
||||||
flags: flags
|
flags: flags,
|
||||||
|
concurrentRequestsCount: concurrentRequestsCount,
|
||||||
|
repeatingFailuresCount: repeatingFailuresCount
|
||||||
)
|
)
|
||||||
res.initQueue()
|
res.initQueue()
|
||||||
res
|
res
|
||||||
@ -182,18 +198,15 @@ proc getBlocks[A, B](man: SyncManager[A, B], peer: A,
|
|||||||
req: SyncRequest[A]): Future[BeaconBlocksRes] {.
|
req: SyncRequest[A]): Future[BeaconBlocksRes] {.
|
||||||
async: (raises: [CancelledError], raw: true).} =
|
async: (raises: [CancelledError], raw: true).} =
|
||||||
mixin getScore, `==`
|
mixin getScore, `==`
|
||||||
|
|
||||||
logScope:
|
|
||||||
peer_score = peer.getScore()
|
|
||||||
peer_speed = peer.netKbps()
|
|
||||||
sync_ident = man.ident
|
|
||||||
direction = man.direction
|
|
||||||
topics = "syncman"
|
|
||||||
|
|
||||||
doAssert(not(req.isEmpty()), "Request must not be empty!")
|
doAssert(not(req.isEmpty()), "Request must not be empty!")
|
||||||
debug "Requesting blocks from peer", request = req
|
debug "Requesting blocks from peer",
|
||||||
|
request = req,
|
||||||
|
peer_score = req.item.getScore(),
|
||||||
|
peer_speed = req.item.netKbps(),
|
||||||
|
sync_ident = man.ident,
|
||||||
|
topics = "syncman"
|
||||||
|
|
||||||
beaconBlocksByRange_v2(peer, req.slot, req.count, 1'u64)
|
beaconBlocksByRange_v2(peer, req.data.slot, req.data.count, 1'u64)
|
||||||
|
|
||||||
proc shouldGetBlobs[A, B](man: SyncManager[A, B], s: Slot): bool =
|
proc shouldGetBlobs[A, B](man: SyncManager[A, B], s: Slot): bool =
|
||||||
let
|
let
|
||||||
@ -204,23 +217,23 @@ proc shouldGetBlobs[A, B](man: SyncManager[A, B], s: Slot): bool =
|
|||||||
epoch >= wallEpoch - man.MIN_EPOCHS_FOR_BLOB_SIDECARS_REQUESTS)
|
epoch >= wallEpoch - man.MIN_EPOCHS_FOR_BLOB_SIDECARS_REQUESTS)
|
||||||
|
|
||||||
proc shouldGetBlobs[A, B](man: SyncManager[A, B], r: SyncRequest[A]): bool =
|
proc shouldGetBlobs[A, B](man: SyncManager[A, B], r: SyncRequest[A]): bool =
|
||||||
man.shouldGetBlobs(r.slot) or man.shouldGetBlobs(r.slot + (r.count - 1))
|
man.shouldGetBlobs(r.data.slot) or
|
||||||
|
man.shouldGetBlobs(r.data.slot + (r.data.count - 1))
|
||||||
|
|
||||||
proc getBlobSidecars[A, B](man: SyncManager[A, B], peer: A,
|
proc getBlobSidecars[A, B](man: SyncManager[A, B], peer: A,
|
||||||
req: SyncRequest[A]): Future[BlobSidecarsRes]
|
req: SyncRequest[A]): Future[BlobSidecarsRes]
|
||||||
{.async: (raises: [CancelledError], raw: true).} =
|
{.async: (raises: [CancelledError], raw: true).} =
|
||||||
mixin getScore, `==`
|
mixin getScore, `==`
|
||||||
|
|
||||||
logScope:
|
|
||||||
peer_score = peer.getScore()
|
|
||||||
peer_speed = peer.netKbps()
|
|
||||||
sync_ident = man.ident
|
|
||||||
direction = man.direction
|
|
||||||
topics = "syncman"
|
|
||||||
|
|
||||||
doAssert(not(req.isEmpty()), "Request must not be empty!")
|
doAssert(not(req.isEmpty()), "Request must not be empty!")
|
||||||
debug "Requesting blobs sidecars from peer", request = req
|
debug "Requesting blobs sidecars from peer",
|
||||||
blobSidecarsByRange(peer, req.slot, req.count)
|
request = req,
|
||||||
|
peer_score = req.item.getScore(),
|
||||||
|
peer_speed = req.item.netKbps(),
|
||||||
|
sync_ident = man.ident,
|
||||||
|
topics = "syncman"
|
||||||
|
|
||||||
|
blobSidecarsByRange(peer, req.data.slot, req.data.count)
|
||||||
|
|
||||||
proc remainingSlots(man: SyncManager): uint64 =
|
proc remainingSlots(man: SyncManager): uint64 =
|
||||||
let
|
let
|
||||||
@ -238,8 +251,8 @@ proc remainingSlots(man: SyncManager): uint64 =
|
|||||||
0'u64
|
0'u64
|
||||||
|
|
||||||
func groupBlobs*(
|
func groupBlobs*(
|
||||||
blocks: seq[ref ForkedSignedBeaconBlock],
|
blocks: openArray[ref ForkedSignedBeaconBlock],
|
||||||
blobs: seq[ref BlobSidecar]
|
blobs: openArray[ref BlobSidecar]
|
||||||
): Result[seq[BlobSidecars], string] =
|
): Result[seq[BlobSidecars], string] =
|
||||||
var
|
var
|
||||||
grouped = newSeq[BlobSidecars](len(blocks))
|
grouped = newSeq[BlobSidecars](len(blocks))
|
||||||
@ -287,13 +300,12 @@ proc getSyncBlockData*[T](
|
|||||||
): Future[SyncBlockDataRes] {.async: (raises: [CancelledError]).} =
|
): Future[SyncBlockDataRes] {.async: (raises: [CancelledError]).} =
|
||||||
mixin getScore
|
mixin getScore
|
||||||
|
|
||||||
logScope:
|
debug "Requesting block from peer",
|
||||||
slot = slot
|
slot = slot,
|
||||||
peer_score = peer.getScore()
|
peer = peer,
|
||||||
peer_speed = peer.netKbps()
|
peer_score = peer.getScore(),
|
||||||
topics = "syncman"
|
peer_speed = peer.netKbps(),
|
||||||
|
topics = "syncman"
|
||||||
debug "Requesting block from peer"
|
|
||||||
|
|
||||||
let blocksRange =
|
let blocksRange =
|
||||||
block:
|
block:
|
||||||
@ -312,7 +324,12 @@ proc getSyncBlockData*[T](
|
|||||||
return err("Incorrect number of blocks was returned by peer, " &
|
return err("Incorrect number of blocks was returned by peer, " &
|
||||||
$len(blocksRange))
|
$len(blocksRange))
|
||||||
|
|
||||||
debug "Received block on request"
|
debug "Received block on request",
|
||||||
|
slot = slot,
|
||||||
|
peer = peer,
|
||||||
|
peer_score = peer.getScore(),
|
||||||
|
peer_speed = peer.netKbps(),
|
||||||
|
topics = "syncman"
|
||||||
|
|
||||||
if blocksRange[0][].slot != slot:
|
if blocksRange[0][].slot != slot:
|
||||||
peer.updateScore(PeerScoreBadResponse)
|
peer.updateScore(PeerScoreBadResponse)
|
||||||
@ -349,7 +366,13 @@ proc getSyncBlockData*[T](
|
|||||||
peer.updateScore(PeerScoreBadResponse)
|
peer.updateScore(PeerScoreBadResponse)
|
||||||
return err("Incorrect number of received blobs in the requested range")
|
return err("Incorrect number of received blobs in the requested range")
|
||||||
|
|
||||||
debug "Received blobs on request", blobs_count = len(blobData)
|
debug "Received blobs on request",
|
||||||
|
slot = slot,
|
||||||
|
blobs_count = len(blobData),
|
||||||
|
peer = peer,
|
||||||
|
peer_score = peer.getScore(),
|
||||||
|
peer_speed = peer.netKbps(),
|
||||||
|
topics = "syncman"
|
||||||
|
|
||||||
let groupedBlobs = groupBlobs(blocksRange, blobData).valueOr:
|
let groupedBlobs = groupBlobs(blocksRange, blobData).valueOr:
|
||||||
peer.updateScore(PeerScoreNoValues)
|
peer.updateScore(PeerScoreNoValues)
|
||||||
@ -365,84 +388,204 @@ proc getSyncBlockData*[T](
|
|||||||
|
|
||||||
ok(SyncBlockData(blocks: blocksRange, blobs: blobsRange))
|
ok(SyncBlockData(blocks: blocksRange, blobs: blobsRange))
|
||||||
|
|
||||||
proc syncStep[A, B](
|
proc getSyncBlockData[A, B](
|
||||||
man: SyncManager[A, B], index: int, peer: A
|
man: SyncManager[A, B],
|
||||||
) {.async: (raises: [CancelledError]).} =
|
index: int,
|
||||||
logScope:
|
sr: SyncRequest[A]
|
||||||
peer_score = peer.getScore()
|
): Future[SyncBlockDataRes] {.async: (raises: [CancelledError]).} =
|
||||||
peer_speed = peer.netKbps()
|
let
|
||||||
index = index
|
peer = sr.item
|
||||||
sync_ident = man.ident
|
blocks = (await man.getBlocks(peer, sr)).valueOr:
|
||||||
topics = "syncman"
|
peer.updateScore(PeerScoreNoValues)
|
||||||
|
return err("Failed to receive blocks on request, reason: " & $error)
|
||||||
|
blockSlots = mapIt(blocks, it[].slot)
|
||||||
|
|
||||||
var
|
debug "Received blocks on request",
|
||||||
|
request = sr,
|
||||||
|
peer_score = sr.item.getScore(),
|
||||||
|
peer_speed = sr.item.netKbps(),
|
||||||
|
index = index,
|
||||||
|
blocks_count = len(blocks),
|
||||||
|
blocks_map = getShortMap(sr, blocks.toSeq()),
|
||||||
|
sync_ident = man.ident,
|
||||||
|
topics = "syncman"
|
||||||
|
|
||||||
|
checkResponse(sr, blockSlots).isOkOr:
|
||||||
|
peer.updateScore(PeerScoreBadResponse)
|
||||||
|
return err("Incorrect blocks sequence received, reason: " & $error)
|
||||||
|
|
||||||
|
let
|
||||||
|
shouldGetBlobs =
|
||||||
|
if not(man.shouldGetBlobs(sr)):
|
||||||
|
false
|
||||||
|
else:
|
||||||
|
var hasBlobs = false
|
||||||
|
for blck in blocks:
|
||||||
|
withBlck(blck[]):
|
||||||
|
when consensusFork >= ConsensusFork.Deneb:
|
||||||
|
if len(forkyBlck.message.body.blob_kzg_commitments) > 0:
|
||||||
|
hasBlobs = true
|
||||||
|
break
|
||||||
|
hasBlobs
|
||||||
|
blobs =
|
||||||
|
if shouldGetBlobs:
|
||||||
|
let
|
||||||
|
res = (await man.getBlobSidecars(peer, sr)).valueOr:
|
||||||
|
peer.updateScore(PeerScoreNoValues)
|
||||||
|
return err("Failed to receive blobs on request, reason: " & $error)
|
||||||
|
blobData = res.asSeq()
|
||||||
|
|
||||||
|
debug "Received blobs on request",
|
||||||
|
request = sr,
|
||||||
|
peer_score = sr.item.getScore(),
|
||||||
|
peer_speed = sr.item.netKbps(),
|
||||||
|
index = index,
|
||||||
|
blobs_count = len(blobData),
|
||||||
|
blobs_map = getShortMap(sr, blobData),
|
||||||
|
sync_ident = man.ident,
|
||||||
|
topics = "syncman"
|
||||||
|
|
||||||
|
if len(blobData) > 0:
|
||||||
|
let blobSlots = mapIt(blobData, it[].signed_block_header.message.slot)
|
||||||
|
checkBlobsResponse(sr, blobSlots).isOkOr:
|
||||||
|
peer.updateScore(PeerScoreBadResponse)
|
||||||
|
return err("Incorrect blobs sequence received, reason: " & $error)
|
||||||
|
|
||||||
|
let groupedBlobs = groupBlobs(blocks.asSeq(), blobData).valueOr:
|
||||||
|
peer.updateScore(PeerScoreNoValues)
|
||||||
|
return err(
|
||||||
|
"Received blobs sequence is inconsistent, reason: " & error)
|
||||||
|
|
||||||
|
groupedBlobs.checkBlobs().isOkOr:
|
||||||
|
peer.updateScore(PeerScoreBadResponse)
|
||||||
|
return err("Received blobs verification failed, reason: " & error)
|
||||||
|
Opt.some(groupedBlobs)
|
||||||
|
else:
|
||||||
|
Opt.none(seq[BlobSidecars])
|
||||||
|
|
||||||
|
ok(SyncBlockData(blocks: blocks.asSeq(), blobs: blobs))
|
||||||
|
|
||||||
|
proc getOrUpdatePeerStatus[A, B](
|
||||||
|
man: SyncManager[A, B], index: int, peer: A
|
||||||
|
): Future[Result[Slot, string]] {.async: (raises: [CancelledError]).} =
|
||||||
|
let
|
||||||
headSlot = man.getLocalHeadSlot()
|
headSlot = man.getLocalHeadSlot()
|
||||||
wallSlot = man.getLocalWallSlot()
|
wallSlot = man.getLocalWallSlot()
|
||||||
peerSlot = peer.getHeadSlot()
|
peerSlot = peer.getHeadSlot()
|
||||||
|
|
||||||
block: # Check that peer status is recent and relevant
|
debug "Peer's syncing status",
|
||||||
logScope:
|
peer = peer,
|
||||||
peer = peer
|
peer_score = peer.getScore(),
|
||||||
direction = man.direction
|
peer_speed = peer.netKbps(),
|
||||||
|
index = index,
|
||||||
|
wall_clock_slot = wallSlot,
|
||||||
|
remote_head_slot = peerSlot,
|
||||||
|
local_head_slot = headSlot,
|
||||||
|
sync_ident = man.ident,
|
||||||
|
direction = man.direction,
|
||||||
|
topics = "syncman"
|
||||||
|
|
||||||
debug "Peer's syncing status", wall_clock_slot = wallSlot,
|
let
|
||||||
remote_head_slot = peerSlot, local_head_slot = headSlot
|
peerStatusAge = Moment.now() - peer.getStatusLastTime()
|
||||||
|
needsUpdate =
|
||||||
|
# Latest status we got is old
|
||||||
|
peerStatusAge >= StatusExpirationTime or
|
||||||
|
# The point we need to sync is close to where the peer is
|
||||||
|
man.getFirstSlot() >= peerSlot
|
||||||
|
|
||||||
let
|
if not(needsUpdate):
|
||||||
peerStatusAge = Moment.now() - peer.getStatusLastTime()
|
return ok(peerSlot)
|
||||||
needsUpdate =
|
|
||||||
# Latest status we got is old
|
|
||||||
peerStatusAge >= StatusExpirationTime or
|
|
||||||
# The point we need to sync is close to where the peer is
|
|
||||||
man.getFirstSlot() >= peerSlot
|
|
||||||
|
|
||||||
if needsUpdate:
|
man.workers[index].status = SyncWorkerStatus.UpdatingStatus
|
||||||
man.workers[index].status = SyncWorkerStatus.UpdatingStatus
|
|
||||||
|
|
||||||
# Avoid a stampede of requests, but make them more frequent in case the
|
# Avoid a stampede of requests, but make them more frequent in case the
|
||||||
# peer is "close" to the slot range of interest
|
# peer is "close" to the slot range of interest
|
||||||
if peerStatusAge < StatusExpirationTime div 2:
|
if peerStatusAge < (StatusExpirationTime div 2):
|
||||||
await sleepAsync(StatusExpirationTime div 2 - peerStatusAge)
|
await sleepAsync((StatusExpirationTime div 2) - peerStatusAge)
|
||||||
|
|
||||||
trace "Updating peer's status information", wall_clock_slot = wallSlot,
|
trace "Updating peer's status information",
|
||||||
remote_head_slot = peerSlot, local_head_slot = headSlot
|
peer = peer,
|
||||||
|
peer_score = peer.getScore(),
|
||||||
|
peer_speed = peer.netKbps(),
|
||||||
|
index = index,
|
||||||
|
wall_clock_slot = wallSlot,
|
||||||
|
remote_head_slot = peerSlot,
|
||||||
|
local_head_slot = headSlot,
|
||||||
|
sync_ident = man.ident,
|
||||||
|
direction = man.direction,
|
||||||
|
topics = "syncman"
|
||||||
|
|
||||||
if not(await peer.updateStatus()):
|
if not(await peer.updateStatus()):
|
||||||
peer.updateScore(PeerScoreNoStatus)
|
peer.updateScore(PeerScoreNoStatus)
|
||||||
debug "Failed to get remote peer's status, exiting",
|
return err("Failed to get remote peer status")
|
||||||
peer_head_slot = peerSlot
|
|
||||||
|
|
||||||
return
|
let newPeerSlot = peer.getHeadSlot()
|
||||||
|
if peerSlot >= newPeerSlot:
|
||||||
|
peer.updateScore(PeerScoreStaleStatus)
|
||||||
|
debug "Peer's status information is stale",
|
||||||
|
peer = peer,
|
||||||
|
peer_score = peer.getScore(),
|
||||||
|
peer_speed = peer.netKbps(),
|
||||||
|
index = index,
|
||||||
|
wall_clock_slot = wallSlot,
|
||||||
|
remote_old_head_slot = peerSlot,
|
||||||
|
local_head_slot = headSlot,
|
||||||
|
remote_new_head_slot = newPeerSlot,
|
||||||
|
sync_ident = man.ident,
|
||||||
|
direction = man.direction,
|
||||||
|
topics = "syncman"
|
||||||
|
else:
|
||||||
|
debug "Peer's status information updated",
|
||||||
|
peer = peer,
|
||||||
|
peer_score = peer.getScore(),
|
||||||
|
peer_speed = peer.netKbps(),
|
||||||
|
index = index,
|
||||||
|
wall_clock_slot = wallSlot,
|
||||||
|
remote_old_head_slot = peerSlot,
|
||||||
|
local_head_slot = headSlot,
|
||||||
|
remote_new_head_slot = newPeerSlot,
|
||||||
|
sync_ident = man.ident,
|
||||||
|
direction = man.direction,
|
||||||
|
topics = "syncman"
|
||||||
|
peer.updateScore(PeerScoreGoodStatus)
|
||||||
|
ok(newPeerSlot)
|
||||||
|
|
||||||
let newPeerSlot = peer.getHeadSlot()
|
proc syncStep[A, B](
|
||||||
if peerSlot >= newPeerSlot:
|
man: SyncManager[A, B], index: int, peer: A
|
||||||
peer.updateScore(PeerScoreStaleStatus)
|
) {.async: (raises: [CancelledError]).} =
|
||||||
debug "Peer's status information is stale",
|
|
||||||
wall_clock_slot = wallSlot, remote_old_head_slot = peerSlot,
|
|
||||||
local_head_slot = headSlot, remote_new_head_slot = newPeerSlot
|
|
||||||
else:
|
|
||||||
debug "Peer's status information updated", wall_clock_slot = wallSlot,
|
|
||||||
remote_old_head_slot = peerSlot, local_head_slot = headSlot,
|
|
||||||
remote_new_head_slot = newPeerSlot
|
|
||||||
peer.updateScore(PeerScoreGoodStatus)
|
|
||||||
peerSlot = newPeerSlot
|
|
||||||
|
|
||||||
# Time passed - enough to move slots, if sleep happened
|
let
|
||||||
|
peerSlot = (await man.getOrUpdatePeerStatus(index, peer)).valueOr:
|
||||||
|
return
|
||||||
headSlot = man.getLocalHeadSlot()
|
headSlot = man.getLocalHeadSlot()
|
||||||
wallSlot = man.getLocalWallSlot()
|
wallSlot = man.getLocalWallSlot()
|
||||||
|
|
||||||
if man.remainingSlots() <= man.maxHeadAge:
|
if man.remainingSlots() <= man.maxHeadAge:
|
||||||
logScope:
|
|
||||||
peer = peer
|
|
||||||
direction = man.direction
|
|
||||||
|
|
||||||
case man.direction
|
case man.direction
|
||||||
of SyncQueueKind.Forward:
|
of SyncQueueKind.Forward:
|
||||||
info "We are in sync with network", wall_clock_slot = wallSlot,
|
info "We are in sync with network",
|
||||||
remote_head_slot = peerSlot, local_head_slot = headSlot
|
peer = peer,
|
||||||
|
peer_score = peer.getScore(),
|
||||||
|
peer_speed = peer.netKbps(),
|
||||||
|
index = index,
|
||||||
|
wall_clock_slot = wallSlot,
|
||||||
|
remote_head_slot = peerSlot,
|
||||||
|
local_head_slot = headSlot,
|
||||||
|
direction = man.direction,
|
||||||
|
sync_ident = man.ident,
|
||||||
|
topics = "syncman"
|
||||||
of SyncQueueKind.Backward:
|
of SyncQueueKind.Backward:
|
||||||
info "Backfill complete", wall_clock_slot = wallSlot,
|
info "Backfill complete",
|
||||||
remote_head_slot = peerSlot, local_head_slot = headSlot
|
peer = peer,
|
||||||
|
peer_score = peer.getScore(),
|
||||||
|
peer_speed = peer.netKbps(),
|
||||||
|
index = index,
|
||||||
|
wall_clock_slot = wallSlot,
|
||||||
|
remote_head_slot = peerSlot,
|
||||||
|
local_head_slot = headSlot,
|
||||||
|
direction = man.direction,
|
||||||
|
sync_ident = man.ident,
|
||||||
|
topics = "syncman"
|
||||||
|
|
||||||
# We clear SyncManager's `notInSyncEvent` so all the workers will become
|
# We clear SyncManager's `notInSyncEvent` so all the workers will become
|
||||||
# sleeping soon.
|
# sleeping soon.
|
||||||
@ -462,161 +605,103 @@ proc syncStep[A, B](
|
|||||||
# Right now we decreasing peer's score a bit, so it will not be
|
# Right now we decreasing peer's score a bit, so it will not be
|
||||||
# disconnected due to low peer's score, but new fresh peers could replace
|
# disconnected due to low peer's score, but new fresh peers could replace
|
||||||
# peers with low latest head.
|
# peers with low latest head.
|
||||||
debug "Peer's head slot is lower then local head slot", peer = peer,
|
debug "Peer's head slot is lower then local head slot",
|
||||||
wall_clock_slot = wallSlot, remote_head_slot = peerSlot,
|
peer = peer,
|
||||||
|
peer_score = peer.getScore(),
|
||||||
|
peer_speed = peer.netKbps(),
|
||||||
|
index = index,
|
||||||
|
wall_clock_slot = wallSlot,
|
||||||
|
remote_head_slot = peerSlot,
|
||||||
local_last_slot = man.getLastSlot(),
|
local_last_slot = man.getLastSlot(),
|
||||||
local_first_slot = man.getFirstSlot(),
|
local_first_slot = man.getFirstSlot(),
|
||||||
direction = man.direction
|
direction = man.direction,
|
||||||
|
sync_ident = man.ident,
|
||||||
|
topics = "syncman"
|
||||||
peer.updateScore(PeerScoreUseless)
|
peer.updateScore(PeerScoreUseless)
|
||||||
return
|
return
|
||||||
|
|
||||||
# Wall clock keeps ticking, so we need to update the queue
|
# Wall clock keeps ticking, so we need to update the queue
|
||||||
man.queue.updateLastSlot(man.getLastSlot())
|
man.queue.updateLastSlot(man.getLastSlot())
|
||||||
|
|
||||||
man.workers[index].status = SyncWorkerStatus.Requesting
|
proc processCallback() =
|
||||||
let req = man.queue.pop(peerSlot, peer)
|
man.workers[index].status = SyncWorkerStatus.Processing
|
||||||
if req.isEmpty():
|
|
||||||
# SyncQueue could return empty request in 2 cases:
|
|
||||||
# 1. There no more slots in SyncQueue to download (we are synced, but
|
|
||||||
# our ``notInSyncEvent`` is not yet cleared).
|
|
||||||
# 2. Current peer's known head slot is too low to satisfy request.
|
|
||||||
#
|
|
||||||
# To avoid endless loop we going to wait for RESP_TIMEOUT time here.
|
|
||||||
# This time is enough for all pending requests to finish and it is also
|
|
||||||
# enough for main sync loop to clear ``notInSyncEvent``.
|
|
||||||
debug "Empty request received from queue, exiting", peer = peer,
|
|
||||||
local_head_slot = headSlot, remote_head_slot = peerSlot,
|
|
||||||
queue_input_slot = man.queue.inpSlot,
|
|
||||||
queue_output_slot = man.queue.outSlot,
|
|
||||||
queue_last_slot = man.queue.finalSlot, direction = man.direction
|
|
||||||
await sleepAsync(RESP_TIMEOUT_DUR)
|
|
||||||
return
|
|
||||||
|
|
||||||
debug "Creating new request for peer", wall_clock_slot = wallSlot,
|
var jobs: seq[Future[void].Raising([CancelledError])]
|
||||||
remote_head_slot = peerSlot, local_head_slot = headSlot,
|
|
||||||
request = req
|
|
||||||
|
|
||||||
man.workers[index].status = SyncWorkerStatus.Downloading
|
try:
|
||||||
|
for rindex in 0 ..< man.concurrentRequestsCount:
|
||||||
|
man.workers[index].status = SyncWorkerStatus.Requesting
|
||||||
|
let request = man.queue.pop(peerSlot, peer)
|
||||||
|
if request.isEmpty():
|
||||||
|
# SyncQueue could return empty request in 2 cases:
|
||||||
|
# 1. There no more slots in SyncQueue to download (we are synced, but
|
||||||
|
# our ``notInSyncEvent`` is not yet cleared).
|
||||||
|
# 2. Current peer's known head slot is too low to satisfy request.
|
||||||
|
#
|
||||||
|
# To avoid endless loop we going to wait for RESP_TIMEOUT time here.
|
||||||
|
# This time is enough for all pending requests to finish and it is also
|
||||||
|
# enough for main sync loop to clear ``notInSyncEvent``.
|
||||||
|
debug "Empty request received from queue",
|
||||||
|
peer = peer,
|
||||||
|
peer_score = peer.getScore(),
|
||||||
|
peer_speed = peer.netKbps(),
|
||||||
|
index = index,
|
||||||
|
local_head_slot = headSlot,
|
||||||
|
remote_head_slot = peerSlot,
|
||||||
|
queue_input_slot = man.queue.inpSlot,
|
||||||
|
queue_output_slot = man.queue.outSlot,
|
||||||
|
queue_last_slot = man.queue.finalSlot,
|
||||||
|
direction = man.direction,
|
||||||
|
sync_ident = man.ident,
|
||||||
|
topics = "syncman"
|
||||||
|
await sleepAsync(RESP_TIMEOUT_DUR)
|
||||||
|
break
|
||||||
|
|
||||||
let blocks = await man.getBlocks(peer, req)
|
man.workers[index].status = SyncWorkerStatus.Downloading
|
||||||
if blocks.isErr():
|
let data = (await man.getSyncBlockData(index, request)).valueOr:
|
||||||
peer.updateScore(PeerScoreNoValues)
|
debug "Failed to get block data",
|
||||||
man.queue.push(req)
|
peer = peer,
|
||||||
debug "Failed to receive blocks on request",
|
peer_score = peer.getScore(),
|
||||||
request = req, err = blocks.error
|
peer_speed = peer.netKbps(),
|
||||||
return
|
index = index,
|
||||||
let blockData = blocks.get().asSeq()
|
reason = error,
|
||||||
debug "Received blocks on request", blocks_count = len(blockData),
|
direction = man.direction,
|
||||||
blocks_map = getShortMap(req, blockData), request = req
|
sync_ident = man.ident,
|
||||||
|
topics = "syncman"
|
||||||
|
man.queue.push(request)
|
||||||
|
break
|
||||||
|
|
||||||
let slots = mapIt(blockData, it[].slot)
|
# Scoring will happen in `syncUpdate`.
|
||||||
checkResponse(req, slots).isOkOr:
|
man.workers[index].status = SyncWorkerStatus.Queueing
|
||||||
peer.updateScore(PeerScoreBadResponse)
|
let
|
||||||
man.queue.push(req)
|
peerFinalized = peer.getFinalizedEpoch().start_slot()
|
||||||
warn "Incorrect blocks sequence received",
|
lastSlot = request.data.slot + request.data.count - 1
|
||||||
blocks_count = len(blockData),
|
# The peer claims the block is finalized - our own block processing will
|
||||||
blocks_map = getShortMap(req, blockData),
|
# verify this point down the line
|
||||||
request = req,
|
# TODO descore peers that lie
|
||||||
reason = error
|
maybeFinalized = lastSlot < peerFinalized
|
||||||
return
|
|
||||||
|
|
||||||
let shouldGetBlobs =
|
jobs.add(man.queue.push(request, data.blocks, data.blobs, maybeFinalized,
|
||||||
if not man.shouldGetBlobs(req):
|
processCallback))
|
||||||
false
|
|
||||||
else:
|
|
||||||
var hasBlobs = false
|
|
||||||
for blck in blockData:
|
|
||||||
withBlck(blck[]):
|
|
||||||
when consensusFork >= ConsensusFork.Deneb:
|
|
||||||
if forkyBlck.message.body.blob_kzg_commitments.len > 0:
|
|
||||||
hasBlobs = true
|
|
||||||
break
|
|
||||||
hasBlobs
|
|
||||||
|
|
||||||
let blobData =
|
if len(jobs) > 0:
|
||||||
if shouldGetBlobs:
|
await allFutures(jobs)
|
||||||
let blobs = await man.getBlobSidecars(peer, req)
|
|
||||||
if blobs.isErr():
|
|
||||||
peer.updateScore(PeerScoreNoValues)
|
|
||||||
man.queue.push(req)
|
|
||||||
debug "Failed to receive blobs on request",
|
|
||||||
request = req, err = blobs.error
|
|
||||||
return
|
|
||||||
let blobData = blobs.get().asSeq()
|
|
||||||
debug "Received blobs on request",
|
|
||||||
blobs_count = len(blobData),
|
|
||||||
blobs_map = getShortMap(req, blobData), request = req
|
|
||||||
|
|
||||||
if len(blobData) > 0:
|
except CancelledError as exc:
|
||||||
let slots = mapIt(blobData, it[].signed_block_header.message.slot)
|
let pending = jobs.filterIt(not(it.finished)).mapIt(cancelAndWait(it))
|
||||||
checkBlobsResponse(req, slots).isOkOr:
|
await noCancel allFutures(pending)
|
||||||
peer.updateScore(PeerScoreBadResponse)
|
raise exc
|
||||||
man.queue.push(req)
|
|
||||||
warn "Incorrect blobs sequence received",
|
|
||||||
blobs_count = len(blobData),
|
|
||||||
blobs_map = getShortMap(req, blobData),
|
|
||||||
request = req,
|
|
||||||
reason = error
|
|
||||||
return
|
|
||||||
let groupedBlobs = groupBlobs(blockData, blobData).valueOr:
|
|
||||||
peer.updateScore(PeerScoreNoValues)
|
|
||||||
man.queue.push(req)
|
|
||||||
info "Received blobs sequence is inconsistent",
|
|
||||||
blobs_map = getShortMap(req, blobData),
|
|
||||||
request = req, msg = error
|
|
||||||
return
|
|
||||||
groupedBlobs.checkBlobs().isOkOr:
|
|
||||||
peer.updateScore(PeerScoreBadResponse)
|
|
||||||
man.queue.push(req)
|
|
||||||
warn "Received blobs verification failed",
|
|
||||||
blobs_count = len(blobData),
|
|
||||||
blobs_map = getShortMap(req, blobData),
|
|
||||||
request = req,
|
|
||||||
reason = error
|
|
||||||
return
|
|
||||||
Opt.some(groupedBlobs)
|
|
||||||
else:
|
|
||||||
Opt.none(seq[BlobSidecars])
|
|
||||||
|
|
||||||
if len(blockData) == 0 and man.direction == SyncQueueKind.Backward and
|
|
||||||
req.contains(man.getSafeSlot()):
|
|
||||||
# The sync protocol does not distinguish between:
|
|
||||||
# - All requested slots are empty
|
|
||||||
# - Peer does not have data available about requested range
|
|
||||||
#
|
|
||||||
# However, we include the `backfill` slot in backward sync requests.
|
|
||||||
# If we receive an empty response to a request covering that slot,
|
|
||||||
# we know that the response is incomplete and can descore.
|
|
||||||
peer.updateScore(PeerScoreNoValues)
|
|
||||||
man.queue.push(req)
|
|
||||||
debug "Response does not include known-to-exist block", request = req
|
|
||||||
return
|
|
||||||
|
|
||||||
# Scoring will happen in `syncUpdate`.
|
|
||||||
man.workers[index].status = SyncWorkerStatus.Queueing
|
|
||||||
let
|
|
||||||
peerFinalized = peer.getFinalizedEpoch().start_slot()
|
|
||||||
lastSlot = req.slot + req.count
|
|
||||||
# The peer claims the block is finalized - our own block processing will
|
|
||||||
# verify this point down the line
|
|
||||||
# TODO descore peers that lie
|
|
||||||
maybeFinalized = lastSlot < peerFinalized
|
|
||||||
|
|
||||||
await man.queue.push(req, blockData, blobData, maybeFinalized, proc() =
|
|
||||||
man.workers[index].status = SyncWorkerStatus.Processing)
|
|
||||||
|
|
||||||
proc syncWorker[A, B](
|
proc syncWorker[A, B](
|
||||||
man: SyncManager[A, B], index: int
|
man: SyncManager[A, B], index: int
|
||||||
) {.async: (raises: [CancelledError]).} =
|
) {.async: (raises: [CancelledError]).} =
|
||||||
mixin getKey, getScore, getHeadSlot
|
mixin getKey, getScore, getHeadSlot
|
||||||
|
|
||||||
logScope:
|
debug "Starting syncing worker",
|
||||||
index = index
|
index = index,
|
||||||
sync_ident = man.ident
|
sync_ident = man.ident,
|
||||||
direction = man.direction
|
direction = man.direction,
|
||||||
topics = "syncman"
|
topics = "syncman"
|
||||||
|
|
||||||
debug "Starting syncing worker"
|
|
||||||
|
|
||||||
var peer: A = nil
|
var peer: A = nil
|
||||||
|
|
||||||
@ -634,7 +719,11 @@ proc syncWorker[A, B](
|
|||||||
if not(isNil(peer)):
|
if not(isNil(peer)):
|
||||||
man.pool.release(peer)
|
man.pool.release(peer)
|
||||||
|
|
||||||
debug "Sync worker stopped"
|
debug "Sync worker stopped",
|
||||||
|
index = index,
|
||||||
|
sync_ident = man.ident,
|
||||||
|
direction = man.direction,
|
||||||
|
topics = "syncman"
|
||||||
|
|
||||||
proc getWorkersStats[A, B](man: SyncManager[A, B]): tuple[map: string,
|
proc getWorkersStats[A, B](man: SyncManager[A, B]): tuple[map: string,
|
||||||
sleeping: int,
|
sleeping: int,
|
||||||
@ -719,18 +808,20 @@ proc syncClose[A, B](
|
|||||||
proc syncLoop[A, B](
|
proc syncLoop[A, B](
|
||||||
man: SyncManager[A, B]
|
man: SyncManager[A, B]
|
||||||
) {.async: (raises: [CancelledError]).} =
|
) {.async: (raises: [CancelledError]).} =
|
||||||
|
|
||||||
logScope:
|
|
||||||
sync_ident = man.ident
|
|
||||||
direction = man.direction
|
|
||||||
topics = "syncman"
|
|
||||||
|
|
||||||
mixin getKey, getScore
|
mixin getKey, getScore
|
||||||
var pauseTime = 0
|
|
||||||
|
# Update SyncQueue parameters, because callbacks used to calculate parameters
|
||||||
|
# could provide different values at moment when syncLoop() started.
|
||||||
|
man.initQueue()
|
||||||
|
|
||||||
man.startWorkers()
|
man.startWorkers()
|
||||||
|
|
||||||
debug "Synchronization loop started"
|
debug "Synchronization loop started",
|
||||||
|
sync_ident = man.ident,
|
||||||
|
direction = man.direction,
|
||||||
|
start_slot = man.queue.startSlot,
|
||||||
|
finish_slot = man.queue.finalSlot,
|
||||||
|
topics = "syncman"
|
||||||
|
|
||||||
proc averageSpeedTask() {.async: (raises: [CancelledError]).} =
|
proc averageSpeedTask() {.async: (raises: [CancelledError]).} =
|
||||||
while true:
|
while true:
|
||||||
@ -778,9 +869,11 @@ proc syncLoop[A, B](
|
|||||||
pending_workers_count = pending,
|
pending_workers_count = pending,
|
||||||
wall_head_slot = wallSlot,
|
wall_head_slot = wallSlot,
|
||||||
local_head_slot = headSlot,
|
local_head_slot = headSlot,
|
||||||
pause_time = $chronos.seconds(pauseTime),
|
|
||||||
avg_sync_speed = man.avgSyncSpeed.formatBiggestFloat(ffDecimal, 4),
|
avg_sync_speed = man.avgSyncSpeed.formatBiggestFloat(ffDecimal, 4),
|
||||||
ins_sync_speed = man.insSyncSpeed.formatBiggestFloat(ffDecimal, 4)
|
ins_sync_speed = man.insSyncSpeed.formatBiggestFloat(ffDecimal, 4),
|
||||||
|
sync_ident = man.ident,
|
||||||
|
direction = man.direction,
|
||||||
|
topics = "syncman"
|
||||||
of SyncQueueKind.Backward:
|
of SyncQueueKind.Backward:
|
||||||
debug "Current syncing state", workers_map = map,
|
debug "Current syncing state", workers_map = map,
|
||||||
sleeping_workers_count = sleeping,
|
sleeping_workers_count = sleeping,
|
||||||
@ -788,9 +881,11 @@ proc syncLoop[A, B](
|
|||||||
pending_workers_count = pending,
|
pending_workers_count = pending,
|
||||||
wall_head_slot = wallSlot,
|
wall_head_slot = wallSlot,
|
||||||
backfill_slot = man.getSafeSlot(),
|
backfill_slot = man.getSafeSlot(),
|
||||||
pause_time = $chronos.seconds(pauseTime),
|
|
||||||
avg_sync_speed = man.avgSyncSpeed.formatBiggestFloat(ffDecimal, 4),
|
avg_sync_speed = man.avgSyncSpeed.formatBiggestFloat(ffDecimal, 4),
|
||||||
ins_sync_speed = man.insSyncSpeed.formatBiggestFloat(ffDecimal, 4)
|
ins_sync_speed = man.insSyncSpeed.formatBiggestFloat(ffDecimal, 4),
|
||||||
|
sync_ident = man.ident,
|
||||||
|
direction = man.direction,
|
||||||
|
topics = "syncman"
|
||||||
let
|
let
|
||||||
pivot = man.progressPivot
|
pivot = man.progressPivot
|
||||||
progress =
|
progress =
|
||||||
@ -855,10 +950,17 @@ proc syncLoop[A, B](
|
|||||||
# all sync workers are in `Sleeping` state.
|
# all sync workers are in `Sleeping` state.
|
||||||
if pending > 0:
|
if pending > 0:
|
||||||
debug "Synchronization loop waits for workers completion",
|
debug "Synchronization loop waits for workers completion",
|
||||||
wall_head_slot = wallSlot, local_head_slot = headSlot,
|
wall_head_slot = wallSlot,
|
||||||
difference = (wallSlot - headSlot), max_head_age = man.maxHeadAge,
|
local_head_slot = headSlot,
|
||||||
|
difference = (wallSlot - headSlot),
|
||||||
|
max_head_age = man.maxHeadAge,
|
||||||
sleeping_workers_count = sleeping,
|
sleeping_workers_count = sleeping,
|
||||||
waiting_workers_count = waiting, pending_workers_count = pending
|
waiting_workers_count = waiting,
|
||||||
|
pending_workers_count = pending,
|
||||||
|
sync_ident = man.ident,
|
||||||
|
direction = man.direction,
|
||||||
|
topics = "syncman"
|
||||||
|
|
||||||
# We already synced, so we should reset all the pending workers from
|
# We already synced, so we should reset all the pending workers from
|
||||||
# any state they have.
|
# any state they have.
|
||||||
man.queue.clearAndWakeup()
|
man.queue.clearAndWakeup()
|
||||||
@ -871,21 +973,33 @@ proc syncLoop[A, B](
|
|||||||
await man.syncClose(averageSpeedTaskFut)
|
await man.syncClose(averageSpeedTaskFut)
|
||||||
man.inProgress = false
|
man.inProgress = false
|
||||||
debug "Forward synchronization process finished, exiting",
|
debug "Forward synchronization process finished, exiting",
|
||||||
wall_head_slot = wallSlot, local_head_slot = headSlot,
|
wall_head_slot = wallSlot,
|
||||||
|
local_head_slot = headSlot,
|
||||||
difference = (wallSlot - headSlot),
|
difference = (wallSlot - headSlot),
|
||||||
max_head_age = man.maxHeadAge
|
max_head_age = man.maxHeadAge,
|
||||||
|
sync_ident = man.ident,
|
||||||
|
direction = man.direction,
|
||||||
|
topics = "syncman"
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
man.inProgress = false
|
man.inProgress = false
|
||||||
debug "Forward synchronization process finished, sleeping",
|
debug "Forward synchronization process finished, sleeping",
|
||||||
wall_head_slot = wallSlot, local_head_slot = headSlot,
|
wall_head_slot = wallSlot,
|
||||||
|
local_head_slot = headSlot,
|
||||||
difference = (wallSlot - headSlot),
|
difference = (wallSlot - headSlot),
|
||||||
max_head_age = man.maxHeadAge
|
max_head_age = man.maxHeadAge,
|
||||||
|
sync_ident = man.ident,
|
||||||
|
direction = man.direction,
|
||||||
|
topics = "syncman"
|
||||||
else:
|
else:
|
||||||
debug "Synchronization loop sleeping", wall_head_slot = wallSlot,
|
debug "Synchronization loop sleeping",
|
||||||
|
wall_head_slot = wallSlot,
|
||||||
local_head_slot = headSlot,
|
local_head_slot = headSlot,
|
||||||
difference = (wallSlot - headSlot),
|
difference = (wallSlot - headSlot),
|
||||||
max_head_age = man.maxHeadAge
|
max_head_age = man.maxHeadAge,
|
||||||
|
sync_ident = man.ident,
|
||||||
|
direction = man.direction,
|
||||||
|
topics = "syncman"
|
||||||
of SyncQueueKind.Backward:
|
of SyncQueueKind.Backward:
|
||||||
# Backward syncing is going to be executed only once, so we exit loop
|
# Backward syncing is going to be executed only once, so we exit loop
|
||||||
# and stop all pending tasks which belongs to this instance (sync
|
# and stop all pending tasks which belongs to this instance (sync
|
||||||
@ -893,9 +1007,13 @@ proc syncLoop[A, B](
|
|||||||
await man.syncClose(averageSpeedTaskFut)
|
await man.syncClose(averageSpeedTaskFut)
|
||||||
man.inProgress = false
|
man.inProgress = false
|
||||||
debug "Backward synchronization process finished, exiting",
|
debug "Backward synchronization process finished, exiting",
|
||||||
wall_head_slot = wallSlot, local_head_slot = headSlot,
|
wall_head_slot = wallSlot,
|
||||||
|
local_head_slot = headSlot,
|
||||||
backfill_slot = man.getLastSlot(),
|
backfill_slot = man.getLastSlot(),
|
||||||
max_head_age = man.maxHeadAge
|
max_head_age = man.maxHeadAge,
|
||||||
|
sync_ident = man.ident,
|
||||||
|
direction = man.direction,
|
||||||
|
topics = "syncman"
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
if not(man.notInSyncEvent.isSet()):
|
if not(man.notInSyncEvent.isSet()):
|
||||||
@ -905,10 +1023,14 @@ proc syncLoop[A, B](
|
|||||||
man.notInSyncEvent.fire()
|
man.notInSyncEvent.fire()
|
||||||
man.inProgress = true
|
man.inProgress = true
|
||||||
debug "Node lost sync for more then preset period",
|
debug "Node lost sync for more then preset period",
|
||||||
period = man.maxHeadAge, wall_head_slot = wallSlot,
|
period = man.maxHeadAge,
|
||||||
|
wall_head_slot = wallSlot,
|
||||||
local_head_slot = headSlot,
|
local_head_slot = headSlot,
|
||||||
missing_slots = man.remainingSlots(),
|
missing_slots = man.remainingSlots(),
|
||||||
progress = float(man.queue.progress())
|
progress = float(man.queue.progress()),
|
||||||
|
sync_ident = man.ident,
|
||||||
|
direction = man.direction,
|
||||||
|
topics = "syncman"
|
||||||
else:
|
else:
|
||||||
man.notInSyncEvent.fire()
|
man.notInSyncEvent.fire()
|
||||||
man.inProgress = true
|
man.inProgress = true
|
||||||
|
@ -200,7 +200,7 @@ proc updatePerformance(overseer: SyncOverseerRef, startTick: Moment,
|
|||||||
|
|
||||||
# Update status string
|
# Update status string
|
||||||
overseer.statusMsg = Opt.some(
|
overseer.statusMsg = Opt.some(
|
||||||
"fill: " & timeleft.toTimeLeftString() & " (" &
|
timeleft.toTimeLeftString() & " (" &
|
||||||
(done * 100).formatBiggestFloat(ffDecimal, 2) & "%) " &
|
(done * 100).formatBiggestFloat(ffDecimal, 2) & "%) " &
|
||||||
overseer.avgSpeed.formatBiggestFloat(ffDecimal, 4) &
|
overseer.avgSpeed.formatBiggestFloat(ffDecimal, 4) &
|
||||||
"slots/s (" & $dag.head.slot & ")")
|
"slots/s (" & $dag.head.slot & ")")
|
||||||
@ -521,8 +521,6 @@ proc mainLoop*(
|
|||||||
quit 1
|
quit 1
|
||||||
|
|
||||||
overseer.untrustedInProgress = false
|
overseer.untrustedInProgress = false
|
||||||
# Reset status bar
|
|
||||||
overseer.statusMsg = Opt.none(string)
|
|
||||||
|
|
||||||
# When we finished state rebuilding process - we could start forward
|
# When we finished state rebuilding process - we could start forward
|
||||||
# SyncManager which could perform finish sync.
|
# SyncManager which could perform finish sync.
|
||||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user