fix: resolve stuck peer refresh state preventing block discovery

This prevents peers from becoming permanently invisible to block discovery when
they fail to respond to WantHave requests.

Part of https://github.com/codex-storage/nim-codex/issues/974
This commit is contained in:
Chrysostomos Nanakos 2025-09-26 20:16:30 +03:00
parent 4abe8c4d97
commit dddf7424b4
No known key found for this signature in database
2 changed files with 17 additions and 11 deletions

View File

@ -206,17 +206,15 @@ proc refreshBlockKnowledge(self: BlockExcEngine) {.async: (raises: [CancelledErr
# want list in the coarsest way possible instead of over many
# small updates.
#
if peer.refreshInProgress:
trace "Peer refresh in progress", peer = peer.id
continue
# In dynamic swarms, staleness will dominate latency.
if peer.lastRefresh < self.pendingBlocks.lastInclusion or peer.isKnowledgeStale:
peer.refreshRequested()
# TODO: optimize this by keeping track of what was sent and sending deltas.
# This should allow us to run much more frequent refreshes, and be way more
# efficient about it.
await self.refreshBlockKnowledge(peer)
if peer.isKnowledgeStale or peer.lastRefresh < self.pendingBlocks.lastInclusion:
if not peer.refreshInProgress:
peer.refreshRequested()
# TODO: optimize this by keeping track of what was sent and sending deltas.
# This should allow us to run much more frequent refreshes, and be way more
# efficient about it.
await self.refreshBlockKnowledge(peer)
else:
trace "Not refreshing: peer is up to date", peer = peer.id

View File

@ -27,7 +27,7 @@ export payments, nitro
const
MinRefreshInterval = 1.seconds
MaxRefreshBackoff = 36 # 3 minutes
MaxRefreshBackoff = 36 # 36 seconds
type BlockExcPeerCtx* = ref object of RootObj
id*: PeerId
@ -45,7 +45,15 @@ type BlockExcPeerCtx* = ref object of RootObj
activityTimeout*: Duration
proc isKnowledgeStale*(self: BlockExcPeerCtx): bool =
self.lastRefresh + self.refreshBackoff * MinRefreshInterval < Moment.now()
let
staleness = self.lastRefresh + self.refreshBackoff * MinRefreshInterval < Moment.now()
if staleness and self.refreshInProgress:
trace "Cleaning up refresh state", peer = self.id
self.refreshInProgress = false
self.refreshBackoff = 1
staleness
proc isBlockSent*(self: BlockExcPeerCtx, address: BlockAddress): bool =
address in self.blocksSent