From dddf7424b4f079277df4ba06c7e520678df4b06d Mon Sep 17 00:00:00 2001 From: Chrysostomos Nanakos Date: Fri, 26 Sep 2025 20:16:30 +0300 Subject: [PATCH] fix: resolve stuck peer refresh state preventing block discovery This prevents peers from becoming permanently invisible to block discovery when they fail to respond to WantHave requests. Part of https://github.com/codex-storage/nim-codex/issues/974 --- codex/blockexchange/engine/engine.nim | 16 +++++++--------- codex/blockexchange/peers/peercontext.nim | 12 ++++++++++-- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/codex/blockexchange/engine/engine.nim b/codex/blockexchange/engine/engine.nim index 3166b808..20effd59 100644 --- a/codex/blockexchange/engine/engine.nim +++ b/codex/blockexchange/engine/engine.nim @@ -206,17 +206,15 @@ proc refreshBlockKnowledge(self: BlockExcEngine) {.async: (raises: [CancelledErr # want list in the coarsest way possible instead of over many # small updates. # - if peer.refreshInProgress: - trace "Peer refresh in progress", peer = peer.id - continue # In dynamic swarms, staleness will dominate latency. - if peer.lastRefresh < self.pendingBlocks.lastInclusion or peer.isKnowledgeStale: - peer.refreshRequested() - # TODO: optimize this by keeping track of what was sent and sending deltas. - # This should allow us to run much more frequent refreshes, and be way more - # efficient about it. - await self.refreshBlockKnowledge(peer) + if peer.isKnowledgeStale or peer.lastRefresh < self.pendingBlocks.lastInclusion: + if not peer.refreshInProgress: + peer.refreshRequested() + # TODO: optimize this by keeping track of what was sent and sending deltas. + # This should allow us to run much more frequent refreshes, and be way more + # efficient about it. + await self.refreshBlockKnowledge(peer) else: trace "Not refreshing: peer is up to date", peer = peer.id diff --git a/codex/blockexchange/peers/peercontext.nim b/codex/blockexchange/peers/peercontext.nim index f6eeea46..326468f1 100644 --- a/codex/blockexchange/peers/peercontext.nim +++ b/codex/blockexchange/peers/peercontext.nim @@ -27,7 +27,7 @@ export payments, nitro const MinRefreshInterval = 1.seconds - MaxRefreshBackoff = 36 # 3 minutes + MaxRefreshBackoff = 36 # 36 seconds type BlockExcPeerCtx* = ref object of RootObj id*: PeerId @@ -45,7 +45,15 @@ type BlockExcPeerCtx* = ref object of RootObj activityTimeout*: Duration proc isKnowledgeStale*(self: BlockExcPeerCtx): bool = - self.lastRefresh + self.refreshBackoff * MinRefreshInterval < Moment.now() + let + staleness = self.lastRefresh + self.refreshBackoff * MinRefreshInterval < Moment.now() + + if staleness and self.refreshInProgress: + trace "Cleaning up refresh state", peer = self.id + self.refreshInProgress = false + self.refreshBackoff = 1 + + staleness proc isBlockSent*(self: BlockExcPeerCtx, address: BlockAddress): bool = address in self.blocksSent