mirror of
https://github.com/logos-storage/logos-storage-nim.git
synced 2026-01-05 23:13:09 +00:00
feat: add strategic runtime metrics for block exchange monitoring
- Add codex_block_exchange_discovery_requests_total counter to track peer discovery frequency - Add codex_block_exchange_peer_timeouts_total counter to monitor peer reliability issues - Add codex_block_exchange_requests_failed_total counter to track request failure rates
This commit is contained in:
parent
7f3004b5c0
commit
25a2b3e9ed
@ -69,6 +69,18 @@ declareCounter(
|
|||||||
codex_block_exchange_spurious_blocks_received,
|
codex_block_exchange_spurious_blocks_received,
|
||||||
"codex blockexchange unrequested/duplicate blocks received",
|
"codex blockexchange unrequested/duplicate blocks received",
|
||||||
)
|
)
|
||||||
|
declareCounter(
|
||||||
|
codex_block_exchange_discovery_requests_total,
|
||||||
|
"Total number of peer discovery requests sent",
|
||||||
|
)
|
||||||
|
declareCounter(
|
||||||
|
codex_block_exchange_peer_timeouts_total, "Total number of peer activity timeouts"
|
||||||
|
)
|
||||||
|
declareCounter(
|
||||||
|
codex_block_exchange_requests_failed_total,
|
||||||
|
"Total number of block requests that failed after exhausting retries"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
const
|
const
|
||||||
DefaultMaxPeersPerRequest* = 10
|
DefaultMaxPeersPerRequest* = 10
|
||||||
@ -211,6 +223,7 @@ proc refreshBlockKnowledge(self: BlockExcEngine) {.async: (raises: [CancelledErr
|
|||||||
proc searchForNewPeers(self: BlockExcEngine, cid: Cid) =
|
proc searchForNewPeers(self: BlockExcEngine, cid: Cid) =
|
||||||
if self.lastDiscRequest + DiscoveryRateLimit < Moment.now():
|
if self.lastDiscRequest + DiscoveryRateLimit < Moment.now():
|
||||||
trace "Searching for new peers for", cid = cid
|
trace "Searching for new peers for", cid = cid
|
||||||
|
codex_block_exchange_discovery_requests_total.inc()
|
||||||
self.lastDiscRequest = Moment.now() # always refresh before calling await!
|
self.lastDiscRequest = Moment.now() # always refresh before calling await!
|
||||||
self.discovery.queueFindBlocksReq(@[cid])
|
self.discovery.queueFindBlocksReq(@[cid])
|
||||||
else:
|
else:
|
||||||
@ -246,6 +259,7 @@ proc downloadInternal(
|
|||||||
|
|
||||||
if self.pendingBlocks.retriesExhausted(address):
|
if self.pendingBlocks.retriesExhausted(address):
|
||||||
trace "Error retries exhausted"
|
trace "Error retries exhausted"
|
||||||
|
codex_block_exchange_requests_failed_total.inc()
|
||||||
handle.fail(newException(RetriesExhaustedError, "Error retries exhausted"))
|
handle.fail(newException(RetriesExhaustedError, "Error retries exhausted"))
|
||||||
break
|
break
|
||||||
|
|
||||||
@ -310,6 +324,7 @@ proc downloadInternal(
|
|||||||
else:
|
else:
|
||||||
# If the peer timed out, retries immediately.
|
# If the peer timed out, retries immediately.
|
||||||
trace "Peer timed out during block request", peer = scheduledPeer.id
|
trace "Peer timed out during block request", peer = scheduledPeer.id
|
||||||
|
codex_block_exchange_peer_timeouts_total.inc()
|
||||||
await self.network.dropPeer(scheduledPeer.id)
|
await self.network.dropPeer(scheduledPeer.id)
|
||||||
# Evicts peer immediately or we may end up picking it again in the
|
# Evicts peer immediately or we may end up picking it again in the
|
||||||
# next retry.
|
# next retry.
|
||||||
@ -320,6 +335,7 @@ proc downloadInternal(
|
|||||||
await handle.cancelAndWait()
|
await handle.cancelAndWait()
|
||||||
except RetriesExhaustedError as exc:
|
except RetriesExhaustedError as exc:
|
||||||
warn "Retries exhausted for block", address, exc = exc.msg
|
warn "Retries exhausted for block", address, exc = exc.msg
|
||||||
|
codex_block_exchange_requests_failed_total.inc()
|
||||||
if not handle.finished:
|
if not handle.finished:
|
||||||
handle.fail(exc)
|
handle.fail(exc)
|
||||||
finally:
|
finally:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user