Fix bug that would cause no nodes to be selected for revalidation (#2656)

This bug would have as effect that our radius cache would not get
filled by any outgoing pings, causing:
- Node lookups to always be occurring on NH gossip
- POKEs to much more rarely

Also add metrics for the amount of offers done via POKE mechanism.
This commit is contained in:
Kim De Mey 2024-09-25 11:30:42 +02:00 committed by GitHub
parent cb69723ff3
commit 3820b15f28
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 7 additions and 2 deletions

View File

@ -75,6 +75,9 @@ declareCounter portal_gossip_without_lookup,
"Portal wire protocol neighborhood gossip that did not require a node lookup", "Portal wire protocol neighborhood gossip that did not require a node lookup",
labels = ["protocol_id"] labels = ["protocol_id"]
declareCounter portal_poke_offers,
"Portal wire protocol offers through poke mechanism", labels = ["protocol_id"]
# Note: These metrics are to get some idea on how many enrs are send on average. # Note: These metrics are to get some idea on how many enrs are send on average.
# Relevant issue: https://github.com/ethereum/portal-network-specs/issues/136 # Relevant issue: https://github.com/ethereum/portal-network-specs/issues/136
const enrsBuckets = [0.0, 1.0, 3.0, 5.0, 8.0, 9.0, Inf] const enrsBuckets = [0.0, 1.0, 3.0, 5.0, 8.0, 9.0, Inf]
@ -1103,6 +1106,7 @@ proc triggerPoke*(
list = List[ContentKV, contentKeysLimit].init(@[contentKV]) list = List[ContentKV, contentKeysLimit].init(@[contentKV])
req = OfferRequest(dst: node, kind: Direct, contentList: list) req = OfferRequest(dst: node, kind: Direct, contentList: list)
p.offerQueue.putNoWait(req) p.offerQueue.putNoWait(req)
portal_poke_offers.inc(labelValues = [$p.protocolId])
except AsyncQueueFullError as e: except AsyncQueueFullError as e:
# Should not occur as full() check is done. # Should not occur as full() check is done.
raiseAssert(e.msg) raiseAssert(e.msg)
@ -1641,12 +1645,13 @@ proc revalidateNode*(p: PortalProtocol, n: Node) {.async: (raises: [CancelledErr
proc getNodeForRevalidation(p: PortalProtocol): Opt[Node] = proc getNodeForRevalidation(p: PortalProtocol): Opt[Node] =
let node = p.routingTable.nodeToRevalidate() let node = p.routingTable.nodeToRevalidate()
if node.isNil: if node.isNil:
# This should not occur except for when the RT is empty
return Opt.none(Node) return Opt.none(Node)
let now = now(chronos.Moment) let now = now(chronos.Moment)
let timestamp = p.pingTimings.getOrDefault(node.id, now) let timestamp = p.pingTimings.getOrDefault(node.id, Moment.init(0'i64, Second))
if (timestamp + revalidationTimeout) <= now: if (timestamp + revalidationTimeout) < now:
Opt.some(node) Opt.some(node)
else: else:
Opt.none(Node) Opt.none(Node)