Improve selecting of nodes in neighborhood gossip (#1072)
Allow also concurrent neighborhood gossip jobs when seeding data into the network. Update Grafana dashboard for two additional metrics regarding lookups in neighborhood gossip.
This commit is contained in:
parent
5467abed8f
commit
69366e1880
|
@ -66,7 +66,7 @@
|
|||
"text": {},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"pluginVersion": "7.5.9",
|
||||
"pluginVersion": "7.5.11",
|
||||
"targets": [
|
||||
{
|
||||
"exemplar": true,
|
||||
|
@ -128,7 +128,7 @@
|
|||
"text": {},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"pluginVersion": "7.5.9",
|
||||
"pluginVersion": "7.5.11",
|
||||
"targets": [
|
||||
{
|
||||
"exemplar": true,
|
||||
|
@ -188,7 +188,7 @@
|
|||
"text": {},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"pluginVersion": "7.5.9",
|
||||
"pluginVersion": "7.5.11",
|
||||
"targets": [
|
||||
{
|
||||
"exemplar": true,
|
||||
|
@ -247,7 +247,7 @@
|
|||
"text": {},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"pluginVersion": "7.5.9",
|
||||
"pluginVersion": "7.5.11",
|
||||
"targets": [
|
||||
{
|
||||
"exemplar": true,
|
||||
|
@ -306,7 +306,7 @@
|
|||
"text": {},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"pluginVersion": "7.5.9",
|
||||
"pluginVersion": "7.5.11",
|
||||
"targets": [
|
||||
{
|
||||
"exemplar": true,
|
||||
|
@ -355,7 +355,7 @@
|
|||
"alertThreshold": true
|
||||
},
|
||||
"percentage": false,
|
||||
"pluginVersion": "7.5.9",
|
||||
"pluginVersion": "7.5.11",
|
||||
"pointradius": 2,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
|
@ -459,7 +459,7 @@
|
|||
"alertThreshold": true
|
||||
},
|
||||
"percentage": false,
|
||||
"pluginVersion": "7.5.9",
|
||||
"pluginVersion": "7.5.11",
|
||||
"pointradius": 2,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
|
@ -603,7 +603,7 @@
|
|||
"alertThreshold": true
|
||||
},
|
||||
"percentage": false,
|
||||
"pluginVersion": "7.5.9",
|
||||
"pluginVersion": "7.5.11",
|
||||
"pointradius": 2,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
|
@ -707,7 +707,7 @@
|
|||
"alertThreshold": true
|
||||
},
|
||||
"percentage": false,
|
||||
"pluginVersion": "7.5.9",
|
||||
"pluginVersion": "7.5.11",
|
||||
"pointradius": 2,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
|
@ -811,7 +811,7 @@
|
|||
"showUnfilled": true,
|
||||
"text": {}
|
||||
},
|
||||
"pluginVersion": "7.5.9",
|
||||
"pluginVersion": "7.5.11",
|
||||
"targets": [
|
||||
{
|
||||
"exemplar": true,
|
||||
|
@ -871,7 +871,7 @@
|
|||
"showUnfilled": true,
|
||||
"text": {}
|
||||
},
|
||||
"pluginVersion": "7.5.9",
|
||||
"pluginVersion": "7.5.11",
|
||||
"targets": [
|
||||
{
|
||||
"exemplar": true,
|
||||
|
@ -923,7 +923,7 @@
|
|||
"alertThreshold": true
|
||||
},
|
||||
"percentage": false,
|
||||
"pluginVersion": "7.5.9",
|
||||
"pluginVersion": "7.5.11",
|
||||
"pointradius": 2,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
|
@ -1036,7 +1036,7 @@
|
|||
"showUnfilled": true,
|
||||
"text": {}
|
||||
},
|
||||
"pluginVersion": "7.5.9",
|
||||
"pluginVersion": "7.5.11",
|
||||
"targets": [
|
||||
{
|
||||
"exemplar": true,
|
||||
|
@ -1094,7 +1094,7 @@
|
|||
"showUnfilled": true,
|
||||
"text": {}
|
||||
},
|
||||
"pluginVersion": "7.5.9",
|
||||
"pluginVersion": "7.5.11",
|
||||
"targets": [
|
||||
{
|
||||
"exemplar": true,
|
||||
|
@ -1146,7 +1146,7 @@
|
|||
"alertThreshold": true
|
||||
},
|
||||
"percentage": false,
|
||||
"pluginVersion": "7.5.9",
|
||||
"pluginVersion": "7.5.11",
|
||||
"pointradius": 2,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
|
@ -1266,7 +1266,7 @@
|
|||
"showUnfilled": true,
|
||||
"text": {}
|
||||
},
|
||||
"pluginVersion": "7.5.9",
|
||||
"pluginVersion": "7.5.11",
|
||||
"targets": [
|
||||
{
|
||||
"exemplar": true,
|
||||
|
@ -1318,7 +1318,7 @@
|
|||
"alertThreshold": true
|
||||
},
|
||||
"percentage": false,
|
||||
"pluginVersion": "7.5.9",
|
||||
"pluginVersion": "7.5.11",
|
||||
"pointradius": 2,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
|
@ -1414,7 +1414,7 @@
|
|||
"alertThreshold": true
|
||||
},
|
||||
"percentage": false,
|
||||
"pluginVersion": "7.5.9",
|
||||
"pluginVersion": "7.5.11",
|
||||
"pointradius": 2,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
|
@ -1518,7 +1518,7 @@
|
|||
"alertThreshold": true
|
||||
},
|
||||
"percentage": false,
|
||||
"pluginVersion": "7.5.9",
|
||||
"pluginVersion": "7.5.11",
|
||||
"pointradius": 2,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
|
@ -1605,7 +1605,7 @@
|
|||
"y": 48
|
||||
},
|
||||
"hiddenSeries": false,
|
||||
"id": 8,
|
||||
"id": 46,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
|
@ -1622,7 +1622,7 @@
|
|||
"alertThreshold": true
|
||||
},
|
||||
"percentage": false,
|
||||
"pluginVersion": "7.5.9",
|
||||
"pluginVersion": "7.5.11",
|
||||
"pointradius": 2,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
|
@ -1633,17 +1633,25 @@
|
|||
"targets": [
|
||||
{
|
||||
"exemplar": true,
|
||||
"expr": "portal_message_decoding_failures_total",
|
||||
"expr": "rate(portal_gossip_with_lookup_total[$__rate_interval])",
|
||||
"interval": "",
|
||||
"legendFormat": "",
|
||||
"legendFormat": "portal_gossip_with_lookup[{{protocol_id}}]",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"exemplar": true,
|
||||
"expr": "rate(portal_gossip_without_lookup_total[$__rate_interval])",
|
||||
"hide": false,
|
||||
"interval": "",
|
||||
"legendFormat": "portal_gossip_without_lookup[{{protocol_id}}]",
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"thresholds": [],
|
||||
"timeFrom": null,
|
||||
"timeRegions": [],
|
||||
"timeShift": null,
|
||||
"title": "Portal message decoding failures",
|
||||
"title": "Neighborhood gossip node lookups",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
|
@ -1659,7 +1667,7 @@
|
|||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"$$hashKey": "object:595",
|
||||
"$$hashKey": "object:97",
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
|
@ -1668,7 +1676,7 @@
|
|||
"show": true
|
||||
},
|
||||
{
|
||||
"$$hashKey": "object:596",
|
||||
"$$hashKey": "object:98",
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
|
@ -1718,7 +1726,7 @@
|
|||
"alertThreshold": true
|
||||
},
|
||||
"percentage": false,
|
||||
"pluginVersion": "7.5.9",
|
||||
"pluginVersion": "7.5.11",
|
||||
"pointradius": 2,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
|
@ -1786,6 +1794,102 @@
|
|||
"alignLevel": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"aliasColors": {},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": null,
|
||||
"fieldConfig": {
|
||||
"defaults": {},
|
||||
"overrides": []
|
||||
},
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 56
|
||||
},
|
||||
"hiddenSeries": false,
|
||||
"id": 8,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"nullPointMode": "null",
|
||||
"options": {
|
||||
"alertThreshold": true
|
||||
},
|
||||
"percentage": false,
|
||||
"pluginVersion": "7.5.11",
|
||||
"pointradius": 2,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [],
|
||||
"spaceLength": 10,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"exemplar": true,
|
||||
"expr": "portal_message_decoding_failures_total",
|
||||
"interval": "",
|
||||
"legendFormat": "",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": [],
|
||||
"timeFrom": null,
|
||||
"timeRegions": [],
|
||||
"timeShift": null,
|
||||
"title": "Portal message decoding failures",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": []
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"$$hashKey": "object:595",
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"$$hashKey": "object:596",
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
}
|
||||
],
|
||||
"yaxis": {
|
||||
"align": false,
|
||||
"alignLevel": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"aliasColors": {},
|
||||
"bars": false,
|
||||
|
@ -1822,7 +1926,7 @@
|
|||
"alertThreshold": true
|
||||
},
|
||||
"percentage": false,
|
||||
"pluginVersion": "7.5.9",
|
||||
"pluginVersion": "7.5.11",
|
||||
"pointradius": 2,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
|
@ -1926,7 +2030,7 @@
|
|||
"alertThreshold": true
|
||||
},
|
||||
"percentage": false,
|
||||
"pluginVersion": "7.5.9",
|
||||
"pluginVersion": "7.5.11",
|
||||
"pointradius": 2,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
|
@ -1987,7 +2091,7 @@
|
|||
}
|
||||
}
|
||||
],
|
||||
"refresh": false,
|
||||
"refresh": "5s",
|
||||
"schemaVersion": 27,
|
||||
"style": "dark",
|
||||
"tags": [],
|
||||
|
@ -2002,5 +2106,5 @@
|
|||
"timezone": "",
|
||||
"title": "Fluffy Dashboard",
|
||||
"uid": "iWQQPuPnkadsf",
|
||||
"version": 4
|
||||
"version": 7
|
||||
}
|
||||
|
|
|
@ -59,6 +59,13 @@ declareCounter portal_gossip_offers_successful,
|
|||
declareCounter portal_gossip_offers_failed,
|
||||
"Portal wire protocol failed content offers from neighborhood gossip",
|
||||
labels = ["protocol_id"]
|
||||
declareCounter portal_gossip_with_lookup,
|
||||
"Portal wire protocol neighborhood gossip that required a node lookup",
|
||||
labels = ["protocol_id"]
|
||||
declareCounter portal_gossip_without_lookup,
|
||||
"Portal wire protocol neighborhood gossip that did not require a node lookup",
|
||||
labels = ["protocol_id"]
|
||||
|
||||
|
||||
# Note: These metrics are to get some idea on how many enrs are send on average.
|
||||
# Relevant issue: https://github.com/ethereum/portal-network-specs/issues/136
|
||||
|
@ -1014,19 +1021,49 @@ proc neighborhoodGossip*(
|
|||
|
||||
let contentId = contentIdOpt.get()
|
||||
|
||||
# Doing an lookup over the network to get the very closest nodes to the
|
||||
# content, instead of looking only at our own routing table. This should give
|
||||
# a bigger rate of success in case the content is not known yet and avoid
|
||||
# data being stopped in its propagation. However, perhaps this causes issues
|
||||
# in data getting propagated in a wider id range.
|
||||
let closestNodes = await p.lookup(NodeId(contentId))
|
||||
# For selecting the closest nodes to whom to gossip the content a mixed
|
||||
# approach is taken:
|
||||
# 1. Select the closest neighbours in the routing table
|
||||
# 2. Check if the radius is known for these these nodes and whether they are
|
||||
# in range of the content to be offered.
|
||||
# 3. If more than n (= 4) nodes are in range, offer these nodes the content
|
||||
# (max nodes set at 8).
|
||||
# 4. If less than n nodes are in range, do a node lookup, and offer the nodes
|
||||
# returned from the lookup the content (max nodes set at 8)
|
||||
#
|
||||
# This should give a bigger rate of success and avoid the data being stopped
|
||||
# in its propagation than when looking only for nodes in the own routing
|
||||
# table, but at the same time avoid unnecessary node lookups.
|
||||
# It might still cause issues in data getting propagated in a wider id range.
|
||||
|
||||
# Selecting closest 8 nodes to offer data
|
||||
for node in closestNodes[0..<min(closestNodes.len, 8)]:
|
||||
# Note: opportunistically not checking if the radius of the node is known
|
||||
# and thus if the node is in radius with the content.
|
||||
let req = OfferRequest(dst: node, kind: Direct, contentList: contentList)
|
||||
await p.offerQueue.addLast(req)
|
||||
const maxGossipNodes = 8
|
||||
|
||||
let closestLocalNodes = p.routingTable.neighbours(
|
||||
NodeId(contentId), k = 16, seenOnly = true)
|
||||
|
||||
var gossipNodes: seq[Node]
|
||||
for node in closestLocalNodes:
|
||||
let radius = p.radiusCache.get(node.id)
|
||||
if radius.isSome():
|
||||
if p.inRange(node.id, radius.unsafeGet(), contentId):
|
||||
gossipNodes.add(node)
|
||||
|
||||
if gossipNodes.len >= 8: # use local nodes for gossip
|
||||
portal_gossip_without_lookup.inc(labelValues = [$p.protocolId])
|
||||
for node in gossipNodes[0..<min(gossipNodes.len, maxGossipNodes)]:
|
||||
let req = OfferRequest(dst: node, kind: Direct, contentList: contentList)
|
||||
await p.offerQueue.addLast(req)
|
||||
else: # use looked up nodes for gossip
|
||||
portal_gossip_with_lookup.inc(labelValues = [$p.protocolId])
|
||||
let closestNodes = await p.lookup(NodeId(contentId))
|
||||
|
||||
for node in closestNodes[0..<min(closestNodes.len, maxGossipNodes)]:
|
||||
# Note: opportunistically not checking if the radius of the node is known
|
||||
# and thus if the node is in radius with the content. Reason is, these
|
||||
# should really be the closest nodes in the DHT, and thus are most likely
|
||||
# going to be in range of the requested content.
|
||||
let req = OfferRequest(dst: node, kind: Direct, contentList: contentList)
|
||||
await p.offerQueue.addLast(req)
|
||||
|
||||
proc processContent(
|
||||
stream: PortalStream, contentKeys: ContentKeysList, content: seq[byte])
|
||||
|
|
|
@ -151,6 +151,22 @@ proc populateHistoryDb*(
|
|||
proc propagateHistoryDb*(
|
||||
p: PortalProtocol, dataFile: string, verify = false):
|
||||
Future[Result[void, string]] {.async.} =
|
||||
|
||||
const concurrentGossips = 20
|
||||
|
||||
var gossipQueue =
|
||||
newAsyncQueue[(ContentKeysList, seq[byte])](concurrentGossips)
|
||||
var gossipWorkers: seq[Future[void]]
|
||||
|
||||
proc gossipWorker(p: PortalProtocol) {.async.} =
|
||||
while true:
|
||||
let (keys, content) = await gossipQueue.popFirst()
|
||||
|
||||
await p.neighborhoodGossip(keys, content)
|
||||
|
||||
for i in 0 ..< concurrentGossips:
|
||||
gossipWorkers.add(gossipWorker(p))
|
||||
|
||||
let blockData = readBlockDataTable(dataFile)
|
||||
|
||||
if blockData.isOk():
|
||||
|
@ -162,12 +178,9 @@ proc propagateHistoryDb*(
|
|||
if p.inRange(contentId):
|
||||
p.contentDB.put(contentId, value[1])
|
||||
|
||||
await p.neighborhoodGossip(
|
||||
ContentKeysList(@[encode(value[0])]), value[1])
|
||||
await gossipQueue.addLast(
|
||||
(ContentKeysList(@[encode(value[0])]), value[1]))
|
||||
|
||||
# Need to be sure that all offers where started. TODO: this is not great.
|
||||
while not p.offerQueueEmpty():
|
||||
await sleepAsync(500.milliseconds)
|
||||
return ok()
|
||||
else:
|
||||
return err(blockData.error)
|
||||
|
|
|
@ -188,6 +188,12 @@ procSuite "Portal testnet tests":
|
|||
check (await clients[0].portal_history_propagate(dataFile))
|
||||
await clients[0].close()
|
||||
|
||||
# Note: Sleeping to make a test work is never great. Here it is needed
|
||||
# because the data needs to propagate over the nodes. What one could do is
|
||||
# add a json-rpc debug proc that returns whether the offer queue is empty or
|
||||
# not. And then poll every node until all nodes have an empty queue.
|
||||
await sleepAsync(10.seconds)
|
||||
|
||||
let blockData = readBlockDataTable(dataFile)
|
||||
check blockData.isOk()
|
||||
|
||||
|
|
Loading…
Reference in New Issue