From 69366e18809fb2e75ed36620ec6a0f4cd539c328 Mon Sep 17 00:00:00 2001 From: Kim De Mey Date: Sat, 7 May 2022 13:50:16 +0200 Subject: [PATCH] Improve selecting of nodes in neighborhood gossip (#1072) Allow also concurrent neighborhood gossip jobs when seeding data into the network. Update Grafana dashboard for two additional metrics regarding lookups in neighborhood gossip. --- fluffy/grafana/fluffy_grafana_dashboard.json | 166 +++++++++++++++---- fluffy/network/wire/portal_protocol.nim | 61 +++++-- fluffy/populate_db.nim | 23 ++- fluffy/scripts/test_portal_testnet.nim | 6 + 4 files changed, 208 insertions(+), 48 deletions(-) diff --git a/fluffy/grafana/fluffy_grafana_dashboard.json b/fluffy/grafana/fluffy_grafana_dashboard.json index bdb3fa6c9..7317cb6c7 100644 --- a/fluffy/grafana/fluffy_grafana_dashboard.json +++ b/fluffy/grafana/fluffy_grafana_dashboard.json @@ -66,7 +66,7 @@ "text": {}, "textMode": "auto" }, - "pluginVersion": "7.5.9", + "pluginVersion": "7.5.11", "targets": [ { "exemplar": true, @@ -128,7 +128,7 @@ "text": {}, "textMode": "auto" }, - "pluginVersion": "7.5.9", + "pluginVersion": "7.5.11", "targets": [ { "exemplar": true, @@ -188,7 +188,7 @@ "text": {}, "textMode": "auto" }, - "pluginVersion": "7.5.9", + "pluginVersion": "7.5.11", "targets": [ { "exemplar": true, @@ -247,7 +247,7 @@ "text": {}, "textMode": "auto" }, - "pluginVersion": "7.5.9", + "pluginVersion": "7.5.11", "targets": [ { "exemplar": true, @@ -306,7 +306,7 @@ "text": {}, "textMode": "auto" }, - "pluginVersion": "7.5.9", + "pluginVersion": "7.5.11", "targets": [ { "exemplar": true, @@ -355,7 +355,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.9", + "pluginVersion": "7.5.11", "pointradius": 2, "points": false, "renderer": "flot", @@ -459,7 +459,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.9", + "pluginVersion": "7.5.11", "pointradius": 2, "points": false, "renderer": "flot", @@ -603,7 +603,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.9", + "pluginVersion": "7.5.11", "pointradius": 2, "points": false, "renderer": "flot", @@ -707,7 +707,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.9", + "pluginVersion": "7.5.11", "pointradius": 2, "points": false, "renderer": "flot", @@ -811,7 +811,7 @@ "showUnfilled": true, "text": {} }, - "pluginVersion": "7.5.9", + "pluginVersion": "7.5.11", "targets": [ { "exemplar": true, @@ -871,7 +871,7 @@ "showUnfilled": true, "text": {} }, - "pluginVersion": "7.5.9", + "pluginVersion": "7.5.11", "targets": [ { "exemplar": true, @@ -923,7 +923,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.9", + "pluginVersion": "7.5.11", "pointradius": 2, "points": false, "renderer": "flot", @@ -1036,7 +1036,7 @@ "showUnfilled": true, "text": {} }, - "pluginVersion": "7.5.9", + "pluginVersion": "7.5.11", "targets": [ { "exemplar": true, @@ -1094,7 +1094,7 @@ "showUnfilled": true, "text": {} }, - "pluginVersion": "7.5.9", + "pluginVersion": "7.5.11", "targets": [ { "exemplar": true, @@ -1146,7 +1146,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.9", + "pluginVersion": "7.5.11", "pointradius": 2, "points": false, "renderer": "flot", @@ -1266,7 +1266,7 @@ "showUnfilled": true, "text": {} }, - "pluginVersion": "7.5.9", + "pluginVersion": "7.5.11", "targets": [ { "exemplar": true, @@ -1318,7 +1318,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.9", + "pluginVersion": "7.5.11", "pointradius": 2, "points": false, "renderer": "flot", @@ -1414,7 +1414,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.9", + "pluginVersion": "7.5.11", "pointradius": 2, "points": false, "renderer": "flot", @@ -1518,7 +1518,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.9", + "pluginVersion": "7.5.11", "pointradius": 2, "points": false, "renderer": "flot", @@ -1605,7 +1605,7 @@ "y": 48 }, "hiddenSeries": false, - "id": 8, + "id": 46, "legend": { "avg": false, "current": false, @@ -1622,7 +1622,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.9", + "pluginVersion": "7.5.11", "pointradius": 2, "points": false, "renderer": "flot", @@ -1633,17 +1633,25 @@ "targets": [ { "exemplar": true, - "expr": "portal_message_decoding_failures_total", + "expr": "rate(portal_gossip_with_lookup_total[$__rate_interval])", "interval": "", - "legendFormat": "", + "legendFormat": "portal_gossip_with_lookup[{{protocol_id}}]", "refId": "A" + }, + { + "exemplar": true, + "expr": "rate(portal_gossip_without_lookup_total[$__rate_interval])", + "hide": false, + "interval": "", + "legendFormat": "portal_gossip_without_lookup[{{protocol_id}}]", + "refId": "B" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Portal message decoding failures", + "title": "Neighborhood gossip node lookups", "tooltip": { "shared": true, "sort": 0, @@ -1659,7 +1667,7 @@ }, "yaxes": [ { - "$$hashKey": "object:595", + "$$hashKey": "object:97", "format": "short", "label": null, "logBase": 1, @@ -1668,7 +1676,7 @@ "show": true }, { - "$$hashKey": "object:596", + "$$hashKey": "object:98", "format": "short", "label": null, "logBase": 1, @@ -1718,7 +1726,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.9", + "pluginVersion": "7.5.11", "pointradius": 2, "points": false, "renderer": "flot", @@ -1786,6 +1794,102 @@ "alignLevel": null } }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 56 + }, + "hiddenSeries": false, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "portal_message_decoding_failures_total", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Portal message decoding failures", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:595", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:596", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, { "aliasColors": {}, "bars": false, @@ -1822,7 +1926,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.9", + "pluginVersion": "7.5.11", "pointradius": 2, "points": false, "renderer": "flot", @@ -1926,7 +2030,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.9", + "pluginVersion": "7.5.11", "pointradius": 2, "points": false, "renderer": "flot", @@ -1987,7 +2091,7 @@ } } ], - "refresh": false, + "refresh": "5s", "schemaVersion": 27, "style": "dark", "tags": [], @@ -2002,5 +2106,5 @@ "timezone": "", "title": "Fluffy Dashboard", "uid": "iWQQPuPnkadsf", - "version": 4 + "version": 7 } diff --git a/fluffy/network/wire/portal_protocol.nim b/fluffy/network/wire/portal_protocol.nim index ba1e750ed..c45917d5f 100644 --- a/fluffy/network/wire/portal_protocol.nim +++ b/fluffy/network/wire/portal_protocol.nim @@ -59,6 +59,13 @@ declareCounter portal_gossip_offers_successful, declareCounter portal_gossip_offers_failed, "Portal wire protocol failed content offers from neighborhood gossip", labels = ["protocol_id"] +declareCounter portal_gossip_with_lookup, + "Portal wire protocol neighborhood gossip that required a node lookup", + labels = ["protocol_id"] +declareCounter portal_gossip_without_lookup, + "Portal wire protocol neighborhood gossip that did not require a node lookup", + labels = ["protocol_id"] + # Note: These metrics are to get some idea on how many enrs are send on average. # Relevant issue: https://github.com/ethereum/portal-network-specs/issues/136 @@ -1014,19 +1021,49 @@ proc neighborhoodGossip*( let contentId = contentIdOpt.get() - # Doing an lookup over the network to get the very closest nodes to the - # content, instead of looking only at our own routing table. This should give - # a bigger rate of success in case the content is not known yet and avoid - # data being stopped in its propagation. However, perhaps this causes issues - # in data getting propagated in a wider id range. - let closestNodes = await p.lookup(NodeId(contentId)) + # For selecting the closest nodes to whom to gossip the content a mixed + # approach is taken: + # 1. Select the closest neighbours in the routing table + # 2. Check if the radius is known for these these nodes and whether they are + # in range of the content to be offered. + # 3. If more than n (= 4) nodes are in range, offer these nodes the content + # (max nodes set at 8). + # 4. If less than n nodes are in range, do a node lookup, and offer the nodes + # returned from the lookup the content (max nodes set at 8) + # + # This should give a bigger rate of success and avoid the data being stopped + # in its propagation than when looking only for nodes in the own routing + # table, but at the same time avoid unnecessary node lookups. + # It might still cause issues in data getting propagated in a wider id range. - # Selecting closest 8 nodes to offer data - for node in closestNodes[0..= 8: # use local nodes for gossip + portal_gossip_without_lookup.inc(labelValues = [$p.protocolId]) + for node in gossipNodes[0..