Add concurrency to the content offers of neighborhoodGossip proc (#1027)
* Add concurrency to the content offers of neighborhoodGossip proc And remove some whitespace * Remove more whitespace and adjust for 80 char line limit * Update fluffy grafana dashboard to include gossip offer results
This commit is contained in:
parent
84ff179cd9
commit
9d656e99c4
|
@ -16,7 +16,7 @@
|
|||
"editable": true,
|
||||
"gnetId": null,
|
||||
"graphTooltip": 0,
|
||||
"id": 12,
|
||||
"id": 13,
|
||||
"links": [],
|
||||
"panels": [
|
||||
{
|
||||
|
@ -1397,7 +1397,7 @@
|
|||
"y": 40
|
||||
},
|
||||
"hiddenSeries": false,
|
||||
"id": 8,
|
||||
"id": 44,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
|
@ -1425,17 +1425,25 @@
|
|||
"targets": [
|
||||
{
|
||||
"exemplar": true,
|
||||
"expr": "portal_message_decoding_failures_total",
|
||||
"expr": "rate(portal_gossip_offers_successful_total[$__rate_interval])",
|
||||
"interval": "",
|
||||
"legendFormat": "",
|
||||
"legendFormat": "portal_gossip_offers_successful[{{protocol_id}}]",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"exemplar": true,
|
||||
"expr": "rate(portal_gossip_offers_failed_total[$__rate_interval])",
|
||||
"hide": false,
|
||||
"interval": "",
|
||||
"legendFormat": "portal_gossip_offers_failed[{{protocol_id}}]",
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"thresholds": [],
|
||||
"timeFrom": null,
|
||||
"timeRegions": [],
|
||||
"timeShift": null,
|
||||
"title": "Portal message decoding failures",
|
||||
"title": "Neighborhood gossip content offers",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
|
@ -1451,7 +1459,7 @@
|
|||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"$$hashKey": "object:595",
|
||||
"$$hashKey": "object:4139",
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
|
@ -1460,7 +1468,7 @@
|
|||
"show": true
|
||||
},
|
||||
{
|
||||
"$$hashKey": "object:596",
|
||||
"$$hashKey": "object:4140",
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
|
@ -1578,6 +1586,102 @@
|
|||
"alignLevel": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"aliasColors": {},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": null,
|
||||
"fieldConfig": {
|
||||
"defaults": {},
|
||||
"overrides": []
|
||||
},
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 48
|
||||
},
|
||||
"hiddenSeries": false,
|
||||
"id": 8,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"nullPointMode": "null",
|
||||
"options": {
|
||||
"alertThreshold": true
|
||||
},
|
||||
"percentage": false,
|
||||
"pluginVersion": "7.5.9",
|
||||
"pointradius": 2,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [],
|
||||
"spaceLength": 10,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"exemplar": true,
|
||||
"expr": "portal_message_decoding_failures_total",
|
||||
"interval": "",
|
||||
"legendFormat": "",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": [],
|
||||
"timeFrom": null,
|
||||
"timeRegions": [],
|
||||
"timeShift": null,
|
||||
"title": "Portal message decoding failures",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": []
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"$$hashKey": "object:595",
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"$$hashKey": "object:596",
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
}
|
||||
],
|
||||
"yaxis": {
|
||||
"align": false,
|
||||
"alignLevel": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"aliasColors": {},
|
||||
"bars": false,
|
||||
|
@ -1640,7 +1744,9 @@
|
|||
}
|
||||
],
|
||||
"thresholds": [],
|
||||
"timeFrom": null,
|
||||
"timeRegions": [],
|
||||
"timeShift": null,
|
||||
"title": "uTP outgoing connections",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
|
@ -1742,7 +1848,9 @@
|
|||
}
|
||||
],
|
||||
"thresholds": [],
|
||||
"timeFrom": null,
|
||||
"timeRegions": [],
|
||||
"timeShift": null,
|
||||
"title": "uTP Packets",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
|
@ -1781,9 +1889,105 @@
|
|||
"align": false,
|
||||
"alignLevel": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"aliasColors": {},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": null,
|
||||
"fieldConfig": {
|
||||
"defaults": {},
|
||||
"overrides": []
|
||||
},
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 69
|
||||
},
|
||||
"hiddenSeries": false,
|
||||
"id": 42,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"nullPointMode": "null",
|
||||
"options": {
|
||||
"alertThreshold": true
|
||||
},
|
||||
"percentage": false,
|
||||
"pluginVersion": "7.5.9",
|
||||
"pointradius": 2,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [],
|
||||
"spaceLength": 10,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"exemplar": true,
|
||||
"expr": "utp_established_connections",
|
||||
"interval": "",
|
||||
"legendFormat": "",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"refresh": "5s",
|
||||
"thresholds": [],
|
||||
"timeFrom": null,
|
||||
"timeRegions": [],
|
||||
"timeShift": null,
|
||||
"title": "uTP established connections",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": []
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"$$hashKey": "object:3811",
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"$$hashKey": "object:3812",
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
}
|
||||
],
|
||||
"yaxis": {
|
||||
"align": false,
|
||||
"alignLevel": null
|
||||
}
|
||||
}
|
||||
],
|
||||
"refresh": false,
|
||||
"schemaVersion": 27,
|
||||
"style": "dark",
|
||||
"tags": [],
|
||||
|
@ -1797,6 +2001,6 @@
|
|||
"timepicker": {},
|
||||
"timezone": "",
|
||||
"title": "Fluffy Dashboard",
|
||||
"uid": "iWQQPuPnk",
|
||||
"version": 9
|
||||
"uid": "iWQQPuPnkadsf",
|
||||
"version": 4
|
||||
}
|
||||
|
|
|
@ -38,7 +38,9 @@ func encodeKey(k: ContentKey): (ByteList, ContentId) =
|
|||
let keyEncoded = encode(k)
|
||||
return (keyEncoded, toContentId(keyEncoded))
|
||||
|
||||
func getEncodedKeyForContent(cType: ContentType, chainId: uint16, hash: BlockHash): (ByteList, ContentId) =
|
||||
func getEncodedKeyForContent(
|
||||
cType: ContentType, chainId: uint16, hash: BlockHash):
|
||||
(ByteList, ContentId) =
|
||||
let contentKeyType = ContentKeyType(chainId: chainId, blockHash: hash)
|
||||
|
||||
let contentKey =
|
||||
|
@ -52,15 +54,16 @@ func getEncodedKeyForContent(cType: ContentType, chainId: uint16, hash: BlockHas
|
|||
|
||||
return encodeKey(contentKey)
|
||||
|
||||
proc validateHeaderBytes*(bytes: seq[byte], hash: BlockHash): Option[BlockHeader] =
|
||||
proc validateHeaderBytes*(
|
||||
bytes: seq[byte], hash: BlockHash): Option[BlockHeader] =
|
||||
try:
|
||||
var rlp = rlpFromBytes(bytes)
|
||||
|
||||
let blockHeader = rlp.read(BlockHeader)
|
||||
|
||||
if not (blockHeader.blockHash() == hash):
|
||||
# TODO: Header with different hash than expected maybe we should punish peer which sent
|
||||
# us this ?
|
||||
# TODO: Header with different hash than expecte, maybe we should punish
|
||||
# peer which sent us this ?
|
||||
return none(BlockHeader)
|
||||
|
||||
return some(blockHeader)
|
||||
|
@ -69,7 +72,9 @@ proc validateHeaderBytes*(bytes: seq[byte], hash: BlockHash): Option[BlockHeader
|
|||
# TODO add some logging about failed decoding
|
||||
return none(BlockHeader)
|
||||
|
||||
proc validateBodyBytes*(bytes: seq[byte], txRoot: KeccakHash, ommersHash: KeccakHash): Option[BlockBody] =
|
||||
proc validateBodyBytes*(
|
||||
bytes: seq[byte], txRoot: KeccakHash, ommersHash: KeccakHash):
|
||||
Option[BlockBody] =
|
||||
try:
|
||||
var rlp = rlpFromBytes(bytes)
|
||||
|
||||
|
@ -80,7 +85,8 @@ proc validateBodyBytes*(bytes: seq[byte], txRoot: KeccakHash, ommersHash: Keccak
|
|||
|
||||
if txRoot != calculatedTxRoot or ommersHash != calculatedOmmersHash:
|
||||
# we got block body (bundle of transactions and uncles) which do not match
|
||||
# header. For now just ignore it, but maybe we should penalize peer sending us such data?
|
||||
# header. For now just ignore it, but maybe we should penalize peer
|
||||
# sending us such data?
|
||||
return none(BlockBody)
|
||||
|
||||
return some(blockBody)
|
||||
|
@ -89,7 +95,8 @@ proc validateBodyBytes*(bytes: seq[byte], txRoot: KeccakHash, ommersHash: Keccak
|
|||
# TODO add some logging about failed decoding
|
||||
return none(BlockBody)
|
||||
|
||||
proc getContentFromDb(h: HistoryNetwork, T: type, contentId: ContentId): Option[T] =
|
||||
proc getContentFromDb(
|
||||
h: HistoryNetwork, T: type, contentId: ContentId): Option[T] =
|
||||
if h.portalProtocol.inRange(contentId):
|
||||
let contentFromDB = h.contentDB.get(contentId)
|
||||
if contentFromDB.isSome():
|
||||
|
@ -98,14 +105,17 @@ proc getContentFromDb(h: HistoryNetwork, T: type, contentId: ContentId): Option[
|
|||
let content = rlp.read(T)
|
||||
return some(content)
|
||||
except CatchableError as e:
|
||||
# Content in db should always have valid formatting, so this should not happen
|
||||
# Content in db should always have valid formatting, so this should not
|
||||
# happen
|
||||
raiseAssert(e.msg)
|
||||
else:
|
||||
return none(T)
|
||||
else:
|
||||
return none(T)
|
||||
|
||||
proc getBlockHeader*(h: HistoryNetwork, chainId: uint16, hash: BlockHash): Future[Option[BlockHeader]] {.async.} =
|
||||
proc getBlockHeader*(
|
||||
h: HistoryNetwork, chainId: uint16, hash: BlockHash):
|
||||
Future[Option[BlockHeader]] {.async.} =
|
||||
let (keyEncoded, contentId) = getEncodedKeyForContent(blockHeader, chainId, hash)
|
||||
|
||||
let maybeHeaderFromDb = h.getContentFromDb(BlockHeader, contentId)
|
||||
|
@ -128,7 +138,9 @@ proc getBlockHeader*(h: HistoryNetwork, chainId: uint16, hash: BlockHash): Futur
|
|||
|
||||
return maybeHeader
|
||||
|
||||
proc getBlock*(h: HistoryNetwork, chainId: uint16, hash: BlockHash): Future[Option[Block]] {.async.} =
|
||||
proc getBlock*(
|
||||
h: HistoryNetwork, chainId: uint16, hash: BlockHash):
|
||||
Future[Option[Block]] {.async.} =
|
||||
let maybeHeader = await h.getBlockHeader(chainId, hash)
|
||||
|
||||
if maybeHeader.isNone():
|
||||
|
|
|
@ -53,6 +53,12 @@ declareHistogram portal_content_keys_offered,
|
|||
declareHistogram portal_content_keys_accepted,
|
||||
"Portal wire protocol amount of content keys per accept message received",
|
||||
labels = ["protocol_id"], buckets = contentKeysBuckets
|
||||
declareCounter portal_gossip_offers_successful,
|
||||
"Portal wire protocol successful content offers from neighborhood gossip",
|
||||
labels = ["protocol_id"]
|
||||
declareCounter portal_gossip_offers_failed,
|
||||
"Portal wire protocol failed content offers from neighborhood gossip",
|
||||
labels = ["protocol_id"]
|
||||
|
||||
# Note: These metrics are to get some idea on how many enrs are send on average.
|
||||
# Relevant issue: https://github.com/ethereum/portal-network-specs/issues/136
|
||||
|
@ -93,6 +99,20 @@ const
|
|||
16 # HMAC
|
||||
discv5MaxSize = 1280
|
||||
|
||||
# These are the concurrent offers per Portal wire protocol that is running.
|
||||
# Using the `offerQueue` allows for limiting the amount of offers send and
|
||||
# thus how many streams can be started.
|
||||
# TODO:
|
||||
# More thought needs to go into this as it is currently on a per network
|
||||
# basis. Keep it simple like that? Or limit it better at the stream transport
|
||||
# level? In the latter case, this might still need to be checked/blocked at
|
||||
# the very start of sending the offer, because blocking/waiting too long
|
||||
# between the received accept message and actually starting the stream and
|
||||
# sending data could give issues due to timeouts on the other side.
|
||||
# And then there are still limits to be applied also for FindContent and the
|
||||
# incoming directions.
|
||||
concurrentOffers = 50
|
||||
|
||||
type
|
||||
ToContentIdHandler* =
|
||||
proc(contentKey: ByteList): Option[ContentId] {.raises: [Defect], gcsafe.}
|
||||
|
@ -114,6 +134,8 @@ type
|
|||
revalidateLoop: Future[void]
|
||||
stream*: PortalStream
|
||||
radiusCache: RadiusCache
|
||||
offerQueue: AsyncQueue[(Node, ContentKeysList)]
|
||||
offerWorkers: seq[Future[void]]
|
||||
|
||||
PortalResult*[T] = Result[T, cstring]
|
||||
|
||||
|
@ -362,7 +384,8 @@ proc new*(T: type PortalProtocol,
|
|||
toContentId: toContentId,
|
||||
dataRadius: dataRadius,
|
||||
bootstrapRecords: @bootstrapRecords,
|
||||
radiusCache: RadiusCache.init(256))
|
||||
radiusCache: RadiusCache.init(256),
|
||||
offerQueue: newAsyncQueue[(Node, ContentKeysList)](concurrentOffers))
|
||||
|
||||
proto.baseProtocol.registerTalkProtocol(@(proto.protocolId), proto).expect(
|
||||
"Only one protocol should have this id")
|
||||
|
@ -606,6 +629,19 @@ proc offer*(p: PortalProtocol, dst: Node, contentKeys: ContentKeysList):
|
|||
else:
|
||||
return err("No accept response")
|
||||
|
||||
proc offerWorker(p: PortalProtocol) {.async.} =
|
||||
while true:
|
||||
let (node, contentKeys) = await p.offerQueue.popFirst()
|
||||
|
||||
let res = await p.offer(node, contentKeys)
|
||||
if res.isOk():
|
||||
portal_gossip_offers_successful.inc(labelValues = [$p.protocolId])
|
||||
else:
|
||||
portal_gossip_offers_failed.inc(labelValues = [$p.protocolId])
|
||||
|
||||
proc offerQueueEmpty*(p: PortalProtocol): bool =
|
||||
p.offerQueue.empty()
|
||||
|
||||
proc neighborhoodGossip*(p: PortalProtocol, contentKeys: ContentKeysList) {.async.} =
|
||||
let contentKey = contentKeys[0] # for now only 1 item is considered
|
||||
let contentIdOpt = p.toContentId(contentKey)
|
||||
|
@ -622,8 +658,7 @@ proc neighborhoodGossip*(p: PortalProtocol, contentKeys: ContentKeysList) {.asyn
|
|||
NodeId(contentId), k = 6, seenOnly = false)
|
||||
|
||||
for node in closestNodes:
|
||||
# Not doing anything if this fails
|
||||
discard await p.offer(node, contentKeys)
|
||||
await p.offerQueue.addLast((node, contentKeys))
|
||||
|
||||
proc processContent(
|
||||
stream: PortalStream, contentKeys: ContentKeysList, content: seq[byte])
|
||||
|
@ -947,12 +982,19 @@ proc start*(p: PortalProtocol) =
|
|||
p.refreshLoop = refreshLoop(p)
|
||||
p.revalidateLoop = revalidateLoop(p)
|
||||
|
||||
for i in 0 ..< concurrentOffers:
|
||||
p.offerWorkers.add(offerWorker(p))
|
||||
|
||||
proc stop*(p: PortalProtocol) =
|
||||
if not p.revalidateLoop.isNil:
|
||||
p.revalidateLoop.cancel()
|
||||
if not p.refreshLoop.isNil:
|
||||
p.refreshLoop.cancel()
|
||||
|
||||
for worker in p.offerWorkers:
|
||||
worker.cancel()
|
||||
p.offerWorkers = @[]
|
||||
|
||||
proc resolve*(p: PortalProtocol, id: NodeId): Future[Option[Node]] {.async.} =
|
||||
## Resolve a `Node` based on provided `NodeId`.
|
||||
##
|
||||
|
|
|
@ -154,6 +154,11 @@ proc propagateHistoryDb*(
|
|||
# TODO: This call will get the content we just stored in the db, so it
|
||||
# might be an improvement to directly pass it.
|
||||
await p.neighborhoodGossip(ContentKeysList(@[encode(value[0])]))
|
||||
|
||||
# Need to be sure that all offers where started. TODO: this is not great.
|
||||
while not p.offerQueueEmpty():
|
||||
error "WAITING FOR OFFER QUEUE EMPTY"
|
||||
await sleepAsync(500.milliseconds)
|
||||
return ok()
|
||||
else:
|
||||
return err(blockData.error)
|
||||
|
|
Loading…
Reference in New Issue