Avoid sending more requests to node that timed out + request metrics

This commit is contained in:
kdeme 2020-12-16 00:07:49 +01:00
parent 68c9b7b3ad
commit e1acc1ae2d
No known key found for this signature in database
GPG Key ID: 4E8DD21420AF43F5
1 changed files with 13 additions and 4 deletions

View File

@ -75,7 +75,7 @@
import import
std/[tables, sets, options, math, sequtils, algorithm], std/[tables, sets, options, math, sequtils, algorithm],
stew/shims/net as stewNet, json_serialization/std/net, stew/shims/net as stewNet, json_serialization/std/net,
stew/endians2, chronicles, chronos, stint, bearssl, stew/endians2, chronicles, chronos, stint, bearssl, metrics,
eth/[rlp, keys, async_utils], eth/[rlp, keys, async_utils],
types, encoding, node, routing_table, enr, random2, sessions types, encoding, node, routing_table, enr, random2, sessions
@ -85,13 +85,16 @@ export options
{.push raises: [Defect].} {.push raises: [Defect].}
declarePublicGauge discovery_message_requests,
"Discovery protocol message requests", labels = ["response"]
logScope: logScope:
topics = "discv5" topics = "discv5"
const const
alpha = 3 ## Kademlia concurrency factor alpha = 3 ## Kademlia concurrency factor
lookupRequestLimit = 3 lookupRequestLimit = 3
findNodeResultLimit = 15 # applies in FINDNODE handler findNodeResultLimit = 16 # applies in FINDNODE handler
maxNodesPerMessage = 3 maxNodesPerMessage = 3
lookupInterval = 60.seconds ## Interval of launching a random lookup to lookupInterval = 60.seconds ## Interval of launching a random lookup to
## populate the routing table. go-ethereum seems to do 3 runs every 30 ## populate the routing table. go-ethereum seems to do 3 runs every 30
@ -566,6 +569,7 @@ proc sendMessage*[T: SomeMessage](d: Protocol, toNode: Node, m: T):
d.registerRequest(toNode, message, nonce) d.registerRequest(toNode, message, nonce)
trace "Send message packet", dstId = toNode.id, address, kind = messageKind(T) trace "Send message packet", dstId = toNode.id, address, kind = messageKind(T)
d.send(toNode, data) d.send(toNode, data)
discovery_message_requests.inc()
return reqId return reqId
proc ping*(d: Protocol, toNode: Node): proc ping*(d: Protocol, toNode: Node):
@ -582,6 +586,7 @@ proc ping*(d: Protocol, toNode: Node):
return ok(resp.get().pong) return ok(resp.get().pong)
else: else:
d.replaceNode(toNode) d.replaceNode(toNode)
discovery_message_requests.inc(labelValues = ["timed_out"])
return err("Pong message not received in time") return err("Pong message not received in time")
proc findNode*(d: Protocol, toNode: Node, distances: seq[uint32]): proc findNode*(d: Protocol, toNode: Node, distances: seq[uint32]):
@ -599,6 +604,7 @@ proc findNode*(d: Protocol, toNode: Node, distances: seq[uint32]):
return ok(res) return ok(res)
else: else:
d.replaceNode(toNode) d.replaceNode(toNode)
discovery_message_requests.inc(labelValues = ["timed_out"])
return err(nodes.error) return err(nodes.error)
proc talkreq*(d: Protocol, toNode: Node, protocol, request: seq[byte]): proc talkreq*(d: Protocol, toNode: Node, protocol, request: seq[byte]):
@ -615,6 +621,7 @@ proc talkreq*(d: Protocol, toNode: Node, protocol, request: seq[byte]):
return ok(resp.get().talkresp) return ok(resp.get().talkresp)
else: else:
d.replaceNode(toNode) d.replaceNode(toNode)
discovery_message_requests.inc(labelValues = ["timed_out"])
return err("Talk response message not received in time") return err("Talk response message not received in time")
proc lookupDistances(target, dest: NodeId): seq[uint32] {.raises: [Defect].} = proc lookupDistances(target, dest: NodeId): seq[uint32] {.raises: [Defect].} =
@ -641,6 +648,8 @@ proc lookupWorker(d: Protocol, destNode: Node, target: NodeId):
# TODO: I guess it makes sense to limit here also to `findNodeResultLimit`? # TODO: I guess it makes sense to limit here also to `findNodeResultLimit`?
result.add(r[]) result.add(r[])
inc i inc i
else:
break
for n in result: for n in result:
discard d.addNode(n) discard d.addNode(n)
@ -684,7 +693,7 @@ proc lookup*(d: Protocol, target: NodeId): Future[seq[Node]]
if index != -1: if index != -1:
pendingQueries.del(index) pendingQueries.del(index)
else: else:
error "Resulting query should have beeen in the pending queries" error "Resulting query should have been in the pending queries"
let nodes = query.read let nodes = query.read
# TODO: Remove node on timed-out query? # TODO: Remove node on timed-out query?