Add log message on content query failure in lookup + refactor (#3079)

* Add log message on content query failure in lookup

* Refactor response handling Portal wire
This commit is contained in:
kdeme 2025-02-16 20:08:28 +01:00 committed by GitHub
parent c300b41c07
commit c0e329d768
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -758,14 +758,11 @@ proc ping*(
): Future[PortalResult[(uint64, CapabilitiesPayload)]] {. ): Future[PortalResult[(uint64, CapabilitiesPayload)]] {.
async: (raises: [CancelledError]) async: (raises: [CancelledError])
.} = .} =
let pongResponse = await p.pingImpl(dst) let pong = ?(await p.pingImpl(dst))
if pongResponse.isOk():
# Update last time we pinged this node # Update last time we pinged this node
p.pingTimings[dst.id] = now(chronos.Moment) p.pingTimings[dst.id] = now(chronos.Moment)
let pong = pongResponse.get()
# Note: currently only decoding as capabilities payload as this is the only # Note: currently only decoding as capabilities payload as this is the only
# one that we support sending. # one that we support sending.
if pong.payload_type != CapabilitiesType: if pong.payload_type != CapabilitiesType:
@ -777,22 +774,15 @@ proc ping*(
p.radiusCache.put(dst.id, payload.data_radius) p.radiusCache.put(dst.id, payload.data_radius)
ok((pong.enrSeq, payload)) ok((pong.enrSeq, payload))
else:
err(pongResponse.error)
proc findNodes*( proc findNodes*(
p: PortalProtocol, dst: Node, distances: seq[uint16] p: PortalProtocol, dst: Node, distances: seq[uint16]
): Future[PortalResult[seq[Node]]] {.async: (raises: [CancelledError]).} = ): Future[PortalResult[seq[Node]]] {.async: (raises: [CancelledError]).} =
let nodesMessage = await p.findNodesImpl(dst, List[uint16, 256](distances)) let response = ?(await p.findNodesImpl(dst, List[uint16, 256](distances)))
if nodesMessage.isOk():
let records = recordsFromBytes(nodesMessage.get().enrs) let records = ?recordsFromBytes(response.enrs)
if records.isOk():
# TODO: distance function is wrong here for state, fix + tests # TODO: distance function is wrong here for state, fix + tests
return ok(verifyNodesRecords(records.get(), dst, enrsResultLimit, distances)) ok(verifyNodesRecords(records, dst, enrsResultLimit, distances))
else:
return err(records.error)
else:
return err(nodesMessage.error)
proc findContent*( proc findContent*(
p: PortalProtocol, dst: Node, contentKey: ContentKeyByteList p: PortalProtocol, dst: Node, contentKey: ContentKeyByteList
@ -801,11 +791,9 @@ proc findContent*(
node = dst node = dst
contentKey contentKey
let contentMessageResponse = await p.findContentImpl(dst, contentKey) let response = ?(await p.findContentImpl(dst, contentKey))
if contentMessageResponse.isOk(): case response.contentMessageType
let m = contentMessageResponse.get()
case m.contentMessageType
of connectionIdType: of connectionIdType:
let nodeAddress = NodeAddress.init(dst).valueOr: let nodeAddress = NodeAddress.init(dst).valueOr:
# This should not happen as it comes a after succesfull talkreq/talkresp # This should not happen as it comes a after succesfull talkreq/talkresp
@ -816,7 +804,7 @@ proc findContent*(
await p.stream.connectTo( await p.stream.connectTo(
# uTP protocol uses BE for all values in the header, incl. connection id # uTP protocol uses BE for all values in the header, incl. connection id
nodeAddress, nodeAddress,
uint16.fromBytesBE(m.connectionId), uint16.fromBytesBE(response.connectionId),
) )
) )
@ -838,11 +826,10 @@ proc findContent*(
let content = await readFut let content = await readFut
# socket received remote FIN and drained whole buffer, it can be # socket received remote FIN and drained whole buffer, it can be
# safely destroyed without notifing remote # safely destroyed without notifing remote
debug "Socket read fully", socketKey = socket.socketKey trace "Socket read fully", socketKey = socket.socketKey
socket.destroy() socket.destroy()
return ok( return
FoundContent(src: dst, kind: Content, content: content, utpTransfer: true) ok(FoundContent(src: dst, kind: Content, content: content, utpTransfer: true))
)
else: else:
debug "Socket read time-out", socketKey = socket.socketKey debug "Socket read time-out", socketKey = socket.socketKey
# Note: This might look a bit strange, but not doing a socket.close() # Note: This might look a bit strange, but not doing a socket.close()
@ -859,24 +846,16 @@ proc findContent*(
socket.close() socket.close()
raise exc raise exc
of contentType: of contentType:
return ok( ok(
FoundContent( FoundContent(
src: dst, kind: Content, content: m.content.asSeq(), utpTransfer: false src: dst, kind: Content, content: response.content.asSeq(), utpTransfer: false
) )
) )
of enrsType: of enrsType:
let records = recordsFromBytes(m.enrs) let records = ?recordsFromBytes(response.enrs)
if records.isOk(): let verifiedNodes = verifyNodesRecords(records, dst, enrsResultLimit)
let verifiedNodes = verifyNodesRecords(records.get(), dst, enrsResultLimit)
return ok(FoundContent(src: dst, kind: Nodes, nodes: verifiedNodes)) ok(FoundContent(src: dst, kind: Nodes, nodes: verifiedNodes))
else:
return err("Content message returned invalid ENRs")
else:
debug "FindContent failed due to find content request failure ",
error = contentMessageResponse.error
return err("No content response")
proc getContentKeys(o: OfferRequest): ContentKeysList = proc getContentKeys(o: OfferRequest): ContentKeysList =
case o.kind case o.kind
@ -884,9 +863,10 @@ proc getContentKeys(o: OfferRequest): ContentKeysList =
var contentKeys: ContentKeysList var contentKeys: ContentKeysList
for info in o.contentList: for info in o.contentList:
discard contentKeys.add(info.contentKey) discard contentKeys.add(info.contentKey)
return contentKeys
contentKeys
of Database: of Database:
return o.contentKeys o.contentKeys
func getMaxOfferedContentKeys*(protocolIdLen: uint32, maxKeySize: uint32): int = func getMaxOfferedContentKeys*(protocolIdLen: uint32, maxKeySize: uint32): int =
## Calculates how many ContentKeys will fit in one offer message which ## Calculates how many ContentKeys will fit in one offer message which
@ -929,16 +909,13 @@ proc offer(
node = o.dst node = o.dst
contentKeys contentKeys
debug "Offering content" trace "Offering content"
portal_content_keys_offered.observe( portal_content_keys_offered.observe(
contentKeys.len().int64, labelValues = [$p.protocolId] contentKeys.len().int64, labelValues = [$p.protocolId]
) )
let acceptMessageResponse = await p.offerImpl(o.dst, contentKeys) let response = ?(await p.offerImpl(o.dst, contentKeys))
if acceptMessageResponse.isOk():
let m = acceptMessageResponse.get()
let contentKeysLen = let contentKeysLen =
case o.kind case o.kind
@ -947,35 +924,35 @@ proc offer(
of Database: of Database:
o.contentKeys.len() o.contentKeys.len()
if m.contentKeys.len() != contentKeysLen: if response.contentKeys.len() != contentKeysLen:
# TODO: # TODO:
# When there is such system, the peer should get scored negatively here. # When there is such system, the peer should get scored negatively here.
error "Accepted content key bitlist has invalid size", error "Accepted content key bitlist has invalid size",
bitListLen = m.contentKeys.len(), contentKeysLen bitListLen = response.contentKeys.len(), contentKeysLen
return err("Accepted content key bitlist has invalid size") return err("Accepted content key bitlist has invalid size")
let acceptedKeysAmount = m.contentKeys.countOnes() let acceptedKeysAmount = response.contentKeys.countOnes()
portal_content_keys_accepted.observe( portal_content_keys_accepted.observe(
acceptedKeysAmount.int64, labelValues = [$p.protocolId] acceptedKeysAmount.int64, labelValues = [$p.protocolId]
) )
if acceptedKeysAmount == 0: if acceptedKeysAmount == 0:
debug "No content accepted" debug "No content accepted"
# Don't open an uTP stream if no content was requested # Don't open an uTP stream if no content was requested
return ok(m.contentKeys) return ok(response.contentKeys)
let nodeAddress = NodeAddress.init(o.dst).valueOr: let nodeAddress = NodeAddress.init(o.dst).valueOr:
# This should not happen as it comes a after succesfull talkreq/talkresp # This should not happen as it comes a after succesfull talkreq/talkresp
return err("Trying to connect to node with unknown address: " & $o.dst.id) return err("Trying to connect to node with unknown address: " & $o.dst.id)
let socket = let socket =
?(await p.stream.connectTo(nodeAddress, uint16.fromBytesBE(m.connectionId))) ?(await p.stream.connectTo(nodeAddress, uint16.fromBytesBE(response.connectionId)))
template lenu32(x: untyped): untyped = template lenu32(x: untyped): untyped =
uint32(len(x)) uint32(len(x))
case o.kind case o.kind
of Direct: of Direct:
for i, b in m.contentKeys: for i, b in response.contentKeys:
if b: if b:
let content = o.contentList[i].content let content = o.contentList[i].content
var output = memoryOutput() var output = memoryOutput()
@ -994,7 +971,7 @@ proc offer(
trace "Offered content item send", dataWritten = dataWritten trace "Offered content item send", dataWritten = dataWritten
of Database: of Database:
for i, b in m.contentKeys: for i, b in response.contentKeys:
if b: if b:
let let
contentKey = o.contentKeys[i] contentKey = o.contentKeys[i]
@ -1028,19 +1005,15 @@ proc offer(
trace "Offered content item send", dataWritten = dataWritten trace "Offered content item send", dataWritten = dataWritten
await socket.closeWait() await socket.closeWait()
debug "Content successfully offered" trace "Content successfully offered"
return ok(m.contentKeys) return ok(response.contentKeys)
else:
debug "Offer failed due to accept request failure ",
error = acceptMessageResponse.error
return err("No or invalid accept response: " & acceptMessageResponse.error)
proc offer*( proc offer*(
p: PortalProtocol, dst: Node, contentKeys: ContentKeysList p: PortalProtocol, dst: Node, contentKeys: ContentKeysList
): Future[PortalResult[ContentKeysBitList]] {.async: (raises: [CancelledError]).} = ): Future[PortalResult[ContentKeysBitList]] {.async: (raises: [CancelledError]).} =
let req = OfferRequest(dst: dst, kind: Database, contentKeys: contentKeys) let req = OfferRequest(dst: dst, kind: Database, contentKeys: contentKeys)
return await p.offer(req) await p.offer(req)
proc offer*( proc offer*(
p: PortalProtocol, dst: Node, content: seq[ContentKV] p: PortalProtocol, dst: Node, content: seq[ContentKV]
@ -1052,7 +1025,7 @@ proc offer*(
let contentList = List[ContentKV, contentKeysLimit].init(content) let contentList = List[ContentKV, contentKeysLimit].init(content)
let req = OfferRequest(dst: dst, kind: Direct, contentList: contentList) let req = OfferRequest(dst: dst, kind: Direct, contentList: contentList)
return await p.offer(req) await p.offer(req)
proc offerWorker(p: PortalProtocol) {.async: (raises: [CancelledError]).} = proc offerWorker(p: PortalProtocol) {.async: (raises: [CancelledError]).} =
while true: while true:
@ -1297,6 +1270,7 @@ proc contentLookup*(
) )
) )
else: else:
debug "Content query failed", error = contentResult.error
# Note: Not doing any retries here as retries can/should be done on a # Note: Not doing any retries here as retries can/should be done on a
# higher layer. However, depending on the failure we could attempt a retry, # higher layer. However, depending on the failure we could attempt a retry,
# e.g. on uTP specific errors. # e.g. on uTP specific errors.