mirror of
https://github.com/status-im/nimbus-eth1.git
synced 2025-01-26 20:19:31 +00:00
Fix flaky portal tests - part II (#2829)
* Remove two simple sleeps from history network test Replace simple sleeps in test_history_network with retries + sleeps. Current simple sleep setting would occasionally still fail on CI. * Increase retries on test_portal_testnet from 2 to 3. * Add 1 second sleep after headers with proof get gossiped
This commit is contained in:
parent
6374bfb39c
commit
cff7091826
@ -14,7 +14,7 @@ import
|
||||
chronos,
|
||||
stew/byteutils,
|
||||
eth/p2p/discoveryv5/random2,
|
||||
eth/keys,
|
||||
eth/common/keys,
|
||||
../common/common_types,
|
||||
../rpc/portal_rpc_client,
|
||||
../rpc/eth_rpc_client,
|
||||
@ -79,7 +79,8 @@ proc withRetries[A](
|
||||
if tries > numRetries:
|
||||
# if we reached max number of retries fail
|
||||
let msg =
|
||||
"Call failed with msg: " & exc.msg & ", for node with idx: " & $nodeIdx
|
||||
"Call failed with msg: " & exc.msg & ", for node with idx: " & $nodeIdx &
|
||||
", after " & $tries & " tries."
|
||||
raise newException(ValueError, msg)
|
||||
|
||||
inc tries
|
||||
@ -94,7 +95,7 @@ proc retryUntil[A](
|
||||
f: FutureCallback[A], c: CheckCallback[A], checkFailMessage: string, nodeIdx: int
|
||||
): Future[A] =
|
||||
# some reasonable limits, which will cause waits as: 1, 2, 4, 8, 16, 32 seconds
|
||||
return withRetries(f, c, 2, seconds(1), checkFailMessage, nodeIdx)
|
||||
return withRetries(f, c, 3, seconds(1), checkFailMessage, nodeIdx)
|
||||
|
||||
# Note:
|
||||
# When doing json-rpc requests following `RpcPostError` can occur:
|
||||
@ -261,9 +262,20 @@ procSuite "Portal testnet tests":
|
||||
|
||||
# Gossiping all block headers with proof first, as bodies and receipts
|
||||
# require them for validation.
|
||||
for (content, contentKey) in blockHeadersWithProof:
|
||||
discard
|
||||
(await clients[0].portal_historyGossip(content.toHex(), contentKey.toHex()))
|
||||
for (contentKey, contentValue) in blockHeadersWithProof:
|
||||
discard (
|
||||
await clients[0].portal_historyGossip(contentKey.toHex(), contentValue.toHex())
|
||||
)
|
||||
|
||||
# TODO: Fix iteration order: Because the blockData gets parsed into a
|
||||
# BlockDataTable, iterating over this result in gossiping the block bodies
|
||||
# and receipts of block in a different order than the headers.
|
||||
# Because of this, block bodies and receipts for block
|
||||
# 0x6251d65b8a8668efabe2f89c96a5b6332d83b3bbe585089ea6b2ab9b6754f5e9
|
||||
# come right after the headers with proof. This is likely to cause validation
|
||||
# failures on the nodes, as the block bodies and receipts require the header
|
||||
# to get validated.
|
||||
await sleepAsync(seconds(1))
|
||||
|
||||
# Gossiping all block bodies and receipts.
|
||||
for b in blocks(blockData, false):
|
||||
|
@ -54,7 +54,19 @@ proc stop(hn: HistoryNode) {.async.} =
|
||||
await hn.discoveryProtocol.closeWait()
|
||||
|
||||
proc containsId(hn: HistoryNode, contentId: ContentId): bool =
|
||||
return hn.historyNetwork.contentDB.contains(contentId)
|
||||
hn.historyNetwork.contentDB.contains(contentId)
|
||||
|
||||
proc checkContainsIdWithRetry(
|
||||
historyNode: HistoryNode, id: ContentId
|
||||
) {.async: (raises: [CancelledError]).} =
|
||||
var res = false
|
||||
for i in 0 .. 50:
|
||||
res = historyNode.containsId(id)
|
||||
if res:
|
||||
break
|
||||
await sleepAsync(10.milliseconds)
|
||||
|
||||
check res
|
||||
|
||||
proc createEmptyHeaders(fromNum: int, toNum: int): seq[Header] =
|
||||
var headers: seq[Header]
|
||||
@ -216,17 +228,10 @@ procSuite "History Content Network":
|
||||
while not historyNode2.historyNetwork.contentQueue.empty():
|
||||
await sleepAsync(1.milliseconds)
|
||||
|
||||
# Note: It seems something changed in chronos, causing different behavior.
|
||||
# Seems that validateContent called through processContentLoop used to
|
||||
# run immediatly in case of a "non async shortpath". This is no longer the
|
||||
# case and causes the content not yet to be validated and thus stored at
|
||||
# this step. Add an await here so that the store can happen.
|
||||
await sleepAsync(100.milliseconds)
|
||||
|
||||
for i, contentKV in contentKVs:
|
||||
let id = toContentId(contentKV.contentKey)
|
||||
if i < len(contentKVs) - 1:
|
||||
check historyNode2.containsId(id) == true
|
||||
await historyNode2.checkContainsIdWithRetry(id)
|
||||
else:
|
||||
check historyNode2.containsId(id) == false
|
||||
|
||||
@ -283,11 +288,9 @@ procSuite "History Content Network":
|
||||
while not historyNode2.historyNetwork.contentQueue.empty():
|
||||
await sleepAsync(1.milliseconds)
|
||||
|
||||
await sleepAsync(100.milliseconds)
|
||||
|
||||
for contentKV in contentKVs:
|
||||
let id = toContentId(contentKV.contentKey)
|
||||
check historyNode2.containsId(id) == true
|
||||
await historyNode2.checkContainsIdWithRetry(id)
|
||||
|
||||
await historyNode1.stop()
|
||||
await historyNode2.stop()
|
||||
|
Loading…
x
Reference in New Issue
Block a user