diff --git a/logos_delivery/waku/node/waku_node.nim b/logos_delivery/waku/node/waku_node.nim index 665f0ba98..febc66731 100644 --- a/logos_delivery/waku/node/waku_node.nim +++ b/logos_delivery/waku/node/waku_node.nim @@ -136,6 +136,7 @@ type wakuMix*: WakuMix wakuKademlia*: WakuKademlia ports*: BoundPorts + relayReconnectFut*: Future[void] SubscriptionManager* = ref object of RootObj node*: WakuNode @@ -620,8 +621,9 @@ proc start*(node: WakuNode) {.async.} = ## NOTE: This will dispatch gossipsub start to the WakuRelay.start method override await node.switch.start() - # After switch.start, run custom Logos Delivery relay start logic - await node.reconnectRelayPeers() + # Reconnect to known relay peers in the background; it waits a prune backoff + # and must not block startup. + node.relayReconnectFut = node.reconnectRelayPeers() node.started = true @@ -650,6 +652,10 @@ proc start*(node: WakuNode) {.async.} = proc stop*(node: WakuNode) {.async.} = ## By stopping the switch we are stopping all the underlying mounted protocols + # Cancel the background relay reconnection (may still be in its backoff wait). + if not node.relayReconnectFut.isNil(): + await node.relayReconnectFut.cancelAndWait() + await node.subscriptionManager.stop() node.stopProvidersAndListeners() diff --git a/logos_delivery/waku/waku.nim b/logos_delivery/waku/waku.nim index 067b5b6ec..fa0562148 100644 --- a/logos_delivery/waku/waku.nim +++ b/logos_delivery/waku/waku.nim @@ -563,8 +563,10 @@ proc stop*(waku: Waku): Future[Result[void, string]] {.async: (raises: []).} = if not waku.healthMonitor.isNil(): await waku.healthMonitor.stopHealthMonitor() - ## Clear RequestConnectionStatus provider + ## Clear all providers registered in start() so a later start() can re-set them. RequestConnectionStatus.clearProvider(waku.brokerCtx) + RequestProtocolHealth.clearProvider(waku.brokerCtx) + RequestHealthReport.clearProvider(waku.brokerCtx) if not waku.restServer.isNil(): await waku.restServer.stop() diff --git a/tests/node/test_wakunode_restart.nim b/tests/node/test_wakunode_restart.nim new file mode 100644 index 000000000..8d58f5b9d --- /dev/null +++ b/tests/node/test_wakunode_restart.nim @@ -0,0 +1,42 @@ +{.used.} + +import std/options +import testutils/unittests, chronos, chronicles +import libp2p/switch + +import logos_delivery/waku/[waku_node, waku_core, node/peer_manager] +import ../testlib/[wakucore, wakunode, testasync] + +suite "WakuNode - restart (#3979)": + asyncTest "start -> stop -> start re-opens the listener promptly": + ## A restart must not block on the relay-reconnect backoff. + let + node1 = + newTestWakuNode(generateSecp256k1Key(), parseIpAddress("0.0.0.0"), Port(0)) + node2 = + newTestWakuNode(generateSecp256k1Key(), parseIpAddress("0.0.0.0"), Port(0)) + + (await node1.mountRelay()).isOkOr: + raiseAssert "mountRelay node1: " & error + (await node2.mountRelay()).isOkOr: + raiseAssert "mountRelay node2: " & error + + await allFutures(node1.start(), node2.start()) + + # node1 learns node2 as a relay peer, so a restart triggers reconnectRelayPeers. + await node1.connectToNodes(@[node2.peerInfo.toRemotePeerInfo()]) + + await node1.stop() + + # The restart must complete promptly and yield a usable, listening node. + let startFut = node1.start() + let restarted = await startFut.withTimeout(20.seconds) + if not restarted: + await startFut.cancelAndWait() + + check: + restarted + node1.started + node1.switch.peerInfo.listenAddrs.len > 0 + + await allFutures(node1.stop(), node2.stop())