mirror of
https://github.com/logos-messaging/logos-delivery.git
synced 2026-05-16 19:59:30 +00:00
feat: active filter subscription management for edge nodes (#3773)
feat: active filter subscription management for edge nodes ## Subscription Manager * edgeFilterSubLoop reconciles desired vs actual filter subscriptions * edgeFilterHealthLoop pings filter peers, evicts stale ones * EdgeFilterSubState per-shard tracking of confirmed peers and health * best-effort unsubscribe on peer removal * RequestEdgeShardHealth and RequestEdgeFilterPeerCount broker providers ## WakuNode * Remove old edge health loop (loopEdgeHealth, edgeHealthEvent, calculateEdgeTopicHealth) * Register MessageSeenEvent push handler on filter client during start * startDeliveryService now returns `Result[void, string]` and propagates errors ## Health Monitor * getFilterClientHealth queries RequestEdgeFilterPeerCount via broker * Shard/content health providers fall back to RequestEdgeShardHealth when relay inactive * Listen to EventShardTopicHealthChange for health recalculation * Add missing return p.notReady() on failed edge filter peer count request * HealthyThreshold constant moved to `connection_status.nim` ## Broker types * RequestEdgeShardHealth, RequestEdgeFilterPeerCount request types * EventShardTopicHealthChange event type ## Filter Client * Add timeout parameter to ping proc ## Tests * Health monitor event tests with per-node lockNewGlobalBrokerContext * Edge (light client) health update test * Edge health driven by confirmed filter subscriptions test * API subscription tests: sub/receive, failover, peer replacement Co-authored-by: Ivan FB <128452529+Ivansete-status@users.noreply.github.com> Co-authored by Zoltan Nagy
This commit is contained in:
parent
0623c10635
commit
dc026bbff1
@ -1,6 +1,6 @@
|
|||||||
{.used.}
|
{.used.}
|
||||||
|
|
||||||
import std/[strutils, net, options, sets]
|
import std/[strutils, sequtils, net, options, sets, tables]
|
||||||
import chronos, testutils/unittests, stew/byteutils
|
import chronos, testutils/unittests, stew/byteutils
|
||||||
import libp2p/[peerid, peerinfo, multiaddress, crypto/crypto]
|
import libp2p/[peerid, peerinfo, multiaddress, crypto/crypto]
|
||||||
import ../testlib/[common, wakucore, wakunode, testasync]
|
import ../testlib/[common, wakucore, wakunode, testasync]
|
||||||
@ -13,12 +13,12 @@ import
|
|||||||
common/broker/broker_context,
|
common/broker/broker_context,
|
||||||
events/message_events,
|
events/message_events,
|
||||||
waku_relay/protocol,
|
waku_relay/protocol,
|
||||||
|
node/kernel_api/filter,
|
||||||
|
node/delivery_service/subscription_manager,
|
||||||
]
|
]
|
||||||
import waku/factory/waku_conf
|
import waku/factory/waku_conf
|
||||||
import tools/confutils/cli_args
|
import tools/confutils/cli_args
|
||||||
|
|
||||||
# TODO: Edge testing (after MAPI edge support is completed)
|
|
||||||
|
|
||||||
const TestTimeout = chronos.seconds(10)
|
const TestTimeout = chronos.seconds(10)
|
||||||
const NegativeTestTimeout = chronos.seconds(2)
|
const NegativeTestTimeout = chronos.seconds(2)
|
||||||
|
|
||||||
@ -60,8 +60,10 @@ proc waitForEvents(
|
|||||||
return await manager.receivedEvent.wait().withTimeout(timeout)
|
return await manager.receivedEvent.wait().withTimeout(timeout)
|
||||||
|
|
||||||
type TestNetwork = ref object
|
type TestNetwork = ref object
|
||||||
publisher: WakuNode
|
publisher: WakuNode # Relay node that publishes messages in tests.
|
||||||
|
meshBuddy: WakuNode # Extra relay peer for publisher's mesh (Edge tests only).
|
||||||
subscriber: Waku
|
subscriber: Waku
|
||||||
|
# The receiver node in tests. Edge node in edge tests, Core node in relay tests.
|
||||||
publisherPeerInfo: RemotePeerInfo
|
publisherPeerInfo: RemotePeerInfo
|
||||||
|
|
||||||
proc createApiNodeConf(
|
proc createApiNodeConf(
|
||||||
@ -94,8 +96,12 @@ proc setupNetwork(
|
|||||||
lockNewGlobalBrokerContext:
|
lockNewGlobalBrokerContext:
|
||||||
net.publisher =
|
net.publisher =
|
||||||
newTestWakuNode(generateSecp256k1Key(), parseIpAddress("0.0.0.0"), Port(0))
|
newTestWakuNode(generateSecp256k1Key(), parseIpAddress("0.0.0.0"), Port(0))
|
||||||
net.publisher.mountMetadata(3, @[0'u16]).expect("Failed to mount metadata")
|
net.publisher.mountMetadata(3, toSeq(0'u16 ..< numShards)).expect(
|
||||||
|
"Failed to mount metadata"
|
||||||
|
)
|
||||||
(await net.publisher.mountRelay()).expect("Failed to mount relay")
|
(await net.publisher.mountRelay()).expect("Failed to mount relay")
|
||||||
|
if mode == cli_args.WakuMode.Edge:
|
||||||
|
await net.publisher.mountFilter()
|
||||||
await net.publisher.mountLibp2pPing()
|
await net.publisher.mountLibp2pPing()
|
||||||
await net.publisher.start()
|
await net.publisher.start()
|
||||||
|
|
||||||
@ -104,16 +110,32 @@ proc setupNetwork(
|
|||||||
proc dummyHandler(topic: PubsubTopic, msg: WakuMessage) {.async, gcsafe.} =
|
proc dummyHandler(topic: PubsubTopic, msg: WakuMessage) {.async, gcsafe.} =
|
||||||
discard
|
discard
|
||||||
|
|
||||||
# Subscribe the publisher to all shards to guarantee a GossipSub mesh with the subscriber.
|
var shards: seq[PubsubTopic]
|
||||||
# Currently, Core/Relay nodes auto-subscribe to all network shards on boot, but if
|
|
||||||
# that changes, this will be needed to cause the publisher to have shard interest
|
|
||||||
# for any shards the subscriber may want to use, which is required for waitForMesh to work.
|
|
||||||
for i in 0 ..< numShards.int:
|
for i in 0 ..< numShards.int:
|
||||||
let shard = PubsubTopic("/waku/2/rs/3/" & $i)
|
shards.add(PubsubTopic("/waku/2/rs/3/" & $i))
|
||||||
|
|
||||||
|
for shard in shards:
|
||||||
net.publisher.subscribe((kind: PubsubSub, topic: shard), dummyHandler).expect(
|
net.publisher.subscribe((kind: PubsubSub, topic: shard), dummyHandler).expect(
|
||||||
"Failed to sub publisher"
|
"Failed to sub publisher"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if mode == cli_args.WakuMode.Edge:
|
||||||
|
lockNewGlobalBrokerContext:
|
||||||
|
net.meshBuddy =
|
||||||
|
newTestWakuNode(generateSecp256k1Key(), parseIpAddress("0.0.0.0"), Port(0))
|
||||||
|
net.meshBuddy.mountMetadata(3, toSeq(0'u16 ..< numShards)).expect(
|
||||||
|
"Failed to mount metadata on meshBuddy"
|
||||||
|
)
|
||||||
|
(await net.meshBuddy.mountRelay()).expect("Failed to mount relay on meshBuddy")
|
||||||
|
await net.meshBuddy.start()
|
||||||
|
|
||||||
|
for shard in shards:
|
||||||
|
net.meshBuddy.subscribe((kind: PubsubSub, topic: shard), dummyHandler).expect(
|
||||||
|
"Failed to sub meshBuddy"
|
||||||
|
)
|
||||||
|
|
||||||
|
await net.meshBuddy.connectToNodes(@[net.publisherPeerInfo])
|
||||||
|
|
||||||
net.subscriber = await setupSubscriberNode(createApiNodeConf(mode, numShards))
|
net.subscriber = await setupSubscriberNode(createApiNodeConf(mode, numShards))
|
||||||
|
|
||||||
await net.subscriber.node.connectToNodes(@[net.publisherPeerInfo])
|
await net.subscriber.node.connectToNodes(@[net.publisherPeerInfo])
|
||||||
@ -125,6 +147,10 @@ proc teardown(net: TestNetwork) {.async.} =
|
|||||||
(await net.subscriber.stop()).expect("Failed to stop subscriber node")
|
(await net.subscriber.stop()).expect("Failed to stop subscriber node")
|
||||||
net.subscriber = nil
|
net.subscriber = nil
|
||||||
|
|
||||||
|
if not isNil(net.meshBuddy):
|
||||||
|
await net.meshBuddy.stop()
|
||||||
|
net.meshBuddy = nil
|
||||||
|
|
||||||
if not isNil(net.publisher):
|
if not isNil(net.publisher):
|
||||||
await net.publisher.stop()
|
await net.publisher.stop()
|
||||||
net.publisher = nil
|
net.publisher = nil
|
||||||
@ -141,18 +167,34 @@ proc waitForMesh(node: WakuNode, shard: PubsubTopic) {.async.} =
|
|||||||
await sleepAsync(100.milliseconds)
|
await sleepAsync(100.milliseconds)
|
||||||
raise newException(ValueError, "GossipSub Mesh failed to stabilize on " & shard)
|
raise newException(ValueError, "GossipSub Mesh failed to stabilize on " & shard)
|
||||||
|
|
||||||
|
proc waitForEdgeSubs(w: Waku, shard: PubsubTopic) {.async.} =
|
||||||
|
let sm = w.deliveryService.subscriptionManager
|
||||||
|
for _ in 0 ..< 50:
|
||||||
|
if sm.edgeFilterPeerCount(shard) > 0:
|
||||||
|
return
|
||||||
|
await sleepAsync(100.milliseconds)
|
||||||
|
raise newException(ValueError, "Edge filter subscription failed on " & shard)
|
||||||
|
|
||||||
proc publishToMesh(
|
proc publishToMesh(
|
||||||
net: TestNetwork, contentTopic: ContentTopic, payload: seq[byte]
|
net: TestNetwork, contentTopic: ContentTopic, payload: seq[byte]
|
||||||
): Future[Result[int, string]] {.async.} =
|
): Future[Result[int, string]] {.async.} =
|
||||||
|
# Publishes a message from "publisher" via relay into the gossipsub mesh.
|
||||||
let shard = net.subscriber.node.getRelayShard(contentTopic)
|
let shard = net.subscriber.node.getRelayShard(contentTopic)
|
||||||
|
|
||||||
await waitForMesh(net.publisher, shard)
|
await waitForMesh(net.publisher, shard)
|
||||||
|
|
||||||
let msg = WakuMessage(
|
let msg = WakuMessage(
|
||||||
payload: payload, contentTopic: contentTopic, version: 0, timestamp: now()
|
payload: payload, contentTopic: contentTopic, version: 0, timestamp: now()
|
||||||
)
|
)
|
||||||
return await net.publisher.publish(some(shard), msg)
|
return await net.publisher.publish(some(shard), msg)
|
||||||
|
|
||||||
|
proc publishToMeshAfterEdgeReady(
|
||||||
|
net: TestNetwork, contentTopic: ContentTopic, payload: seq[byte]
|
||||||
|
): Future[Result[int, string]] {.async.} =
|
||||||
|
# First, ensure "subscriber" node (an edge node) is subscribed and ready to receive.
|
||||||
|
# Afterwards, "publisher" (relay node) sends the message in the gossipsub network.
|
||||||
|
let shard = net.subscriber.node.getRelayShard(contentTopic)
|
||||||
|
await waitForEdgeSubs(net.subscriber, shard)
|
||||||
|
return await net.publishToMesh(contentTopic, payload)
|
||||||
|
|
||||||
suite "Messaging API, SubscriptionManager":
|
suite "Messaging API, SubscriptionManager":
|
||||||
asyncTest "Subscription API, relay node auto subscribe and receive message":
|
asyncTest "Subscription API, relay node auto subscribe and receive message":
|
||||||
let net = await setupNetwork(1)
|
let net = await setupNetwork(1)
|
||||||
@ -398,3 +440,370 @@ suite "Messaging API, SubscriptionManager":
|
|||||||
activeSubs.add(t)
|
activeSubs.add(t)
|
||||||
|
|
||||||
await verifyNetworkState(activeSubs)
|
await verifyNetworkState(activeSubs)
|
||||||
|
|
||||||
|
asyncTest "Subscription API, edge node subscribe and receive message":
|
||||||
|
let net = await setupNetwork(1, cli_args.WakuMode.Edge)
|
||||||
|
defer:
|
||||||
|
await net.teardown()
|
||||||
|
|
||||||
|
let testTopic = ContentTopic("/waku/2/test-content/proto")
|
||||||
|
(await net.subscriber.subscribe(testTopic)).expect("failed to subscribe")
|
||||||
|
|
||||||
|
let eventManager = newReceiveEventListenerManager(net.subscriber.brokerCtx, 1)
|
||||||
|
defer:
|
||||||
|
eventManager.teardown()
|
||||||
|
|
||||||
|
discard (await net.publishToMeshAfterEdgeReady(testTopic, "Hello, edge!".toBytes())).expect(
|
||||||
|
"Publish failed"
|
||||||
|
)
|
||||||
|
|
||||||
|
require await eventManager.waitForEvents(TestTimeout)
|
||||||
|
require eventManager.receivedMessages.len == 1
|
||||||
|
check eventManager.receivedMessages[0].contentTopic == testTopic
|
||||||
|
|
||||||
|
asyncTest "Subscription API, edge node ignores unsubscribed content topics":
|
||||||
|
let net = await setupNetwork(1, cli_args.WakuMode.Edge)
|
||||||
|
defer:
|
||||||
|
await net.teardown()
|
||||||
|
|
||||||
|
let subbedTopic = ContentTopic("/waku/2/subbed-topic/proto")
|
||||||
|
let ignoredTopic = ContentTopic("/waku/2/ignored-topic/proto")
|
||||||
|
(await net.subscriber.subscribe(subbedTopic)).expect("failed to subscribe")
|
||||||
|
|
||||||
|
let eventManager = newReceiveEventListenerManager(net.subscriber.brokerCtx, 1)
|
||||||
|
defer:
|
||||||
|
eventManager.teardown()
|
||||||
|
|
||||||
|
discard (await net.publishToMesh(ignoredTopic, "Ghost Msg".toBytes())).expect(
|
||||||
|
"Publish failed"
|
||||||
|
)
|
||||||
|
|
||||||
|
check not await eventManager.waitForEvents(NegativeTestTimeout)
|
||||||
|
check eventManager.receivedMessages.len == 0
|
||||||
|
|
||||||
|
asyncTest "Subscription API, edge node unsubscribe stops message receipt":
|
||||||
|
let net = await setupNetwork(1, cli_args.WakuMode.Edge)
|
||||||
|
defer:
|
||||||
|
await net.teardown()
|
||||||
|
|
||||||
|
let testTopic = ContentTopic("/waku/2/unsub-test/proto")
|
||||||
|
|
||||||
|
(await net.subscriber.subscribe(testTopic)).expect("failed to subscribe")
|
||||||
|
net.subscriber.unsubscribe(testTopic).expect("failed to unsubscribe")
|
||||||
|
|
||||||
|
let eventManager = newReceiveEventListenerManager(net.subscriber.brokerCtx, 1)
|
||||||
|
defer:
|
||||||
|
eventManager.teardown()
|
||||||
|
|
||||||
|
discard (await net.publishToMesh(testTopic, "Should be dropped".toBytes())).expect(
|
||||||
|
"Publish failed"
|
||||||
|
)
|
||||||
|
|
||||||
|
check not await eventManager.waitForEvents(NegativeTestTimeout)
|
||||||
|
check eventManager.receivedMessages.len == 0
|
||||||
|
|
||||||
|
asyncTest "Subscription API, edge node overlapping topics isolation":
|
||||||
|
let net = await setupNetwork(1, cli_args.WakuMode.Edge)
|
||||||
|
defer:
|
||||||
|
await net.teardown()
|
||||||
|
|
||||||
|
let topicA = ContentTopic("/waku/2/topic-a/proto")
|
||||||
|
let topicB = ContentTopic("/waku/2/topic-b/proto")
|
||||||
|
(await net.subscriber.subscribe(topicA)).expect("failed to sub A")
|
||||||
|
(await net.subscriber.subscribe(topicB)).expect("failed to sub B")
|
||||||
|
|
||||||
|
let shard = net.subscriber.node.getRelayShard(topicA)
|
||||||
|
await waitForEdgeSubs(net.subscriber, shard)
|
||||||
|
|
||||||
|
let eventManager = newReceiveEventListenerManager(net.subscriber.brokerCtx, 1)
|
||||||
|
defer:
|
||||||
|
eventManager.teardown()
|
||||||
|
|
||||||
|
net.subscriber.unsubscribe(topicA).expect("failed to unsub A")
|
||||||
|
|
||||||
|
discard (await net.publishToMesh(topicA, "Dropped Message".toBytes())).expect(
|
||||||
|
"Publish A failed"
|
||||||
|
)
|
||||||
|
discard
|
||||||
|
(await net.publishToMesh(topicB, "Kept Msg".toBytes())).expect("Publish B failed")
|
||||||
|
|
||||||
|
require await eventManager.waitForEvents(TestTimeout)
|
||||||
|
require eventManager.receivedMessages.len == 1
|
||||||
|
check eventManager.receivedMessages[0].contentTopic == topicB
|
||||||
|
|
||||||
|
asyncTest "Subscription API, edge node resubscribe after unsubscribe":
|
||||||
|
let net = await setupNetwork(1, cli_args.WakuMode.Edge)
|
||||||
|
defer:
|
||||||
|
await net.teardown()
|
||||||
|
|
||||||
|
let testTopic = ContentTopic("/waku/2/resub-test/proto")
|
||||||
|
|
||||||
|
(await net.subscriber.subscribe(testTopic)).expect("Initial sub failed")
|
||||||
|
|
||||||
|
var eventManager = newReceiveEventListenerManager(net.subscriber.brokerCtx, 1)
|
||||||
|
discard (await net.publishToMeshAfterEdgeReady(testTopic, "Msg 1".toBytes())).expect(
|
||||||
|
"Pub 1 failed"
|
||||||
|
)
|
||||||
|
|
||||||
|
require await eventManager.waitForEvents(TestTimeout)
|
||||||
|
eventManager.teardown()
|
||||||
|
|
||||||
|
net.subscriber.unsubscribe(testTopic).expect("Unsub failed")
|
||||||
|
eventManager = newReceiveEventListenerManager(net.subscriber.brokerCtx, 1)
|
||||||
|
|
||||||
|
discard
|
||||||
|
(await net.publishToMesh(testTopic, "Ghost".toBytes())).expect("Ghost pub failed")
|
||||||
|
|
||||||
|
check not await eventManager.waitForEvents(NegativeTestTimeout)
|
||||||
|
eventManager.teardown()
|
||||||
|
|
||||||
|
(await net.subscriber.subscribe(testTopic)).expect("Resub failed")
|
||||||
|
eventManager = newReceiveEventListenerManager(net.subscriber.brokerCtx, 1)
|
||||||
|
|
||||||
|
discard (await net.publishToMeshAfterEdgeReady(testTopic, "Msg 2".toBytes())).expect(
|
||||||
|
"Pub 2 failed"
|
||||||
|
)
|
||||||
|
|
||||||
|
require await eventManager.waitForEvents(TestTimeout)
|
||||||
|
check eventManager.receivedMessages[0].payload == "Msg 2".toBytes()
|
||||||
|
|
||||||
|
asyncTest "Subscription API, edge node failover after service peer dies":
|
||||||
|
# NOTE: This test is a bit more verbose because it defines a custom topology.
|
||||||
|
# It doesn't use the shared TestNetwork helper.
|
||||||
|
# This mounts two service peers for the edge node then fails one.
|
||||||
|
let numShards: uint16 = 1
|
||||||
|
let shards = @[PubsubTopic("/waku/2/rs/3/0")]
|
||||||
|
|
||||||
|
proc dummyHandler(topic: PubsubTopic, msg: WakuMessage) {.async, gcsafe.} =
|
||||||
|
discard
|
||||||
|
|
||||||
|
var publisher: WakuNode
|
||||||
|
lockNewGlobalBrokerContext:
|
||||||
|
publisher =
|
||||||
|
newTestWakuNode(generateSecp256k1Key(), parseIpAddress("0.0.0.0"), Port(0))
|
||||||
|
publisher.mountMetadata(3, toSeq(0'u16 ..< numShards)).expect(
|
||||||
|
"Failed to mount metadata on publisher"
|
||||||
|
)
|
||||||
|
(await publisher.mountRelay()).expect("Failed to mount relay on publisher")
|
||||||
|
await publisher.mountFilter()
|
||||||
|
await publisher.mountLibp2pPing()
|
||||||
|
await publisher.start()
|
||||||
|
|
||||||
|
for shard in shards:
|
||||||
|
publisher.subscribe((kind: PubsubSub, topic: shard), dummyHandler).expect(
|
||||||
|
"Failed to sub publisher"
|
||||||
|
)
|
||||||
|
|
||||||
|
let publisherPeerInfo = publisher.peerInfo.toRemotePeerInfo()
|
||||||
|
|
||||||
|
var meshBuddy: WakuNode
|
||||||
|
lockNewGlobalBrokerContext:
|
||||||
|
meshBuddy =
|
||||||
|
newTestWakuNode(generateSecp256k1Key(), parseIpAddress("0.0.0.0"), Port(0))
|
||||||
|
meshBuddy.mountMetadata(3, toSeq(0'u16 ..< numShards)).expect(
|
||||||
|
"Failed to mount metadata on meshBuddy"
|
||||||
|
)
|
||||||
|
(await meshBuddy.mountRelay()).expect("Failed to mount relay on meshBuddy")
|
||||||
|
await meshBuddy.mountFilter()
|
||||||
|
await meshBuddy.mountLibp2pPing()
|
||||||
|
await meshBuddy.start()
|
||||||
|
|
||||||
|
for shard in shards:
|
||||||
|
meshBuddy.subscribe((kind: PubsubSub, topic: shard), dummyHandler).expect(
|
||||||
|
"Failed to sub meshBuddy"
|
||||||
|
)
|
||||||
|
|
||||||
|
let meshBuddyPeerInfo = meshBuddy.peerInfo.toRemotePeerInfo()
|
||||||
|
|
||||||
|
await meshBuddy.connectToNodes(@[publisherPeerInfo])
|
||||||
|
|
||||||
|
let conf = createApiNodeConf(cli_args.WakuMode.Edge, numShards)
|
||||||
|
var subscriber: Waku
|
||||||
|
lockNewGlobalBrokerContext:
|
||||||
|
subscriber = (await createNode(conf)).expect("Failed to create edge subscriber")
|
||||||
|
(await startWaku(addr subscriber)).expect("Failed to start edge subscriber")
|
||||||
|
|
||||||
|
# Connect edge subscriber to both filter servers so selectPeers finds both
|
||||||
|
await subscriber.node.connectToNodes(@[publisherPeerInfo, meshBuddyPeerInfo])
|
||||||
|
|
||||||
|
let testTopic = ContentTopic("/waku/2/failover-test/proto")
|
||||||
|
let shard = subscriber.node.getRelayShard(testTopic)
|
||||||
|
|
||||||
|
(await subscriber.subscribe(testTopic)).expect("Failed to subscribe")
|
||||||
|
|
||||||
|
# Wait for dialing both filter servers (HealthyThreshold = 2)
|
||||||
|
for _ in 0 ..< 100:
|
||||||
|
if subscriber.deliveryService.subscriptionManager.edgeFilterPeerCount(shard) >= 2:
|
||||||
|
break
|
||||||
|
await sleepAsync(100.milliseconds)
|
||||||
|
|
||||||
|
check subscriber.deliveryService.subscriptionManager.edgeFilterPeerCount(shard) >= 2
|
||||||
|
|
||||||
|
# Verify message delivery with both servers alive
|
||||||
|
await waitForMesh(publisher, shard)
|
||||||
|
|
||||||
|
var eventManager = newReceiveEventListenerManager(subscriber.brokerCtx, 1)
|
||||||
|
let msg1 = WakuMessage(
|
||||||
|
payload: "Before failover".toBytes(),
|
||||||
|
contentTopic: testTopic,
|
||||||
|
version: 0,
|
||||||
|
timestamp: now(),
|
||||||
|
)
|
||||||
|
discard (await publisher.publish(some(shard), msg1)).expect("Publish 1 failed")
|
||||||
|
|
||||||
|
require await eventManager.waitForEvents(TestTimeout)
|
||||||
|
check eventManager.receivedMessages[0].payload == "Before failover".toBytes()
|
||||||
|
eventManager.teardown()
|
||||||
|
|
||||||
|
# Disconnect meshBuddy from edge (keeps relay mesh alive for publishing)
|
||||||
|
await subscriber.node.disconnectNode(meshBuddyPeerInfo)
|
||||||
|
|
||||||
|
# Wait for the dead peer to be pruned
|
||||||
|
for _ in 0 ..< 50:
|
||||||
|
if subscriber.deliveryService.subscriptionManager.edgeFilterPeerCount(shard) < 2:
|
||||||
|
break
|
||||||
|
await sleepAsync(100.milliseconds)
|
||||||
|
|
||||||
|
check subscriber.deliveryService.subscriptionManager.edgeFilterPeerCount(shard) >= 1
|
||||||
|
|
||||||
|
# Verify messages still arrive through the surviving filter server (publisher)
|
||||||
|
eventManager = newReceiveEventListenerManager(subscriber.brokerCtx, 1)
|
||||||
|
let msg2 = WakuMessage(
|
||||||
|
payload: "After failover".toBytes(),
|
||||||
|
contentTopic: testTopic,
|
||||||
|
version: 0,
|
||||||
|
timestamp: now(),
|
||||||
|
)
|
||||||
|
discard (await publisher.publish(some(shard), msg2)).expect("Publish 2 failed")
|
||||||
|
|
||||||
|
require await eventManager.waitForEvents(TestTimeout)
|
||||||
|
check eventManager.receivedMessages[0].payload == "After failover".toBytes()
|
||||||
|
eventManager.teardown()
|
||||||
|
|
||||||
|
(await subscriber.stop()).expect("Failed to stop subscriber")
|
||||||
|
await meshBuddy.stop()
|
||||||
|
await publisher.stop()
|
||||||
|
|
||||||
|
asyncTest "Subscription API, edge node dials replacement after peer eviction":
|
||||||
|
# 3 service peers: publisher, meshBuddy, sparePeer. Edge subscribes and
|
||||||
|
# confirms 2 (HealthyThreshold). After one is disconnected, the sub loop
|
||||||
|
# should detect the loss and dial the spare to recover back to threshold.
|
||||||
|
let numShards: uint16 = 1
|
||||||
|
let shards = @[PubsubTopic("/waku/2/rs/3/0")]
|
||||||
|
|
||||||
|
proc dummyHandler(topic: PubsubTopic, msg: WakuMessage) {.async, gcsafe.} =
|
||||||
|
discard
|
||||||
|
|
||||||
|
var publisher: WakuNode
|
||||||
|
lockNewGlobalBrokerContext:
|
||||||
|
publisher =
|
||||||
|
newTestWakuNode(generateSecp256k1Key(), parseIpAddress("0.0.0.0"), Port(0))
|
||||||
|
publisher.mountMetadata(3, toSeq(0'u16 ..< numShards)).expect(
|
||||||
|
"Failed to mount metadata on publisher"
|
||||||
|
)
|
||||||
|
(await publisher.mountRelay()).expect("Failed to mount relay on publisher")
|
||||||
|
await publisher.mountFilter()
|
||||||
|
await publisher.mountLibp2pPing()
|
||||||
|
await publisher.start()
|
||||||
|
|
||||||
|
for shard in shards:
|
||||||
|
publisher.subscribe((kind: PubsubSub, topic: shard), dummyHandler).expect(
|
||||||
|
"Failed to sub publisher"
|
||||||
|
)
|
||||||
|
|
||||||
|
let publisherPeerInfo = publisher.peerInfo.toRemotePeerInfo()
|
||||||
|
|
||||||
|
var meshBuddy: WakuNode
|
||||||
|
lockNewGlobalBrokerContext:
|
||||||
|
meshBuddy =
|
||||||
|
newTestWakuNode(generateSecp256k1Key(), parseIpAddress("0.0.0.0"), Port(0))
|
||||||
|
meshBuddy.mountMetadata(3, toSeq(0'u16 ..< numShards)).expect(
|
||||||
|
"Failed to mount metadata on meshBuddy"
|
||||||
|
)
|
||||||
|
(await meshBuddy.mountRelay()).expect("Failed to mount relay on meshBuddy")
|
||||||
|
await meshBuddy.mountFilter()
|
||||||
|
await meshBuddy.mountLibp2pPing()
|
||||||
|
await meshBuddy.start()
|
||||||
|
|
||||||
|
for shard in shards:
|
||||||
|
meshBuddy.subscribe((kind: PubsubSub, topic: shard), dummyHandler).expect(
|
||||||
|
"Failed to sub meshBuddy"
|
||||||
|
)
|
||||||
|
|
||||||
|
let meshBuddyPeerInfo = meshBuddy.peerInfo.toRemotePeerInfo()
|
||||||
|
|
||||||
|
var sparePeer: WakuNode
|
||||||
|
lockNewGlobalBrokerContext:
|
||||||
|
sparePeer =
|
||||||
|
newTestWakuNode(generateSecp256k1Key(), parseIpAddress("0.0.0.0"), Port(0))
|
||||||
|
sparePeer.mountMetadata(3, toSeq(0'u16 ..< numShards)).expect(
|
||||||
|
"Failed to mount metadata on sparePeer"
|
||||||
|
)
|
||||||
|
(await sparePeer.mountRelay()).expect("Failed to mount relay on sparePeer")
|
||||||
|
await sparePeer.mountFilter()
|
||||||
|
await sparePeer.mountLibp2pPing()
|
||||||
|
await sparePeer.start()
|
||||||
|
|
||||||
|
for shard in shards:
|
||||||
|
sparePeer.subscribe((kind: PubsubSub, topic: shard), dummyHandler).expect(
|
||||||
|
"Failed to sub sparePeer"
|
||||||
|
)
|
||||||
|
|
||||||
|
let sparePeerInfo = sparePeer.peerInfo.toRemotePeerInfo()
|
||||||
|
|
||||||
|
await meshBuddy.connectToNodes(@[publisherPeerInfo])
|
||||||
|
await sparePeer.connectToNodes(@[publisherPeerInfo])
|
||||||
|
|
||||||
|
let conf = createApiNodeConf(cli_args.WakuMode.Edge, numShards)
|
||||||
|
var subscriber: Waku
|
||||||
|
lockNewGlobalBrokerContext:
|
||||||
|
subscriber = (await createNode(conf)).expect("Failed to create edge subscriber")
|
||||||
|
(await startWaku(addr subscriber)).expect("Failed to start edge subscriber")
|
||||||
|
|
||||||
|
await subscriber.node.connectToNodes(
|
||||||
|
@[publisherPeerInfo, meshBuddyPeerInfo, sparePeerInfo]
|
||||||
|
)
|
||||||
|
|
||||||
|
let testTopic = ContentTopic("/waku/2/replacement-test/proto")
|
||||||
|
let shard = subscriber.node.getRelayShard(testTopic)
|
||||||
|
|
||||||
|
(await subscriber.subscribe(testTopic)).expect("Failed to subscribe")
|
||||||
|
|
||||||
|
# Wait for 2 confirmed peers (HealthyThreshold). The 3rd is available but not dialed.
|
||||||
|
for _ in 0 ..< 100:
|
||||||
|
if subscriber.deliveryService.subscriptionManager.edgeFilterPeerCount(shard) >= 2:
|
||||||
|
break
|
||||||
|
await sleepAsync(100.milliseconds)
|
||||||
|
|
||||||
|
require subscriber.deliveryService.subscriptionManager.edgeFilterPeerCount(shard) ==
|
||||||
|
2
|
||||||
|
|
||||||
|
await subscriber.node.disconnectNode(meshBuddyPeerInfo)
|
||||||
|
|
||||||
|
# Wait for the sub loop to detect the loss and dial a replacement
|
||||||
|
for _ in 0 ..< 100:
|
||||||
|
if subscriber.deliveryService.subscriptionManager.edgeFilterPeerCount(shard) >= 2:
|
||||||
|
break
|
||||||
|
await sleepAsync(100.milliseconds)
|
||||||
|
|
||||||
|
check subscriber.deliveryService.subscriptionManager.edgeFilterPeerCount(shard) >= 2
|
||||||
|
|
||||||
|
await waitForMesh(publisher, shard)
|
||||||
|
|
||||||
|
var eventManager = newReceiveEventListenerManager(subscriber.brokerCtx, 1)
|
||||||
|
let msg = WakuMessage(
|
||||||
|
payload: "After replacement".toBytes(),
|
||||||
|
contentTopic: testTopic,
|
||||||
|
version: 0,
|
||||||
|
timestamp: now(),
|
||||||
|
)
|
||||||
|
discard (await publisher.publish(some(shard), msg)).expect("Publish failed")
|
||||||
|
|
||||||
|
require await eventManager.waitForEvents(TestTimeout)
|
||||||
|
check eventManager.receivedMessages[0].payload == "After replacement".toBytes()
|
||||||
|
eventManager.teardown()
|
||||||
|
|
||||||
|
(await subscriber.stop()).expect("Failed to stop subscriber")
|
||||||
|
await sparePeer.stop()
|
||||||
|
await meshBuddy.stop()
|
||||||
|
await publisher.stop()
|
||||||
|
|||||||
@ -12,12 +12,18 @@ import
|
|||||||
node/health_monitor/health_status,
|
node/health_monitor/health_status,
|
||||||
node/health_monitor/connection_status,
|
node/health_monitor/connection_status,
|
||||||
node/health_monitor/protocol_health,
|
node/health_monitor/protocol_health,
|
||||||
|
node/health_monitor/topic_health,
|
||||||
node/health_monitor/node_health_monitor,
|
node/health_monitor/node_health_monitor,
|
||||||
|
node/delivery_service/delivery_service,
|
||||||
|
node/delivery_service/subscription_manager,
|
||||||
node/kernel_api/relay,
|
node/kernel_api/relay,
|
||||||
node/kernel_api/store,
|
node/kernel_api/store,
|
||||||
node/kernel_api/lightpush,
|
node/kernel_api/lightpush,
|
||||||
node/kernel_api/filter,
|
node/kernel_api/filter,
|
||||||
|
events/health_events,
|
||||||
|
events/peer_events,
|
||||||
waku_archive,
|
waku_archive,
|
||||||
|
common/broker/broker_context,
|
||||||
]
|
]
|
||||||
|
|
||||||
import ../testlib/[wakunode, wakucore], ../waku_archive/archive_utils
|
import ../testlib/[wakunode, wakucore], ../waku_archive/archive_utils
|
||||||
@ -129,13 +135,12 @@ suite "Health Monitor - health state calculation":
|
|||||||
|
|
||||||
suite "Health Monitor - events":
|
suite "Health Monitor - events":
|
||||||
asyncTest "Core (relay) health update":
|
asyncTest "Core (relay) health update":
|
||||||
let
|
var nodeA: WakuNode
|
||||||
nodeAKey = generateSecp256k1Key()
|
lockNewGlobalBrokerContext:
|
||||||
|
let nodeAKey = generateSecp256k1Key()
|
||||||
nodeA = newTestWakuNode(nodeAKey, parseIpAddress("127.0.0.1"), Port(0))
|
nodeA = newTestWakuNode(nodeAKey, parseIpAddress("127.0.0.1"), Port(0))
|
||||||
|
(await nodeA.mountRelay()).expect("Node A failed to mount Relay")
|
||||||
(await nodeA.mountRelay()).expect("Node A failed to mount Relay")
|
await nodeA.start()
|
||||||
|
|
||||||
await nodeA.start()
|
|
||||||
|
|
||||||
let monitorA = NodeHealthMonitor.new(nodeA)
|
let monitorA = NodeHealthMonitor.new(nodeA)
|
||||||
|
|
||||||
@ -151,17 +156,15 @@ suite "Health Monitor - events":
|
|||||||
|
|
||||||
monitorA.startHealthMonitor().expect("Health monitor failed to start")
|
monitorA.startHealthMonitor().expect("Health monitor failed to start")
|
||||||
|
|
||||||
let
|
var nodeB: WakuNode
|
||||||
nodeBKey = generateSecp256k1Key()
|
lockNewGlobalBrokerContext:
|
||||||
|
let nodeBKey = generateSecp256k1Key()
|
||||||
nodeB = newTestWakuNode(nodeBKey, parseIpAddress("127.0.0.1"), Port(0))
|
nodeB = newTestWakuNode(nodeBKey, parseIpAddress("127.0.0.1"), Port(0))
|
||||||
|
let driver = newSqliteArchiveDriver()
|
||||||
let driver = newSqliteArchiveDriver()
|
nodeB.mountArchive(driver).expect("Node B failed to mount archive")
|
||||||
nodeB.mountArchive(driver).expect("Node B failed to mount archive")
|
(await nodeB.mountRelay()).expect("Node B failed to mount relay")
|
||||||
|
await nodeB.mountStore()
|
||||||
(await nodeB.mountRelay()).expect("Node B failed to mount relay")
|
await nodeB.start()
|
||||||
await nodeB.mountStore()
|
|
||||||
|
|
||||||
await nodeB.start()
|
|
||||||
|
|
||||||
await nodeA.connectToNodes(@[nodeB.switch.peerInfo.toRemotePeerInfo()])
|
await nodeA.connectToNodes(@[nodeB.switch.peerInfo.toRemotePeerInfo()])
|
||||||
|
|
||||||
@ -214,15 +217,20 @@ suite "Health Monitor - events":
|
|||||||
await nodeA.stop()
|
await nodeA.stop()
|
||||||
|
|
||||||
asyncTest "Edge (light client) health update":
|
asyncTest "Edge (light client) health update":
|
||||||
let
|
var nodeA: WakuNode
|
||||||
nodeAKey = generateSecp256k1Key()
|
lockNewGlobalBrokerContext:
|
||||||
|
let nodeAKey = generateSecp256k1Key()
|
||||||
nodeA = newTestWakuNode(nodeAKey, parseIpAddress("127.0.0.1"), Port(0))
|
nodeA = newTestWakuNode(nodeAKey, parseIpAddress("127.0.0.1"), Port(0))
|
||||||
|
nodeA.mountLightpushClient()
|
||||||
|
await nodeA.mountFilterClient()
|
||||||
|
nodeA.mountStoreClient()
|
||||||
|
require nodeA.mountAutoSharding(1, 8).isOk
|
||||||
|
nodeA.mountMetadata(1, @[0'u16]).expect("Node A failed to mount metadata")
|
||||||
|
await nodeA.start()
|
||||||
|
|
||||||
nodeA.mountLightpushClient()
|
let ds =
|
||||||
await nodeA.mountFilterClient()
|
DeliveryService.new(false, nodeA).expect("Failed to create DeliveryService")
|
||||||
nodeA.mountStoreClient()
|
ds.startDeliveryService().expect("Failed to start DeliveryService")
|
||||||
|
|
||||||
await nodeA.start()
|
|
||||||
|
|
||||||
let monitorA = NodeHealthMonitor.new(nodeA)
|
let monitorA = NodeHealthMonitor.new(nodeA)
|
||||||
|
|
||||||
@ -238,23 +246,40 @@ suite "Health Monitor - events":
|
|||||||
|
|
||||||
monitorA.startHealthMonitor().expect("Health monitor failed to start")
|
monitorA.startHealthMonitor().expect("Health monitor failed to start")
|
||||||
|
|
||||||
let
|
var nodeB: WakuNode
|
||||||
nodeBKey = generateSecp256k1Key()
|
lockNewGlobalBrokerContext:
|
||||||
|
let nodeBKey = generateSecp256k1Key()
|
||||||
nodeB = newTestWakuNode(nodeBKey, parseIpAddress("127.0.0.1"), Port(0))
|
nodeB = newTestWakuNode(nodeBKey, parseIpAddress("127.0.0.1"), Port(0))
|
||||||
|
let driver = newSqliteArchiveDriver()
|
||||||
|
nodeB.mountArchive(driver).expect("Node B failed to mount archive")
|
||||||
|
(await nodeB.mountRelay()).expect("Node B failed to mount relay")
|
||||||
|
(await nodeB.mountLightpush()).expect("Node B failed to mount lightpush")
|
||||||
|
await nodeB.mountFilter()
|
||||||
|
await nodeB.mountStore()
|
||||||
|
require nodeB.mountAutoSharding(1, 8).isOk
|
||||||
|
nodeB.mountMetadata(1, toSeq(0'u16 ..< 8'u16)).expect(
|
||||||
|
"Node B failed to mount metadata"
|
||||||
|
)
|
||||||
|
await nodeB.start()
|
||||||
|
|
||||||
let driver = newSqliteArchiveDriver()
|
var metadataFut = newFuture[void]("waitForMetadata")
|
||||||
nodeB.mountArchive(driver).expect("Node B failed to mount archive")
|
let metadataLis = WakuPeerEvent
|
||||||
|
.listen(
|
||||||
(await nodeB.mountRelay()).expect("Node B failed to mount relay")
|
nodeA.brokerCtx,
|
||||||
|
proc(evt: WakuPeerEvent): Future[void] {.async: (raises: []), gcsafe.} =
|
||||||
(await nodeB.mountLightpush()).expect("Node B failed to mount lightpush")
|
if not metadataFut.finished and
|
||||||
await nodeB.mountFilter()
|
evt.kind == WakuPeerEventKind.EventMetadataUpdated:
|
||||||
await nodeB.mountStore()
|
metadataFut.complete()
|
||||||
|
,
|
||||||
await nodeB.start()
|
)
|
||||||
|
.expect("Failed to listen for metadata")
|
||||||
|
|
||||||
await nodeA.connectToNodes(@[nodeB.switch.peerInfo.toRemotePeerInfo()])
|
await nodeA.connectToNodes(@[nodeB.switch.peerInfo.toRemotePeerInfo()])
|
||||||
|
|
||||||
|
let metadataOk = await metadataFut.withTimeout(TestConnectivityTimeLimit)
|
||||||
|
WakuPeerEvent.dropListener(nodeA.brokerCtx, metadataLis)
|
||||||
|
require metadataOk
|
||||||
|
|
||||||
let connectTimeLimit = Moment.now() + TestConnectivityTimeLimit
|
let connectTimeLimit = Moment.now() + TestConnectivityTimeLimit
|
||||||
var gotConnected = false
|
var gotConnected = false
|
||||||
|
|
||||||
@ -292,4 +317,118 @@ suite "Health Monitor - events":
|
|||||||
lastStatus == ConnectionStatus.Disconnected
|
lastStatus == ConnectionStatus.Disconnected
|
||||||
|
|
||||||
await monitorA.stopHealthMonitor()
|
await monitorA.stopHealthMonitor()
|
||||||
|
await ds.stopDeliveryService()
|
||||||
|
await nodeA.stop()
|
||||||
|
|
||||||
|
asyncTest "Edge health driven by confirmed filter subscriptions":
|
||||||
|
var nodeA: WakuNode
|
||||||
|
lockNewGlobalBrokerContext:
|
||||||
|
let nodeAKey = generateSecp256k1Key()
|
||||||
|
nodeA = newTestWakuNode(nodeAKey, parseIpAddress("127.0.0.1"), Port(0))
|
||||||
|
await nodeA.mountFilterClient()
|
||||||
|
nodeA.mountLightpushClient()
|
||||||
|
nodeA.mountStoreClient()
|
||||||
|
require nodeA.mountAutoSharding(1, 8).isOk
|
||||||
|
nodeA.mountMetadata(1, @[0'u16]).expect("Node A failed to mount metadata")
|
||||||
|
await nodeA.start()
|
||||||
|
|
||||||
|
let ds =
|
||||||
|
DeliveryService.new(false, nodeA).expect("Failed to create DeliveryService")
|
||||||
|
ds.startDeliveryService().expect("Failed to start DeliveryService")
|
||||||
|
let subMgr = ds.subscriptionManager
|
||||||
|
|
||||||
|
var nodeB: WakuNode
|
||||||
|
lockNewGlobalBrokerContext:
|
||||||
|
let nodeBKey = generateSecp256k1Key()
|
||||||
|
nodeB = newTestWakuNode(nodeBKey, parseIpAddress("127.0.0.1"), Port(0))
|
||||||
|
let driver = newSqliteArchiveDriver()
|
||||||
|
nodeB.mountArchive(driver).expect("Node B failed to mount archive")
|
||||||
|
(await nodeB.mountRelay()).expect("Node B failed to mount relay")
|
||||||
|
(await nodeB.mountLightpush()).expect("Node B failed to mount lightpush")
|
||||||
|
await nodeB.mountFilter()
|
||||||
|
await nodeB.mountStore()
|
||||||
|
require nodeB.mountAutoSharding(1, 8).isOk
|
||||||
|
nodeB.mountMetadata(1, toSeq(0'u16 ..< 8'u16)).expect(
|
||||||
|
"Node B failed to mount metadata"
|
||||||
|
)
|
||||||
|
await nodeB.start()
|
||||||
|
|
||||||
|
let monitorA = NodeHealthMonitor.new(nodeA)
|
||||||
|
|
||||||
|
var
|
||||||
|
lastStatus = ConnectionStatus.Disconnected
|
||||||
|
healthSignal = newAsyncEvent()
|
||||||
|
|
||||||
|
monitorA.onConnectionStatusChange = proc(status: ConnectionStatus) {.async.} =
|
||||||
|
lastStatus = status
|
||||||
|
healthSignal.fire()
|
||||||
|
|
||||||
|
monitorA.startHealthMonitor().expect("Health monitor failed to start")
|
||||||
|
|
||||||
|
var metadataFut = newFuture[void]("waitForMetadata")
|
||||||
|
let metadataLis = WakuPeerEvent
|
||||||
|
.listen(
|
||||||
|
nodeA.brokerCtx,
|
||||||
|
proc(evt: WakuPeerEvent): Future[void] {.async: (raises: []), gcsafe.} =
|
||||||
|
if not metadataFut.finished and
|
||||||
|
evt.kind == WakuPeerEventKind.EventMetadataUpdated:
|
||||||
|
metadataFut.complete()
|
||||||
|
,
|
||||||
|
)
|
||||||
|
.expect("Failed to listen for metadata")
|
||||||
|
|
||||||
|
await nodeA.connectToNodes(@[nodeB.switch.peerInfo.toRemotePeerInfo()])
|
||||||
|
|
||||||
|
let metadataOk = await metadataFut.withTimeout(TestConnectivityTimeLimit)
|
||||||
|
WakuPeerEvent.dropListener(nodeA.brokerCtx, metadataLis)
|
||||||
|
require metadataOk
|
||||||
|
|
||||||
|
var deadline = Moment.now() + TestConnectivityTimeLimit
|
||||||
|
while Moment.now() < deadline:
|
||||||
|
if lastStatus == ConnectionStatus.PartiallyConnected:
|
||||||
|
break
|
||||||
|
if await healthSignal.wait().withTimeout(deadline - Moment.now()):
|
||||||
|
healthSignal.clear()
|
||||||
|
|
||||||
|
check lastStatus == ConnectionStatus.PartiallyConnected
|
||||||
|
|
||||||
|
var shardHealthFut = newFuture[EventShardTopicHealthChange]("waitForShardHealth")
|
||||||
|
|
||||||
|
let shardHealthLis = EventShardTopicHealthChange
|
||||||
|
.listen(
|
||||||
|
nodeA.brokerCtx,
|
||||||
|
proc(
|
||||||
|
evt: EventShardTopicHealthChange
|
||||||
|
): Future[void] {.async: (raises: []), gcsafe.} =
|
||||||
|
if not shardHealthFut.finished and (
|
||||||
|
evt.health == TopicHealth.MINIMALLY_HEALTHY or
|
||||||
|
evt.health == TopicHealth.SUFFICIENTLY_HEALTHY
|
||||||
|
):
|
||||||
|
shardHealthFut.complete(evt)
|
||||||
|
,
|
||||||
|
)
|
||||||
|
.expect("Failed to listen for shard health")
|
||||||
|
|
||||||
|
let contentTopic = ContentTopic("/waku/2/default-content/proto")
|
||||||
|
subMgr.subscribe(contentTopic).expect("Failed to subscribe")
|
||||||
|
|
||||||
|
let shardHealthOk = await shardHealthFut.withTimeout(TestConnectivityTimeLimit)
|
||||||
|
EventShardTopicHealthChange.dropListener(nodeA.brokerCtx, shardHealthLis)
|
||||||
|
|
||||||
|
check shardHealthOk == true
|
||||||
|
check subMgr.edgeFilterSubStates.len > 0
|
||||||
|
|
||||||
|
healthSignal.clear()
|
||||||
|
deadline = Moment.now() + TestConnectivityTimeLimit
|
||||||
|
while Moment.now() < deadline:
|
||||||
|
if lastStatus == ConnectionStatus.PartiallyConnected:
|
||||||
|
break
|
||||||
|
if await healthSignal.wait().withTimeout(deadline - Moment.now()):
|
||||||
|
healthSignal.clear()
|
||||||
|
|
||||||
|
check lastStatus == ConnectionStatus.PartiallyConnected
|
||||||
|
|
||||||
|
await ds.stopDeliveryService()
|
||||||
|
await monitorA.stopHealthMonitor()
|
||||||
|
await nodeB.stop()
|
||||||
await nodeA.stop()
|
await nodeA.stop()
|
||||||
|
|||||||
@ -8,6 +8,6 @@ type WakuPeerEventKind* {.pure.} = enum
|
|||||||
EventMetadataUpdated
|
EventMetadataUpdated
|
||||||
|
|
||||||
EventBroker:
|
EventBroker:
|
||||||
type EventWakuPeer* = object
|
type WakuPeerEvent* = object
|
||||||
peerId*: PeerId
|
peerId*: PeerId
|
||||||
kind*: WakuPeerEventKind
|
kind*: WakuPeerEventKind
|
||||||
|
|||||||
@ -416,7 +416,8 @@ proc startWaku*(waku: ptr Waku): Future[Result[void, string]] {.async: (raises:
|
|||||||
|
|
||||||
## Reliability
|
## Reliability
|
||||||
if not waku[].deliveryService.isNil():
|
if not waku[].deliveryService.isNil():
|
||||||
waku[].deliveryService.startDeliveryService()
|
waku[].deliveryService.startDeliveryService().isOkOr:
|
||||||
|
return err("failed to start delivery service: " & $error)
|
||||||
|
|
||||||
## Health Monitor
|
## Health Monitor
|
||||||
waku[].healthMonitor.startHealthMonitor().isOkOr:
|
waku[].healthMonitor.startHealthMonitor().isOkOr:
|
||||||
|
|||||||
@ -1,18 +1,13 @@
|
|||||||
## This module helps to ensure the correct transmission and reception of messages
|
## This module helps to ensure the correct transmission and reception of messages
|
||||||
|
|
||||||
import results
|
import results
|
||||||
import chronos
|
import chronos, chronicles
|
||||||
import
|
import
|
||||||
./recv_service,
|
./recv_service,
|
||||||
./send_service,
|
./send_service,
|
||||||
./subscription_manager,
|
./subscription_manager,
|
||||||
waku/[
|
waku/[
|
||||||
waku_core,
|
waku_core, waku_node, waku_store/client, waku_relay/protocol, waku_lightpush/client
|
||||||
waku_node,
|
|
||||||
waku_store/client,
|
|
||||||
waku_relay/protocol,
|
|
||||||
waku_lightpush/client,
|
|
||||||
waku_filter_v2/client,
|
|
||||||
]
|
]
|
||||||
|
|
||||||
type DeliveryService* = ref object
|
type DeliveryService* = ref object
|
||||||
@ -37,10 +32,11 @@ proc new*(
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
proc startDeliveryService*(self: DeliveryService) =
|
proc startDeliveryService*(self: DeliveryService): Result[void, string] =
|
||||||
self.subscriptionManager.startSubscriptionManager()
|
?self.subscriptionManager.startSubscriptionManager()
|
||||||
self.recvService.startRecvService()
|
self.recvService.startRecvService()
|
||||||
self.sendService.startSendService()
|
self.sendService.startSendService()
|
||||||
|
return ok()
|
||||||
|
|
||||||
proc stopDeliveryService*(self: DeliveryService) {.async.} =
|
proc stopDeliveryService*(self: DeliveryService) {.async.} =
|
||||||
await self.sendService.stopSendService()
|
await self.sendService.stopSendService()
|
||||||
|
|||||||
@ -91,20 +91,20 @@ proc msgChecker(self: RecvService) {.async.} =
|
|||||||
self.endTimeToCheck = getNowInNanosecondTime()
|
self.endTimeToCheck = getNowInNanosecondTime()
|
||||||
|
|
||||||
var msgHashesInStore = newSeq[WakuMessageHash](0)
|
var msgHashesInStore = newSeq[WakuMessageHash](0)
|
||||||
for sub in self.subscriptionManager.getActiveSubscriptions():
|
for pubsubTopic, contentTopics in self.subscriptionManager.subscribedTopics:
|
||||||
let storeResp: StoreQueryResponse = (
|
let storeResp: StoreQueryResponse = (
|
||||||
await self.node.wakuStoreClient.queryToAny(
|
await self.node.wakuStoreClient.queryToAny(
|
||||||
StoreQueryRequest(
|
StoreQueryRequest(
|
||||||
includeData: false,
|
includeData: false,
|
||||||
pubsubTopic: some(PubsubTopic(sub.pubsubTopic)),
|
pubsubTopic: some(pubsubTopic),
|
||||||
contentTopics: sub.contentTopics,
|
contentTopics: toSeq(contentTopics),
|
||||||
startTime: some(self.startTimeToCheck - DelayExtra.nanos),
|
startTime: some(self.startTimeToCheck - DelayExtra.nanos),
|
||||||
endTime: some(self.endTimeToCheck + DelayExtra.nanos),
|
endTime: some(self.endTimeToCheck + DelayExtra.nanos),
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
).valueOr:
|
).valueOr:
|
||||||
error "msgChecker failed to get remote msgHashes",
|
error "msgChecker failed to get remote msgHashes",
|
||||||
pubsubTopic = sub.pubsubTopic, cTopics = sub.contentTopics, error = $error
|
pubsubTopic = pubsubTopic, cTopics = toSeq(contentTopics), error = $error
|
||||||
continue
|
continue
|
||||||
|
|
||||||
msgHashesInStore.add(storeResp.messages.mapIt(it.messageHash))
|
msgHashesInStore.add(storeResp.messages.mapIt(it.messageHash))
|
||||||
@ -154,10 +154,6 @@ proc new*(T: typedesc[RecvService], node: WakuNode, s: SubscriptionManager): T =
|
|||||||
recentReceivedMsgs: @[],
|
recentReceivedMsgs: @[],
|
||||||
)
|
)
|
||||||
|
|
||||||
# TODO: For MAPI Edge support, either call node.wakuFilterClient.registerPushHandler
|
|
||||||
# so that the RecvService listens to incoming filter messages,
|
|
||||||
# or have the filter client emit MessageSeenEvent.
|
|
||||||
|
|
||||||
return recvService
|
return recvService
|
||||||
|
|
||||||
proc loopPruneOldMessages(self: RecvService) {.async.} =
|
proc loopPruneOldMessages(self: RecvService) {.async.} =
|
||||||
|
|||||||
@ -1,4 +1,5 @@
|
|||||||
import std/[sets, tables, options, strutils], chronos, chronicles, results
|
import std/[sequtils, sets, tables, options, strutils], chronos, chronicles, results
|
||||||
|
import libp2p/[peerid, peerinfo]
|
||||||
import
|
import
|
||||||
waku/[
|
waku/[
|
||||||
waku_core,
|
waku_core,
|
||||||
@ -6,16 +7,67 @@ import
|
|||||||
waku_core/topics/sharding,
|
waku_core/topics/sharding,
|
||||||
waku_node,
|
waku_node,
|
||||||
waku_relay,
|
waku_relay,
|
||||||
|
waku_filter_v2/common as filter_common,
|
||||||
|
waku_filter_v2/client as filter_client,
|
||||||
|
waku_filter_v2/protocol as filter_protocol,
|
||||||
common/broker/broker_context,
|
common/broker/broker_context,
|
||||||
events/delivery_events,
|
events/health_events,
|
||||||
|
events/peer_events,
|
||||||
|
requests/health_requests,
|
||||||
|
node/peer_manager,
|
||||||
|
node/health_monitor/topic_health,
|
||||||
|
node/health_monitor/connection_status,
|
||||||
]
|
]
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Logos Messaging API SubscriptionManager
|
||||||
|
#
|
||||||
|
# Maps all topic subscription intent and centralizes all consistency
|
||||||
|
# maintenance of the pubsub and content topic subscription model across
|
||||||
|
# the various network drivers that handle topics (Edge/Filter and Core/Relay).
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
type EdgeFilterSubState* = object
|
||||||
|
peers: seq[RemotePeerInfo]
|
||||||
|
## Filter service peers with confirmed subscriptions on this shard.
|
||||||
|
pending: seq[Future[void]] ## In-flight dial futures for peers not yet confirmed.
|
||||||
|
pendingPeers: HashSet[PeerId] ## PeerIds of peers currently being dialed.
|
||||||
|
currentHealth: TopicHealth
|
||||||
|
## Cached health derived from peers.len; updated on every peer set change.
|
||||||
|
|
||||||
|
func toTopicHealth*(peersCount: int): TopicHealth =
|
||||||
|
if peersCount >= HealthyThreshold:
|
||||||
|
TopicHealth.SUFFICIENTLY_HEALTHY
|
||||||
|
elif peersCount > 0:
|
||||||
|
TopicHealth.MINIMALLY_HEALTHY
|
||||||
|
else:
|
||||||
|
TopicHealth.UNHEALTHY
|
||||||
|
|
||||||
type SubscriptionManager* = ref object of RootObj
|
type SubscriptionManager* = ref object of RootObj
|
||||||
node: WakuNode
|
node: WakuNode
|
||||||
contentTopicSubs: Table[PubsubTopic, HashSet[ContentTopic]]
|
contentTopicSubs: Table[PubsubTopic, HashSet[ContentTopic]]
|
||||||
## Map of Shard to ContentTopic needed because e.g. WakuRelay is PubsubTopic only.
|
## Map of Shard to ContentTopic needed because e.g. WakuRelay is PubsubTopic only.
|
||||||
## A present key with an empty HashSet value means pubsubtopic already subscribed
|
## A present key with an empty HashSet value means pubsubtopic already subscribed
|
||||||
## (via subscribePubsubTopics()) but there's no specific content topic interest yet.
|
## (via subscribePubsubTopics()) but there's no specific content topic interest yet.
|
||||||
|
edgeFilterSubStates*: Table[PubsubTopic, EdgeFilterSubState]
|
||||||
|
## Per-shard filter subscription state for edge mode.
|
||||||
|
edgeFilterWakeup: AsyncEvent
|
||||||
|
## Signalled when the edge filter sub loop should re-reconcile.
|
||||||
|
edgeFilterSubLoopFut: Future[void]
|
||||||
|
edgeFilterHealthLoopFut: Future[void]
|
||||||
|
peerEventListener: WakuPeerEventListener
|
||||||
|
## Listener for peer connect/disconnect events (edge filter wakeup).
|
||||||
|
|
||||||
|
iterator subscribedTopics*(
|
||||||
|
self: SubscriptionManager
|
||||||
|
): (PubsubTopic, HashSet[ContentTopic]) =
|
||||||
|
for pubsub, topics in self.contentTopicSubs.pairs:
|
||||||
|
yield (pubsub, topics)
|
||||||
|
|
||||||
|
proc edgeFilterPeerCount*(sm: SubscriptionManager, shard: PubsubTopic): int =
|
||||||
|
sm.edgeFilterSubStates.withValue(shard, state):
|
||||||
|
return state.peers.len
|
||||||
|
return 0
|
||||||
|
|
||||||
proc new*(T: typedesc[SubscriptionManager], node: WakuNode): T =
|
proc new*(T: typedesc[SubscriptionManager], node: WakuNode): T =
|
||||||
SubscriptionManager(
|
SubscriptionManager(
|
||||||
@ -25,30 +77,35 @@ proc new*(T: typedesc[SubscriptionManager], node: WakuNode): T =
|
|||||||
proc addContentTopicInterest(
|
proc addContentTopicInterest(
|
||||||
self: SubscriptionManager, shard: PubsubTopic, topic: ContentTopic
|
self: SubscriptionManager, shard: PubsubTopic, topic: ContentTopic
|
||||||
): Result[void, string] =
|
): Result[void, string] =
|
||||||
|
var changed = false
|
||||||
if not self.contentTopicSubs.hasKey(shard):
|
if not self.contentTopicSubs.hasKey(shard):
|
||||||
self.contentTopicSubs[shard] = initHashSet[ContentTopic]()
|
self.contentTopicSubs[shard] = initHashSet[ContentTopic]()
|
||||||
|
changed = true
|
||||||
|
|
||||||
self.contentTopicSubs.withValue(shard, cTopics):
|
self.contentTopicSubs.withValue(shard, cTopics):
|
||||||
if not cTopics[].contains(topic):
|
if not cTopics[].contains(topic):
|
||||||
cTopics[].incl(topic)
|
cTopics[].incl(topic)
|
||||||
|
changed = true
|
||||||
|
|
||||||
# TODO: Call a "subscribe(shard, topic)" on filter client here,
|
if changed and not isNil(self.edgeFilterWakeup):
|
||||||
# so the filter client can know that subscriptions changed.
|
self.edgeFilterWakeup.fire()
|
||||||
|
|
||||||
return ok()
|
return ok()
|
||||||
|
|
||||||
proc removeContentTopicInterest(
|
proc removeContentTopicInterest(
|
||||||
self: SubscriptionManager, shard: PubsubTopic, topic: ContentTopic
|
self: SubscriptionManager, shard: PubsubTopic, topic: ContentTopic
|
||||||
): Result[void, string] =
|
): Result[void, string] =
|
||||||
|
var changed = false
|
||||||
self.contentTopicSubs.withValue(shard, cTopics):
|
self.contentTopicSubs.withValue(shard, cTopics):
|
||||||
if cTopics[].contains(topic):
|
if cTopics[].contains(topic):
|
||||||
cTopics[].excl(topic)
|
cTopics[].excl(topic)
|
||||||
|
changed = true
|
||||||
|
|
||||||
if cTopics[].len == 0 and isNil(self.node.wakuRelay):
|
if cTopics[].len == 0 and isNil(self.node.wakuRelay):
|
||||||
self.contentTopicSubs.del(shard) # We're done with cTopics here
|
self.contentTopicSubs.del(shard) # We're done with cTopics here
|
||||||
|
|
||||||
# TODO: Call a "unsubscribe(shard, topic)" on filter client here,
|
if changed and not isNil(self.edgeFilterWakeup):
|
||||||
# so the filter client can know that subscriptions changed.
|
self.edgeFilterWakeup.fire()
|
||||||
|
|
||||||
return ok()
|
return ok()
|
||||||
|
|
||||||
@ -73,46 +130,6 @@ proc subscribePubsubTopics(
|
|||||||
|
|
||||||
return ok()
|
return ok()
|
||||||
|
|
||||||
proc startSubscriptionManager*(self: SubscriptionManager) =
|
|
||||||
if isNil(self.node.wakuRelay):
|
|
||||||
return
|
|
||||||
|
|
||||||
if self.node.wakuAutoSharding.isSome():
|
|
||||||
# Subscribe relay to all shards in autosharding.
|
|
||||||
let autoSharding = self.node.wakuAutoSharding.get()
|
|
||||||
let clusterId = autoSharding.clusterId
|
|
||||||
let numShards = autoSharding.shardCountGenZero
|
|
||||||
|
|
||||||
if numShards > 0:
|
|
||||||
var clusterPubsubTopics = newSeqOfCap[PubsubTopic](numShards)
|
|
||||||
|
|
||||||
for i in 0 ..< numShards:
|
|
||||||
let shardObj = RelayShard(clusterId: clusterId, shardId: uint16(i))
|
|
||||||
clusterPubsubTopics.add(PubsubTopic($shardObj))
|
|
||||||
|
|
||||||
self.subscribePubsubTopics(clusterPubsubTopics).isOkOr:
|
|
||||||
error "Failed to auto-subscribe Relay to cluster shards: ", error = error
|
|
||||||
else:
|
|
||||||
info "SubscriptionManager has no AutoSharding configured; skipping auto-subscribe."
|
|
||||||
|
|
||||||
proc stopSubscriptionManager*(self: SubscriptionManager) {.async.} =
|
|
||||||
discard
|
|
||||||
|
|
||||||
proc getActiveSubscriptions*(
|
|
||||||
self: SubscriptionManager
|
|
||||||
): seq[tuple[pubsubTopic: string, contentTopics: seq[ContentTopic]]] =
|
|
||||||
var activeSubs: seq[tuple[pubsubTopic: string, contentTopics: seq[ContentTopic]]] =
|
|
||||||
@[]
|
|
||||||
|
|
||||||
for pubsub, cTopicSet in self.contentTopicSubs.pairs:
|
|
||||||
if cTopicSet.len > 0:
|
|
||||||
var cTopicSeq = newSeqOfCap[ContentTopic](cTopicSet.len)
|
|
||||||
for t in cTopicSet:
|
|
||||||
cTopicSeq.add(t)
|
|
||||||
activeSubs.add((pubsub, cTopicSeq))
|
|
||||||
|
|
||||||
return activeSubs
|
|
||||||
|
|
||||||
proc getShardForContentTopic(
|
proc getShardForContentTopic(
|
||||||
self: SubscriptionManager, topic: ContentTopic
|
self: SubscriptionManager, topic: ContentTopic
|
||||||
): Result[PubsubTopic, string] =
|
): Result[PubsubTopic, string] =
|
||||||
@ -162,3 +179,358 @@ proc unsubscribe*(
|
|||||||
?self.removeContentTopicInterest(shard, topic)
|
?self.removeContentTopicInterest(shard, topic)
|
||||||
|
|
||||||
return ok()
|
return ok()
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Edge Filter driver for the Logos Messaging API
|
||||||
|
#
|
||||||
|
# The SubscriptionManager absorbs natively the responsibility of using the
|
||||||
|
# Edge Filter protocol to effect subscriptions and message receipt for edge.
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
const EdgeFilterSubscribeTimeout = chronos.seconds(15)
|
||||||
|
## Timeout for a single filter subscribe/unsubscribe RPC to a service peer.
|
||||||
|
const EdgeFilterPingTimeout = chronos.seconds(5)
|
||||||
|
## Timeout for a filter ping health check.
|
||||||
|
const EdgeFilterLoopInterval = chronos.seconds(30)
|
||||||
|
## Interval for the edge filter health ping loop.
|
||||||
|
const EdgeFilterSubLoopDebounce = chronos.seconds(1)
|
||||||
|
## Debounce delay to coalesce rapid-fire wakeups into a single reconciliation pass.
|
||||||
|
|
||||||
|
proc updateShardHealth(
|
||||||
|
self: SubscriptionManager, shard: PubsubTopic, state: var EdgeFilterSubState
|
||||||
|
) =
|
||||||
|
## Recompute and emit health for a shard after its peer set changed.
|
||||||
|
let newHealth = toTopicHealth(state.peers.len)
|
||||||
|
if newHealth != state.currentHealth:
|
||||||
|
state.currentHealth = newHealth
|
||||||
|
EventShardTopicHealthChange.emit(self.node.brokerCtx, shard, newHealth)
|
||||||
|
|
||||||
|
proc removePeer(self: SubscriptionManager, shard: PubsubTopic, peerId: PeerId) =
|
||||||
|
## Remove a peer from edgeFilterSubStates for the given shard,
|
||||||
|
## update health, and wake the sub loop to dial a replacement.
|
||||||
|
## Best-effort unsubscribe so the service peer stops pushing to us.
|
||||||
|
self.edgeFilterSubStates.withValue(shard, state):
|
||||||
|
var peer: RemotePeerInfo
|
||||||
|
var found = false
|
||||||
|
for p in state.peers:
|
||||||
|
if p.peerId == peerId:
|
||||||
|
peer = p
|
||||||
|
found = true
|
||||||
|
break
|
||||||
|
if not found:
|
||||||
|
return
|
||||||
|
|
||||||
|
state.peers.keepItIf(it.peerId != peerId)
|
||||||
|
self.updateShardHealth(shard, state[])
|
||||||
|
self.edgeFilterWakeup.fire()
|
||||||
|
|
||||||
|
if not self.node.wakuFilterClient.isNil():
|
||||||
|
self.contentTopicSubs.withValue(shard, topics):
|
||||||
|
let ct = toSeq(topics[])
|
||||||
|
if ct.len > 0:
|
||||||
|
proc doUnsubscribe() {.async.} =
|
||||||
|
discard await self.node.wakuFilterClient.unsubscribe(peer, shard, ct)
|
||||||
|
|
||||||
|
asyncSpawn doUnsubscribe()
|
||||||
|
|
||||||
|
type SendChunkedFilterRpcKind = enum
|
||||||
|
FilterSubscribe
|
||||||
|
FilterUnsubscribe
|
||||||
|
|
||||||
|
proc sendChunkedFilterRpc(
|
||||||
|
self: SubscriptionManager,
|
||||||
|
peer: RemotePeerInfo,
|
||||||
|
shard: PubsubTopic,
|
||||||
|
topics: seq[ContentTopic],
|
||||||
|
kind: SendChunkedFilterRpcKind,
|
||||||
|
): Future[bool] {.async.} =
|
||||||
|
## Send a chunked filter subscribe or unsubscribe RPC. Returns true on
|
||||||
|
## success. On failure the peer is removed and false is returned.
|
||||||
|
try:
|
||||||
|
var i = 0
|
||||||
|
while i < topics.len:
|
||||||
|
let chunk =
|
||||||
|
topics[i ..< min(i + filter_protocol.MaxContentTopicsPerRequest, topics.len)]
|
||||||
|
let fut =
|
||||||
|
case kind
|
||||||
|
of FilterSubscribe:
|
||||||
|
self.node.wakuFilterClient.subscribe(peer, shard, chunk)
|
||||||
|
of FilterUnsubscribe:
|
||||||
|
self.node.wakuFilterClient.unsubscribe(peer, shard, chunk)
|
||||||
|
if not (await fut.withTimeout(EdgeFilterSubscribeTimeout)) or fut.read().isErr():
|
||||||
|
trace "sendChunkedFilterRpc: chunk failed",
|
||||||
|
op = kind, shard = shard, peer = peer.peerId
|
||||||
|
self.removePeer(shard, peer.peerId)
|
||||||
|
return false
|
||||||
|
i += filter_protocol.MaxContentTopicsPerRequest
|
||||||
|
except CatchableError as exc:
|
||||||
|
debug "sendChunkedFilterRpc: failed",
|
||||||
|
op = kind, shard = shard, peer = peer.peerId, err = exc.msg
|
||||||
|
self.removePeer(shard, peer.peerId)
|
||||||
|
return false
|
||||||
|
return true
|
||||||
|
|
||||||
|
proc syncFilterDeltas(
|
||||||
|
self: SubscriptionManager,
|
||||||
|
peer: RemotePeerInfo,
|
||||||
|
shard: PubsubTopic,
|
||||||
|
added: seq[ContentTopic],
|
||||||
|
removed: seq[ContentTopic],
|
||||||
|
) {.async.} =
|
||||||
|
## Push content topic changes (adds/removes) to an already-tracked peer.
|
||||||
|
if added.len > 0:
|
||||||
|
if not await self.sendChunkedFilterRpc(peer, shard, added, FilterSubscribe):
|
||||||
|
return
|
||||||
|
|
||||||
|
if removed.len > 0:
|
||||||
|
discard await self.sendChunkedFilterRpc(peer, shard, removed, FilterUnsubscribe)
|
||||||
|
|
||||||
|
proc dialFilterPeer(
|
||||||
|
self: SubscriptionManager,
|
||||||
|
peer: RemotePeerInfo,
|
||||||
|
shard: PubsubTopic,
|
||||||
|
contentTopics: seq[ContentTopic],
|
||||||
|
) {.async.} =
|
||||||
|
## Subscribe a new peer to all content topics on a shard and start tracking it.
|
||||||
|
self.edgeFilterSubStates.withValue(shard, state):
|
||||||
|
state.pendingPeers.incl(peer.peerId)
|
||||||
|
|
||||||
|
try:
|
||||||
|
if not await self.sendChunkedFilterRpc(peer, shard, contentTopics, FilterSubscribe):
|
||||||
|
return
|
||||||
|
|
||||||
|
self.edgeFilterSubStates.withValue(shard, state):
|
||||||
|
if state.peers.anyIt(it.peerId == peer.peerId):
|
||||||
|
trace "dialFilterPeer: peer already tracked, skipping duplicate",
|
||||||
|
shard = shard, peer = peer.peerId
|
||||||
|
return
|
||||||
|
|
||||||
|
state.peers.add(peer)
|
||||||
|
self.updateShardHealth(shard, state[])
|
||||||
|
trace "dialFilterPeer: successfully subscribed to all chunks",
|
||||||
|
shard = shard, peer = peer.peerId, totalPeers = state.peers.len
|
||||||
|
do:
|
||||||
|
trace "dialFilterPeer: shard removed while subscribing, discarding result",
|
||||||
|
shard = shard, peer = peer.peerId
|
||||||
|
finally:
|
||||||
|
self.edgeFilterSubStates.withValue(shard, state):
|
||||||
|
state.pendingPeers.excl(peer.peerId)
|
||||||
|
|
||||||
|
proc edgeFilterHealthLoop*(self: SubscriptionManager) {.async.} =
|
||||||
|
## Periodically pings all connected filter service peers to verify they are
|
||||||
|
## still alive at the application layer. Peers that fail the ping are removed.
|
||||||
|
while true:
|
||||||
|
await sleepAsync(EdgeFilterLoopInterval)
|
||||||
|
|
||||||
|
if self.node.wakuFilterClient.isNil():
|
||||||
|
warn "filter client is nil within edge filter health loop"
|
||||||
|
continue
|
||||||
|
|
||||||
|
var connected = initTable[PeerId, RemotePeerInfo]()
|
||||||
|
for state in self.edgeFilterSubStates.values:
|
||||||
|
for peer in state.peers:
|
||||||
|
if self.node.peerManager.switch.peerStore.isConnected(peer.peerId):
|
||||||
|
connected[peer.peerId] = peer
|
||||||
|
|
||||||
|
var alive = initHashSet[PeerId]()
|
||||||
|
|
||||||
|
if connected.len > 0:
|
||||||
|
var pingTasks: seq[(PeerId, Future[FilterSubscribeResult])] = @[]
|
||||||
|
for peer in connected.values:
|
||||||
|
pingTasks.add(
|
||||||
|
(peer.peerId, self.node.wakuFilterClient.ping(peer, EdgeFilterPingTimeout))
|
||||||
|
)
|
||||||
|
|
||||||
|
# extract future tasks from (PeerId, Future) tuples and await them
|
||||||
|
await allFutures(pingTasks.mapIt(it[1]))
|
||||||
|
|
||||||
|
for (peerId, task) in pingTasks:
|
||||||
|
if task.read().isOk():
|
||||||
|
alive.incl(peerId)
|
||||||
|
|
||||||
|
var changed = false
|
||||||
|
for shard, state in self.edgeFilterSubStates.mpairs:
|
||||||
|
let oldLen = state.peers.len
|
||||||
|
state.peers.keepItIf(it.peerId notin connected or alive.contains(it.peerId))
|
||||||
|
|
||||||
|
if state.peers.len < oldLen:
|
||||||
|
changed = true
|
||||||
|
self.updateShardHealth(shard, state)
|
||||||
|
trace "Edge Filter health degraded by Ping failure",
|
||||||
|
shard = shard, new = state.currentHealth
|
||||||
|
|
||||||
|
if changed:
|
||||||
|
self.edgeFilterWakeup.fire()
|
||||||
|
|
||||||
|
proc edgeFilterSubLoop*(self: SubscriptionManager) {.async.} =
|
||||||
|
## Reconciles filter subscriptions with the desired state from SubscriptionManager.
|
||||||
|
var lastSynced = initTable[PubsubTopic, HashSet[ContentTopic]]()
|
||||||
|
|
||||||
|
while true:
|
||||||
|
await self.edgeFilterWakeup.wait()
|
||||||
|
await sleepAsync(EdgeFilterSubLoopDebounce)
|
||||||
|
self.edgeFilterWakeup.clear()
|
||||||
|
trace "edgeFilterSubLoop: woke up"
|
||||||
|
|
||||||
|
if isNil(self.node.wakuFilterClient):
|
||||||
|
trace "edgeFilterSubLoop: wakuFilterClient is nil, skipping"
|
||||||
|
continue
|
||||||
|
|
||||||
|
let desired = self.contentTopicSubs
|
||||||
|
|
||||||
|
trace "edgeFilterSubLoop: desired state", numShards = desired.len
|
||||||
|
|
||||||
|
let allShards = toHashSet(toSeq(desired.keys)) + toHashSet(toSeq(lastSynced.keys))
|
||||||
|
|
||||||
|
for shard in allShards:
|
||||||
|
let currTopics = desired.getOrDefault(shard)
|
||||||
|
let prevTopics = lastSynced.getOrDefault(shard)
|
||||||
|
|
||||||
|
if shard notin self.edgeFilterSubStates:
|
||||||
|
self.edgeFilterSubStates[shard] =
|
||||||
|
EdgeFilterSubState(currentHealth: TopicHealth.UNHEALTHY)
|
||||||
|
|
||||||
|
let addedTopics = toSeq(currTopics - prevTopics)
|
||||||
|
let removedTopics = toSeq(prevTopics - currTopics)
|
||||||
|
|
||||||
|
self.edgeFilterSubStates.withValue(shard, state):
|
||||||
|
state.peers.keepItIf(
|
||||||
|
self.node.peerManager.switch.peerStore.isConnected(it.peerId)
|
||||||
|
)
|
||||||
|
state.pending.keepItIf(not it.finished)
|
||||||
|
|
||||||
|
if addedTopics.len > 0 or removedTopics.len > 0:
|
||||||
|
for peer in state.peers:
|
||||||
|
asyncSpawn self.syncFilterDeltas(peer, shard, addedTopics, removedTopics)
|
||||||
|
|
||||||
|
if currTopics.len == 0:
|
||||||
|
for fut in state.pending:
|
||||||
|
if not fut.finished:
|
||||||
|
await fut.cancelAndWait()
|
||||||
|
self.edgeFilterSubStates.del(shard)
|
||||||
|
# invalidates `state` — do not use after this
|
||||||
|
else:
|
||||||
|
self.updateShardHealth(shard, state[])
|
||||||
|
|
||||||
|
let needed = max(0, HealthyThreshold - state.peers.len - state.pending.len)
|
||||||
|
|
||||||
|
if needed > 0:
|
||||||
|
let tracked = state.peers.mapIt(it.peerId).toHashSet() + state.pendingPeers
|
||||||
|
var candidates = self.node.peerManager.selectPeers(
|
||||||
|
filter_common.WakuFilterSubscribeCodec, some(shard)
|
||||||
|
)
|
||||||
|
candidates.keepItIf(it.peerId notin tracked)
|
||||||
|
|
||||||
|
let toDial = min(needed, candidates.len)
|
||||||
|
|
||||||
|
trace "edgeFilterSubLoop: shard reconciliation",
|
||||||
|
shard = shard,
|
||||||
|
num_peers = state.peers.len,
|
||||||
|
num_pending = state.pending.len,
|
||||||
|
num_needed = needed,
|
||||||
|
num_available = candidates.len,
|
||||||
|
toDial = toDial
|
||||||
|
|
||||||
|
for i in 0 ..< toDial:
|
||||||
|
let fut = self.dialFilterPeer(candidates[i], shard, toSeq(currTopics))
|
||||||
|
state.pending.add(fut)
|
||||||
|
|
||||||
|
lastSynced = desired
|
||||||
|
|
||||||
|
proc startEdgeFilterLoops(self: SubscriptionManager): Result[void, string] =
|
||||||
|
## Start the edge filter orchestration loops.
|
||||||
|
## Caller must ensure this is only called in edge mode (relay nil, filter client present).
|
||||||
|
self.edgeFilterWakeup = newAsyncEvent()
|
||||||
|
|
||||||
|
self.peerEventListener = WakuPeerEvent.listen(
|
||||||
|
self.node.brokerCtx,
|
||||||
|
proc(evt: WakuPeerEvent) {.async: (raises: []), gcsafe.} =
|
||||||
|
if evt.kind == WakuPeerEventKind.EventDisconnected or
|
||||||
|
evt.kind == WakuPeerEventKind.EventMetadataUpdated:
|
||||||
|
self.edgeFilterWakeup.fire()
|
||||||
|
,
|
||||||
|
).valueOr:
|
||||||
|
return err("Failed to listen to peer events for edge filter: " & error)
|
||||||
|
|
||||||
|
self.edgeFilterSubLoopFut = self.edgeFilterSubLoop()
|
||||||
|
self.edgeFilterHealthLoopFut = self.edgeFilterHealthLoop()
|
||||||
|
return ok()
|
||||||
|
|
||||||
|
proc stopEdgeFilterLoops(self: SubscriptionManager) {.async: (raises: []).} =
|
||||||
|
## Stop the edge filter orchestration loops and clean up pending futures.
|
||||||
|
if not isNil(self.edgeFilterSubLoopFut):
|
||||||
|
await self.edgeFilterSubLoopFut.cancelAndWait()
|
||||||
|
self.edgeFilterSubLoopFut = nil
|
||||||
|
|
||||||
|
if not isNil(self.edgeFilterHealthLoopFut):
|
||||||
|
await self.edgeFilterHealthLoopFut.cancelAndWait()
|
||||||
|
self.edgeFilterHealthLoopFut = nil
|
||||||
|
|
||||||
|
for shard, state in self.edgeFilterSubStates:
|
||||||
|
for fut in state.pending:
|
||||||
|
if not fut.finished:
|
||||||
|
await fut.cancelAndWait()
|
||||||
|
|
||||||
|
WakuPeerEvent.dropListener(self.node.brokerCtx, self.peerEventListener)
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# SubscriptionManager Lifecycle (calls Edge behavior above)
|
||||||
|
#
|
||||||
|
# startSubscriptionManager and stopSubscriptionManager orchestrate both the
|
||||||
|
# core (relay) and edge (filter) paths, and register/clear broker providers.
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
proc startSubscriptionManager*(self: SubscriptionManager): Result[void, string] =
|
||||||
|
# Register edge filter broker providers. The shard/content health providers
|
||||||
|
# in WakuNode query these via the broker as a fallback when relay health is
|
||||||
|
# not available. If edge mode is not active, these providers simply return
|
||||||
|
# NOT_SUBSCRIBED / strength 0, which is harmless.
|
||||||
|
RequestEdgeShardHealth.setProvider(
|
||||||
|
self.node.brokerCtx,
|
||||||
|
proc(shard: PubsubTopic): Result[RequestEdgeShardHealth, string] =
|
||||||
|
self.edgeFilterSubStates.withValue(shard, state):
|
||||||
|
return ok(RequestEdgeShardHealth(health: state.currentHealth))
|
||||||
|
return ok(RequestEdgeShardHealth(health: TopicHealth.NOT_SUBSCRIBED)),
|
||||||
|
).isOkOr:
|
||||||
|
error "Can't set provider for RequestEdgeShardHealth", error = error
|
||||||
|
|
||||||
|
RequestEdgeFilterPeerCount.setProvider(
|
||||||
|
self.node.brokerCtx,
|
||||||
|
proc(): Result[RequestEdgeFilterPeerCount, string] =
|
||||||
|
var minPeers = high(int)
|
||||||
|
for state in self.edgeFilterSubStates.values:
|
||||||
|
minPeers = min(minPeers, state.peers.len)
|
||||||
|
if minPeers == high(int):
|
||||||
|
minPeers = 0
|
||||||
|
return ok(RequestEdgeFilterPeerCount(peerCount: minPeers)),
|
||||||
|
).isOkOr:
|
||||||
|
error "Can't set provider for RequestEdgeFilterPeerCount", error = error
|
||||||
|
|
||||||
|
if self.node.wakuRelay.isNil():
|
||||||
|
return self.startEdgeFilterLoops()
|
||||||
|
|
||||||
|
# Core mode: auto-subscribe relay to all shards in autosharding.
|
||||||
|
if self.node.wakuAutoSharding.isSome():
|
||||||
|
let autoSharding = self.node.wakuAutoSharding.get()
|
||||||
|
let clusterId = autoSharding.clusterId
|
||||||
|
let numShards = autoSharding.shardCountGenZero
|
||||||
|
|
||||||
|
if numShards > 0:
|
||||||
|
var clusterPubsubTopics = newSeqOfCap[PubsubTopic](numShards)
|
||||||
|
|
||||||
|
for i in 0 ..< numShards:
|
||||||
|
let shardObj = RelayShard(clusterId: clusterId, shardId: uint16(i))
|
||||||
|
clusterPubsubTopics.add(PubsubTopic($shardObj))
|
||||||
|
|
||||||
|
self.subscribePubsubTopics(clusterPubsubTopics).isOkOr:
|
||||||
|
error "Failed to auto-subscribe Relay to cluster shards: ", error = error
|
||||||
|
else:
|
||||||
|
info "SubscriptionManager has no AutoSharding configured; skipping auto-subscribe."
|
||||||
|
|
||||||
|
return ok()
|
||||||
|
|
||||||
|
proc stopSubscriptionManager*(self: SubscriptionManager) {.async: (raises: []).} =
|
||||||
|
if self.node.wakuRelay.isNil():
|
||||||
|
await self.stopEdgeFilterLoops()
|
||||||
|
RequestEdgeShardHealth.clearProvider(self.node.brokerCtx)
|
||||||
|
RequestEdgeFilterPeerCount.clearProvider(self.node.brokerCtx)
|
||||||
|
|||||||
@ -2,6 +2,9 @@ import chronos, results, std/strutils, ../../api/types
|
|||||||
|
|
||||||
export ConnectionStatus
|
export ConnectionStatus
|
||||||
|
|
||||||
|
const HealthyThreshold* = 2
|
||||||
|
## Minimum peers required per service protocol for a "Connected" status (excluding Relay).
|
||||||
|
|
||||||
proc init*(
|
proc init*(
|
||||||
t: typedesc[ConnectionStatus], strRep: string
|
t: typedesc[ConnectionStatus], strRep: string
|
||||||
): Result[ConnectionStatus, string] =
|
): Result[ConnectionStatus, string] =
|
||||||
|
|||||||
@ -21,6 +21,7 @@ import
|
|||||||
node/health_monitor/health_report,
|
node/health_monitor/health_report,
|
||||||
node/health_monitor/connection_status,
|
node/health_monitor/connection_status,
|
||||||
node/health_monitor/protocol_health,
|
node/health_monitor/protocol_health,
|
||||||
|
requests/health_requests,
|
||||||
]
|
]
|
||||||
|
|
||||||
## This module is aimed to check the state of the "self" Waku Node
|
## This module is aimed to check the state of the "self" Waku Node
|
||||||
@ -29,9 +30,6 @@ import
|
|||||||
# if not called, the outcome of randomization procedures will be the same in every run
|
# if not called, the outcome of randomization procedures will be the same in every run
|
||||||
random.randomize()
|
random.randomize()
|
||||||
|
|
||||||
const HealthyThreshold* = 2
|
|
||||||
## minimum peers required for all services for a Connected status, excluding Relay
|
|
||||||
|
|
||||||
type NodeHealthMonitor* = ref object
|
type NodeHealthMonitor* = ref object
|
||||||
nodeHealth: HealthStatus
|
nodeHealth: HealthStatus
|
||||||
node: WakuNode
|
node: WakuNode
|
||||||
@ -48,7 +46,8 @@ type NodeHealthMonitor* = ref object
|
|||||||
## latest known connectivity strength (e.g. connected peer count) metric for each protocol.
|
## latest known connectivity strength (e.g. connected peer count) metric for each protocol.
|
||||||
## if it doesn't make sense for the protocol in question, this is set to zero.
|
## if it doesn't make sense for the protocol in question, this is set to zero.
|
||||||
relayObserver: PubSubObserver
|
relayObserver: PubSubObserver
|
||||||
peerEventListener: EventWakuPeerListener
|
peerEventListener: WakuPeerEventListener
|
||||||
|
shardHealthListener: EventShardTopicHealthChangeListener
|
||||||
|
|
||||||
func getHealth*(report: HealthReport, kind: WakuProtocol): ProtocolHealth =
|
func getHealth*(report: HealthReport, kind: WakuProtocol): ProtocolHealth =
|
||||||
for h in report.protocolsHealth:
|
for h in report.protocolsHealth:
|
||||||
@ -198,6 +197,17 @@ proc getFilterClientHealth(hm: NodeHealthMonitor): ProtocolHealth =
|
|||||||
hm.strength[WakuProtocol.FilterClientProtocol] = 0
|
hm.strength[WakuProtocol.FilterClientProtocol] = 0
|
||||||
return p.notMounted()
|
return p.notMounted()
|
||||||
|
|
||||||
|
if isNil(hm.node.wakuRelay):
|
||||||
|
let edgeRes = RequestEdgeFilterPeerCount.request(hm.node.brokerCtx)
|
||||||
|
if edgeRes.isOk():
|
||||||
|
let peerCount = edgeRes.get().peerCount
|
||||||
|
if peerCount > 0:
|
||||||
|
hm.strength[WakuProtocol.FilterClientProtocol] = peerCount
|
||||||
|
return p.ready()
|
||||||
|
else:
|
||||||
|
error "Failed to request edge filter peer count", error = edgeRes.error
|
||||||
|
return p.notReady("Failed to request edge filter peer count: " & edgeRes.error)
|
||||||
|
|
||||||
let peerCount = countCapablePeers(hm, WakuFilterSubscribeCodec)
|
let peerCount = countCapablePeers(hm, WakuFilterSubscribeCodec)
|
||||||
hm.strength[WakuProtocol.FilterClientProtocol] = peerCount
|
hm.strength[WakuProtocol.FilterClientProtocol] = peerCount
|
||||||
|
|
||||||
@ -663,14 +673,23 @@ proc startHealthMonitor*(hm: NodeHealthMonitor): Result[void, string] =
|
|||||||
)
|
)
|
||||||
hm.node.wakuRelay.addObserver(hm.relayObserver)
|
hm.node.wakuRelay.addObserver(hm.relayObserver)
|
||||||
|
|
||||||
hm.peerEventListener = EventWakuPeer.listen(
|
hm.peerEventListener = WakuPeerEvent.listen(
|
||||||
hm.node.brokerCtx,
|
hm.node.brokerCtx,
|
||||||
proc(evt: EventWakuPeer): Future[void] {.async: (raises: []), gcsafe.} =
|
proc(evt: WakuPeerEvent): Future[void] {.async: (raises: []), gcsafe.} =
|
||||||
## Recompute health on any peer changing anything (join, leave, identify, metadata update)
|
## Recompute health on any peer changing anything (join, leave, identify, metadata update)
|
||||||
hm.healthUpdateEvent.fire(),
|
hm.healthUpdateEvent.fire(),
|
||||||
).valueOr:
|
).valueOr:
|
||||||
return err("Failed to subscribe to peer events: " & error)
|
return err("Failed to subscribe to peer events: " & error)
|
||||||
|
|
||||||
|
hm.shardHealthListener = EventShardTopicHealthChange.listen(
|
||||||
|
hm.node.brokerCtx,
|
||||||
|
proc(
|
||||||
|
evt: EventShardTopicHealthChange
|
||||||
|
): Future[void] {.async: (raises: []), gcsafe.} =
|
||||||
|
hm.healthUpdateEvent.fire(),
|
||||||
|
).valueOr:
|
||||||
|
return err("Failed to subscribe to shard health events: " & error)
|
||||||
|
|
||||||
hm.healthUpdateEvent = newAsyncEvent()
|
hm.healthUpdateEvent = newAsyncEvent()
|
||||||
hm.healthUpdateEvent.fire()
|
hm.healthUpdateEvent.fire()
|
||||||
|
|
||||||
@ -690,8 +709,8 @@ proc stopHealthMonitor*(hm: NodeHealthMonitor) {.async.} =
|
|||||||
if not isNil(hm.healthLoopFut):
|
if not isNil(hm.healthLoopFut):
|
||||||
await hm.healthLoopFut.cancelAndWait()
|
await hm.healthLoopFut.cancelAndWait()
|
||||||
|
|
||||||
if hm.peerEventListener.id != 0:
|
WakuPeerEvent.dropListener(hm.node.brokerCtx, hm.peerEventListener)
|
||||||
EventWakuPeer.dropListener(hm.node.brokerCtx, hm.peerEventListener)
|
EventShardTopicHealthChange.dropListener(hm.node.brokerCtx, hm.shardHealthListener)
|
||||||
|
|
||||||
if not isNil(hm.node.wakuRelay) and not isNil(hm.relayObserver):
|
if not isNil(hm.node.wakuRelay) and not isNil(hm.relayObserver):
|
||||||
hm.node.wakuRelay.removeObserver(hm.relayObserver)
|
hm.node.wakuRelay.removeObserver(hm.relayObserver)
|
||||||
|
|||||||
@ -215,31 +215,34 @@ proc loadFromStorage(pm: PeerManager) {.gcsafe.} =
|
|||||||
|
|
||||||
trace "recovered peers from storage", amount = amount
|
trace "recovered peers from storage", amount = amount
|
||||||
|
|
||||||
proc selectPeer*(
|
proc selectPeers*(
|
||||||
pm: PeerManager, proto: string, shard: Option[PubsubTopic] = none(PubsubTopic)
|
pm: PeerManager, proto: string, shard: Option[PubsubTopic] = none(PubsubTopic)
|
||||||
): Option[RemotePeerInfo] =
|
): seq[RemotePeerInfo] =
|
||||||
# Selects the best peer for a given protocol
|
## Returns all peers that support the given protocol (and optionally shard),
|
||||||
|
## shuffled randomly. Callers can further filter or pick from this list.
|
||||||
var peers = pm.switch.peerStore.getPeersByProtocol(proto)
|
var peers = pm.switch.peerStore.getPeersByProtocol(proto)
|
||||||
trace "Selecting peer from peerstore",
|
trace "Selecting peers from peerstore",
|
||||||
protocol = proto, peers, address = cast[uint](pm.switch.peerStore)
|
protocol = proto, num_peers = peers.len, address = cast[uint](pm.switch.peerStore)
|
||||||
|
|
||||||
if shard.isSome():
|
if shard.isSome():
|
||||||
# Parse the shard from the pubsub topic to get cluster and shard ID
|
|
||||||
let shardInfo = RelayShard.parse(shard.get()).valueOr:
|
let shardInfo = RelayShard.parse(shard.get()).valueOr:
|
||||||
trace "Failed to parse shard from pubsub topic", topic = shard.get()
|
trace "Failed to parse shard from pubsub topic", topic = shard.get()
|
||||||
return none(RemotePeerInfo)
|
return @[]
|
||||||
|
|
||||||
# Filter peers that support the requested shard
|
|
||||||
# Check both ENR (if present) and the shards field on RemotePeerInfo
|
|
||||||
peers.keepItIf(
|
peers.keepItIf(
|
||||||
# Check ENR if available
|
|
||||||
(it.enr.isSome() and it.enr.get().containsShard(shard.get())) or
|
(it.enr.isSome() and it.enr.get().containsShard(shard.get())) or
|
||||||
# Otherwise check the shards field directly
|
(it.shards.len > 0 and it.shards.contains(shardInfo.shardId))
|
||||||
(it.shards.len > 0 and it.shards.contains(shardInfo.shardId))
|
|
||||||
)
|
)
|
||||||
|
|
||||||
shuffle(peers)
|
shuffle(peers)
|
||||||
|
return peers
|
||||||
|
|
||||||
|
proc selectPeer*(
|
||||||
|
pm: PeerManager, proto: string, shard: Option[PubsubTopic] = none(PubsubTopic)
|
||||||
|
): Option[RemotePeerInfo] =
|
||||||
|
## Selects a single peer for a given protocol, checking service slots first
|
||||||
|
## (for non-relay protocols).
|
||||||
|
let peers = pm.selectPeers(proto, shard)
|
||||||
|
|
||||||
# No criteria for selecting a peer for WakuRelay, random one
|
# No criteria for selecting a peer for WakuRelay, random one
|
||||||
if proto == WakuRelayCodec:
|
if proto == WakuRelayCodec:
|
||||||
@ -742,7 +745,7 @@ proc refreshPeerMetadata(pm: PeerManager, peerId: PeerId) {.async.} =
|
|||||||
# TODO: should only trigger an event if metadata actually changed
|
# TODO: should only trigger an event if metadata actually changed
|
||||||
# should include the shard subscription delta in the event when
|
# should include the shard subscription delta in the event when
|
||||||
# it is a MetadataUpdated event
|
# it is a MetadataUpdated event
|
||||||
EventWakuPeer.emit(pm.brokerCtx, peerId, WakuPeerEventKind.EventMetadataUpdated)
|
WakuPeerEvent.emit(pm.brokerCtx, peerId, WakuPeerEventKind.EventMetadataUpdated)
|
||||||
return
|
return
|
||||||
|
|
||||||
info "disconnecting from peer", peerId = peerId, reason = reason
|
info "disconnecting from peer", peerId = peerId, reason = reason
|
||||||
@ -787,7 +790,7 @@ proc onPeerEvent(pm: PeerManager, peerId: PeerId, event: PeerEvent) {.async.} =
|
|||||||
asyncSpawn(pm.switch.disconnect(peerId))
|
asyncSpawn(pm.switch.disconnect(peerId))
|
||||||
peerStore.delete(peerId)
|
peerStore.delete(peerId)
|
||||||
|
|
||||||
EventWakuPeer.emit(pm.brokerCtx, peerId, WakuPeerEventKind.EventConnected)
|
WakuPeerEvent.emit(pm.brokerCtx, peerId, WakuPeerEventKind.EventConnected)
|
||||||
|
|
||||||
if not pm.onConnectionChange.isNil():
|
if not pm.onConnectionChange.isNil():
|
||||||
# we don't want to await for the callback to finish
|
# we don't want to await for the callback to finish
|
||||||
@ -804,7 +807,7 @@ proc onPeerEvent(pm: PeerManager, peerId: PeerId, event: PeerEvent) {.async.} =
|
|||||||
pm.ipTable.del(ip)
|
pm.ipTable.del(ip)
|
||||||
break
|
break
|
||||||
|
|
||||||
EventWakuPeer.emit(pm.brokerCtx, peerId, WakuPeerEventKind.EventDisconnected)
|
WakuPeerEvent.emit(pm.brokerCtx, peerId, WakuPeerEventKind.EventDisconnected)
|
||||||
|
|
||||||
if not pm.onConnectionChange.isNil():
|
if not pm.onConnectionChange.isNil():
|
||||||
# we don't want to await for the callback to finish
|
# we don't want to await for the callback to finish
|
||||||
@ -812,7 +815,7 @@ proc onPeerEvent(pm: PeerManager, peerId: PeerId, event: PeerEvent) {.async.} =
|
|||||||
of PeerEventKind.Identified:
|
of PeerEventKind.Identified:
|
||||||
info "event identified", peerId = peerId
|
info "event identified", peerId = peerId
|
||||||
|
|
||||||
EventWakuPeer.emit(pm.brokerCtx, peerId, WakuPeerEventKind.EventIdentified)
|
WakuPeerEvent.emit(pm.brokerCtx, peerId, WakuPeerEventKind.EventIdentified)
|
||||||
|
|
||||||
peerStore[ConnectionBook][peerId] = connectedness
|
peerStore[ConnectionBook][peerId] = connectedness
|
||||||
peerStore[DirectionBook][peerId] = direction
|
peerStore[DirectionBook][peerId] = direction
|
||||||
|
|||||||
@ -1,7 +1,7 @@
|
|||||||
{.push raises: [].}
|
{.push raises: [].}
|
||||||
|
|
||||||
import
|
import
|
||||||
std/[options, tables, strutils, sequtils, os, net, random],
|
std/[options, tables, strutils, sequtils, os, net, random, sets],
|
||||||
chronos,
|
chronos,
|
||||||
chronicles,
|
chronicles,
|
||||||
metrics,
|
metrics,
|
||||||
@ -38,7 +38,6 @@ import
|
|||||||
waku_store/resume,
|
waku_store/resume,
|
||||||
waku_store_sync,
|
waku_store_sync,
|
||||||
waku_filter_v2,
|
waku_filter_v2,
|
||||||
waku_filter_v2/common as filter_common,
|
|
||||||
waku_filter_v2/client as filter_client,
|
waku_filter_v2/client as filter_client,
|
||||||
waku_metadata,
|
waku_metadata,
|
||||||
waku_rendezvous/protocol,
|
waku_rendezvous/protocol,
|
||||||
@ -60,7 +59,7 @@ import
|
|||||||
requests/node_requests,
|
requests/node_requests,
|
||||||
requests/health_requests,
|
requests/health_requests,
|
||||||
events/health_events,
|
events/health_events,
|
||||||
events/peer_events,
|
events/message_events,
|
||||||
],
|
],
|
||||||
waku/discovery/waku_kademlia,
|
waku/discovery/waku_kademlia,
|
||||||
./net_config,
|
./net_config,
|
||||||
@ -95,9 +94,6 @@ const clientId* = "Nimbus Waku v2 node"
|
|||||||
|
|
||||||
const WakuNodeVersionString* = "version / git commit hash: " & git_version
|
const WakuNodeVersionString* = "version / git commit hash: " & git_version
|
||||||
|
|
||||||
const EdgeTopicHealthyThreshold = 2
|
|
||||||
## Lightpush server and filter server requirement for a healthy topic in edge mode
|
|
||||||
|
|
||||||
# key and crypto modules different
|
# key and crypto modules different
|
||||||
type
|
type
|
||||||
# TODO: Move to application instance (e.g., `WakuNode2`)
|
# TODO: Move to application instance (e.g., `WakuNode2`)
|
||||||
@ -142,10 +138,6 @@ type
|
|||||||
legacyAppHandlers*: Table[PubsubTopic, WakuRelayHandler]
|
legacyAppHandlers*: Table[PubsubTopic, WakuRelayHandler]
|
||||||
## Kernel API Relay appHandlers (if any)
|
## Kernel API Relay appHandlers (if any)
|
||||||
wakuMix*: WakuMix
|
wakuMix*: WakuMix
|
||||||
edgeTopicsHealth*: Table[PubsubTopic, TopicHealth]
|
|
||||||
edgeHealthEvent*: AsyncEvent
|
|
||||||
edgeHealthLoop: Future[void]
|
|
||||||
peerEventListener*: EventWakuPeerListener
|
|
||||||
kademliaDiscoveryLoop*: Future[void]
|
kademliaDiscoveryLoop*: Future[void]
|
||||||
wakuKademlia*: WakuKademlia
|
wakuKademlia*: WakuKademlia
|
||||||
|
|
||||||
@ -498,52 +490,7 @@ proc updateAnnouncedAddrWithPrimaryIpAddr*(node: WakuNode): Result[void, string]
|
|||||||
|
|
||||||
return ok()
|
return ok()
|
||||||
|
|
||||||
proc calculateEdgeTopicHealth(node: WakuNode, shard: PubsubTopic): TopicHealth =
|
|
||||||
let filterPeers =
|
|
||||||
node.peerManager.getPeersForShard(filter_common.WakuFilterSubscribeCodec, shard)
|
|
||||||
let lightpushPeers =
|
|
||||||
node.peerManager.getPeersForShard(lightpush_protocol.WakuLightPushCodec, shard)
|
|
||||||
|
|
||||||
if filterPeers >= EdgeTopicHealthyThreshold and
|
|
||||||
lightpushPeers >= EdgeTopicHealthyThreshold:
|
|
||||||
return TopicHealth.SUFFICIENTLY_HEALTHY
|
|
||||||
elif filterPeers > 0 and lightpushPeers > 0:
|
|
||||||
return TopicHealth.MINIMALLY_HEALTHY
|
|
||||||
|
|
||||||
return TopicHealth.UNHEALTHY
|
|
||||||
|
|
||||||
proc loopEdgeHealth(node: WakuNode) {.async.} =
|
|
||||||
while node.started:
|
|
||||||
await node.edgeHealthEvent.wait()
|
|
||||||
node.edgeHealthEvent.clear()
|
|
||||||
|
|
||||||
try:
|
|
||||||
for shard in node.edgeTopicsHealth.keys:
|
|
||||||
if not node.wakuRelay.isNil and node.wakuRelay.isSubscribed(shard):
|
|
||||||
continue
|
|
||||||
|
|
||||||
let oldHealth = node.edgeTopicsHealth.getOrDefault(shard, TopicHealth.UNHEALTHY)
|
|
||||||
let newHealth = node.calculateEdgeTopicHealth(shard)
|
|
||||||
if newHealth != oldHealth:
|
|
||||||
node.edgeTopicsHealth[shard] = newHealth
|
|
||||||
EventShardTopicHealthChange.emit(node.brokerCtx, shard, newHealth)
|
|
||||||
except CancelledError:
|
|
||||||
break
|
|
||||||
except CatchableError as e:
|
|
||||||
warn "Error in edge health check", error = e.msg
|
|
||||||
|
|
||||||
# safety cooldown to protect from edge cases
|
|
||||||
await sleepAsync(100.milliseconds)
|
|
||||||
|
|
||||||
proc startProvidersAndListeners*(node: WakuNode) =
|
proc startProvidersAndListeners*(node: WakuNode) =
|
||||||
node.peerEventListener = EventWakuPeer.listen(
|
|
||||||
node.brokerCtx,
|
|
||||||
proc(evt: EventWakuPeer) {.async: (raises: []), gcsafe.} =
|
|
||||||
node.edgeHealthEvent.fire(),
|
|
||||||
).valueOr:
|
|
||||||
error "Failed to listen to peer events", error = error
|
|
||||||
return
|
|
||||||
|
|
||||||
RequestRelayShard.setProvider(
|
RequestRelayShard.setProvider(
|
||||||
node.brokerCtx,
|
node.brokerCtx,
|
||||||
proc(
|
proc(
|
||||||
@ -561,14 +508,23 @@ proc startProvidersAndListeners*(node: WakuNode) =
|
|||||||
var response: RequestShardTopicsHealth
|
var response: RequestShardTopicsHealth
|
||||||
|
|
||||||
for shard in topics:
|
for shard in topics:
|
||||||
var healthStatus = TopicHealth.UNHEALTHY
|
# Health resolution order:
|
||||||
|
# 1. Relay topicsHealth (computed from gossipsub mesh state)
|
||||||
|
# 2. If relay is active but topicsHealth hasn't computed yet, UNHEALTHY
|
||||||
|
# 3. Otherwise, ask edge filter (via broker; no-op if no provider set)
|
||||||
|
var healthStatus = TopicHealth.NOT_SUBSCRIBED
|
||||||
|
|
||||||
if not node.wakuRelay.isNil:
|
if not node.wakuRelay.isNil:
|
||||||
healthStatus =
|
healthStatus =
|
||||||
node.wakuRelay.topicsHealth.getOrDefault(shard, TopicHealth.NOT_SUBSCRIBED)
|
node.wakuRelay.topicsHealth.getOrDefault(shard, TopicHealth.NOT_SUBSCRIBED)
|
||||||
|
|
||||||
if healthStatus == TopicHealth.NOT_SUBSCRIBED:
|
if healthStatus == TopicHealth.NOT_SUBSCRIBED:
|
||||||
healthStatus = node.calculateEdgeTopicHealth(shard)
|
if not node.wakuRelay.isNil and node.wakuRelay.isSubscribed(shard):
|
||||||
|
healthStatus = TopicHealth.UNHEALTHY
|
||||||
|
else:
|
||||||
|
let edgeRes = RequestEdgeShardHealth.request(node.brokerCtx, shard)
|
||||||
|
if edgeRes.isOk():
|
||||||
|
healthStatus = edgeRes.get().health
|
||||||
|
|
||||||
response.topicHealth.add((shard, healthStatus))
|
response.topicHealth.add((shard, healthStatus))
|
||||||
|
|
||||||
@ -594,9 +550,10 @@ proc startProvidersAndListeners*(node: WakuNode) =
|
|||||||
pubsubTopic, TopicHealth.NOT_SUBSCRIBED
|
pubsubTopic, TopicHealth.NOT_SUBSCRIBED
|
||||||
)
|
)
|
||||||
|
|
||||||
if topicHealth == TopicHealth.NOT_SUBSCRIBED and
|
if topicHealth == TopicHealth.NOT_SUBSCRIBED:
|
||||||
pubsubTopic in node.edgeTopicsHealth:
|
let edgeRes = RequestEdgeShardHealth.request(node.brokerCtx, pubsubTopic)
|
||||||
topicHealth = node.calculateEdgeTopicHealth(pubsubTopic)
|
if edgeRes.isOk():
|
||||||
|
topicHealth = edgeRes.get().health
|
||||||
|
|
||||||
response.contentTopicHealth.add((topic: contentTopic, health: topicHealth))
|
response.contentTopicHealth.add((topic: contentTopic, health: topicHealth))
|
||||||
|
|
||||||
@ -605,7 +562,6 @@ proc startProvidersAndListeners*(node: WakuNode) =
|
|||||||
error "Can't set provider for RequestContentTopicsHealth", error = error
|
error "Can't set provider for RequestContentTopicsHealth", error = error
|
||||||
|
|
||||||
proc stopProvidersAndListeners*(node: WakuNode) =
|
proc stopProvidersAndListeners*(node: WakuNode) =
|
||||||
EventWakuPeer.dropListener(node.brokerCtx, node.peerEventListener)
|
|
||||||
RequestRelayShard.clearProvider(node.brokerCtx)
|
RequestRelayShard.clearProvider(node.brokerCtx)
|
||||||
RequestContentTopicsHealth.clearProvider(node.brokerCtx)
|
RequestContentTopicsHealth.clearProvider(node.brokerCtx)
|
||||||
RequestShardTopicsHealth.clearProvider(node.brokerCtx)
|
RequestShardTopicsHealth.clearProvider(node.brokerCtx)
|
||||||
@ -658,13 +614,16 @@ proc start*(node: WakuNode) {.async.} =
|
|||||||
## The switch will update addresses after start using the addressMapper
|
## The switch will update addresses after start using the addressMapper
|
||||||
await node.switch.start()
|
await node.switch.start()
|
||||||
|
|
||||||
node.edgeHealthEvent = newAsyncEvent()
|
node.started = true
|
||||||
node.edgeHealthLoop = loopEdgeHealth(node)
|
|
||||||
|
if not node.wakuFilterClient.isNil():
|
||||||
|
node.wakuFilterClient.registerPushHandler(
|
||||||
|
proc(pubsubTopic: PubsubTopic, msg: WakuMessage) {.async, gcsafe.} =
|
||||||
|
MessageSeenEvent.emit(node.brokerCtx, pubsubTopic, msg)
|
||||||
|
)
|
||||||
|
|
||||||
node.startProvidersAndListeners()
|
node.startProvidersAndListeners()
|
||||||
|
|
||||||
node.started = true
|
|
||||||
|
|
||||||
if not zeroPortPresent:
|
if not zeroPortPresent:
|
||||||
updateAnnouncedAddrWithPrimaryIpAddr(node).isOkOr:
|
updateAnnouncedAddrWithPrimaryIpAddr(node).isOkOr:
|
||||||
error "failed update announced addr", error = $error
|
error "failed update announced addr", error = $error
|
||||||
@ -678,10 +637,6 @@ proc stop*(node: WakuNode) {.async.} =
|
|||||||
|
|
||||||
node.stopProvidersAndListeners()
|
node.stopProvidersAndListeners()
|
||||||
|
|
||||||
if not node.edgeHealthLoop.isNil:
|
|
||||||
await node.edgeHealthLoop.cancelAndWait()
|
|
||||||
node.edgeHealthLoop = nil
|
|
||||||
|
|
||||||
await node.switch.stop()
|
await node.switch.stop()
|
||||||
|
|
||||||
node.peerManager.stop()
|
node.peerManager.stop()
|
||||||
|
|||||||
@ -37,3 +37,15 @@ RequestBroker:
|
|||||||
healthStatus*: ProtocolHealth
|
healthStatus*: ProtocolHealth
|
||||||
|
|
||||||
proc signature(protocol: WakuProtocol): Future[Result[RequestProtocolHealth, string]]
|
proc signature(protocol: WakuProtocol): Future[Result[RequestProtocolHealth, string]]
|
||||||
|
|
||||||
|
# Get edge filter health for a single shard (set by DeliveryService when edge mode is active)
|
||||||
|
RequestBroker(sync):
|
||||||
|
type RequestEdgeShardHealth* = object
|
||||||
|
health*: TopicHealth
|
||||||
|
|
||||||
|
proc signature(shard: PubsubTopic): Result[RequestEdgeShardHealth, string]
|
||||||
|
|
||||||
|
# Get edge filter confirmed peer count (set by DeliveryService when edge mode is active)
|
||||||
|
RequestBroker(sync):
|
||||||
|
type RequestEdgeFilterPeerCount* = object
|
||||||
|
peerCount*: int
|
||||||
|
|||||||
@ -1,3 +1,3 @@
|
|||||||
import ./subscription/subscription_manager, ./subscription/push_handler
|
import ./subscription/push_handler
|
||||||
|
|
||||||
export subscription_manager, push_handler
|
export push_handler
|
||||||
|
|||||||
@ -1,52 +0,0 @@
|
|||||||
{.push raises: [].}
|
|
||||||
|
|
||||||
import std/tables, results, chronicles, chronos
|
|
||||||
|
|
||||||
import ./push_handler, ../topics, ../message
|
|
||||||
|
|
||||||
## Subscription manager
|
|
||||||
type LegacySubscriptionManager* = object
|
|
||||||
subscriptions: TableRef[(string, ContentTopic), FilterPushHandler]
|
|
||||||
|
|
||||||
proc init*(T: type LegacySubscriptionManager): T =
|
|
||||||
LegacySubscriptionManager(
|
|
||||||
subscriptions: newTable[(string, ContentTopic), FilterPushHandler]()
|
|
||||||
)
|
|
||||||
|
|
||||||
proc clear*(m: var LegacySubscriptionManager) =
|
|
||||||
m.subscriptions.clear()
|
|
||||||
|
|
||||||
proc registerSubscription*(
|
|
||||||
m: LegacySubscriptionManager,
|
|
||||||
pubsubTopic: PubsubTopic,
|
|
||||||
contentTopic: ContentTopic,
|
|
||||||
handler: FilterPushHandler,
|
|
||||||
) =
|
|
||||||
try:
|
|
||||||
# TODO: Handle over subscription surprises
|
|
||||||
m.subscriptions[(pubsubTopic, contentTopic)] = handler
|
|
||||||
except CatchableError:
|
|
||||||
error "failed to register filter subscription", error = getCurrentExceptionMsg()
|
|
||||||
|
|
||||||
proc removeSubscription*(
|
|
||||||
m: LegacySubscriptionManager, pubsubTopic: PubsubTopic, contentTopic: ContentTopic
|
|
||||||
) =
|
|
||||||
m.subscriptions.del((pubsubTopic, contentTopic))
|
|
||||||
|
|
||||||
proc notifySubscriptionHandler*(
|
|
||||||
m: LegacySubscriptionManager,
|
|
||||||
pubsubTopic: PubsubTopic,
|
|
||||||
contentTopic: ContentTopic,
|
|
||||||
message: WakuMessage,
|
|
||||||
) =
|
|
||||||
if not m.subscriptions.hasKey((pubsubTopic, contentTopic)):
|
|
||||||
return
|
|
||||||
|
|
||||||
try:
|
|
||||||
let handler = m.subscriptions[(pubsubTopic, contentTopic)]
|
|
||||||
asyncSpawn handler(pubsubTopic, message)
|
|
||||||
except CatchableError:
|
|
||||||
discard
|
|
||||||
|
|
||||||
proc getSubscriptionsCount*(m: LegacySubscriptionManager): int =
|
|
||||||
m.subscriptions.len()
|
|
||||||
@ -101,12 +101,20 @@ proc sendSubscribeRequest(
|
|||||||
return ok()
|
return ok()
|
||||||
|
|
||||||
proc ping*(
|
proc ping*(
|
||||||
wfc: WakuFilterClient, servicePeer: RemotePeerInfo
|
wfc: WakuFilterClient, servicePeer: RemotePeerInfo, timeout = chronos.seconds(0)
|
||||||
): Future[FilterSubscribeResult] {.async.} =
|
): Future[FilterSubscribeResult] {.async.} =
|
||||||
info "sending ping", servicePeer = shortLog($servicePeer)
|
info "sending ping", servicePeer = shortLog($servicePeer)
|
||||||
let requestId = generateRequestId(wfc.rng)
|
let requestId = generateRequestId(wfc.rng)
|
||||||
let filterSubscribeRequest = FilterSubscribeRequest.ping(requestId)
|
let filterSubscribeRequest = FilterSubscribeRequest.ping(requestId)
|
||||||
|
|
||||||
|
if timeout > chronos.seconds(0):
|
||||||
|
let fut = wfc.sendSubscribeRequest(servicePeer, filterSubscribeRequest)
|
||||||
|
if not await fut.withTimeout(timeout):
|
||||||
|
return err(
|
||||||
|
FilterSubscribeError.parse(uint32(FilterSubscribeErrorKind.PEER_DIAL_FAILURE))
|
||||||
|
)
|
||||||
|
return fut.read()
|
||||||
|
|
||||||
return await wfc.sendSubscribeRequest(servicePeer, filterSubscribeRequest)
|
return await wfc.sendSubscribeRequest(servicePeer, filterSubscribeRequest)
|
||||||
|
|
||||||
proc subscribe*(
|
proc subscribe*(
|
||||||
|
|||||||
@ -157,7 +157,7 @@ type
|
|||||||
): Future[ValidationResult] {.gcsafe, raises: [Defect].}
|
): Future[ValidationResult] {.gcsafe, raises: [Defect].}
|
||||||
WakuRelay* = ref object of GossipSub
|
WakuRelay* = ref object of GossipSub
|
||||||
brokerCtx: BrokerContext
|
brokerCtx: BrokerContext
|
||||||
peerEventListener: EventWakuPeerListener
|
peerEventListener: WakuPeerEventListener
|
||||||
# seq of tuples: the first entry in the tuple contains the validators are called for every topic
|
# seq of tuples: the first entry in the tuple contains the validators are called for every topic
|
||||||
# the second entry contains the error messages to be returned when the validator fails
|
# the second entry contains the error messages to be returned when the validator fails
|
||||||
wakuValidators: seq[tuple[handler: WakuValidatorHandler, errorMessage: string]]
|
wakuValidators: seq[tuple[handler: WakuValidatorHandler, errorMessage: string]]
|
||||||
@ -376,9 +376,9 @@ proc new*(
|
|||||||
w.initProtocolHandler()
|
w.initProtocolHandler()
|
||||||
w.initRelayObservers()
|
w.initRelayObservers()
|
||||||
|
|
||||||
w.peerEventListener = EventWakuPeer.listen(
|
w.peerEventListener = WakuPeerEvent.listen(
|
||||||
w.brokerCtx,
|
w.brokerCtx,
|
||||||
proc(evt: EventWakuPeer): Future[void] {.async: (raises: []), gcsafe.} =
|
proc(evt: WakuPeerEvent): Future[void] {.async: (raises: []), gcsafe.} =
|
||||||
if evt.kind == WakuPeerEventKind.EventDisconnected:
|
if evt.kind == WakuPeerEventKind.EventDisconnected:
|
||||||
w.topicHealthCheckAll = true
|
w.topicHealthCheckAll = true
|
||||||
w.topicHealthUpdateEvent.fire()
|
w.topicHealthUpdateEvent.fire()
|
||||||
@ -524,8 +524,7 @@ method stop*(w: WakuRelay) {.async, base.} =
|
|||||||
info "stop"
|
info "stop"
|
||||||
await procCall GossipSub(w).stop()
|
await procCall GossipSub(w).stop()
|
||||||
|
|
||||||
if w.peerEventListener.id != 0:
|
WakuPeerEvent.dropListener(w.brokerCtx, w.peerEventListener)
|
||||||
EventWakuPeer.dropListener(w.brokerCtx, w.peerEventListener)
|
|
||||||
|
|
||||||
if not w.topicHealthLoopHandle.isNil():
|
if not w.topicHealthLoopHandle.isNil():
|
||||||
await w.topicHealthLoopHandle.cancelAndWait()
|
await w.topicHealthLoopHandle.cancelAndWait()
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user