diff --git a/apps/liteprotocoltester/liteprotocoltester.nim b/apps/liteprotocoltester/liteprotocoltester.nim index 46c85e910..a41c83400 100644 --- a/apps/liteprotocoltester/liteprotocoltester.nim +++ b/apps/liteprotocoltester/liteprotocoltester.nim @@ -96,7 +96,7 @@ when isMainModule: wakuNodeConf.shards = @[conf.shard] wakuNodeConf.contentTopics = conf.contentTopics - wakuNodeConf.clusterId = conf.clusterId + wakuNodeConf.clusterId = some(conf.clusterId) ## TODO: Depending on the tester needs we might extend here with shards, clusterId, etc... wakuNodeConf.metricsServer = true @@ -123,7 +123,7 @@ when isMainModule: error "Waku initialization failed", error = error quit(QuitFailure) - (waitFor startWaku(addr waku)).isOkOr: + (waitFor waku.start()).isOkOr: error "Starting waku failed", error = error quit(QuitFailure) diff --git a/apps/wakunode2/wakunode2.nim b/apps/wakunode2/wakunode2.nim index 484adf68f..be3a83f57 100644 --- a/apps/wakunode2/wakunode2.nim +++ b/apps/wakunode2/wakunode2.nim @@ -55,7 +55,7 @@ when isMainModule: error "Waku initialization failed", error = error quit(QuitFailure) - (waitFor startWaku(addr waku)).isOkOr: + (waitFor waku.start()).isOkOr: error "Starting waku failed", error = error quit(QuitFailure) diff --git a/channels/events.nim b/channels/events.nim index 904a34dc6..3e271976e 100644 --- a/channels/events.nim +++ b/channels/events.nim @@ -1,7 +1,7 @@ ## Reliable Channel event types emitted to API consumers. ## ## Lifecycle events for individual segments (sent / propagated / errored) -## are the same as the network-level ones the DeliveryService already +## are the same as the network-level ones the MessagingClient already ## emits — `requestId` is shared across layers — so we just re-export ## `waku/events/message_events` and avoid declaring duplicates. ## diff --git a/channels/reliable_channel.nim b/channels/reliable_channel.nim index e32b57e36..6aa7086e5 100644 --- a/channels/reliable_channel.nim +++ b/channels/reliable_channel.nim @@ -20,8 +20,7 @@ import bearssl/rand import stew/byteutils import libp2p/crypto/crypto as libp2p_crypto -import waku/api/api -import waku/factory/waku as waku_factory +import waku/api/types import waku/node/delivery_service/send_service import waku/waku_core/topics @@ -32,7 +31,7 @@ import ./rate_limit_manager/rate_limit_manager import ./encryption/encryption export - api, waku_factory, events, segmentation, scalable_data_sync, rate_limit_manager, + types, send_service, events, segmentation, scalable_data_sync, rate_limit_manager, encryption const LipWireReliableChannelVersion* = "RELIABLE-CHANNEL-API/1" @@ -47,9 +46,10 @@ type SendHandler* = proc(envelope: MessageEnvelope): Future[Result[RequestId, string]] {. async: (raises: [CatchableError]), gcsafe .} - ## Egress dispatch boundary. Defaults to `waku.send`; tests inject a - ## fake that records calls and returns canned `RequestId`s so the - ## send state machine can be exercised end-to-end without a network. + ## Egress dispatch boundary. Typically wraps `MessagingClient.send`; + ## tests inject a fake that records calls and returns canned + ## `RequestId`s so the send state machine can be exercised end-to-end + ## without a network. MessagePersistence {.pure.} = enum Persistent @@ -264,20 +264,20 @@ proc onReadyToSend( meta: LipWireReliableChannelVersion.toBytes(), ) - ## `waku.send` is not annotated `(raises: [])`, but this listener is. + ## `sendHandler` is not annotated `(raises: [])`, but this listener is. ## Convert any raise to a Result error so the state machine handles ## both failure modes (Result.err and exception) through one path. let sendRes = try: await self.sendHandler(envelope) except CatchableError as e: - Result[RequestId, string].err("waku send raised: " & e.msg) + Result[RequestId, string].err("messaging send raised: " & e.msg) let messagingReqId = sendRes.valueOr: MessageErrorEvent.emit( self.brokerCtx, MessageErrorEvent( - requestId: channelReqId, messageHash: "", error: "waku send failed: " & error + requestId: channelReqId, messageHash: "", error: "messaging send failed: " & error ), ) self.markSegmentFailed(channelReqId) @@ -374,7 +374,7 @@ proc onMessageReceived( proc new*( T: type ReliableChannel, - waku: Waku, + sendHandler: SendHandler, channelId: ChannelId, contentTopic: ContentTopic, senderId: SdsParticipantID, @@ -382,7 +382,6 @@ proc new*( sdsConfig: SdsConfig, rateConfig: RateLimitConfig, brokerCtx: BrokerContext = globalBrokerContext(), - sendHandler: SendHandler = nil, ): T = ## Pipeline handlers (segmentation/SDS/rate-limit) are constructed ## inside the channel rather than handed in by the caller — they are @@ -391,19 +390,11 @@ proc new*( ## `Decrypt` request brokers, so the channel keeps no per-instance ## encryption state either. ## - ## `sendHandler` defaults to `waku.send`; tests pass a fake to drive - ## the send state machine without touching the network. - let resolvedSendHandler = - if sendHandler.isNil(): - proc( - envelope: MessageEnvelope - ): Future[Result[RequestId, string]] {.async: (raises: [CatchableError]), gcsafe.} = - return await waku.send(envelope) - else: - sendHandler - + ## `sendHandler` is the egress dispatch. The owning `ReliableChannelManager` + ## typically constructs it as a closure over `MessagingClient.send`. Tests + ## pass a fake to drive the send state machine without touching the network. let chn = T( - sendHandler: resolvedSendHandler, + sendHandler: sendHandler, channelId: channelId, contentTopic: contentTopic, senderId: senderId, diff --git a/channels/reliable_channel_manager.nim b/channels/reliable_channel_manager.nim index 747f755b4..68ae82388 100644 --- a/channels/reliable_channel_manager.nim +++ b/channels/reliable_channel_manager.nim @@ -10,11 +10,10 @@ import results import chronos import stew/byteutils -import waku/api/api -import waku/api/api_conf +import brokers/broker_context + import waku/events/message_events as waku_message_events -import waku/factory/waku as waku_factory -import waku/node/delivery_service/delivery_service +import waku/messaging_client import waku/waku_core/topics import ./reliable_channel @@ -24,40 +23,43 @@ export reliable_channel type ReliableChannelManager* = ref object channels: Table[ChannelId, ReliableChannel] - waku: Waku - ## Owned by the manager. The channel layer reaches the messaging - ## API through `waku.send(envelope)`; constructing DeliveryTasks - ## directly would breach the layer boundary. + messagingClient: MessagingClient + ## Borrowed from the owning `Waku`. + sendHandler: SendHandler + ## Default egress dispatch for channels created through this manager. + ## Constructed at mount time as a closure over `MessagingClient.send` + ## so the channel layer itself stays callable-only. brokerCtx: BrokerContext proc new*( T: type ReliableChannelManager, - conf: WakuNodeConf, + messagingClient: MessagingClient, + sendHandler: SendHandler, brokerCtx: BrokerContext = globalBrokerContext(), -): Future[Result[T, string]] {.async.} = - ## TODO !! The proper ownership chain is: - ## ReliableChannelManager -> DeliveryService (MessagingClient) -> Waku (Kernel/Protocols) -> WakuNode, - ## and this will be implemented in the future. For now, `createNode` - ## is called here to get a Waku instance, and the WakuNode is immediately discarded. - ## This is a temporary workaround to get the API - - let waku = ?(await createNode(conf)) - - let manager = T( - channels: initTable[ChannelId, ReliableChannel](), waku: waku, brokerCtx: brokerCtx +): Result[T, string] = + if messagingClient.isNil(): + return err("messaging client is required") + if sendHandler.isNil(): + return err("sendHandler is required") + return ok( + T( + channels: initTable[ChannelId, ReliableChannel](), + messagingClient: messagingClient, + sendHandler: sendHandler, + brokerCtx: brokerCtx, + ) ) - return ok(manager) - proc start*(self: ReliableChannelManager): Result[void, string] = - ## Bring the owned DeliveryService up. Separated from `new` so callers - ## can register encryption providers / create channels before traffic - ## starts flowing. - self.waku.deliveryService.startDeliveryService() + ## Placeholder: per-channel listeners are installed in `ReliableChannel.new`, + ## so the manager has nothing to start at this layer. Kept for symmetry + ## with the `Waku` mount/start lifecycle and as a hook for future state. + discard + ok() proc stop*(self: ReliableChannelManager) {.async.} = - if not self.waku.isNil(): - await self.waku.deliveryService.stopDeliveryService() + ## Placeholder mirror of `start`. + discard proc createReliableChannel*( self: ReliableChannelManager, @@ -66,17 +68,17 @@ proc createReliableChannel*( senderId: SdsParticipantID, sendHandler: SendHandler = nil, ): Result[ChannelId, string] = - ## Spec entry point. The `DeliveryService` and `rng` the channel needs - ## are sourced from the owning `ReliableChannelManager` rather than - ## passed per call. Encryption is wired up through the `Encrypt`/ - ## `Decrypt` request brokers — the application installs its own - ## providers (or `setNoopEncryption()`) before traffic flows. + ## Spec entry point. The `sendHandler` and `rng` the channel needs are + ## sourced from the owning `ReliableChannelManager` rather than passed + ## per call. Encryption is wired up through the `Encrypt`/`Decrypt` + ## request brokers — the application installs its own providers + ## (or `setNoopEncryption()`) before traffic flows. ## ## Segmentation, SDS and rate-limit configs will eventually be read ## from the node's `NodeConfig`. Defaults for now. ## - ## `sendHandler` is left `nil` in production so the channel uses the - ## owned `waku.send`; tests pass a fake to bypass the network. + ## `sendHandler` defaults to the manager's default (constructed at mount + ## from `MessagingClient.send`); tests pass a fake to bypass the network. if self.channels.hasKey(channelId): return err("channel already exists: " & channelId) @@ -95,8 +97,14 @@ proc createReliableChannel*( epochPeriodSec: DefaultEpochPeriodSec, messagesPerEpoch: DefaultMessagesPerEpoch ) + let effectiveSendHandler = + if sendHandler.isNil(): + self.sendHandler + else: + sendHandler + let chn = ReliableChannel.new( - waku = self.waku, + sendHandler = effectiveSendHandler, channelId = channelId, contentTopic = contentTopic, senderId = senderId, @@ -104,7 +112,6 @@ proc createReliableChannel*( sdsConfig = sdsConfig, rateConfig = rateConfig, brokerCtx = self.brokerCtx, - sendHandler = sendHandler, ) self.channels[channelId] = chn @@ -137,5 +144,5 @@ proc send*( ## `ReliableChannel` installs its own `MessageReceivedEvent` listener ## in `ReliableChannel.new`, filters by spec marker and `contentTopic`, ## and routes to its private `onMessageReceived`. This keeps the lower -## layer (MessagingAPI/Waku) unaware of the existence of ReliableChannel +## layer (MessagingClient/Waku) unaware of the existence of ReliableChannel ## and keeps the manager out of per-channel event dispatch. diff --git a/examples/api_example/api_example.nim b/examples/api_example/api_example.nim index 2093a81c0..207e83429 100644 --- a/examples/api_example/api_example.nim +++ b/examples/api_example/api_example.nim @@ -82,8 +82,12 @@ when isMainModule: echo("Waku node created successfully!") + node.mountMessagingClient().isOkOr: + echo "Failed to mount messaging: ", error + quit(QuitFailure) + # Start the node - (waitFor startWaku(addr node)).isOkOr: + (waitFor node.start()).isOkOr: echo "Failed to start node: ", error quit(QuitFailure) diff --git a/examples/wakustealthcommitments/node_spec.nim b/examples/wakustealthcommitments/node_spec.nim index d85e83a5b..572a32316 100644 --- a/examples/wakustealthcommitments/node_spec.nim +++ b/examples/wakustealthcommitments/node_spec.nim @@ -30,25 +30,25 @@ proc setup*(): Waku = # Override configuration conf.maxMessageSize = twnNetworkConf.maxMessageSize - conf.clusterId = twnNetworkConf.clusterId + conf.clusterId = some(twnNetworkConf.clusterId) conf.rlnRelayEthContractAddress = twnNetworkConf.rlnRelayEthContractAddress - conf.rlnRelayDynamic = twnNetworkConf.rlnRelayDynamic - conf.discv5Discovery = twnNetworkConf.discv5Discovery + conf.rlnRelayDynamic = some(twnNetworkConf.rlnRelayDynamic) + conf.discv5Discovery = some(twnNetworkConf.discv5Discovery) conf.discv5BootstrapNodes = conf.discv5BootstrapNodes & twnNetworkConf.discv5BootstrapNodes - conf.rlnEpochSizeSec = twnNetworkConf.rlnEpochSizeSec - conf.rlnRelayUserMessageLimit = twnNetworkConf.rlnRelayUserMessageLimit + conf.rlnEpochSizeSec = some(twnNetworkConf.rlnEpochSizeSec) + conf.rlnRelayUserMessageLimit = some(twnNetworkConf.rlnRelayUserMessageLimit) # Only set rlnRelay to true if relay is configured if conf.relay: - conf.rlnRelay = twnNetworkConf.rlnRelay + conf.rlnRelay = some(twnNetworkConf.rlnRelay) info "Starting node" var waku = (waitFor Waku.new(conf)).valueOr: error "Waku initialization failed", error = error quit(QuitFailure) - (waitFor startWaku(addr waku)).isOkOr: + (waitFor waku.start()).isOkOr: error "Starting waku failed", error = error quit(QuitFailure) diff --git a/liblogosdelivery/logos_delivery_api/node_api.nim b/liblogosdelivery/logos_delivery_api/node_api.nim index 2e30d1b43..b7f6f4321 100644 --- a/liblogosdelivery/logos_delivery_api/node_api.nim +++ b/liblogosdelivery/logos_delivery_api/node_api.nim @@ -1,11 +1,11 @@ -import std/[json, strutils, tables] -import chronos, chronicles, results, confutils, confutils/std/net, ffi +import std/json +import chronos, chronicles, results, ffi import waku/factory/waku, waku/node/waku_node, waku/api/[api, types], waku/events/[message_events, health_events], - tools/confutils/cli_args, + tools/confutils/conf_from_json, ../declare_lib, ../json_event @@ -15,59 +15,11 @@ proc `%`*(id: RequestId): JsonNode = registerReqFFI(CreateNodeRequest, ctx: ptr FFIContext[Waku]): proc(configJson: cstring): Future[Result[string, string]] {.async.} = - ## Parse the JSON configuration using fieldPairs approach (WakuNodeConf) - var conf = defaultWakuNodeConf().valueOr: - return err("Failed creating default conf: " & error) + let conf = parseConfJson($configJson).valueOr: + error "Failed to assemble WakuNodeConf from JSON", + error = error, configJson = $configJson + return err(error) - var jsonNode: JsonNode - try: - jsonNode = parseJson($configJson) - except Exception: - let exceptionMsg = getCurrentExceptionMsg() - error "Failed to parse config JSON", - error = exceptionMsg, configJson = $configJson - return err( - "Failed to parse config JSON: " & exceptionMsg & " configJson string: " & - $configJson - ) - - var jsonFields: Table[string, (string, JsonNode)] - for key, value in jsonNode: - let lowerKey = key.toLowerAscii() - - if jsonFields.hasKey(lowerKey): - error "Duplicate configuration option found when normalized to lowercase", - key = key - return err( - "Duplicate configuration option found when normalized to lowercase: '" & key & - "'" - ) - - jsonFields[lowerKey] = (key, value) - - for confField, confValue in fieldPairs(conf): - let lowerField = confField.toLowerAscii() - if jsonFields.hasKey(lowerField): - let (jsonKey, jsonValue) = jsonFields[lowerField] - let formattedString = ($jsonValue).strip(chars = {'\"'}) - try: - confValue = parseCmdArg(typeof(confValue), formattedString) - except Exception: - return err( - "Failed to parse field '" & confField & "' from JSON key '" & jsonKey & "': " & - getCurrentExceptionMsg() & ". Value: " & formattedString - ) - - jsonFields.del(lowerField) - - if jsonFields.len > 0: - var unknownKeys = newSeq[string]() - for _, (jsonKey, _) in pairs(jsonFields): - unknownKeys.add(jsonKey) - error "Unrecognized configuration option(s) found", option = unknownKeys - return err("Unrecognized configuration option(s) found: " & $unknownKeys) - - # Create the node ctx.myLib[] = (await api.createNode(conf)).valueOr: let errMsg = $error chronicles.error "CreateNodeRequest failed", err = errMsg @@ -96,7 +48,7 @@ proc logosdelivery_create_node( ): pointer {.dynlib, exportc, cdecl.} = initializeLibrary() - if isNil(callback): + if callback.isNil(): echo "error: missing callback in logosdelivery_create_node" return nil @@ -172,7 +124,17 @@ proc logosdelivery_start_node( chronicles.error "ConnectionStatusChange.listen failed", err = $error return err("ConnectionStatusChange.listen failed: " & $error) - (await startWaku(addr ctx.myLib[])).isOkOr: + ctx.myLib[].mountMessagingClient().isOkOr: + let errMsg = $error + chronicles.error "mountMessagingClient failed", error = errMsg + return err("failed to mount messaging: " & errMsg) + + ctx.myLib[].mountReliableChannelManager().isOkOr: + let errMsg = $error + chronicles.error "mountReliableChannelManager failed", err = errMsg + return err("failed to mount reliable channel manager: " & errMsg) + + (await ctx.myLib[].start()).isOkOr: let errMsg = $error chronicles.error "START_NODE failed", err = errMsg return err("failed to start: " & errMsg) diff --git a/library/kernel_api/node_lifecycle_api.nim b/library/kernel_api/node_lifecycle_api.nim index 8f3e99b24..55dd7cd55 100644 --- a/library/kernel_api/node_lifecycle_api.nim +++ b/library/kernel_api/node_lifecycle_api.nim @@ -71,7 +71,7 @@ registerReqFFI(CreateNodeRequest, ctx: ptr FFIContext[Waku]): proc waku_start( ctx: ptr FFIContext[Waku], callback: FFICallBack, userData: pointer ) {.ffi.} = - (await startWaku(ctx[].myLib)).isOkOr: + (await ctx.myLib[].start()).isOkOr: error "START_NODE failed", error = error return err("failed to start: " & $error) return ok("") diff --git a/nix/default.nix b/nix/default.nix index ec9e0542c..dfe537f24 100644 --- a/nix/default.nix +++ b/nix/default.nix @@ -30,7 +30,7 @@ let # while others use the repo root. Pass both so the compiler finds either layout. pathArgs = builtins.concatStringsSep " " - (builtins.concatMap (p: [ "--path:${p}" "--path:${p}/src" ]) + (builtins.concatMap (p: [ "--path:${p}" "--path:${p}/src" "--path:${p}/sds" ]) (builtins.attrValues otherDeps)); libExt = diff --git a/tests/api/test_api_health.nim b/tests/api/test_api_health.nim index d949db24f..856d18897 100644 --- a/tests/api/test_api_health.nim +++ b/tests/api/test_api_health.nim @@ -97,13 +97,15 @@ suite "LM API health checking": conf.listenAddress = parseIpAddress("0.0.0.0") conf.tcpPort = Port(0) conf.discv5UdpPort = Port(0) - conf.clusterId = 3'u16 + conf.clusterId = some(3'u16) conf.numShardsInNetwork = 1 conf.rest = false client = (await createNode(conf)).valueOr: raiseAssert error - (await startWaku(addr client)).isOkOr: + client.mountMessagingClient().isOkOr: + raiseAssert error + (await client.start()).isOkOr: raiseAssert error asyncTeardown: @@ -274,14 +276,16 @@ suite "LM API health checking": edgeConf.listenAddress = parseIpAddress("0.0.0.0") edgeConf.tcpPort = Port(0) edgeConf.discv5UdpPort = Port(0) - edgeConf.clusterId = 3'u16 + edgeConf.clusterId = some(3'u16) edgeConf.maxMessageSize = "150 KiB" edgeConf.rest = false edgeWaku = (await createNode(edgeConf)).valueOr: raiseAssert "Failed to create edge node: " & error - (await startWaku(addr edgeWaku)).isOkOr: + edgeWaku.mountMessagingClient().isOkOr: + raiseAssert "Failed to mount edge messaging: " & error + (await edgeWaku.start()).isOkOr: raiseAssert "Failed to start edge waku: " & error let relayReq = await RequestProtocolHealth.request( diff --git a/tests/api/test_api_receive.nim b/tests/api/test_api_receive.nim index d6aa954a4..8466a52ab 100644 --- a/tests/api/test_api_receive.nim +++ b/tests/api/test_api_receive.nim @@ -6,6 +6,7 @@ import libp2p/[peerid, peerinfo, crypto/crypto] import brokers/broker_context import ../testlib/[common, wakucore, wakunode, testasync] import ../waku_archive/archive_utils +import waku/messaging_client import waku, @@ -16,7 +17,6 @@ import waku_relay/protocol, waku_archive, waku_archive/common as archive_common, - node/delivery_service/delivery_service, node/delivery_service/recv_service, ] import waku/factory/waku_conf @@ -67,9 +67,9 @@ proc createApiNodeConf(numShards: uint16 = 1): WakuNodeConf = conf.listenAddress = parseIpAddress("0.0.0.0") conf.tcpPort = Port(0) conf.discv5UdpPort = Port(0) - conf.clusterId = 3'u16 + conf.clusterId = some(3'u16) conf.numShardsInNetwork = numShards - conf.reliabilityEnabled = true + conf.reliabilityEnabled = some(true) conf.rest = false result = conf @@ -147,7 +147,8 @@ suite "Messaging API, Receive Service (store recovery)": subscriber = (await createNode(createApiNodeConf(numShards))).expect( "Failed to create subscriber" ) - (await startWaku(addr subscriber)).expect("Failed to start subscriber") + subscriber.mountMessagingClient().expect("Failed to mount messaging") + (await subscriber.start()).expect("Failed to start subscriber") # publish after the subscriber exists but before it connects to the # store; the message reaches the archive but the subscriber doesn't @@ -185,7 +186,7 @@ suite "Messaging API, Receive Service (store recovery)": await eventManager.teardown() # trigger store check, should recover and deliver via MessageReceivedEvent - await subscriber.deliveryService.recvService.checkStore() + await subscriber.messagingClient.recvService.checkStore() let received = await eventManager.waitForEvents(TestTimeout) check received diff --git a/tests/api/test_api_send.nim b/tests/api/test_api_send.nim index 084119041..9380fbfdd 100644 --- a/tests/api/test_api_send.nim +++ b/tests/api/test_api_send.nim @@ -126,9 +126,9 @@ proc createApiNodeConf(mode: cli_args.WakuMode = cli_args.WakuMode.Core): WakuNo conf.listenAddress = parseIpAddress("0.0.0.0") conf.tcpPort = Port(0) conf.discv5UdpPort = Port(0) - conf.clusterId = 3'u16 + conf.clusterId = some(3'u16) conf.numShardsInNetwork = 1 - conf.reliabilityEnabled = true + conf.reliabilityEnabled = some(true) conf.rest = false result = conf @@ -241,7 +241,9 @@ suite "Waku API - Send": lockNewGlobalBrokerContext: node = (await createNode(createApiNodeConf())).valueOr: raiseAssert error - (await startWaku(addr node)).isOkOr: + node.mountMessagingClient().isOkOr: + raiseAssert "Failed to mount messaging: " & error + (await node.start()).isOkOr: raiseAssert "Failed to start Waku node: " & error # node is not connected ! @@ -263,7 +265,9 @@ suite "Waku API - Send": lockNewGlobalBrokerContext: node = (await createNode(createApiNodeConf())).valueOr: raiseAssert error - (await startWaku(addr node)).isOkOr: + node.mountMessagingClient().isOkOr: + raiseAssert "Failed to mount messaging: " & error + (await node.start()).isOkOr: raiseAssert "Failed to start Waku node: " & error await node.node.connectToNodes( @@ -297,7 +301,9 @@ suite "Waku API - Send": lockNewGlobalBrokerContext: node = (await createNode(createApiNodeConf())).valueOr: raiseAssert error - (await startWaku(addr node)).isOkOr: + node.mountMessagingClient().isOkOr: + raiseAssert "Failed to mount messaging: " & error + (await node.start()).isOkOr: raiseAssert "Failed to start Waku node: " & error await node.node.connectToNodes(@[relayNode1PeerInfo]) @@ -327,7 +333,9 @@ suite "Waku API - Send": lockNewGlobalBrokerContext: node = (await createNode(createApiNodeConf())).valueOr: raiseAssert error - (await startWaku(addr node)).isOkOr: + node.mountMessagingClient().isOkOr: + raiseAssert "Failed to mount messaging: " & error + (await node.start()).isOkOr: raiseAssert "Failed to start Waku node: " & error await node.node.connectToNodes(@[lightpushNodePeerInfo]) @@ -357,7 +365,9 @@ suite "Waku API - Send": lockNewGlobalBrokerContext: node = (await createNode(createApiNodeConf())).valueOr: raiseAssert error - (await startWaku(addr node)).isOkOr: + node.mountMessagingClient().isOkOr: + raiseAssert "Failed to mount messaging: " & error + (await node.start()).isOkOr: raiseAssert "Failed to start Waku node: " & error await node.node.connectToNodes(@[lightpushNodePeerInfo, storeNodePeerInfo]) @@ -411,7 +421,9 @@ suite "Waku API - Send": lockNewGlobalBrokerContext: node = (await createNode(createApiNodeConf(cli_args.WakuMode.Edge))).valueOr: raiseAssert error - (await startWaku(addr node)).isOkOr: + node.mountMessagingClient().isOkOr: + raiseAssert "Failed to mount messaging: " & error + (await node.start()).isOkOr: raiseAssert "Failed to start Waku node: " & error await node.node.connectToNodes(@[fakeLightpushNodePeerInfo]) diff --git a/tests/api/test_api_subscription.nim b/tests/api/test_api_subscription.nim index 32d4e742f..984a5c91e 100644 --- a/tests/api/test_api_subscription.nim +++ b/tests/api/test_api_subscription.nim @@ -5,6 +5,7 @@ import chronos, testutils/unittests, stew/byteutils import libp2p/[peerid, peerinfo, multiaddress, crypto/crypto] import brokers/broker_context import ../testlib/[common, wakucore, wakunode, testasync] +import waku/messaging_client import waku, @@ -14,13 +15,14 @@ import events/message_events, waku_relay/protocol, node/kernel_api/filter, - node/delivery_service/subscription_manager, + node/subscription_manager, ] import waku/factory/waku_conf import tools/confutils/cli_args const TestTimeout = chronos.seconds(10) const NegativeTestTimeout = chronos.seconds(2) +const EdgeWaitTimeout = chronos.seconds(60) type ReceiveEventListenerManager = ref object brokerCtx: BrokerContext @@ -75,9 +77,9 @@ proc createApiNodeConf( conf.listenAddress = parseIpAddress("0.0.0.0") conf.tcpPort = Port(0) conf.discv5UdpPort = Port(0) - conf.clusterId = 3'u16 + conf.clusterId = some(3'u16) conf.numShardsInNetwork = numShards - conf.reliabilityEnabled = true + conf.reliabilityEnabled = some(true) conf.rest = false result = conf @@ -85,7 +87,8 @@ proc setupSubscriberNode(conf: WakuNodeConf): Future[Waku] {.async.} = var node: Waku lockNewGlobalBrokerContext: node = (await createNode(conf)).expect("Failed to create subscriber node") - (await startWaku(addr node)).expect("Failed to start subscriber node") + node.mountMessagingClient().expect("Failed to mount messaging") + (await node.start()).expect("Failed to start subscriber node") return node proc setupNetwork( @@ -161,20 +164,39 @@ proc getRelayShard(node: WakuNode, contentTopic: ContentTopic): PubsubTopic = return PubsubTopic($shardObj) proc waitForMesh(node: WakuNode, shard: PubsubTopic) {.async.} = - for _ in 0 ..< 50: + let deadline = Moment.now() + EdgeWaitTimeout + while Moment.now() < deadline: if node.wakuRelay.getNumPeersInMesh(shard).valueOr(0) > 0: return await sleepAsync(100.milliseconds) raise newException(ValueError, "GossipSub Mesh failed to stabilize on " & shard) proc waitForEdgeSubs(w: Waku, shard: PubsubTopic) {.async.} = - let sm = w.deliveryService.subscriptionManager - for _ in 0 ..< 50: - if sm.edgeFilterPeerCount(shard) > 0: + let deadline = Moment.now() + EdgeWaitTimeout + while Moment.now() < deadline: + if w.node.subscriptionManager.edgeFilterPeerCount(shard) > 0: return await sleepAsync(100.milliseconds) raise newException(ValueError, "Edge filter subscription failed on " & shard) +proc edgePeersReached(w: Waku, shard: PubsubTopic, n: int): Future[bool] {.async.} = + let deadline = Moment.now() + EdgeWaitTimeout + while Moment.now() < deadline: + if w.node.subscriptionManager.edgeFilterPeerCount(shard) >= n: + return true + await sleepAsync(100.milliseconds) + return false + +proc edgePeersDroppedBelow( + w: Waku, shard: PubsubTopic, n: int +): Future[bool] {.async.} = + let deadline = Moment.now() + EdgeWaitTimeout + while Moment.now() < deadline: + if w.node.subscriptionManager.edgeFilterPeerCount(shard) < n: + return true + await sleepAsync(100.milliseconds) + return false + proc publishToMesh( net: TestNetwork, contentTopic: ContentTopic, payload: seq[byte] ): Future[Result[int, string]] {.async.} = @@ -621,7 +643,8 @@ suite "Messaging API, SubscriptionManager": var subscriber: Waku lockNewGlobalBrokerContext: subscriber = (await createNode(conf)).expect("Failed to create edge subscriber") - (await startWaku(addr subscriber)).expect("Failed to start edge subscriber") + subscriber.mountMessagingClient().expect("Failed to mount messaging") + (await subscriber.start()).expect("Failed to start edge subscriber") # Connect edge subscriber to both filter servers so selectPeers finds both await subscriber.node.connectToNodes(@[publisherPeerInfo, meshBuddyPeerInfo]) @@ -632,12 +655,7 @@ suite "Messaging API, SubscriptionManager": (await subscriber.subscribe(testTopic)).expect("Failed to subscribe") # Wait for dialing both filter servers (HealthyThreshold = 2) - for _ in 0 ..< 100: - if subscriber.deliveryService.subscriptionManager.edgeFilterPeerCount(shard) >= 2: - break - await sleepAsync(100.milliseconds) - - check subscriber.deliveryService.subscriptionManager.edgeFilterPeerCount(shard) >= 2 + check await edgePeersReached(subscriber, shard, 2) # Verify message delivery with both servers alive await waitForMesh(publisher, shard) @@ -659,12 +677,8 @@ suite "Messaging API, SubscriptionManager": await subscriber.node.disconnectNode(meshBuddyPeerInfo) # Wait for the dead peer to be pruned - for _ in 0 ..< 50: - if subscriber.deliveryService.subscriptionManager.edgeFilterPeerCount(shard) < 2: - break - await sleepAsync(100.milliseconds) - - check subscriber.deliveryService.subscriptionManager.edgeFilterPeerCount(shard) >= 1 + check await edgePeersDroppedBelow(subscriber, shard, 2) + check subscriber.node.subscriptionManager.edgeFilterPeerCount(shard) >= 1 # Verify messages still arrive through the surviving filter server (publisher) eventManager = newReceiveEventListenerManager(subscriber.brokerCtx, 1) @@ -758,7 +772,8 @@ suite "Messaging API, SubscriptionManager": var subscriber: Waku lockNewGlobalBrokerContext: subscriber = (await createNode(conf)).expect("Failed to create edge subscriber") - (await startWaku(addr subscriber)).expect("Failed to start edge subscriber") + subscriber.mountMessagingClient().expect("Failed to mount messaging") + (await subscriber.start()).expect("Failed to start edge subscriber") await subscriber.node.connectToNodes( @[publisherPeerInfo, meshBuddyPeerInfo, sparePeerInfo] @@ -770,23 +785,13 @@ suite "Messaging API, SubscriptionManager": (await subscriber.subscribe(testTopic)).expect("Failed to subscribe") # Wait for 2 confirmed peers (HealthyThreshold). The 3rd is available but not dialed. - for _ in 0 ..< 100: - if subscriber.deliveryService.subscriptionManager.edgeFilterPeerCount(shard) >= 2: - break - await sleepAsync(100.milliseconds) - - require subscriber.deliveryService.subscriptionManager.edgeFilterPeerCount(shard) == - 2 + check await edgePeersReached(subscriber, shard, 2) + require subscriber.node.subscriptionManager.edgeFilterPeerCount(shard) == 2 await subscriber.node.disconnectNode(meshBuddyPeerInfo) # Wait for the sub loop to detect the loss and dial a replacement - for _ in 0 ..< 100: - if subscriber.deliveryService.subscriptionManager.edgeFilterPeerCount(shard) >= 2: - break - await sleepAsync(100.milliseconds) - - check subscriber.deliveryService.subscriptionManager.edgeFilterPeerCount(shard) >= 2 + check await edgePeersReached(subscriber, shard, 2) await waitForMesh(publisher, shard) diff --git a/tests/api/test_node_conf.nim b/tests/api/test_node_conf.nim index 8798c5cc5..fb8d97708 100644 --- a/tests/api/test_node_conf.nim +++ b/tests/api/test_node_conf.nim @@ -37,7 +37,7 @@ suite "WakuNodeConf - mode-driven toWakuConf": var conf = defaultWakuNodeConf().valueOr: raiseAssert error conf.mode = Core - conf.clusterId = 1 + conf.clusterId = some(1'u16) ## When let wakuConfRes = conf.toWakuConf() @@ -58,7 +58,7 @@ suite "WakuNodeConf - mode-driven toWakuConf": var conf = defaultWakuNodeConf().valueOr: raiseAssert error conf.mode = Edge - conf.clusterId = 1 + conf.clusterId = some(1'u16) ## When let wakuConfRes = conf.toWakuConf() @@ -81,7 +81,7 @@ suite "WakuNodeConf - mode-driven toWakuConf": conf.mode = cli_args.WakuMode.noMode conf.relay = true conf.lightpush = false - conf.clusterId = 5 + conf.clusterId = some(5'u16) ## When let wakuConfRes = conf.toWakuConf() @@ -122,7 +122,7 @@ suite "WakuNodeConf - JSON parsing with fieldPairs": let conf = confRes.get() check: conf.mode == cli_args.WakuMode.noMode - conf.clusterId == 0 + conf.clusterId.isNone() conf.logLevel == logging.LogLevel.INFO test "JSON with mode and clusterId": @@ -134,7 +134,7 @@ suite "WakuNodeConf - JSON parsing with fieldPairs": let conf = confRes.get() check: conf.mode == Core - conf.clusterId == 42 + conf.clusterId == some(42'u16) test "JSON with Edge mode": ## Given / When @@ -165,7 +165,7 @@ suite "WakuNodeConf - JSON parsing with fieldPairs": require confRes.isOk() let conf = confRes.get() check: - conf.clusterId == 99 + conf.clusterId == some(99'u16) conf.numShardsInNetwork == 16 test "JSON with unknown fields is silently ignored": @@ -177,7 +177,7 @@ suite "WakuNodeConf - JSON parsing with fieldPairs": require confRes.isOk() let conf = confRes.get() check: - conf.clusterId == 5 + conf.clusterId == some(5'u16) test "Invalid JSON syntax returns error": ## Given / When diff --git a/tests/channels/test_reliable_channel_send_receive.nim b/tests/channels/test_reliable_channel_send_receive.nim index 5ea300eb3..bc889c299 100644 --- a/tests/channels/test_reliable_channel_send_receive.nim +++ b/tests/channels/test_reliable_channel_send_receive.nim @@ -1,6 +1,6 @@ {.used.} -import std/[net] +import std/[net, options] import chronos, testutils/unittests, stew/byteutils import brokers/broker_context @@ -24,9 +24,9 @@ proc createApiNodeConf(): WakuNodeConf = conf.listenAddress = parseIpAddress("0.0.0.0") conf.tcpPort = Port(0) conf.discv5UdpPort = Port(0) - conf.clusterId = 3'u16 + conf.clusterId = some(3'u16) conf.numShardsInNetwork = 1 - conf.reliabilityEnabled = true + conf.reliabilityEnabled = some(true) conf.rest = false return conf @@ -35,7 +35,7 @@ suite "Reliable Channel - ingress": ## Unit test for the receive side of the API: instead of standing ## up two libp2p nodes and a relay mesh, we drive the manager ## directly by emitting a `MessageReceivedEvent` (the exact event - ## the DeliveryService emits when a `WakuMessage` arrives off the + ## the MessagingClient emits when a `WakuMessage` arrives off the ## wire). The manager must: ## - drop traffic missing the Reliable Channel spec marker ## - dispatch the matching channel's `onMessageReceived` @@ -45,13 +45,15 @@ suite "Reliable Channel - ingress": contentTopic = ContentTopic("/reliable-channel/test/proto") let appPayload = "hello reliable channel".toBytes() + var waku: Waku var manager: ReliableChannelManager var brokerCtx: BrokerContext lockNewGlobalBrokerContext: brokerCtx = globalBrokerContext() - manager = (await ReliableChannelManager.new(createApiNodeConf())).expect( - "Failed to create manager" - ) + waku = (await createNode(createApiNodeConf())).expect("createNode") + waku.mountMessagingClient().expect("mountMessagingClient") + waku.mountReliableChannelManager().expect("mountReliableChannelManager") + manager = waku.reliableChannelManager ## Noop encryption providers so the Encrypt/Decrypt brokers have ## something to dispatch to; without this the channel falls back to @@ -95,7 +97,7 @@ suite "Reliable Channel - ingress": if arrived: check received.read() == appPayload - await manager.stop() + (await waku.stop()).expect("stop") asyncTest "manager drops unmarked WakuMessage": ## Mirror of the above: same content topic, but `meta` is empty @@ -105,13 +107,15 @@ suite "Reliable Channel - ingress": contentTopic = ContentTopic("/reliable-channel/test/proto") let appPayload = "foreign payload".toBytes() + var waku: Waku var manager: ReliableChannelManager var brokerCtx: BrokerContext lockNewGlobalBrokerContext: brokerCtx = globalBrokerContext() - manager = (await ReliableChannelManager.new(createApiNodeConf())).expect( - "Failed to create manager" - ) + waku = (await createNode(createApiNodeConf())).expect("createNode") + waku.mountMessagingClient().expect("mountMessagingClient") + waku.mountReliableChannelManager().expect("mountReliableChannelManager") + manager = waku.reliableChannelManager setNoopEncryption() @@ -146,7 +150,7 @@ suite "Reliable Channel - ingress": await sleepAsync(100.milliseconds) check not fired - await manager.stop() + (await waku.stop()).expect("stop") suite "Reliable Channel - send state machine": asyncTest "MessageSentEvent finalises the channelReqId as Sent": @@ -162,13 +166,15 @@ suite "Reliable Channel - send state machine": contentTopic = ContentTopic("/reliable-channel/test/sm-success") fakeMsgReqId = RequestId("fake-msg-req-1") + var waku: Waku var manager: ReliableChannelManager var brokerCtx: BrokerContext lockNewGlobalBrokerContext: brokerCtx = globalBrokerContext() - manager = (await ReliableChannelManager.new(createApiNodeConf())).expect( - "Failed to create manager" - ) + waku = (await createNode(createApiNodeConf())).expect("createNode") + waku.mountMessagingClient().expect("mountMessagingClient") + waku.mountReliableChannelManager().expect("mountReliableChannelManager") + manager = waku.reliableChannelManager setNoopEncryption() @@ -213,7 +219,7 @@ suite "Reliable Channel - send state machine": if finalised: check sentFut.read() == channelReqId - await manager.stop() + (await waku.stop()).expect("stop") asyncTest "two independent channelReqIds are finalised independently": ## Two `send()` calls -> two independent `channelReqId`s, each with @@ -227,13 +233,15 @@ suite "Reliable Channel - send state machine": channelId = ChannelId("sm-multi-channel") contentTopic = ContentTopic("/reliable-channel/test/sm-multi") + var waku: Waku var manager: ReliableChannelManager var brokerCtx: BrokerContext lockNewGlobalBrokerContext: brokerCtx = globalBrokerContext() - manager = (await ReliableChannelManager.new(createApiNodeConf())).expect( - "Failed to create manager" - ) + waku = (await createNode(createApiNodeConf())).expect("createNode") + waku.mountMessagingClient().expect("mountMessagingClient") + waku.mountReliableChannelManager().expect("mountReliableChannelManager") + manager = waku.reliableChannelManager setNoopEncryption() @@ -303,7 +311,7 @@ suite "Reliable Channel - send state machine": if erroredArrived: check erroredFut.read() == channelReqId2 - await manager.stop() + (await waku.stop()).expect("stop") asyncTest "TODO: channelReqId not pruned until ALL its segments are final": ## Placeholder for the multi-sibling prune rule. Today's diff --git a/tests/factory/test_waku_conf.nim b/tests/factory/test_waku_conf.nim index 885e22867..a458e17db 100644 --- a/tests/factory/test_waku_conf.nim +++ b/tests/factory/test_waku_conf.nim @@ -206,8 +206,9 @@ suite "Waku Conf - build with cluster conf": assert conf.rlnRelayConf.isSome let rlnRelayConf = conf.rlnRelayConf.get() - check rlnRelayConf.ethContractAddress.string == - networkConf.rlnRelayEthContractAddress + # actually match the explicit contractAddress, which is the value set on the builder above + # this proves that an explicit builder call wins over the same field set via the preset + check rlnRelayConf.ethContractAddress.string == contractAddress check rlnRelayConf.dynamic == networkConf.rlnRelayDynamic check rlnRelayConf.chainId == networkConf.rlnRelayChainId check rlnRelayConf.epochSizeSec == networkConf.rlnEpochSizeSec @@ -247,10 +248,6 @@ suite "Waku Conf - build with cluster conf": let networkConf = NetworkConf.LogosDevConf() var builder = WakuConfBuilder.init() builder.withNetworkConf(networkConf) - # Note: builder.withNumShardsInCluster() is not called when the - # value that comes from the CLI path is 0 (which means it was - # either set to 0 or was left unset). - builder.withShardingConf(StaticSharding) ## When let conf = builder.build().expect("build should succeed") diff --git a/tests/node/test_wakunode_health_monitor.nim b/tests/node/test_wakunode_health_monitor.nim index 08f641a75..a85056d51 100644 --- a/tests/node/test_wakunode_health_monitor.nim +++ b/tests/node/test_wakunode_health_monitor.nim @@ -15,8 +15,7 @@ import node/health_monitor/protocol_health, node/health_monitor/topic_health, node/health_monitor/node_health_monitor, - node/delivery_service/delivery_service, - node/delivery_service/subscription_manager, + messaging_client, node/kernel_api/relay, node/kernel_api/store, node/kernel_api/lightpush, @@ -27,6 +26,7 @@ import ] import ../testlib/[wakunode, wakucore], ../waku_archive/archive_utils +import waku/node/subscription_manager const MockDLow = 4 # Mocked GossipSub DLow value @@ -229,8 +229,8 @@ suite "Health Monitor - events": await nodeA.start() let ds = - DeliveryService.new(false, nodeA).expect("Failed to create DeliveryService") - ds.startDeliveryService().expect("Failed to start DeliveryService") + MessagingClient.new(false, nodeA).expect("Failed to create MessagingClient") + ds.start().expect("Failed to start MessagingClient") let monitorA = NodeHealthMonitor.new(nodeA) @@ -317,7 +317,7 @@ suite "Health Monitor - events": lastStatus == ConnectionStatus.Disconnected await monitorA.stopHealthMonitor() - await ds.stopDeliveryService() + await ds.stop() await nodeA.stop() asyncTest "Edge health driven by confirmed filter subscriptions": @@ -333,9 +333,9 @@ suite "Health Monitor - events": await nodeA.start() let ds = - DeliveryService.new(false, nodeA).expect("Failed to create DeliveryService") - ds.startDeliveryService().expect("Failed to start DeliveryService") - let subMgr = ds.subscriptionManager + MessagingClient.new(false, nodeA).expect("Failed to create MessagingClient") + ds.start().expect("Failed to start MessagingClient") + let subMgr = nodeA.subscriptionManager var nodeB: WakuNode lockNewGlobalBrokerContext: @@ -416,7 +416,7 @@ suite "Health Monitor - events": await EventShardTopicHealthChange.dropListener(nodeA.brokerCtx, shardHealthLis) check shardHealthOk == true - check subMgr.edgeFilterSubStates.len > 0 + check nodeA.subscriptionManager.edgeFilterSubStates.len > 0 healthSignal.clear() deadline = Moment.now() + TestConnectivityTimeLimit @@ -428,7 +428,7 @@ suite "Health Monitor - events": check lastStatus == ConnectionStatus.PartiallyConnected - await ds.stopDeliveryService() + await ds.stop() await monitorA.stopHealthMonitor() await nodeB.stop() await nodeA.stop() diff --git a/tests/node/test_wakunode_peer_exchange.nim b/tests/node/test_wakunode_peer_exchange.nim index e6649c455..82ca25868 100644 --- a/tests/node/test_wakunode_peer_exchange.nim +++ b/tests/node/test_wakunode_peer_exchange.nim @@ -9,7 +9,8 @@ import libp2p/peerId, libp2p/crypto/crypto, eth/keys, - eth/p2p/discoveryv5/enr + eth/p2p/discoveryv5/enr, + brokers/broker_context import waku/[ @@ -184,114 +185,115 @@ suite "Waku Peer Exchange": suite "Waku Peer Exchange with discv5": asyncTest "Node successfully exchanges px peers with real discv5": - ## Given (copied from test_waku_discv5.nim) - let - # todo: px flag - flags = CapabilitiesBitfield.init( - lightpush = false, filter = false, store = false, relay = true - ) - bindIp = parseIpAddress("0.0.0.0") - extIp = parseIpAddress("127.0.0.1") + lockNewGlobalBrokerContext: + ## Given (copied from test_waku_discv5.nim) + let + # todo: px flag + flags = CapabilitiesBitfield.init( + lightpush = false, filter = false, store = false, relay = true + ) + bindIp = parseIpAddress("0.0.0.0") + extIp = parseIpAddress("127.0.0.1") - nodeKey1 = generateSecp256k1Key() - nodeTcpPort1 = Port(64010) - nodeUdpPort1 = Port(9000) - node1 = newTestWakuNode( - nodeKey1, - bindIp, - nodeTcpPort1, - some(extIp), - wakuFlags = some(flags), - discv5UdpPort = some(nodeUdpPort1), + nodeKey1 = generateSecp256k1Key() + nodeTcpPort1 = Port(64010) + nodeUdpPort1 = Port(9000) + node1 = newTestWakuNode( + nodeKey1, + bindIp, + nodeTcpPort1, + some(extIp), + wakuFlags = some(flags), + discv5UdpPort = some(nodeUdpPort1), + ) + + nodeKey2 = generateSecp256k1Key() + nodeTcpPort2 = Port(64012) + nodeUdpPort2 = Port(9002) + node2 = newTestWakuNode( + nodeKey2, + bindIp, + nodeTcpPort2, + some(extIp), + wakuFlags = some(flags), + discv5UdpPort = some(nodeUdpPort2), + ) + + nodeKey3 = generateSecp256k1Key() + nodeTcpPort3 = Port(64014) + nodeUdpPort3 = Port(9004) + node3 = newTestWakuNode( + nodeKey3, + bindIp, + nodeTcpPort3, + some(extIp), + wakuFlags = some(flags), + discv5UdpPort = some(nodeUdpPort3), + ) + + # discv5 + let conf1 = WakuDiscoveryV5Config( + discv5Config: none(DiscoveryConfig), + address: bindIp, + port: nodeUdpPort1, + privateKey: keys.PrivateKey(nodeKey1.skkey), + bootstrapRecords: @[], + autoupdateRecord: true, ) - nodeKey2 = generateSecp256k1Key() - nodeTcpPort2 = Port(64012) - nodeUdpPort2 = Port(9002) - node2 = newTestWakuNode( - nodeKey2, - bindIp, - nodeTcpPort2, - some(extIp), - wakuFlags = some(flags), - discv5UdpPort = some(nodeUdpPort2), + let disc1 = + WakuDiscoveryV5.new(node1.rng, conf1, some(node1.enr), some(node1.peerManager)) + + let conf2 = WakuDiscoveryV5Config( + discv5Config: none(DiscoveryConfig), + address: bindIp, + port: nodeUdpPort2, + privateKey: keys.PrivateKey(nodeKey2.skkey), + bootstrapRecords: @[disc1.protocol.getRecord()], + autoupdateRecord: true, ) - nodeKey3 = generateSecp256k1Key() - nodeTcpPort3 = Port(64014) - nodeUdpPort3 = Port(9004) - node3 = newTestWakuNode( - nodeKey3, - bindIp, - nodeTcpPort3, - some(extIp), - wakuFlags = some(flags), - discv5UdpPort = some(nodeUdpPort3), + let disc2 = + WakuDiscoveryV5.new(node2.rng, conf2, some(node2.enr), some(node2.peerManager)) + + await allFutures(node1.start(), node2.start(), node3.start()) + let resultDisc1StartRes = await disc1.start() + assert resultDisc1StartRes.isOk(), resultDisc1StartRes.error + let resultDisc2StartRes = await disc2.start() + assert resultDisc2StartRes.isOk(), resultDisc2StartRes.error + + ## When + var attempts = 10 + while (disc1.protocol.nodesDiscovered < 1 or disc2.protocol.nodesDiscovered < 1) and + attempts > 0: + await sleepAsync(1.seconds) + attempts -= 1 + + # node2 can be connected, so will be returned by peer exchange + require ( + await node1.peerManager.connectPeer(node2.switch.peerInfo.toRemotePeerInfo()) ) - # discv5 - let conf1 = WakuDiscoveryV5Config( - discv5Config: none(DiscoveryConfig), - address: bindIp, - port: nodeUdpPort1, - privateKey: keys.PrivateKey(nodeKey1.skkey), - bootstrapRecords: @[], - autoupdateRecord: true, - ) + # Mount peer exchange + await node1.mountPeerExchange() + await node3.mountPeerExchange() + await node3.mountPeerExchangeClient() - let disc1 = - WakuDiscoveryV5.new(node1.rng, conf1, some(node1.enr), some(node1.peerManager)) + let dialResponse = + await node3.dialForPeerExchange(node1.switch.peerInfo.toRemotePeerInfo()) - let conf2 = WakuDiscoveryV5Config( - discv5Config: none(DiscoveryConfig), - address: bindIp, - port: nodeUdpPort2, - privateKey: keys.PrivateKey(nodeKey2.skkey), - bootstrapRecords: @[disc1.protocol.getRecord()], - autoupdateRecord: true, - ) + check dialResponse.isOk - let disc2 = - WakuDiscoveryV5.new(node2.rng, conf2, some(node2.enr), some(node2.peerManager)) + let + requestPeers = 1 + currentPeers = node3.peerManager.switch.peerStore.peers.len + let res = await node3.fetchPeerExchangePeers(1) + check res.tryGet() == 1 - await allFutures(node1.start(), node2.start(), node3.start()) - let resultDisc1StartRes = await disc1.start() - assert resultDisc1StartRes.isOk(), resultDisc1StartRes.error - let resultDisc2StartRes = await disc2.start() - assert resultDisc2StartRes.isOk(), resultDisc2StartRes.error + # Then node3 has received 1 peer from node1 + check: + node3.peerManager.switch.peerStore.peers.len == currentPeers + requestPeers - ## When - var attempts = 10 - while (disc1.protocol.nodesDiscovered < 1 or disc2.protocol.nodesDiscovered < 1) and - attempts > 0: - await sleepAsync(1.seconds) - attempts -= 1 - - # node2 can be connected, so will be returned by peer exchange - require ( - await node1.peerManager.connectPeer(node2.switch.peerInfo.toRemotePeerInfo()) - ) - - # Mount peer exchange - await node1.mountPeerExchange() - await node3.mountPeerExchange() - await node3.mountPeerExchangeClient() - - let dialResponse = - await node3.dialForPeerExchange(node1.switch.peerInfo.toRemotePeerInfo()) - - check dialResponse.isOk - - let - requestPeers = 1 - currentPeers = node3.peerManager.switch.peerStore.peers.len - let res = await node3.fetchPeerExchangePeers(1) - check res.tryGet() == 1 - - # Then node3 has received 1 peer from node1 - check: - node3.peerManager.switch.peerStore.peers.len == currentPeers + requestPeers - - await allFutures( - [node1.stop(), node2.stop(), node3.stop(), disc1.stop(), disc2.stop()] - ) + await allFutures( + [node1.stop(), node2.stop(), node3.stop(), disc1.stop(), disc2.stop()] + ) diff --git a/tests/test_waku.nim b/tests/test_waku.nim index cf5675716..65a15199d 100644 --- a/tests/test_waku.nim +++ b/tests/test_waku.nim @@ -1,24 +1,28 @@ {.used.} -import chronos, testutils/unittests, std/options +import std/[net, options] + +import chronos, testutils/unittests import waku import tools/confutils/cli_args +import waku/factory/networks_config +import waku/factory/conf_builder/conf_builder suite "Waku API - Create node": asyncTest "Create node with minimal configuration": ## Given var nodeConf = defaultWakuNodeConf().valueOr: - raiseAssert error + raiseAssert "defaultWakuNodeConf failed: " & error nodeConf.mode = Core - nodeConf.clusterId = 3'u16 + nodeConf.clusterId = some(3'u16) nodeConf.rest = false # This is the actual minimal config but as the node auto-start, it is not suitable for tests ## When let node = (await createNode(nodeConf)).valueOr: - raiseAssert error + raiseAssert "createNode (minimal config) failed: " & error ## Then check: @@ -29,9 +33,9 @@ suite "Waku API - Create node": asyncTest "Create node with full configuration": ## Given var nodeConf = defaultWakuNodeConf().valueOr: - raiseAssert error + raiseAssert "defaultWakuNodeConf failed: " & error nodeConf.mode = Core - nodeConf.clusterId = 99'u16 + nodeConf.clusterId = some(99'u16) nodeConf.rest = false nodeConf.numShardsInNetwork = 16 nodeConf.maxMessageSize = "1024 KiB" @@ -44,7 +48,7 @@ suite "Waku API - Create node": ## When let node = (await createNode(nodeConf)).valueOr: - raiseAssert error + raiseAssert "createNode (full config) failed: " & error ## Then check: @@ -61,9 +65,9 @@ suite "Waku API - Create node": asyncTest "Create node with mixed entry nodes (enrtree, multiaddr)": ## Given var nodeConf = defaultWakuNodeConf().valueOr: - raiseAssert error + raiseAssert "defaultWakuNodeConf failed: " & error nodeConf.mode = Core - nodeConf.clusterId = 42'u16 + nodeConf.clusterId = some(42'u16) nodeConf.rest = false nodeConf.entryNodes = @[ "enrtree://AIRVQ5DDA4FFWLRBCHJWUWOO6X6S4ZTZ5B667LQ6AJU6PEYDLRD5O@sandbox.waku.nodes.status.im", @@ -72,7 +76,7 @@ suite "Waku API - Create node": ## When let node = (await createNode(nodeConf)).valueOr: - raiseAssert error + raiseAssert "createNode (mixed entry nodes) failed: " & error ## Then check: diff --git a/tests/waku_discv5/test_waku_discv5.nim b/tests/waku_discv5/test_waku_discv5.nim index 936c01826..36d34058c 100644 --- a/tests/waku_discv5/test_waku_discv5.nim +++ b/tests/waku_discv5/test_waku_discv5.nim @@ -431,7 +431,7 @@ suite "Waku Discovery v5": let waku0 = (await Waku.new(conf)).valueOr: raiseAssert error - (waitFor startWaku(addr waku0)).isOkOr: + (waitFor waku0.start()).isOkOr: raiseAssert error confBuilder.withNodeKey(crypto.PrivateKey.random(Secp256k1, myRng[])[]) @@ -445,7 +445,7 @@ suite "Waku Discovery v5": let waku1 = (await Waku.new(conf1)).valueOr: raiseAssert error - (waitFor startWaku(addr waku1)).isOkOr: + (waitFor waku1.start()).isOkOr: raiseAssert error await waku1.node.mountPeerExchange() @@ -461,7 +461,7 @@ suite "Waku Discovery v5": let waku2 = (await Waku.new(conf2)).valueOr: raiseAssert error - (waitFor startWaku(addr waku2)).isOkOr: + (waitFor waku2.start()).isOkOr: raiseAssert error # leave some time for discv5 to act diff --git a/tests/waku_peer_exchange/test_protocol.nim b/tests/waku_peer_exchange/test_protocol.nim index 74cdba110..29ec45d1e 100644 --- a/tests/waku_peer_exchange/test_protocol.nim +++ b/tests/waku_peer_exchange/test_protocol.nim @@ -5,7 +5,8 @@ import testutils/unittests, chronos, libp2p/[switch, peerId, crypto/crypto], - eth/[keys, p2p/discoveryv5/enr] + eth/[keys, p2p/discoveryv5/enr], + brokers/broker_context import waku/[ @@ -31,110 +32,113 @@ suite "Waku Peer Exchange": suite "request": asyncTest "Retrieve and provide peer exchange peers from discv5": - ## Given (copied from test_waku_discv5.nim) - let - # todo: px flag - flags = CapabilitiesBitfield.init( - lightpush = false, filter = false, store = false, relay = true - ) - bindIp = parseIpAddress("0.0.0.0") - extIp = parseIpAddress("127.0.0.1") + lockNewGlobalBrokerContext: + ## Given (copied from test_waku_discv5.nim) + let + # todo: px flag + flags = CapabilitiesBitfield.init( + lightpush = false, filter = false, store = false, relay = true + ) + bindIp = parseIpAddress("0.0.0.0") + extIp = parseIpAddress("127.0.0.1") - nodeKey1 = generateSecp256k1Key() - nodeTcpPort1 = Port(64010) - nodeUdpPort1 = Port(9000) - node1 = newTestWakuNode( - nodeKey1, - bindIp, - nodeTcpPort1, - some(extIp), - wakuFlags = some(flags), - discv5UdpPort = some(nodeUdpPort1), + nodeKey1 = generateSecp256k1Key() + nodeTcpPort1 = Port(64010) + nodeUdpPort1 = Port(9000) + node1 = newTestWakuNode( + nodeKey1, + bindIp, + nodeTcpPort1, + some(extIp), + wakuFlags = some(flags), + discv5UdpPort = some(nodeUdpPort1), + ) + + nodeKey2 = generateSecp256k1Key() + nodeTcpPort2 = Port(64012) + nodeUdpPort2 = Port(9002) + node2 = newTestWakuNode( + nodeKey2, + bindIp, + nodeTcpPort2, + some(extIp), + wakuFlags = some(flags), + discv5UdpPort = some(nodeUdpPort2), + ) + + nodeKey3 = generateSecp256k1Key() + nodeTcpPort3 = Port(64014) + nodeUdpPort3 = Port(9004) + node3 = newTestWakuNode( + nodeKey3, + bindIp, + nodeTcpPort3, + some(extIp), + wakuFlags = some(flags), + discv5UdpPort = some(nodeUdpPort3), + ) + + # discv5 + let conf1 = WakuDiscoveryV5Config( + discv5Config: none(DiscoveryConfig), + address: bindIp, + port: nodeUdpPort1, + privateKey: keys.PrivateKey(nodeKey1.skkey), + bootstrapRecords: @[], + autoupdateRecord: true, ) - nodeKey2 = generateSecp256k1Key() - nodeTcpPort2 = Port(64012) - nodeUdpPort2 = Port(9002) - node2 = newTestWakuNode( - nodeKey2, - bindIp, - nodeTcpPort2, - some(extIp), - wakuFlags = some(flags), - discv5UdpPort = some(nodeUdpPort2), + let disc1 = WakuDiscoveryV5.new( + node1.rng, conf1, some(node1.enr), some(node1.peerManager) ) - nodeKey3 = generateSecp256k1Key() - nodeTcpPort3 = Port(64014) - nodeUdpPort3 = Port(9004) - node3 = newTestWakuNode( - nodeKey3, - bindIp, - nodeTcpPort3, - some(extIp), - wakuFlags = some(flags), - discv5UdpPort = some(nodeUdpPort3), + let conf2 = WakuDiscoveryV5Config( + discv5Config: none(DiscoveryConfig), + address: bindIp, + port: nodeUdpPort2, + privateKey: keys.PrivateKey(nodeKey2.skkey), + bootstrapRecords: @[disc1.protocol.getRecord()], + autoupdateRecord: true, ) - # discv5 - let conf1 = WakuDiscoveryV5Config( - discv5Config: none(DiscoveryConfig), - address: bindIp, - port: nodeUdpPort1, - privateKey: keys.PrivateKey(nodeKey1.skkey), - bootstrapRecords: @[], - autoupdateRecord: true, - ) + let disc2 = WakuDiscoveryV5.new( + node2.rng, conf2, some(node2.enr), some(node2.peerManager) + ) - let disc1 = - WakuDiscoveryV5.new(node1.rng, conf1, some(node1.enr), some(node1.peerManager)) + await allFutures(node1.start(), node2.start(), node3.start()) + let resultDisc1StartRes = await disc1.start() + assert resultDisc1StartRes.isOk(), resultDisc1StartRes.error + let resultDisc2StartRes = await disc2.start() + assert resultDisc2StartRes.isOk(), resultDisc2StartRes.error - let conf2 = WakuDiscoveryV5Config( - discv5Config: none(DiscoveryConfig), - address: bindIp, - port: nodeUdpPort2, - privateKey: keys.PrivateKey(nodeKey2.skkey), - bootstrapRecords: @[disc1.protocol.getRecord()], - autoupdateRecord: true, - ) + ## When + var attempts = 10 + while (disc1.protocol.nodesDiscovered < 1 or disc2.protocol.nodesDiscovered < 1) and + attempts > 0: + await sleepAsync(1.seconds) + attempts -= 1 - let disc2 = - WakuDiscoveryV5.new(node2.rng, conf2, some(node2.enr), some(node2.peerManager)) + # node2 can be connected, so will be returned by peer exchange + require ( + await node1.peerManager.connectPeer(node2.switch.peerInfo.toRemotePeerInfo()) + ) - await allFutures(node1.start(), node2.start(), node3.start()) - let resultDisc1StartRes = await disc1.start() - assert resultDisc1StartRes.isOk(), resultDisc1StartRes.error - let resultDisc2StartRes = await disc2.start() - assert resultDisc2StartRes.isOk(), resultDisc2StartRes.error + # Mount peer exchange + await node1.mountPeerExchange() + await node3.mountPeerExchange() - ## When - var attempts = 10 - while (disc1.protocol.nodesDiscovered < 1 or disc2.protocol.nodesDiscovered < 1) and - attempts > 0: - await sleepAsync(1.seconds) - attempts -= 1 + let dialResponse = + await node3.dialForPeerExchange(node1.switch.peerInfo.toRemotePeerInfo()) + let response = dialResponse.get() - # node2 can be connected, so will be returned by peer exchange - require ( - await node1.peerManager.connectPeer(node2.switch.peerInfo.toRemotePeerInfo()) - ) + ## Then + check: + response.get().peerInfos.len == 1 + response.get().peerInfos[0].enr == disc2.protocol.localNode.record.raw - # Mount peer exchange - await node1.mountPeerExchange() - await node3.mountPeerExchange() - - let dialResponse = - await node3.dialForPeerExchange(node1.switch.peerInfo.toRemotePeerInfo()) - let response = dialResponse.get() - - ## Then - check: - response.get().peerInfos.len == 1 - response.get().peerInfos[0].enr == disc2.protocol.localNode.record.raw - - await allFutures( - [node1.stop(), node2.stop(), node3.stop(), disc1.stop(), disc2.stop()] - ) + await allFutures( + [node1.stop(), node2.stop(), node3.stop(), disc1.stop(), disc2.stop()] + ) asyncTest "Request returns some discovered peers": let diff --git a/tests/wakunode2/test_app.nim b/tests/wakunode2/test_app.nim index 7621ab1e7..8dc9e3582 100644 --- a/tests/wakunode2/test_app.nim +++ b/tests/wakunode2/test_app.nim @@ -46,7 +46,7 @@ suite "Wakunode2 - Waku initialization": var waku = (waitFor Waku.new(conf)).valueOr: raiseAssert error - (waitFor startWaku(addr waku)).isOkOr: + (waitFor waku.start()).isOkOr: raiseAssert error ## Then @@ -71,7 +71,7 @@ suite "Wakunode2 - Waku initialization": var waku = (waitFor Waku.new(conf)).valueOr: raiseAssert error - (waitFor startWaku(addr waku)).isOkOr: + (waitFor waku.start()).isOkOr: raiseAssert error ## Then @@ -128,7 +128,7 @@ suite "Wakunode2 - Waku initialization": (waitFor waku.stop()).isOkOr: raiseAssert error - (waitFor startWaku(addr waku)).isOkOr: + (waitFor waku.start()).isOkOr: raiseAssert error let portsJson = waku.stateInfo.getNodeInfoItem(NodeInfoId.MyBoundPorts) diff --git a/tools/confutils/cli_args.nim b/tools/confutils/cli_args.nim index f965c3a06..d4b7185f2 100644 --- a/tools/confutils/cli_args.nim +++ b/tools/confutils/cli_args.nim @@ -117,20 +117,23 @@ type WakuNodeConf* = object name: "rln-relay-eth-private-key" .}: string - # TODO: Remove "Default is" when it's already visible on the CLI + # Option-typed; desc states the default since the CLI can't auto-show it for none(). rlnRelayUserMessageLimit* {. desc: - "Set a user message limit for the rln membership registration. Must be a positive integer. Default is 1.", - defaultValue: 1, + "Set a user message limit for the rln membership registration. Must be a positive integer. Default is " & + $DefaultRlnRelayUserMessageLimit & ".", + defaultValue: none(uint64), name: "rln-relay-user-message-limit" - .}: uint64 + .}: Option[uint64] + # Option-typed; desc states the default since the CLI can't auto-show it for none(). rlnEpochSizeSec* {. desc: - "Epoch size in seconds used to rate limit RLN memberships. Default is 1 second.", - defaultValue: 1, + "Epoch size in seconds used to rate limit RLN memberships. Default is " & + $DefaultRlnRelayEpochSizeSec & " second.", + defaultValue: none(uint64), name: "rln-relay-epoch-sec" - .}: uint64 + .}: Option[uint64] maxMessageSize* {. desc: @@ -170,12 +173,15 @@ type WakuNodeConf* = object name: "preset" .}: string + # Option-typed; desc states the default since the CLI can't auto-show it for none(). clusterId* {. - desc: - "Cluster id that the node is running in. Node in a different cluster id is disconnected.", - defaultValue: 0, + desc: static( + "Cluster id that the node is running in. Node in a different cluster id is disconnected. Default is " & + $DefaultClusterId & "." + ), + defaultValue: none(uint16), name: "cluster-id" - .}: uint16 + .}: Option[uint16] agentString* {. defaultValue: "logos-delivery-" & cli_args.git_version, @@ -291,11 +297,14 @@ hence would have reachability issues.""", name: "relay-shard-manager" .}: bool + # Option-typed; desc states the default since the CLI can't auto-show it for none(). rlnRelay* {. - desc: "Enable spam protection through rln-relay: true|false.", - defaultValue: false, + desc: + "Enable spam protection through rln-relay: true|false. Default is " & + $DefaultRlnRelayEnabled & ".", + defaultValue: none(bool), name: "rln-relay" - .}: bool + .}: Option[bool] rlnRelayCredIndex* {. desc: "the index of the onchain commitment to use", @@ -304,9 +313,9 @@ hence would have reachability issues.""", rlnRelayDynamic* {. desc: "Enable waku-rln-relay with on-chain dynamic group management: true|false.", - defaultValue: false, + defaultValue: none(bool), name: "rln-relay-dynamic" - .}: bool + .}: Option[bool] entryNodes* {. desc: @@ -466,13 +475,14 @@ hence would have reachability issues.""", .}: string ## Reliability config + # Option-typed; desc states the default since the CLI can't auto-show it for none(). reliabilityEnabled* {. desc: - """Adds an extra effort in the delivery/reception of messages by leveraging store-v3 requests. -with the drawback of consuming some more bandwidth.""", - defaultValue: true, + """Adds an extra effort in the delivery/reception of messages by leveraging store-v3 requests, with the drawback of consuming some more bandwidth. Default is """ & + $DefaultP2pReliability & ".", + defaultValue: none(bool), name: "reliability" - .}: bool + .}: Option[bool] ## REST HTTP config rest* {. @@ -557,8 +567,11 @@ with the drawback of consuming some more bandwidth.""", .}: string ## Discovery v5 config + # Option-typed; desc states the default since the CLI can't auto-show it for none(). discv5Discovery* {. - desc: "Enable discovering nodes via Node Discovery v5.", + desc: + "Enable discovering nodes via Node Discovery v5. Default is " & + $DefaultDiscv5Enabled & ".", defaultValue: none(bool), name: "discv5-discovery" .}: Option[bool] @@ -627,8 +640,12 @@ with the drawback of consuming some more bandwidth.""", .}: bool #Mix config - mix* {.desc: "Enable mix protocol: true|false", defaultValue: false, name: "mix".}: - bool + # Option-typed; desc states the default since the CLI can't auto-show it for none(). + mix* {. + desc: "Enable mix protocol: true|false. Default is " & $DefaultMix & ".", + defaultValue: none(bool), + name: "mix" + .}: Option[bool] mixkey* {. desc: @@ -643,12 +660,14 @@ with the drawback of consuming some more bandwidth.""", .}: seq[MixNodePubInfo] # Kademlia Discovery config + # Option-typed; desc states the default since the CLI can't auto-show it for none(). enableKadDiscovery* {. desc: - "Enable extended kademlia discovery. Can be enabled without bootstrap nodes for the first node in the network.", - defaultValue: false, + "Enable extended kademlia discovery. Can be enabled without bootstrap nodes for the first node in the network. Default is " & + $DefaultKadEnabled & ".", + defaultValue: none(bool), name: "enable-kad-discovery" - .}: bool + .}: Option[bool] kadBootstrapNodes* {. desc: @@ -919,7 +938,7 @@ proc toKeystoreGeneratorConf*(n: WakuNodeConf): RlnKeystoreGeneratorConf = chainId: UInt256.fromBytesBE(n.rlnRelayChainId.toBytesBE()), ethClientUrls: n.ethClientUrls.mapIt(string(it)), ethContractAddress: n.rlnRelayEthContractAddress, - userMessageLimit: n.rlnRelayUserMessageLimit, + userMessageLimit: n.rlnRelayUserMessageLimit.get(DefaultRlnRelayUserMessageLimit), ethPrivateKey: n.rlnRelayEthPrivateKey, credPath: n.rlnRelayCredPath, credPassword: n.rlnRelayCredPassword, @@ -955,7 +974,7 @@ proc toNetworkConf( proc toWakuConf*(n: WakuNodeConf): ConfResult[WakuConf] = var b = WakuConfBuilder.init() - let networkConf = toNetworkConf(n.preset, some(n.clusterId)).valueOr: + let networkConf = toNetworkConf(n.preset, n.clusterId).valueOr: return err("Error determining cluster from preset: " & $error) if networkConf.isSome(): @@ -964,7 +983,8 @@ proc toWakuConf*(n: WakuNodeConf): ConfResult[WakuConf] = b.withLogLevel(n.logLevel) b.withLogFormat(n.logFormat) - b.rlnRelayConf.withEnabled(n.rlnRelay) + if n.rlnRelay.isSome(): + b.rlnRelayConf.withEnabled(n.rlnRelay.get()) if n.rlnRelayCredPath != "": b.rlnRelayConf.withCredPath(n.rlnRelayCredPath) if n.rlnRelayCredPassword != "": @@ -976,18 +996,22 @@ proc toWakuConf*(n: WakuNodeConf): ConfResult[WakuConf] = if n.rlnRelayChainId != 0: b.rlnRelayConf.withChainId(n.rlnRelayChainId) - b.rlnRelayConf.withUserMessageLimit(n.rlnRelayUserMessageLimit) - b.rlnRelayConf.withEpochSizeSec(n.rlnEpochSizeSec) + if n.rlnRelayUserMessageLimit.isSome(): + b.rlnRelayConf.withUserMessageLimit(n.rlnRelayUserMessageLimit.get()) + if n.rlnEpochSizeSec.isSome(): + b.rlnRelayConf.withEpochSizeSec(n.rlnEpochSizeSec.get()) if n.rlnRelayCredIndex.isSome(): b.rlnRelayConf.withCredIndex(n.rlnRelayCredIndex.get()) - b.rlnRelayConf.withDynamic(n.rlnRelayDynamic) + if n.rlnRelayDynamic.isSome(): + b.rlnRelayConf.withDynamic(n.rlnRelayDynamic.get()) if n.maxMessageSize != "": b.withMaxMessageSize(n.maxMessageSize) b.withProtectedShards(n.protectedShards) - b.withClusterId(n.clusterId) + if n.clusterId.isSome(): + b.withClusterId(n.clusterId.get()) b.withAgentString(n.agentString) @@ -1041,7 +1065,7 @@ proc toWakuConf*(n: WakuNodeConf): ConfResult[WakuConf] = if n.numShardsInNetwork != 0: b.withNumShardsInCluster(n.numShardsInNetwork) b.withShardingConf(AutoSharding) - else: + elif networkConf.isNone(): b.withShardingConf(StaticSharding) # It is not possible to pass an empty sequence on the CLI @@ -1074,9 +1098,10 @@ proc toWakuConf*(n: WakuNodeConf): ConfResult[WakuConf] = b.storeServiceConf.storeSyncConf.withRangeSec(n.storeSyncRange) b.storeServiceConf.storeSyncConf.withRelayJitterSec(n.storeSyncRelayJitter) - b.mixConf.withEnabled(n.mix) + if n.mix.isSome(): + b.mixConf.withEnabled(n.mix.get()) + b.withMix(n.mix.get()) b.mixConf.withMixNodes(n.mixnodes) - b.withMix(n.mix) if n.mixkey.isSome(): b.mixConf.withMixKey(n.mixkey.get()) @@ -1086,7 +1111,8 @@ proc toWakuConf*(n: WakuNodeConf): ConfResult[WakuConf] = b.filterServiceConf.withMaxCriteria(n.filterMaxCriteria) b.withLightPush(n.lightpush) - b.withP2pReliability(n.reliabilityEnabled) + if n.reliabilityEnabled.isSome(): + b.withP2pReliability(n.reliabilityEnabled.get()) b.restServerConf.withEnabled(n.rest) b.restServerConf.withListenAddress(n.restAddress) @@ -1129,7 +1155,8 @@ proc toWakuConf*(n: WakuNodeConf): ConfResult[WakuConf] = b.withLocalStoragePath(n.localStoragePath) - b.kademliaDiscoveryConf.withEnabled(n.enableKadDiscovery) + if n.enableKadDiscovery.isSome(): + b.kademliaDiscoveryConf.withEnabled(n.enableKadDiscovery.get()) b.kademliaDiscoveryConf.withBootstrapNodes(n.kadBootstrapNodes) # Mode-driven configuration overrides diff --git a/tools/confutils/conf_from_json.nim b/tools/confutils/conf_from_json.nim new file mode 100644 index 000000000..d269faadc --- /dev/null +++ b/tools/confutils/conf_from_json.nim @@ -0,0 +1,121 @@ +import std/[json, strutils, tables] +import confutils, confutils/std/net, results +import ./cli_args + +proc collectJsonFields*( + jsonNode: JsonNode +): Result[Table[string, (string, JsonNode)], string] = + ## Walk the top-level JSON object and key it by lowercased names. + if jsonNode.kind != JObject: + return err("config JSON must be a JSON object, got " & $jsonNode.kind) + var jsonFields: Table[string, (string, JsonNode)] + for key, value in jsonNode: + let lowerKey = key.toLowerAscii() + if jsonFields.hasKey(lowerKey): + let firstKey = jsonFields[lowerKey][0] + return err( + "Duplicate configuration option (case-insensitive): '" & firstKey & "' and '" & + key & "'" + ) + jsonFields[lowerKey] = (key, value) + return ok(jsonFields) + +proc unknownKeysError( + jsonFields: Table[string, (string, JsonNode)], prefix: string +): string = + ## Format leftover JSON keys as an error message. + var keys = newSeq[string]() + for _, (jsonKey, _) in pairs(jsonFields): + keys.add(jsonKey) + return prefix & ": " & $keys + +proc jsonScalarToString(node: JsonNode): Result[string, string] = + ## Convert a scalar JSON value to its string form. + case node.kind + of JString: + return ok(node.getStr()) + of JInt: + return ok($node.getInt()) + of JFloat: + return ok($node.getFloat()) + of JBool: + return ok($node.getBool()) + of JNull: + return ok("") + else: + return err("expected scalar JSON value, got " & $node.kind) + +proc applyJsonFieldsToConf( + conf: var WakuNodeConf, + jsonFields: var Table[string, (string, JsonNode)], + parseErrPrefix: string, + unknownErrPrefix: string, +): Result[void, string] = + ## Walk `conf`'s fields and write each one matched (case-insensitive) by + ## `jsonFields`. seq fields take a JArray (full replace); scalar fields + ## take any scalar JSON kind. Errors on leftover unknown keys. + for confField, confValue in fieldPairs(conf): + let lowerField = confField.toLowerAscii() + if jsonFields.hasKey(lowerField): + let (jsonKey, jsonValue) = jsonFields[lowerField] + when confValue is seq: + if jsonValue.kind != JArray: + return err( + parseErrPrefix & " '" & confField & "' from JSON key '" & jsonKey & + "' must be a JSON array" + ) + var newSeq: typeof(confValue) = @[] + for item in jsonValue: + let formattedItem = jsonScalarToString(item).valueOr: + return err( + parseErrPrefix & " '" & confField & "' from JSON key '" & jsonKey & "': " & + error + ) + try: + type ElemType = typeof(confValue[0]) + newSeq.add(parseCmdArg(ElemType, formattedItem)) + except CatchableError as e: + return err( + parseErrPrefix & " '" & confField & "' from JSON key '" & jsonKey & "': " & + e.msg & ". Value: " & formattedItem + ) + confValue = newSeq + else: + let formattedString = jsonScalarToString(jsonValue).valueOr: + return err( + parseErrPrefix & " '" & confField & "' from JSON key '" & jsonKey & "': " & + error + ) + try: + confValue = parseCmdArg(typeof(confValue), formattedString) + except CatchableError as e: + return err( + parseErrPrefix & " '" & confField & "' from JSON key '" & jsonKey & "': " & + e.msg & ". Value: " & formattedString + ) + jsonFields.del(lowerField) + if jsonFields.len > 0: + return err(unknownKeysError(jsonFields, unknownErrPrefix)) + return ok() + +proc assembleFullConf*( + jsonFields: Table[string, (string, JsonNode)] +): Result[WakuNodeConf, string] = + ## Build a WakuNodeConf from a flat JSON object whose keys are WakuNodeConf field names. + var conf = ?defaultWakuNodeConf() + var fields = jsonFields + ?applyJsonFieldsToConf( + conf, fields, "Failed to parse field", "Unrecognized configuration option(s) found" + ) + return ok(conf) + +proc parseConfJson*(jsonStr: string): Result[WakuNodeConf, string] = + ## Parse a flat JSON config whose keys are WakuNodeConf field names. + var jsonNode: JsonNode + try: + jsonNode = parseJson(jsonStr) + except CatchableError as e: + return err("Failed to parse config JSON: " & e.msg) + + let jsonFields = ?collectJsonFields(jsonNode) + return assembleFullConf(jsonFields) diff --git a/waku/api/api.nim b/waku/api/api.nim index 1eee982fd..03f176a72 100644 --- a/waku/api/api.nim +++ b/waku/api/api.nim @@ -1,10 +1,12 @@ -import chronicles, chronos, results +import std/[net, options] + +import chronicles, chronos, libp2p/peerid, results import waku/factory/waku +import waku/messaging_client import waku/[requests/health_requests, waku_core, waku_node] import waku/node/delivery_service/send_service -import waku/node/delivery_service/subscription_manager -import libp2p/peerid +import waku/node/subscription_manager import ../../tools/confutils/cli_args import ./[api_conf, types] @@ -38,39 +40,15 @@ proc subscribe*( ): Future[Result[void, string]] {.async.} = ?checkApiAvailability(w) - return w.deliveryService.subscriptionManager.subscribe(contentTopic) + return w.node.subscriptionManager.subscribe(contentTopic) proc unsubscribe*(w: Waku, contentTopic: ContentTopic): Result[void, string] = ?checkApiAvailability(w) - return w.deliveryService.subscriptionManager.unsubscribe(contentTopic) + return w.node.subscriptionManager.unsubscribe(contentTopic) proc send*( w: Waku, envelope: MessageEnvelope ): Future[Result[RequestId, string]] {.async.} = ?checkApiAvailability(w) - - let isSubbed = w.deliveryService.subscriptionManager - .isSubscribed(envelope.contentTopic) - .valueOr(false) - if not isSubbed: - info "Auto-subscribing to topic on send", contentTopic = envelope.contentTopic - w.deliveryService.subscriptionManager.subscribe(envelope.contentTopic).isOkOr: - warn "Failed to auto-subscribe", error = error - return err("Failed to auto-subscribe before sending: " & error) - - let requestId = RequestId.new(w.rng) - - let deliveryTask = DeliveryTask.new(requestId, envelope, w.brokerCtx).valueOr: - return err("API send: Failed to create delivery task: " & error) - - info "API send: scheduling delivery task", - requestId = $requestId, - pubsubTopic = deliveryTask.pubsubTopic, - contentTopic = deliveryTask.msg.contentTopic, - msgHash = deliveryTask.msgHash.to0xHex(), - myPeerId = w.node.peerId() - - asyncSpawn w.deliveryService.sendService.send(deliveryTask) - - return ok(requestId) + return await w.messagingClient.send(envelope) diff --git a/waku/factory/conf_builder/discv5_conf_builder.nim b/waku/factory/conf_builder/discv5_conf_builder.nim index 5dd269d23..c2a40132f 100644 --- a/waku/factory/conf_builder/discv5_conf_builder.nim +++ b/waku/factory/conf_builder/discv5_conf_builder.nim @@ -4,7 +4,13 @@ import ../waku_conf logScope: topics = "waku conf builder discv5" -const DefaultDiscv5UdpPort*: Port = Port(9000) +const + DefaultDiscv5Enabled*: bool = false + DefaultDiscv5BitsPerHop*: int = 1 + DefaultDiscv5BucketIpLimit*: uint = 2 + DefaultDiscv5EnrAutoUpdate*: bool = true + DefaultDiscv5TableIpLimit*: uint = 10 + DefaultDiscv5UdpPort*: Port = Port(9000) ########################### ## Discv5 Config Builder ## @@ -48,17 +54,17 @@ proc withBootstrapNodes*(b: var Discv5ConfBuilder, bootstrapNodes: seq[string]) b.bootstrapNodes = concat(b.bootstrapNodes, bootstrapNodes) proc build*(b: Discv5ConfBuilder): Result[Option[Discv5Conf], string] = - if not b.enabled.get(false): + if not b.enabled.get(DefaultDiscv5Enabled): return ok(none(Discv5Conf)) return ok( some( Discv5Conf( bootstrapNodes: b.bootstrapNodes, - bitsPerHop: b.bitsPerHop.get(1), - bucketIpLimit: b.bucketIpLimit.get(2), - enrAutoUpdate: b.enrAutoUpdate.get(true), - tableIpLimit: b.tableIpLimit.get(10), + bitsPerHop: b.bitsPerHop.get(DefaultDiscv5BitsPerHop), + bucketIpLimit: b.bucketIpLimit.get(DefaultDiscv5BucketIpLimit), + enrAutoUpdate: b.enrAutoUpdate.get(DefaultDiscv5EnrAutoUpdate), + tableIpLimit: b.tableIpLimit.get(DefaultDiscv5TableIpLimit), udpPort: b.udpPort.get(DefaultDiscv5UdpPort), ) ) diff --git a/waku/factory/conf_builder/filter_service_conf_builder.nim b/waku/factory/conf_builder/filter_service_conf_builder.nim index 0a6617430..11efc84db 100644 --- a/waku/factory/conf_builder/filter_service_conf_builder.nim +++ b/waku/factory/conf_builder/filter_service_conf_builder.nim @@ -4,6 +4,12 @@ import ../waku_conf logScope: topics = "waku conf builder filter service" +const + DefaultFilterEnabled*: bool = false + DefaultFilterMaxPeersToServe*: uint32 = 500 + DefaultFilterSubscriptionTimeout*: uint16 = 300 + DefaultFilterMaxCriteria*: uint32 = 1000 + ################################### ## Filter Service Config Builder ## ################################### @@ -37,15 +43,15 @@ proc withMaxCriteria*(b: var FilterServiceConfBuilder, maxCriteria: uint32) = b.maxCriteria = some(maxCriteria) proc build*(b: FilterServiceConfBuilder): Result[Option[FilterServiceConf], string] = - if not b.enabled.get(false): + if not b.enabled.get(DefaultFilterEnabled): return ok(none(FilterServiceConf)) return ok( some( FilterServiceConf( - maxPeersToServe: b.maxPeersToServe.get(500), - subscriptionTimeout: b.subscriptionTimeout.get(300), - maxCriteria: b.maxCriteria.get(1000), + maxPeersToServe: b.maxPeersToServe.get(DefaultFilterMaxPeersToServe), + subscriptionTimeout: b.subscriptionTimeout.get(DefaultFilterSubscriptionTimeout), + maxCriteria: b.maxCriteria.get(DefaultFilterMaxCriteria), ) ) ) diff --git a/waku/factory/conf_builder/kademlia_discovery_conf_builder.nim b/waku/factory/conf_builder/kademlia_discovery_conf_builder.nim index 916d71be1..135663086 100644 --- a/waku/factory/conf_builder/kademlia_discovery_conf_builder.nim +++ b/waku/factory/conf_builder/kademlia_discovery_conf_builder.nim @@ -5,18 +5,20 @@ import waku/factory/waku_conf logScope: topics = "waku conf builder kademlia discovery" +const DefaultKadEnabled*: bool = false + ####################################### ## Kademlia Discovery Config Builder ## ####################################### type KademliaDiscoveryConfBuilder* = object - enabled*: bool + enabled*: Option[bool] bootstrapNodes*: seq[string] proc init*(T: type KademliaDiscoveryConfBuilder): KademliaDiscoveryConfBuilder = KademliaDiscoveryConfBuilder() proc withEnabled*(b: var KademliaDiscoveryConfBuilder, enabled: bool) = - b.enabled = enabled + b.enabled = some(enabled) proc withBootstrapNodes*( b: var KademliaDiscoveryConfBuilder, bootstrapNodes: seq[string] @@ -27,7 +29,7 @@ proc build*( b: KademliaDiscoveryConfBuilder ): Result[Option[KademliaDiscoveryConf], string] = # Kademlia is enabled if explicitly enabled OR if bootstrap nodes are provided - let enabled = b.enabled or b.bootstrapNodes.len > 0 + let enabled = b.enabled.get(DefaultKadEnabled) or b.bootstrapNodes.len > 0 if not enabled: return ok(none(KademliaDiscoveryConf)) diff --git a/waku/factory/conf_builder/metrics_server_conf_builder.nim b/waku/factory/conf_builder/metrics_server_conf_builder.nim index 8b2ea4eb8..473b04d9e 100644 --- a/waku/factory/conf_builder/metrics_server_conf_builder.nim +++ b/waku/factory/conf_builder/metrics_server_conf_builder.nim @@ -4,7 +4,11 @@ import ../waku_conf logScope: topics = "waku conf builder metrics server" -const DefaultMetricsHttpPort*: Port = Port(8008) +const + DefaultMetricsEnabled*: bool = false + DefaultMetricsHttpAddress*: IpAddress = static parseIpAddress("127.0.0.1") + DefaultMetricsHttpPort*: Port = Port(8008) + DefaultMetricsLogging*: bool = false ################################### ## Metrics Server Config Builder ## @@ -35,15 +39,15 @@ proc withLogging*(b: var MetricsServerConfBuilder, logging: bool) = b.logging = some(logging) proc build*(b: MetricsServerConfBuilder): Result[Option[MetricsServerConf], string] = - if not b.enabled.get(false): + if not b.enabled.get(DefaultMetricsEnabled): return ok(none(MetricsServerConf)) return ok( some( MetricsServerConf( - httpAddress: b.httpAddress.get(static parseIpAddress("127.0.0.1")), + httpAddress: b.httpAddress.get(DefaultMetricsHttpAddress), httpPort: b.httpPort.get(DefaultMetricsHttpPort), - logging: b.logging.get(false), + logging: b.logging.get(DefaultMetricsLogging), ) ) ) diff --git a/waku/factory/conf_builder/mix_conf_builder.nim b/waku/factory/conf_builder/mix_conf_builder.nim index 145ccb76e..1a832d352 100644 --- a/waku/factory/conf_builder/mix_conf_builder.nim +++ b/waku/factory/conf_builder/mix_conf_builder.nim @@ -5,6 +5,8 @@ import ../waku_conf, waku/waku_mix logScope: topics = "waku conf builder mix" +const DefaultMixEnabled*: bool = false + ################################## ## Mix Config Builder ## ################################## @@ -26,7 +28,7 @@ proc withMixNodes*(b: var MixConfBuilder, mixNodes: seq[MixNodePubInfo]) = b.mixNodes = mixNodes proc build*(b: MixConfBuilder): Result[Option[MixConf], string] = - if not b.enabled.get(false): + if not b.enabled.get(DefaultMixEnabled): return ok(none[MixConf]()) else: if b.mixKey.isSome(): diff --git a/waku/factory/conf_builder/rest_server_conf_builder.nim b/waku/factory/conf_builder/rest_server_conf_builder.nim index dcafbb56a..88446c08b 100644 --- a/waku/factory/conf_builder/rest_server_conf_builder.nim +++ b/waku/factory/conf_builder/rest_server_conf_builder.nim @@ -4,7 +4,10 @@ import ../waku_conf logScope: topics = "waku conf builder rest server" -const DefaultRestPort*: Port = Port(8645) +const + DefaultRestEnabled*: bool = false + DefaultRestPort*: Port = Port(8645) + DefaultRestAdmin*: bool = false ################################ ## REST Server Config Builder ## @@ -43,7 +46,7 @@ proc withRelayCacheCapacity*(b: var RestServerConfBuilder, relayCacheCapacity: u b.relayCacheCapacity = some(relayCacheCapacity) proc build*(b: RestServerConfBuilder): Result[Option[RestServerConf], string] = - if not b.enabled.get(false): + if not b.enabled.get(DefaultRestEnabled): return ok(none(RestServerConf)) if b.listenAddress.isNone(): @@ -57,7 +60,7 @@ proc build*(b: RestServerConfBuilder): Result[Option[RestServerConf], string] = allowOrigin: b.allowOrigin, listenAddress: b.listenAddress.get(), port: b.port.get(DefaultRestPort), - admin: b.admin.get(false), + admin: b.admin.get(DefaultRestAdmin), relayCacheCapacity: b.relayCacheCapacity.get(), ) ) diff --git a/waku/factory/conf_builder/rln_relay_conf_builder.nim b/waku/factory/conf_builder/rln_relay_conf_builder.nim index 4cdcf8324..f4e70767e 100644 --- a/waku/factory/conf_builder/rln_relay_conf_builder.nim +++ b/waku/factory/conf_builder/rln_relay_conf_builder.nim @@ -4,6 +4,11 @@ import ../waku_conf logScope: topics = "waku conf builder rln relay" +const + DefaultRlnRelayEnabled*: bool = false + DefaultRlnRelayEpochSizeSec*: uint64 = 1 + DefaultRlnRelayUserMessageLimit*: uint64 = 1 + ############################## ## RLN Relay Config Builder ## ############################## @@ -56,7 +61,7 @@ proc withUserMessageLimit*(b: var RlnRelayConfBuilder, userMessageLimit: uint64) b.userMessageLimit = some(userMessageLimit) proc build*(b: RlnRelayConfBuilder): Result[Option[RlnRelayConf], string] = - if not b.enabled.get(false): + if not b.enabled.get(DefaultRlnRelayEnabled): return ok(none(RlnRelayConf)) if b.chainId.isNone(): @@ -78,11 +83,6 @@ proc build*(b: RlnRelayConfBuilder): Result[Option[RlnRelayConf], string] = return err("rlnRelay.ethClientUrls is not specified") if b.ethContractAddress.get("") == "": return err("rlnRelay.ethContractAddress is not specified") - if b.epochSizeSec.isNone(): - return err("rlnRelay.epochSizeSec is not specified") - if b.userMessageLimit.isNone(): - return err("rlnRelay.userMessageLimit is not specified") - return ok( some( RlnRelayConf( @@ -92,8 +92,8 @@ proc build*(b: RlnRelayConfBuilder): Result[Option[RlnRelayConf], string] = dynamic: b.dynamic.get(), ethClientUrls: b.ethClientUrls.get(), ethContractAddress: b.ethContractAddress.get(), - epochSizeSec: b.epochSizeSec.get(), - userMessageLimit: b.userMessageLimit.get(), + epochSizeSec: b.epochSizeSec.get(DefaultRlnRelayEpochSizeSec), + userMessageLimit: b.userMessageLimit.get(DefaultRlnRelayUserMessageLimit), ) ) ) diff --git a/waku/factory/conf_builder/store_service_conf_builder.nim b/waku/factory/conf_builder/store_service_conf_builder.nim index f1b0b1402..d1b51c9e5 100644 --- a/waku/factory/conf_builder/store_service_conf_builder.nim +++ b/waku/factory/conf_builder/store_service_conf_builder.nim @@ -5,6 +5,14 @@ import ../waku_conf, ./store_sync_conf_builder logScope: topics = "waku conf builder store service" +const + DefaultStoreEnabled*: bool = false + DefaultStoreDbMigration*: bool = true + DefaultStoreDbVacuum*: bool = false + DefaultStoreMaxNumDbConnections*: int = 50 + DefaultStoreResume*: bool = false + DefaultStoreRetentionPolicy*: string = "time:" & $2.days.seconds + ################################## ## Store Service Config Builder ## ################################## @@ -77,7 +85,7 @@ proc validateRetentionPolicies(policies: seq[string]): Result[void, string] = return ok() proc build*(b: StoreServiceConfBuilder): Result[Option[StoreServiceConf], string] = - if not b.enabled.get(false): + if not b.enabled.get(DefaultStoreEnabled): return ok(none(StoreServiceConf)) if b.dbUrl.get("") == "": @@ -88,7 +96,7 @@ proc build*(b: StoreServiceConfBuilder): Result[Option[StoreServiceConf], string let retentionPolicies = if b.retentionPolicies.len == 0: - @["time:" & $2.days.seconds] + @[DefaultStoreRetentionPolicy] else: validateRetentionPolicies(b.retentionPolicies).isOkOr: return err("invalid retention policies: " & error) @@ -97,12 +105,12 @@ proc build*(b: StoreServiceConfBuilder): Result[Option[StoreServiceConf], string return ok( some( StoreServiceConf( - dbMigration: b.dbMigration.get(true), + dbMigration: b.dbMigration.get(DefaultStoreDbMigration), dbURl: b.dbUrl.get(), - dbVacuum: b.dbVacuum.get(false), - maxNumDbConnections: b.maxNumDbConnections.get(50), + dbVacuum: b.dbVacuum.get(DefaultStoreDbVacuum), + maxNumDbConnections: b.maxNumDbConnections.get(DefaultStoreMaxNumDbConnections), retentionPolicies: retentionPolicies, - resume: b.resume.get(false), + resume: b.resume.get(DefaultStoreResume), storeSyncConf: storeSyncConf, ) ) diff --git a/waku/factory/conf_builder/store_sync_conf_builder.nim b/waku/factory/conf_builder/store_sync_conf_builder.nim index 4c7177b71..d47c199a4 100644 --- a/waku/factory/conf_builder/store_sync_conf_builder.nim +++ b/waku/factory/conf_builder/store_sync_conf_builder.nim @@ -4,6 +4,8 @@ import ../waku_conf logScope: topics = "waku conf builder store sync" +const DefaultStoreSyncEnabled*: bool = false + ################################## ## Store Sync Config Builder ## ################################## @@ -30,7 +32,7 @@ proc withRelayJitterSec*(b: var StoreSyncConfBuilder, relayJitterSec: uint32) = b.relayJitterSec = some(relayJitterSec) proc build*(b: StoreSyncConfBuilder): Result[Option[StoreSyncConf], string] = - if not b.enabled.get(false): + if not b.enabled.get(DefaultStoreSyncEnabled): return ok(none(StoreSyncConf)) if b.rangeSec.isNone(): diff --git a/waku/factory/conf_builder/waku_conf_builder.nim b/waku/factory/conf_builder/waku_conf_builder.nim index 96e34eeed..ceecf7f00 100644 --- a/waku/factory/conf_builder/waku_conf_builder.nim +++ b/waku/factory/conf_builder/waku_conf_builder.nim @@ -13,6 +13,9 @@ import factory/networks_config, common/logging, common/utils/parse_size_units, + node/peer_manager, + waku_core/message/default_values, + waku_core/topics/pubsub_topic, waku_enr/capabilities, persistency/persistency, ], @@ -35,9 +38,35 @@ import logScope: topics = "waku conf builder" +# Picks up the same -d:git_version=... build flag that cli_args.nim defines. +const git_version {.strdefine.} = "(unknown)" + const DefaultMaxConnections* = 150 + DefaultRelay*: bool = false + DefaultLightPush*: bool = false + DefaultPeerExchange*: bool = false + DefaultStoreSyncMount*: bool = false + DefaultRendezvous*: bool = false + DefaultMix*: bool = false + DefaultRelayPeerExchange*: bool = false + DefaultLogLevel*: logging.LogLevel = logging.LogLevel.INFO + DefaultLogFormat*: logging.LogFormat = logging.LogFormat.TEXT + DefaultNatStrategy*: string = "none" DefaultP2pTcpPort*: Port = Port(60000) + DefaultP2pListenAddress*: IpAddress = static parseIpAddress("0.0.0.0") + DefaultPortsShift*: uint16 = 0 + DefaultExtMultiAddrsOnly*: bool = false + DefaultDnsAddrsNameServers*: seq[IpAddress] = + @[static parseIpAddress("1.1.1.1"), static parseIpAddress("1.0.0.1")] + DefaultPeerPersistence*: bool = false + DefaultAgentString*: string = "logos-delivery " & git_version + DefaultRelayShardedPeerManagement*: bool = false + DefaultRelayServiceRatio*: string = "50:50" + DefaultCircuitRelayClient*: bool = false + DefaultP2pReliability*: bool = true + DefaultNumShardsInCluster*: uint16 = 1 + DefaultShardingConfKind*: ShardingConfKind = AutoSharding type MaxMessageSizeKind* = enum mmskNone @@ -309,117 +338,124 @@ proc buildShardingConf( bNumShardsInCluster: Option[uint16], bSubscribeShards: Option[seq[uint16]], ): (ShardingConf, seq[uint16]) = - case bShardingConfKind.get(AutoSharding) + case bShardingConfKind.get(DefaultShardingConfKind) of StaticSharding: (ShardingConf(kind: StaticSharding), bSubscribeShards.get(@[])) of AutoSharding: - let numShardsInCluster = bNumShardsInCluster.get(1) + let numShardsInCluster = bNumShardsInCluster.get(DefaultNumShardsInCluster) let shardingConf = ShardingConf(kind: AutoSharding, numShardsInCluster: numShardsInCluster) let upperShard = uint16(numShardsInCluster - 1) (shardingConf, bSubscribeShards.get(toSeq(0.uint16 .. upperShard))) +template checkSetPresetValueToField[T]( + field: var Option[T], presetVal: T, msg: static string +) = + ## Set the field to the preset's value, unless the field is already set + ## (explicit wins). Warn iff the field's existing value differs from the + ## preset's. No-op if they agree. + + if field.isSome(): + if field.get() != presetVal: + warn msg, used = field.get(), discarded = presetVal + else: + field = some(presetVal) + +proc checkAddPresetValueToField[T](field: var seq[T], presetVals: seq[T]) = + ## Append the preset's list values to the field's existing list. Lists + ## concat rather than override; both the user's and the preset's entries + ## end up in the final list. + + field = field & presetVals + proc applyNetworkConf(builder: var WakuConfBuilder) = - # Apply network conf, overrides most values passed individually - # If you want to tweak values, don't use networkConf - # TODO: networkconf should be one field of the conf builder so that this function becomes unnecessary + ## NetworkConf = network presets. + ## Cascade the chosen preset's values onto builder fields the user hasn't set. + ## User-set fields stay; preset fills the gaps and warns on conflict (explicit wins). + ## List fields concat (preset's nodes appended to user's). + if builder.networkConf.isNone(): - return + return # If there is no preset given, then nothing to do. + let networkConf = builder.networkConf.get() - if builder.clusterId.isSome(): - warn "Cluster id was provided alongside a network conf", - used = networkConf.clusterId, discarded = builder.clusterId.get() - builder.clusterId = some(networkConf.clusterId) + checkSetPresetValueToField( + builder.clusterId, networkConf.clusterId, + "Cluster id was provided alongside a network conf", + ) # Apply relay parameters - if builder.relay.get(false) and networkConf.rlnRelay: - if builder.rlnRelayConf.enabled.isSome(): - warn "RLN Relay was provided alongside a network conf", - used = networkConf.rlnRelay, discarded = builder.rlnRelayConf.enabled - builder.rlnRelayConf.withEnabled(true) - - if builder.rlnRelayConf.ethContractAddress.get("") != "": - warn "RLN Relay ETH Contract Address was provided alongside a network conf", - used = networkConf.rlnRelayEthContractAddress.string, - discarded = builder.rlnRelayConf.ethContractAddress.get().string - builder.rlnRelayConf.withEthContractAddress(networkConf.rlnRelayEthContractAddress) - - if builder.rlnRelayConf.chainId.isSome(): - warn "RLN Relay Chain Id was provided alongside a network conf", - used = networkConf.rlnRelayChainId, discarded = builder.rlnRelayConf.chainId - builder.rlnRelayConf.withChainId(networkConf.rlnRelayChainId) - - if builder.rlnRelayConf.dynamic.isSome(): - warn "RLN Relay Dynamic was provided alongside a network conf", - used = networkConf.rlnRelayDynamic, discarded = builder.rlnRelayConf.dynamic - builder.rlnRelayConf.withDynamic(networkConf.rlnRelayDynamic) - - if builder.rlnRelayConf.epochSizeSec.isSome(): - warn "RLN Epoch Size in Seconds was provided alongside a network conf", - used = networkConf.rlnEpochSizeSec, - discarded = builder.rlnRelayConf.epochSizeSec - builder.rlnRelayConf.withEpochSizeSec(networkConf.rlnEpochSizeSec) - - if builder.rlnRelayConf.userMessageLimit.isSome(): - warn "RLN Relay User Message Limit was provided alongside a network conf", - used = networkConf.rlnRelayUserMessageLimit, - discarded = builder.rlnRelayConf.userMessageLimit - if builder.rlnRelayConf.userMessageLimit.get(0) == 0: - ## only override with the "preset" value if there was not explicit set value - builder.rlnRelayConf.withUserMessageLimit(networkConf.rlnRelayUserMessageLimit) - + if builder.relay.get(DefaultRelay) and networkConf.rlnRelay: + checkSetPresetValueToField( + builder.rlnRelayConf.enabled, + networkConf.rlnRelay, # true + "RLN Relay was provided alongside a network conf", + ) + checkSetPresetValueToField( + builder.rlnRelayConf.ethContractAddress, networkConf.rlnRelayEthContractAddress, + "RLN Relay ETH Contract Address was provided alongside a network conf", + ) + checkSetPresetValueToField( + builder.rlnRelayConf.chainId, networkConf.rlnRelayChainId, + "RLN Relay Chain Id was provided alongside a network conf", + ) + checkSetPresetValueToField( + builder.rlnRelayConf.dynamic, networkConf.rlnRelayDynamic, + "RLN Relay Dynamic was provided alongside a network conf", + ) + checkSetPresetValueToField( + builder.rlnRelayConf.epochSizeSec, networkConf.rlnEpochSizeSec, + "RLN Epoch Size in Seconds was provided alongside a network conf", + ) + checkSetPresetValueToField( + builder.rlnRelayConf.userMessageLimit, networkConf.rlnRelayUserMessageLimit, + "RLN Relay User Message Limit was provided alongside a network conf", + ) # End Apply relay parameters case builder.maxMessageSize.kind of mmskNone: - discard + builder.withMaxMessageSize(parseCorrectMsgSize(networkConf.maxMessageSize)) of mmskStr, mmskInt: warn "Max Message Size was provided alongside a network conf", - used = networkConf.maxMessageSize, discarded = $builder.maxMessageSize - builder.withMaxMessageSize(parseCorrectMsgSize(networkConf.maxMessageSize)) - - if builder.shardingConf.isSome(): - warn "Sharding Conf was provided alongside a network conf", - used = networkConf.shardingConf.kind, discarded = builder.shardingConf + used = $builder.maxMessageSize, discarded = networkConf.maxMessageSize + checkSetPresetValueToField( + builder.shardingConf, networkConf.shardingConf.kind, + "Sharding Conf was provided alongside a network conf", + ) case networkConf.shardingConf.kind - of StaticSharding: - builder.shardingConf = some(StaticSharding) of AutoSharding: - builder.shardingConf = some(AutoSharding) - if builder.numShardsInCluster.isSome(): - warn "Num Shards In Cluster overrides network conf preset", - used = builder.numShardsInCluster.get(), - ignored = networkConf.shardingConf.numShardsInCluster - else: - builder.numShardsInCluster = some(networkConf.shardingConf.numShardsInCluster) + checkSetPresetValueToField( + builder.numShardsInCluster, networkConf.shardingConf.numShardsInCluster, + "Num Shards In Cluster overrides network conf preset", + ) + of StaticSharding: + discard - if networkConf.discv5Discovery: - if builder.discv5Conf.enabled.isNone: - builder.discv5Conf.withEnabled(networkConf.discv5Discovery) + checkSetPresetValueToField( + builder.discv5Conf.enabled, networkConf.discv5Discovery, + "Discv5 Discovery was provided alongside a network conf", + ) + checkAddPresetValueToField( + builder.discv5Conf.bootstrapNodes, networkConf.discv5BootstrapNodes + ) - if builder.discv5Conf.bootstrapNodes.len == 0 and - networkConf.discv5BootstrapNodes.len > 0: - warn "Discv5 Bootstrap nodes were provided alongside a network conf", - used = networkConf.discv5BootstrapNodes, - discarded = builder.discv5Conf.bootstrapNodes - builder.discv5Conf.withBootstrapNodes(networkConf.discv5BootstrapNodes) + checkSetPresetValueToField( + builder.kademliaDiscoveryConf.enabled, networkConf.enableKadDiscovery, + "Kademlia Discovery was provided alongside a network conf", + ) + checkAddPresetValueToField( + builder.kademliaDiscoveryConf.bootstrapNodes, networkConf.kadBootstrapNodes + ) - if networkConf.enableKadDiscovery: - if not builder.kademliaDiscoveryConf.enabled: - builder.kademliaDiscoveryConf.withEnabled(networkConf.enableKadDiscovery) - - if builder.kademliaDiscoveryConf.bootstrapNodes.len == 0 and - networkConf.kadBootstrapNodes.len > 0: - builder.kademliaDiscoveryConf.withBootstrapNodes(networkConf.kadBootstrapNodes) - - if networkConf.mix: - if builder.mix.isNone: - builder.mix = some(networkConf.mix) - - if builder.p2pReliability.isNone: - builder.withP2pReliability(networkConf.p2pReliability) + checkSetPresetValueToField( + builder.mix, networkConf.mix, "Mix was provided alongside a network conf" + ) + checkSetPresetValueToField( + builder.p2pReliability, networkConf.p2pReliability, + "P2P Reliability was provided alongside a network conf", + ) # Process entry nodes from network config - classify and distribute if networkConf.entryNodes.len > 0: @@ -457,44 +493,44 @@ proc build*( builder.relay.get() else: warn "whether to mount relay is not specified, defaulting to not mounting" - false + DefaultRelay let lightPush = if builder.lightPush.isSome(): builder.lightPush.get() else: warn "whether to mount lightPush is not specified, defaulting to not mounting" - false + DefaultLightPush let peerExchange = if builder.peerExchange.isSome(): builder.peerExchange.get() else: warn "whether to mount peerExchange is not specified, defaulting to not mounting" - false + DefaultPeerExchange let storeSync = if builder.storeSync.isSome(): builder.storeSync.get() else: warn "whether to mount storeSync is not specified, defaulting to not mounting" - false + DefaultStoreSyncMount let rendezvous = if builder.rendezvous.isSome(): builder.rendezvous.get() else: warn "whether to mount rendezvous is not specified, defaulting to not mounting" - false + DefaultRendezvous let mix = if builder.mix.isSome(): builder.mix.get() else: warn "whether to mount mix is not specified, defaulting to not mounting" - false + DefaultMix - let relayPeerExchange = builder.relayPeerExchange.get(false) + let relayPeerExchange = builder.relayPeerExchange.get(DefaultRelayPeerExchange) let nodeKey = ?nodeKey(builder, rng) @@ -503,7 +539,7 @@ proc build*( # TODO: ClusterId should never be defaulted, instead, presets # should be defined and used warn("Cluster Id was not specified, defaulting to 0") - 0.uint16 + DefaultClusterId else: builder.clusterId.get().uint16 @@ -522,8 +558,9 @@ proc build*( of mmskStr: ?parseMsgSize(builder.maxMessageSize.str) else: - warn "Max Message Size not specified, defaulting to 150KiB" - parseCorrectMsgSize("150KiB") + warn "Max Message Size not specified, defaulting to DefaultMaxWakuMessageSize", + default = DefaultMaxWakuMessageSizeStr + DefaultMaxWakuMessageSize let contentTopics = builder.contentTopics.get(@[]) @@ -568,21 +605,21 @@ proc build*( builder.logLevel.get() else: warn "Log Level not specified, defaulting to INFO" - logging.LogLevel.INFO + DefaultLogLevel let logFormat = if builder.logFormat.isSome(): builder.logFormat.get() else: warn "Log Format not specified, defaulting to TEXT" - logging.LogFormat.TEXT + DefaultLogFormat let natStrategy = if builder.natStrategy.isSome(): builder.natStrategy.get() else: warn "Nat Strategy is not specified, defaulting to none" - "none" + DefaultNatStrategy let p2pTcpPort = builder.p2pTcpPort.get(DefaultP2pTcpPort) @@ -591,14 +628,14 @@ proc build*( builder.p2pListenAddress.get() else: warn "P2P listening address not specified, listening on 0.0.0.0" - (static parseIpAddress("0.0.0.0")) + DefaultP2pListenAddress let portsShift = if builder.portsShift.isSome(): builder.portsShift.get() else: warn "Ports Shift is not specified, defaulting to 0" - 0.uint16 + DefaultPortsShift let dns4DomainName = if builder.dns4DomainName.isSome(): @@ -621,21 +658,21 @@ proc build*( builder.extMultiAddrsOnly.get() else: warn "Whether to only announce external multiaddresses is not specified, defaulting to false" - false + DefaultExtMultiAddrsOnly let dnsAddrsNameServers = if builder.dnsAddrsNameServers.len != 0: builder.dnsAddrsNameServers else: warn "DNS name servers IPs not provided, defaulting to Cloudflare's." - @[static parseIpAddress("1.1.1.1"), static parseIpAddress("1.0.0.1")] + DefaultDnsAddrsNameServers let peerPersistence = if builder.peerPersistence.isSome(): builder.peerPersistence.get() else: warn "Peer persistence not specified, defaulting to false" - false + DefaultPeerPersistence let maxConnections = if builder.maxConnections.isSome(): @@ -649,15 +686,13 @@ proc build*( warn "max-connections less than DefaultMaxConnections; we suggest using DefaultMaxConnections or more for better connectivity", provided = maxConnections, recommended = DefaultMaxConnections - # TODO: Do the git version thing here - let agentString = builder.agentString.get("logos-delivery") + let agentString = builder.agentString.get(DefaultAgentString) - # TODO: use `DefaultColocationLimit`. the user of this value should - # probably be defining a config object - let colocationLimit = builder.colocationLimit.get(5) + let colocationLimit = builder.colocationLimit.get(DefaultColocationLimit) # TODO: is there a strategy for experimental features? delete vs promote - let relayShardedPeerManagement = builder.relayShardedPeerManagement.get(false) + let relayShardedPeerManagement = + builder.relayShardedPeerManagement.get(DefaultRelayShardedPeerManagement) let wakuFlags = CapabilitiesBitfield.init( lightpush = lightPush and relay, @@ -718,12 +753,12 @@ proc build*( agentString: agentString, colocationLimit: colocationLimit, maxRelayPeers: builder.maxRelayPeers, - relayServiceRatio: builder.relayServiceRatio.get("50:50"), + relayServiceRatio: builder.relayServiceRatio.get(DefaultRelayServiceRatio), rateLimit: rateLimit, - circuitRelayClient: builder.circuitRelayClient.get(false), + circuitRelayClient: builder.circuitRelayClient.get(DefaultCircuitRelayClient), staticNodes: builder.staticNodes, relayShardedPeerManagement: relayShardedPeerManagement, - p2pReliability: builder.p2pReliability.get(false), + p2pReliability: builder.p2pReliability.get(DefaultP2pReliability), wakuFlags: wakuFlags, localStoragePath: builder.localStoragePath.get(DefaultStoragePath), ) diff --git a/waku/factory/conf_builder/web_socket_conf_builder.nim b/waku/factory/conf_builder/web_socket_conf_builder.nim index 61334d958..797c6d036 100644 --- a/waku/factory/conf_builder/web_socket_conf_builder.nim +++ b/waku/factory/conf_builder/web_socket_conf_builder.nim @@ -4,7 +4,10 @@ import waku/factory/waku_conf logScope: topics = "waku conf builder websocket" -const DefaultWebSocketPort*: Port = Port(8000) +const + DefaultWebSocketEnabled*: bool = false + DefaultWebSocketSecureEnabled*: bool = false + DefaultWebSocketPort*: Port = Port(8000) ############################## ## WebSocket Config Builder ## @@ -40,10 +43,10 @@ proc withCertPath*(b: var WebSocketConfBuilder, certPath: string) = b.certPath = some(certPath) proc build*(b: WebSocketConfBuilder): Result[Option[WebSocketConf], string] = - if not b.enabled.get(false): + if not b.enabled.get(DefaultWebSocketEnabled): return ok(none(WebSocketConf)) - if not b.secureEnabled.get(false): + if not b.secureEnabled.get(DefaultWebSocketSecureEnabled): return ok( some( WebSocketConf( diff --git a/waku/factory/networks_config.nim b/waku/factory/networks_config.nim index 488f58464..1fb065a4b 100644 --- a/waku/factory/networks_config.nim +++ b/waku/factory/networks_config.nim @@ -1,6 +1,7 @@ {.push raises: [].} import chronicles, results, stint +import waku/waku_core/message/default_values logScope: topics = "waku networks conf" @@ -17,7 +18,7 @@ type of StaticSharding: discard -type NetworkConf* = object +type NetworkConf* = object ## A network "preset" (--preset=twn, --preset=logos.dev). maxMessageSize*: string # TODO: static convert to a uint64 clusterId*: uint16 rlnRelay*: bool @@ -41,7 +42,7 @@ type NetworkConf* = object proc TheWakuNetworkConf*(T: type NetworkConf): NetworkConf = const RelayChainId = 59141'u256 return NetworkConf( - maxMessageSize: "150KiB", + maxMessageSize: DefaultMaxWakuMessageSizeStr, clusterId: 1, rlnRelay: true, rlnRelayEthContractAddress: "0xB9cd878C90E49F797B4431fBF4fb333108CB90e6", @@ -68,7 +69,7 @@ proc TheWakuNetworkConf*(T: type NetworkConf): NetworkConf = proc LogosDevConf*(T: type NetworkConf): NetworkConf = const ZeroChainId = 0'u256 return NetworkConf( - maxMessageSize: "150KiB", + maxMessageSize: DefaultMaxWakuMessageSizeStr, clusterId: 2, rlnRelay: false, rlnRelayEthContractAddress: "", diff --git a/waku/factory/waku.nim b/waku/factory/waku.nim index 6a5567f8c..ee70cf713 100644 --- a/waku/factory/waku.nim +++ b/waku/factory/waku.nim @@ -30,12 +30,12 @@ import waku_enr/sharding, waku_enr/multiaddr, api/types, + messaging_client, common/logging, node/peer_manager, node/health_monitor, node/waku_metrics, - node/delivery_service/delivery_service, - node/delivery_service/subscription_manager, + node/subscription_manager, rest_api/message_cache, rest_api/endpoint/server, rest_api/endpoint/builder as rest_server_builder, @@ -48,6 +48,7 @@ import factory/app_callbacks, persistency/persistency, ], + channels/reliable_channel_manager, ./waku_conf, ./waku_state_info @@ -73,7 +74,9 @@ type Waku* = ref object healthMonitor*: NodeHealthMonitor - deliveryService*: DeliveryService + messagingClient*: MessagingClient + + reliableChannelManager*: ReliableChannelManager restServer*: WakuRestServerRef metricsServer*: MetricsHttpServerRef @@ -215,10 +218,6 @@ proc new*( error "Failed setting up app callbacks", error = error return err("Failed setting up app callbacks: " & $error) - ## Delivery Monitor - let deliveryService = DeliveryService.new(wakuConf.p2pReliability, node).valueOr: - return err("could not create delivery service: " & $error) - var waku = Waku( stateInfo: WakuStateInfo.init(node), conf: wakuConf, @@ -226,7 +225,6 @@ proc new*( key: wakuConf.nodeKey, node: node, healthMonitor: healthMonitor, - deliveryService: deliveryService, appCallbacks: appCallbacks, restServer: restServer, brokerCtx: brokerCtx, @@ -254,9 +252,9 @@ proc getPorts( return ok((tcpPort: tcpPort, websocketPort: websocketPort)) -proc getRunningNetConfig(waku: ptr Waku): Future[Result[NetConfig, string]] {.async.} = - let conf = waku[].conf - let (tcpPort, websocketPort) = getPorts(waku[].node.switch.peerInfo.listenAddrs).valueOr: +proc getRunningNetConfig(waku: Waku): Future[Result[NetConfig, string]] {.async.} = + let conf = waku.conf + let (tcpPort, websocketPort) = getPorts(waku.node.switch.peerInfo.listenAddrs).valueOr: return err("Could not retrieve ports: " & error) if tcpPort.isSome(): @@ -276,67 +274,67 @@ proc getRunningNetConfig(waku: ptr Waku): Future[Result[NetConfig, string]] {.as return ok(netConf) -proc updateEnr(waku: ptr Waku): Future[Result[void, string]] {.async.} = +proc updateEnr(waku: Waku): Future[Result[void, string]] {.async.} = let netConf: NetConfig = (await getRunningNetConfig(waku)).valueOr: return err("error calling updateNetConfig: " & $error) - let record = enrConfiguration(waku[].conf, netConf).valueOr: + let record = enrConfiguration(waku.conf, netConf).valueOr: return err("ENR setup failed: " & error) - if isClusterMismatched(record, waku[].conf.clusterId): + if isClusterMismatched(record, waku.conf.clusterId): return err("cluster-id mismatch configured shards") - waku[].node.enr = record + waku.node.enr = record # If TCP/WS was configured with port 0, node.announcedAddresses was built # pre-bind with a port value of 0. In any case, the resync is harmless. - waku[].node.announcedAddresses = netConf.announcedAddresses + waku.node.announcedAddresses = netConf.announcedAddresses return ok() -proc updateAddressInENR(waku: ptr Waku): Result[void, string] = - let addresses: seq[MultiAddress] = waku[].node.announcedAddresses +proc updateAddressInENR(waku: Waku): Result[void, string] = + let addresses: seq[MultiAddress] = waku.node.announcedAddresses let encodedAddrs = multiaddr.encodeMultiaddrs(addresses) ## First update the enr info contained in WakuNode - let keyBytes = waku[].key.getRawBytes().valueOr: + let keyBytes = waku.key.getRawBytes().valueOr: return err("failed to retrieve raw bytes from waku key: " & $error) let parsedPk = keys.PrivateKey.fromHex(keyBytes.toHex()).valueOr: return err("failed to parse the private key: " & $error) let enrFields = @[toFieldPair(MultiaddrEnrField, encodedAddrs)] - waku[].node.enr.update(parsedPk, extraFields = enrFields).isOkOr: + waku.node.enr.update(parsedPk, extraFields = enrFields).isOkOr: return err("failed to update multiaddress in ENR updateAddressInENR: " & $error) info "Waku node ENR updated successfully with new multiaddress", - enr = waku[].node.enr.toUri(), record = $(waku[].node.enr) + enr = waku.node.enr.toUri(), record = $(waku.node.enr) ## Now update the ENR infor in discv5 - if not waku[].wakuDiscv5.isNil(): - waku[].wakuDiscv5.protocol.localNode.record = waku[].node.enr - let enr = waku[].wakuDiscv5.protocol.localNode.record + if not waku.wakuDiscv5.isNil(): + waku.wakuDiscv5.protocol.localNode.record = waku.node.enr + let enr = waku.wakuDiscv5.protocol.localNode.record info "Waku discv5 ENR updated successfully with new multiaddress", enr = enr.toUri(), record = $(enr) return ok() -proc updateWaku(waku: ptr Waku): Future[Result[void, string]] {.async.} = +proc updateWaku(waku: Waku): Future[Result[void, string]] {.async.} = (await updateEnr(waku)).isOkOr: return err("error calling updateEnr: " & $error) - ?updateAnnouncedAddrWithPrimaryIpAddr(waku[].node) + ?updateAnnouncedAddrWithPrimaryIpAddr(waku.node) ?updateAddressInENR(waku) return ok() -proc startDnsDiscoveryRetryLoop(waku: ptr Waku): Future[void] {.async.} = +proc startDnsDiscoveryRetryLoop(waku: Waku): Future[void] {.async.} = while true: await sleepAsync(30.seconds) if waku.conf.dnsDiscoveryConf.isSome(): let dnsDiscoveryConf = waku.conf.dnsDiscoveryConf.get() - waku[].dynamicBootstrapNodes = ( + waku.dynamicBootstrapNodes = ( await waku_dnsdisc.retrieveDynamicBootstrapNodes( dnsDiscoveryConf.enrTreeUrl, dnsDiscoveryConf.nameServers ) @@ -344,35 +342,61 @@ proc startDnsDiscoveryRetryLoop(waku: ptr Waku): Future[void] {.async.} = error "Retrieving dynamic bootstrap nodes failed", error = error continue - if not waku[].wakuDiscv5.isNil(): - let dynamicBootstrapEnrs = waku[].dynamicBootstrapNodes - .filterIt(it.hasUdpPort()) - .mapIt(it.enr.get().toUri()) + if not waku.wakuDiscv5.isNil(): + let dynamicBootstrapEnrs = + waku.dynamicBootstrapNodes.filterIt(it.hasUdpPort()).mapIt(it.enr.get().toUri()) var discv5BootstrapEnrs: seq[enr.Record] # parse enrURIs from the configuration and add the resulting ENRs to the discv5BootstrapEnrs seq for enrUri in dynamicBootstrapEnrs: addBootstrapNode(enrUri, discv5BootstrapEnrs) - waku[].wakuDiscv5.updateBootstrapRecords( - waku[].wakuDiscv5.protocol.bootstrapRecords & discv5BootstrapEnrs + waku.wakuDiscv5.updateBootstrapRecords( + waku.wakuDiscv5.protocol.bootstrapRecords & discv5BootstrapEnrs ) info "Connecting to dynamic bootstrap peers" try: - await connectToNodes( - waku[].node, waku[].dynamicBootstrapNodes, "dynamic bootstrap" - ) + await connectToNodes(waku.node, waku.dynamicBootstrapNodes, "dynamic bootstrap") except CatchableError: error "failed to connect to dynamic bootstrap nodes: " & getCurrentExceptionMsg() return -proc startWaku*(waku: ptr Waku): Future[Result[void, string]] {.async: (raises: []).} = - if waku[].node.started: - warn "startWaku: waku node already started" +proc mountMessagingClient*(waku: Waku): Result[void, string] = + if not waku.messagingClient.isNil(): + return err("messaging client already mounted") + if waku.node.started: + return err("cannot mount messaging client on a started node") + waku.messagingClient = MessagingClient.new(waku.conf.p2pReliability, waku.node).valueOr: + return err("could not create messaging client: " & $error) + return ok() + +proc mountReliableChannelManager*(waku: Waku): Result[void, string] = + if not waku.reliableChannelManager.isNil(): + return err("reliable channel manager already mounted") + if waku.messagingClient.isNil(): + return err("reliable channel manager requires a mounted messaging client") + if waku.node.started: + return err("cannot mount reliable channel manager on a started node") + + let messagingClient = waku.messagingClient + let defaultSendHandler: SendHandler = proc( + envelope: MessageEnvelope + ): Future[Result[RequestId, string]] {.async: (raises: [CatchableError]), gcsafe.} = + return await messagingClient.send(envelope) + + waku.reliableChannelManager = ReliableChannelManager.new( + messagingClient, defaultSendHandler, waku.brokerCtx + ).valueOr: + return err("could not create reliable channel manager: " & $error) + return ok() + +proc start*(waku: Waku): Future[Result[void, string]] {.async: (raises: []).} = + if waku.node.started: + warn "start: waku node already started" return ok() info "Retrieve dynamic bootstrap nodes" - let conf = waku[].conf + let conf = waku.conf if conf.dnsDiscoveryConf.isSome(): let dnsDiscoveryConf = waku.conf.dnsDiscoveryConf.get() @@ -390,9 +414,9 @@ proc startWaku*(waku: ptr Waku): Future[Result[void, string]] {.async: (raises: error "Retrieving dynamic bootstrap nodes failed", error = dynamicBootstrapNodesRes.error # Start Dns Discovery retry loop - waku[].dnsRetryLoopHandle = waku.startDnsDiscoveryRetryLoop() + waku.dnsRetryLoopHandle = waku.startDnsDiscoveryRetryLoop() else: - waku[].dynamicBootstrapNodes = dynamicBootstrapNodesRes.get() + waku.dynamicBootstrapNodes = dynamicBootstrapNodesRes.get() ## Initialize persistency singleton instance - we don't need the instance itself here, ## but this ensures it's initialized before any store job starts. @@ -405,12 +429,12 @@ proc startWaku*(waku: ptr Waku): Future[Result[void, string]] {.async: (raises: let bound = getPorts(waku.node.switch.peerInfo.listenAddrs).valueOr: return err("failed to read bound ports from switch: " & $error) - waku[].node.ports.tcp = bound.tcpPort.get(Port(0)).uint16 - waku[].node.ports.webSocket = bound.websocketPort.get(Port(0)).uint16 + waku.node.ports.tcp = bound.tcpPort.get(Port(0)).uint16 + waku.node.ports.webSocket = bound.websocketPort.get(Port(0)).uint16 ## Discv5 if conf.discv5Conf.isSome(): - waku[].wakuDiscV5 = ( + waku.wakuDiscV5 = ( await waku_discv5.setupAndStartDiscv5( waku.node.enr, waku.node.peerManager, @@ -425,23 +449,21 @@ proc startWaku*(waku: ptr Waku): Future[Result[void, string]] {.async: (raises: ).valueOr: return err("failed to start waku discovery v5: " & error) - waku[].node.ports.discv5Udp = waku[].wakuDiscV5.udpPort.uint16 - waku[].conf.discv5Conf.get().udpPort = waku[].wakuDiscV5.udpPort + waku.node.ports.discv5Udp = waku.wakuDiscV5.udpPort.uint16 + waku.conf.discv5Conf.get().udpPort = waku.wakuDiscV5.udpPort ## Update waku data that is set dynamically on node start try: (await updateWaku(waku)).isOkOr: - return err("Error in startWaku: " & $error) + return err("Error in start: " & $error) except CatchableError: - return err("Caught exception in startWaku: " & getCurrentExceptionMsg()) + return err("Caught exception in start: " & getCurrentExceptionMsg()) - ## Reliability - if not waku[].deliveryService.isNil(): - waku[].deliveryService.startDeliveryService().isOkOr: - return err("failed to start delivery service: " & $error) + waku.node.subscriptionManager.subscribeAllAutoshards().isOkOr: + return err("failed to auto-subscribe autosharding shards: " & $error) ## Health Monitor - waku[].healthMonitor.startHealthMonitor().isOkOr: + waku.healthMonitor.startHealthMonitor().isOkOr: return err("failed to start health monitor: " & $error) ## Setup RequestConnectionStatus provider @@ -450,7 +472,7 @@ proc startWaku*(waku: ptr Waku): Future[Result[void, string]] {.async: (raises: globalBrokerContext(), proc(): Result[RequestConnectionStatus, string] = try: - let healthReport = waku[].healthMonitor.getSyncNodeHealthReport() + let healthReport = waku.healthMonitor.getSyncNodeHealthReport() return ok(RequestConnectionStatus(connectionStatus: healthReport.connectionStatus)) except CatchableError: @@ -467,7 +489,7 @@ proc startWaku*(waku: ptr Waku): Future[Result[void, string]] {.async: (raises: ): Future[Result[RequestProtocolHealth, string]] {.async.} = try: let protocolHealthStatus = - await waku[].healthMonitor.getProtocolHealthInfo(protocol) + await waku.healthMonitor.getProtocolHealthInfo(protocol) return ok(RequestProtocolHealth(healthStatus: protocolHealthStatus)) except CatchableError: return err("Failed to get protocol health: " & getCurrentExceptionMsg()), @@ -480,7 +502,7 @@ proc startWaku*(waku: ptr Waku): Future[Result[void, string]] {.async: (raises: globalBrokerContext(), proc(): Future[Result[RequestHealthReport, string]] {.async.} = try: - let report = await waku[].healthMonitor.getNodeHealthReport() + let report = await waku.healthMonitor.getNodeHealthReport() return ok(RequestHealthReport(healthReport: report)) except CatchableError: return err("Failed to get health report: " & getCurrentExceptionMsg()), @@ -489,9 +511,9 @@ proc startWaku*(waku: ptr Waku): Future[Result[void, string]] {.async: (raises: if conf.restServerConf.isSome(): rest_server_builder.startRestServerProtocolSupport( - waku[].restServer, - waku[].node, - waku[].wakuDiscv5, + waku.restServer, + waku.node, + waku.wakuDiscv5, conf.restServerConf.get(), conf.relay, conf.lightPush, @@ -509,21 +531,27 @@ proc startWaku*(waku: ptr Waku): Future[Result[void, string]] {.async: (raises: ) ).valueOr: return err("Starting monitoring and external interfaces failed: " & error) - waku[].metricsServer = server - waku[].node.ports.metrics = port.uint16 - waku[].conf.metricsServerConf.get().httpPort = port + waku.metricsServer = server + waku.node.ports.metrics = port.uint16 + waku.conf.metricsServerConf.get().httpPort = port except CatchableError: return err( "Caught exception starting monitoring and external interfaces failed: " & getCurrentExceptionMsg() ) - waku[].healthMonitor.setOverallHealth(HealthStatus.READY) + waku.healthMonitor.setOverallHealth(HealthStatus.READY) + + if not waku.messagingClient.isNil(): + waku.messagingClient.start().isOkOr: + return err("failed to start messaging client: " & $error) + + if not waku.reliableChannelManager.isNil(): + waku.reliableChannelManager.start().isOkOr: + return err("failed to start reliable channel manager: " & $error) return ok() proc stop*(waku: Waku): Future[Result[void, string]] {.async: (raises: []).} = - ## Waku shutdown - if not waku.node.started: warn "stop: attempting to stop node that isn't running" @@ -538,9 +566,11 @@ proc stop*(waku: Waku): Future[Result[void, string]] {.async: (raises: []).} = if not waku.wakuDiscv5.isNil(): await waku.wakuDiscv5.stop() - if not waku.deliveryService.isNil(): - await waku.deliveryService.stopDeliveryService() - waku.deliveryService = nil + if not waku.reliableChannelManager.isNil(): + await waku.reliableChannelManager.stop() + + if not waku.messagingClient.isNil(): + await waku.messagingClient.stop() if not waku.node.isNil(): await waku.node.stop() diff --git a/waku/messaging_client.nim b/waku/messaging_client.nim new file mode 100644 index 000000000..1fc4deb3c --- /dev/null +++ b/waku/messaging_client.nim @@ -0,0 +1,63 @@ +import results, chronos +import chronicles +import + ./api/types, + ./node/[ + waku_node, + subscription_manager, + delivery_service/recv_service, + delivery_service/send_service, + delivery_service/send_service/delivery_task, + ] + +type MessagingClient* = ref object + node: WakuNode + sendService*: SendService + recvService*: RecvService + started: bool + +proc new*( + T: type MessagingClient, useP2PReliability: bool, node: WakuNode +): Result[T, string] = + let sendService = ?SendService.new(useP2PReliability, node) + let recvService = RecvService.new(node) + ok(T(node: node, sendService: sendService, recvService: recvService)) + +proc start*(self: MessagingClient): Result[void, string] = + if self.started: + return ok() + self.recvService.startRecvService() + self.sendService.startSendService() + self.started = true + ok() + +proc stop*(self: MessagingClient) {.async.} = + if not self.started: + return + await self.sendService.stopSendService() + await self.recvService.stopRecvService() + self.started = false + +proc send*( + self: MessagingClient, envelope: MessageEnvelope +): Future[Result[RequestId, string]] {.async.} = + ## High-level messaging API send. Auto-subscribes to the content topic + ## (so the local node sees its own gossipsub broadcast), builds a + ## `DeliveryTask`, and hands it to the send service. Returns the request + ## id the caller can correlate with `MessageSentEvent` / `MessageErrorEvent`. + let isSubbed = + self.node.subscriptionManager.isSubscribed(envelope.contentTopic).valueOr(false) + if not isSubbed: + info "Auto-subscribing to topic on send", contentTopic = envelope.contentTopic + self.node.subscriptionManager.subscribe(envelope.contentTopic).isOkOr: + warn "Failed to auto-subscribe", error = error + return err("Failed to auto-subscribe before sending: " & error) + + let requestId = RequestId.new(self.node.rng) + + let deliveryTask = DeliveryTask.new(requestId, envelope, self.node.brokerCtx).valueOr: + return err("MessagingClient.send: Failed to create delivery task: " & error) + + asyncSpawn self.sendService.send(deliveryTask) + + return ok(requestId) diff --git a/waku/node/delivery_service/delivery_service.nim b/waku/node/delivery_service/delivery_service.nim deleted file mode 100644 index f3d78d98e..000000000 --- a/waku/node/delivery_service/delivery_service.nim +++ /dev/null @@ -1,44 +0,0 @@ -## This module helps to ensure the correct transmission and reception of messages - -import results -import chronos, chronicles -import - ./recv_service, - ./send_service, - ./subscription_manager, - waku/[ - waku_core, waku_node, waku_store/client, waku_relay/protocol, waku_lightpush/client - ] - -type DeliveryService* = ref object - sendService*: SendService - recvService*: RecvService - subscriptionManager*: SubscriptionManager - -proc new*( - T: type DeliveryService, useP2PReliability: bool, w: WakuNode -): Result[T, string] = - ## storeClient is needed to give store visitility to DeliveryService - ## wakuRelay and wakuLightpushClient are needed to give a mechanism to SendService to re-publish - let subscriptionManager = SubscriptionManager.new(w) - let sendService = ?SendService.new(useP2PReliability, w, subscriptionManager) - let recvService = RecvService.new(w, subscriptionManager) - - return ok( - DeliveryService( - sendService: sendService, - recvService: recvService, - subscriptionManager: subscriptionManager, - ) - ) - -proc startDeliveryService*(self: DeliveryService): Result[void, string] = - ?self.subscriptionManager.startSubscriptionManager() - self.recvService.startRecvService() - self.sendService.startSendService() - return ok() - -proc stopDeliveryService*(self: DeliveryService) {.async.} = - await self.sendService.stopSendService() - await self.recvService.stopRecvService() - await self.subscriptionManager.stopSubscriptionManager() diff --git a/waku/node/delivery_service/recv_service/recv_service.nim b/waku/node/delivery_service/recv_service/recv_service.nim index 899f80f71..500926cc7 100644 --- a/waku/node/delivery_service/recv_service/recv_service.nim +++ b/waku/node/delivery_service/recv_service/recv_service.nim @@ -4,17 +4,17 @@ import std/[tables, sequtils, options, sets] import chronos, chronicles, libp2p/utility -import ../[subscription_manager] import brokers/broker_context import waku/[ waku_core, + waku_core/topics, waku_store/client, waku_store/common, waku_filter_v2/client, - waku_core/topics, events/message_events, waku_node, + node/subscription_manager, ] const StoreCheckPeriod = chronos.minutes(5) ## How often to perform store queries @@ -38,7 +38,6 @@ type RecvService* = ref object of RootObj brokerCtx: BrokerContext node: WakuNode seenMsgListener: MessageSeenEventListener - subscriptionManager: SubscriptionManager recentReceivedMsgs: seq[RecvMessage] @@ -77,7 +76,9 @@ proc processIncomingMessage( ## or if the message is a duplicate (recently-seen). Otherwise, save it as ## recently-seen, emit a MessageReceivedEvent, and return true. - if not self.subscriptionManager.isSubscribed(pubsubTopic, message.contentTopic): + if not self.node.subscriptionManager.isContentSubscribed( + pubsubTopic, message.contentTopic + ): trace "skipping message as I am not subscribed", shard = pubsubTopic, contentTopic = message.contentTopic return false @@ -101,7 +102,7 @@ proc checkStore*(self: RecvService) {.async.} = self.endTimeToCheck = getNowInNanosecondTime() ## query store and deliver new recovered messages per subscribed topic - for pubsubTopic, contentTopics in self.subscriptionManager.subscribedTopics: + for pubsubTopic, contentTopics in self.node.subscriptionManager.subscribedContentTopics: let storeResp: StoreQueryResponse = ( await self.node.wakuStoreClient.queryToAny( StoreQueryRequest( @@ -146,7 +147,7 @@ proc msgChecker(self: RecvService) {.async.} = await sleepAsync(StoreCheckPeriod) await self.checkStore() -proc new*(T: typedesc[RecvService], node: WakuNode, s: SubscriptionManager): T = +proc new*(T: typedesc[RecvService], node: WakuNode): T = ## The storeClient will help to acquire any possible missed messages let now = getNowInNanosecondTime() @@ -154,7 +155,6 @@ proc new*(T: typedesc[RecvService], node: WakuNode, s: SubscriptionManager): T = node: node, startTimeToCheck: now, brokerCtx: node.brokerCtx, - subscriptionManager: s, recentReceivedMsgs: @[], ) diff --git a/waku/node/delivery_service/send_service/send_service.nim b/waku/node/delivery_service/send_service/send_service.nim index 88ec802cf..e60b26124 100644 --- a/waku/node/delivery_service/send_service/send_service.nim +++ b/waku/node/delivery_service/send_service/send_service.nim @@ -6,10 +6,10 @@ import chronos, chronicles, libp2p/utility import brokers/broker_context import ./[send_processor, relay_processor, lightpush_processor, delivery_task], - ../[subscription_manager], waku/[ waku_core, node/waku_node, + node/subscription_manager, node/peer_manager, waku_store/client, waku_store/common, @@ -58,7 +58,6 @@ type SendService* = ref object of RootObj node: WakuNode checkStoreForMessages: bool - subscriptionManager: SubscriptionManager proc setupSendProcessorChain( peerManager: PeerManager, @@ -96,10 +95,7 @@ proc setupSendProcessorChain( return ok(processors[0]) proc new*( - T: typedesc[SendService], - preferP2PReliability: bool, - w: WakuNode, - s: SubscriptionManager, + T: typedesc[SendService], preferP2PReliability: bool, w: WakuNode ): Result[T, string] = if w.wakuRelay.isNil() and w.wakuLightpushClient.isNil(): return err( @@ -120,7 +116,6 @@ proc new*( sendProcessor: sendProcessorChain, node: w, checkStoreForMessages: checkStoreForMessages, - subscriptionManager: s, ) return ok(sendService) @@ -263,7 +258,7 @@ proc send*(self: SendService, task: DeliveryTask) {.async.} = info "SendService.send: processing delivery task", requestId = task.requestId, msgHash = task.msgHash.to0xHex() - self.subscriptionManager.subscribe(task.msg.contentTopic).isOkOr: + self.node.subscriptionManager.subscribe(task.msg.contentTopic).isOkOr: error "SendService.send: failed to subscribe to content topic", contentTopic = task.msg.contentTopic, error = error diff --git a/waku/node/delivery_service/subscription_manager.nim b/waku/node/delivery_service/subscription_manager.nim deleted file mode 100644 index 393a61eae..000000000 --- a/waku/node/delivery_service/subscription_manager.nim +++ /dev/null @@ -1,596 +0,0 @@ -import std/[sequtils, sets, tables, options, strutils], chronos, chronicles, results -import libp2p/[peerid, peerinfo] -import brokers/broker_context - -import - waku/[ - waku_core, - waku_core/topics, - waku_core/topics/sharding, - waku_node, - waku_relay, - waku_filter_v2/common as filter_common, - waku_filter_v2/client as filter_client, - waku_filter_v2/protocol as filter_protocol, - events/health_events, - events/peer_events, - requests/health_requests, - node/peer_manager, - node/health_monitor/topic_health, - node/health_monitor/connection_status, - ] - -# --------------------------------------------------------------------------- -# Logos Messaging API SubscriptionManager -# -# Maps all topic subscription intent and centralizes all consistency -# maintenance of the pubsub and content topic subscription model across -# the various network drivers that handle topics (Edge/Filter and Core/Relay). -# --------------------------------------------------------------------------- - -type EdgeFilterSubState* = object - peers: seq[RemotePeerInfo] - ## Filter service peers with confirmed subscriptions on this shard. - pending: seq[Future[void]] ## In-flight dial futures for peers not yet confirmed. - pendingPeers: HashSet[PeerId] ## PeerIds of peers currently being dialed. - currentHealth: TopicHealth - ## Cached health derived from peers.len; updated on every peer set change. - -func toTopicHealth*(peersCount: int): TopicHealth = - if peersCount >= HealthyThreshold: - TopicHealth.SUFFICIENTLY_HEALTHY - elif peersCount > 0: - TopicHealth.MINIMALLY_HEALTHY - else: - TopicHealth.UNHEALTHY - -type SubscriptionManager* = ref object of RootObj - node: WakuNode - contentTopicSubs: Table[PubsubTopic, HashSet[ContentTopic]] - ## Map of Shard to ContentTopic needed because e.g. WakuRelay is PubsubTopic only. - ## A present key with an empty HashSet value means pubsubtopic already subscribed - ## (via subscribePubsubTopics()) but there's no specific content topic interest yet. - edgeFilterSubStates*: Table[PubsubTopic, EdgeFilterSubState] - ## Per-shard filter subscription state for edge mode. - edgeFilterWakeup: AsyncEvent - ## Signalled when the edge filter sub loop should re-reconcile. - edgeFilterSubLoopFut: Future[void] - edgeFilterHealthLoopFut: Future[void] - peerEventListener: WakuPeerEventListener - ## Listener for peer connect/disconnect events (edge filter wakeup). - -iterator subscribedTopics*( - self: SubscriptionManager -): (PubsubTopic, HashSet[ContentTopic]) = - ## Iterate over all subscribed content topics, batched per shard. - ## This is guaranteed to return a non-empty `topics` (content topics) list on iteration. - - for pubsub, topics in self.contentTopicSubs.pairs: - # We are iterating over subscribed content topics; if we are subscribed to - # a shard but have no subscription (interest) for any content topic in that - # shard, then avoid triggering an iteration that doesn't advance the intent - # to iterate over content topic subscriptions. - if topics.len == 0: - continue - yield (pubsub, topics) - -proc edgeFilterPeerCount*(sm: SubscriptionManager, shard: PubsubTopic): int = - sm.edgeFilterSubStates.withValue(shard, state): - return state.peers.len - return 0 - -proc new*(T: typedesc[SubscriptionManager], node: WakuNode): T = - SubscriptionManager( - node: node, contentTopicSubs: initTable[PubsubTopic, HashSet[ContentTopic]]() - ) - -proc addContentTopicInterest( - self: SubscriptionManager, shard: PubsubTopic, topic: ContentTopic -): Result[void, string] = - var changed = false - if not self.contentTopicSubs.hasKey(shard): - self.contentTopicSubs[shard] = initHashSet[ContentTopic]() - changed = true - - self.contentTopicSubs.withValue(shard, cTopics): - if not cTopics[].contains(topic): - cTopics[].incl(topic) - changed = true - - if changed and not isNil(self.edgeFilterWakeup): - self.edgeFilterWakeup.fire() - - return ok() - -proc removeContentTopicInterest( - self: SubscriptionManager, shard: PubsubTopic, topic: ContentTopic -): Result[void, string] = - var changed = false - self.contentTopicSubs.withValue(shard, cTopics): - if cTopics[].contains(topic): - cTopics[].excl(topic) - changed = true - - if cTopics[].len == 0 and isNil(self.node.wakuRelay): - self.contentTopicSubs.del(shard) # We're done with cTopics here - - if changed and not isNil(self.edgeFilterWakeup): - self.edgeFilterWakeup.fire() - - return ok() - -proc subscribePubsubTopics( - self: SubscriptionManager, shards: seq[PubsubTopic] -): Result[void, string] = - if isNil(self.node.wakuRelay): - return err("subscribePubsubTopics requires a Relay") - - var errors: seq[string] - - for shard in shards: - if not self.contentTopicSubs.hasKey(shard): - self.node.subscribe((kind: PubsubSub, topic: shard), nil).isOkOr: - errors.add("shard " & shard & ": " & error) - continue - - self.contentTopicSubs[shard] = initHashSet[ContentTopic]() - - if errors.len > 0: - return err("subscribeShard errors: " & errors.join("; ")) - - return ok() - -proc getShardForContentTopic( - self: SubscriptionManager, topic: ContentTopic -): Result[PubsubTopic, string] = - if self.node.wakuAutoSharding.isSome(): - let shardObj = ?self.node.wakuAutoSharding.get().getShard(topic) - return ok($shardObj) - - return err("SubscriptionManager requires AutoSharding") - -proc isSubscribed*( - self: SubscriptionManager, topic: ContentTopic -): Result[bool, string] = - let shard = ?self.getShardForContentTopic(topic) - return ok( - self.contentTopicSubs.hasKey(shard) and self.contentTopicSubs[shard].contains(topic) - ) - -proc isSubscribed*( - self: SubscriptionManager, shard: PubsubTopic, contentTopic: ContentTopic -): bool {.raises: [].} = - self.contentTopicSubs.withValue(shard, cTopics): - return cTopics[].contains(contentTopic) - return false - -proc subscribe*(self: SubscriptionManager, topic: ContentTopic): Result[void, string] = - if isNil(self.node.wakuRelay) and isNil(self.node.wakuFilterClient): - return err("SubscriptionManager requires either Relay or Filter Client.") - - let shard = ?self.getShardForContentTopic(topic) - - if not isNil(self.node.wakuRelay) and not self.contentTopicSubs.hasKey(shard): - ?self.subscribePubsubTopics(@[shard]) - - ?self.addContentTopicInterest(shard, topic) - - return ok() - -proc unsubscribe*( - self: SubscriptionManager, topic: ContentTopic -): Result[void, string] = - if isNil(self.node.wakuRelay) and isNil(self.node.wakuFilterClient): - return err("SubscriptionManager requires either Relay or Filter Client.") - - let shard = ?self.getShardForContentTopic(topic) - - if self.isSubscribed(shard, topic): - ?self.removeContentTopicInterest(shard, topic) - - return ok() - -# --------------------------------------------------------------------------- -# Edge Filter driver for the Logos Messaging API -# -# The SubscriptionManager absorbs natively the responsibility of using the -# Edge Filter protocol to effect subscriptions and message receipt for edge. -# --------------------------------------------------------------------------- - -const EdgeFilterSubscribeTimeout = chronos.seconds(15) - ## Timeout for a single filter subscribe/unsubscribe RPC to a service peer. -const EdgeFilterPingTimeout = chronos.seconds(5) - ## Timeout for a filter ping health check. -const EdgeFilterLoopInterval = chronos.seconds(30) - ## Interval for the edge filter health ping loop. -const EdgeFilterSubLoopDebounce = chronos.seconds(1) - ## Debounce delay to coalesce rapid-fire wakeups into a single reconciliation pass. - -type EdgeDialTask = object - peer: RemotePeerInfo - shard: PubsubTopic - topics: seq[ContentTopic] - -proc updateShardHealth( - self: SubscriptionManager, shard: PubsubTopic, state: var EdgeFilterSubState -) = - ## Recompute and emit health for a shard after its peer set changed. - let newHealth = toTopicHealth(state.peers.len) - if newHealth != state.currentHealth: - state.currentHealth = newHealth - EventShardTopicHealthChange.emit(self.node.brokerCtx, shard, newHealth) - -proc removePeer(self: SubscriptionManager, shard: PubsubTopic, peerId: PeerId) = - ## Remove a peer from edgeFilterSubStates for the given shard, - ## update health, and wake the sub loop to dial a replacement. - ## Best-effort unsubscribe so the service peer stops pushing to us. - self.edgeFilterSubStates.withValue(shard, state): - var peer: RemotePeerInfo - var found = false - for p in state.peers: - if p.peerId == peerId: - peer = p - found = true - break - if not found: - return - - state.peers.keepItIf(it.peerId != peerId) - self.updateShardHealth(shard, state[]) - self.edgeFilterWakeup.fire() - - if not self.node.wakuFilterClient.isNil(): - self.contentTopicSubs.withValue(shard, topics): - let ct = toSeq(topics[]) - if ct.len > 0: - proc doUnsubscribe() {.async.} = - discard await self.node.wakuFilterClient.unsubscribe(peer, shard, ct) - - asyncSpawn doUnsubscribe() - -type SendChunkedFilterRpcKind = enum - FilterSubscribe - FilterUnsubscribe - -proc sendChunkedFilterRpc( - self: SubscriptionManager, - peer: RemotePeerInfo, - shard: PubsubTopic, - topics: seq[ContentTopic], - kind: SendChunkedFilterRpcKind, -): Future[bool] {.async.} = - ## Send a chunked filter subscribe or unsubscribe RPC. Returns true on - ## success. On failure the peer is removed and false is returned. - try: - var i = 0 - while i < topics.len: - let chunk = - topics[i ..< min(i + filter_protocol.MaxContentTopicsPerRequest, topics.len)] - let fut = - case kind - of FilterSubscribe: - self.node.wakuFilterClient.subscribe(peer, shard, chunk) - of FilterUnsubscribe: - self.node.wakuFilterClient.unsubscribe(peer, shard, chunk) - if not (await fut.withTimeout(EdgeFilterSubscribeTimeout)) or fut.read().isErr(): - trace "sendChunkedFilterRpc: chunk failed", - op = kind, shard = shard, peer = peer.peerId - self.removePeer(shard, peer.peerId) - return false - i += filter_protocol.MaxContentTopicsPerRequest - except CatchableError as exc: - debug "sendChunkedFilterRpc: failed", - op = kind, shard = shard, peer = peer.peerId, err = exc.msg - self.removePeer(shard, peer.peerId) - return false - return true - -proc syncFilterDeltas( - self: SubscriptionManager, - peer: RemotePeerInfo, - shard: PubsubTopic, - added: seq[ContentTopic], - removed: seq[ContentTopic], -) {.async.} = - ## Push content topic changes (adds/removes) to an already-tracked peer. - if added.len > 0: - if not await self.sendChunkedFilterRpc(peer, shard, added, FilterSubscribe): - return - - if removed.len > 0: - discard await self.sendChunkedFilterRpc(peer, shard, removed, FilterUnsubscribe) - -proc dialFilterPeer( - self: SubscriptionManager, - peer: RemotePeerInfo, - shard: PubsubTopic, - contentTopics: seq[ContentTopic], -) {.async.} = - ## Subscribe a new peer to all content topics on a shard and start tracking it. - self.edgeFilterSubStates.withValue(shard, state): - state.pendingPeers.incl(peer.peerId) - - try: - if not await self.sendChunkedFilterRpc(peer, shard, contentTopics, FilterSubscribe): - return - - self.edgeFilterSubStates.withValue(shard, state): - if state.peers.anyIt(it.peerId == peer.peerId): - trace "dialFilterPeer: peer already tracked, skipping duplicate", - shard = shard, peer = peer.peerId - return - - state.peers.add(peer) - self.updateShardHealth(shard, state[]) - trace "dialFilterPeer: successfully subscribed to all chunks", - shard = shard, peer = peer.peerId, totalPeers = state.peers.len - do: - trace "dialFilterPeer: shard removed while subscribing, discarding result", - shard = shard, peer = peer.peerId - finally: - self.edgeFilterSubStates.withValue(shard, state): - state.pendingPeers.excl(peer.peerId) - -proc edgeFilterHealthLoop*(self: SubscriptionManager) {.async.} = - ## Periodically pings all connected filter service peers to verify they are - ## still alive at the application layer. Peers that fail the ping are removed. - while true: - await sleepAsync(EdgeFilterLoopInterval) - - if self.node.wakuFilterClient.isNil(): - warn "filter client is nil within edge filter health loop" - continue - - var connected = initTable[PeerId, RemotePeerInfo]() - for state in self.edgeFilterSubStates.values: - for peer in state.peers: - if self.node.peerManager.switch.peerStore.isConnected(peer.peerId): - connected[peer.peerId] = peer - - var alive = initHashSet[PeerId]() - - if connected.len > 0: - var pingTasks: seq[(PeerId, Future[FilterSubscribeResult])] - for peer in connected.values: - pingTasks.add( - (peer.peerId, self.node.wakuFilterClient.ping(peer, EdgeFilterPingTimeout)) - ) - - # extract future tasks from (PeerId, Future) tuples and await them - await allFutures(pingTasks.mapIt(it[1])) - - for (peerId, task) in pingTasks: - if task.read().isOk(): - alive.incl(peerId) - - var changed = false - for shard, state in self.edgeFilterSubStates.mpairs: - let oldLen = state.peers.len - state.peers.keepItIf(it.peerId notin connected or alive.contains(it.peerId)) - - if state.peers.len < oldLen: - changed = true - self.updateShardHealth(shard, state) - trace "Edge Filter health degraded by Ping failure", - shard = shard, new = state.currentHealth - - if changed: - self.edgeFilterWakeup.fire() - -proc selectFilterCandidates( - self: SubscriptionManager, shard: PubsubTopic, exclude: HashSet[PeerId], needed: int -): seq[RemotePeerInfo] = - ## Select filter service peer candidates for a shard. - - # Start with every filter server peer that can serve the shard - var allCandidates = self.node.peerManager.selectPeers( - filter_common.WakuFilterSubscribeCodec, some(shard) - ) - - # Remove all already used in this shard or being dialed for it - allCandidates.keepItIf(it.peerId notin exclude) - - # Collect peer IDs already tracked on other shards - var trackedOnOther = initHashSet[PeerId]() - for otherShard, otherState in self.edgeFilterSubStates.pairs: - if otherShard != shard: - for peer in otherState.peers: - trackedOnOther.incl(peer.peerId) - - # Prefer peers we already have a connection to first, preserving shuffle - var candidates = - allCandidates.filterIt(it.peerId in trackedOnOther) & - allCandidates.filterIt(it.peerId notin trackedOnOther) - - # We need to return 'needed' peers only - if candidates.len > needed: - candidates.setLen(needed) - return candidates - -proc edgeFilterSubLoop*(self: SubscriptionManager) {.async.} = - ## Reconciles filter subscriptions with the desired state from SubscriptionManager. - var lastSynced = initTable[PubsubTopic, HashSet[ContentTopic]]() - - while true: - await self.edgeFilterWakeup.wait() - await sleepAsync(EdgeFilterSubLoopDebounce) - self.edgeFilterWakeup.clear() - trace "edgeFilterSubLoop: woke up" - - if isNil(self.node.wakuFilterClient): - trace "edgeFilterSubLoop: wakuFilterClient is nil, skipping" - continue - - let desired = self.contentTopicSubs - - trace "edgeFilterSubLoop: desired state", numShards = desired.len - - let allShards = toHashSet(toSeq(desired.keys)) + toHashSet(toSeq(lastSynced.keys)) - - # Step 1: read state across all shards at once and - # create a list of peer dial tasks and shard tracking to delete. - - var dialTasks: seq[EdgeDialTask] - var shardsToDelete: seq[PubsubTopic] - - for shard in allShards: - let currTopics = desired.getOrDefault(shard) - let prevTopics = lastSynced.getOrDefault(shard) - - if shard notin self.edgeFilterSubStates: - self.edgeFilterSubStates[shard] = - EdgeFilterSubState(currentHealth: TopicHealth.UNHEALTHY) - - let addedTopics = toSeq(currTopics - prevTopics) - let removedTopics = toSeq(prevTopics - currTopics) - - self.edgeFilterSubStates.withValue(shard, state): - state.peers.keepItIf( - self.node.peerManager.switch.peerStore.isConnected(it.peerId) - ) - state.pending.keepItIf(not it.finished) - - if addedTopics.len > 0 or removedTopics.len > 0: - for peer in state.peers: - asyncSpawn self.syncFilterDeltas(peer, shard, addedTopics, removedTopics) - - if currTopics.len == 0: - shardsToDelete.add(shard) - else: - self.updateShardHealth(shard, state[]) - - let needed = max(0, HealthyThreshold - state.peers.len - state.pending.len) - - if needed > 0: - let tracked = state.peers.mapIt(it.peerId).toHashSet() + state.pendingPeers - let candidates = self.selectFilterCandidates(shard, tracked, needed) - let toDial = min(needed, candidates.len) - - trace "edgeFilterSubLoop: shard reconciliation", - shard = shard, - num_peers = state.peers.len, - num_pending = state.pending.len, - num_needed = needed, - num_available = candidates.len, - toDial = toDial - - for i in 0 ..< toDial: - dialTasks.add( - EdgeDialTask( - peer: candidates[i], shard: shard, topics: toSeq(currTopics) - ) - ) - - # Step 2: execute deferred shard tracking deletion and dial tasks. - - for shard in shardsToDelete: - self.edgeFilterSubStates.withValue(shard, state): - for fut in state.pending: - if not fut.finished: - await fut.cancelAndWait() - self.edgeFilterSubStates.del(shard) - - for task in dialTasks: - let fut = self.dialFilterPeer(task.peer, task.shard, task.topics) - self.edgeFilterSubStates.withValue(task.shard, state): - state.pending.add(fut) - - lastSynced = desired - -proc startEdgeFilterLoops(self: SubscriptionManager): Result[void, string] = - ## Start the edge filter orchestration loops. - ## Caller must ensure this is only called in edge mode (relay nil, filter client present). - self.edgeFilterWakeup = newAsyncEvent() - - self.peerEventListener = WakuPeerEvent.listen( - self.node.brokerCtx, - proc(evt: WakuPeerEvent) {.async: (raises: []), gcsafe.} = - if evt.kind == WakuPeerEventKind.EventDisconnected or - evt.kind == WakuPeerEventKind.EventMetadataUpdated: - self.edgeFilterWakeup.fire() - , - ).valueOr: - return err("Failed to listen to peer events for edge filter: " & error) - - self.edgeFilterSubLoopFut = self.edgeFilterSubLoop() - self.edgeFilterHealthLoopFut = self.edgeFilterHealthLoop() - return ok() - -proc stopEdgeFilterLoops(self: SubscriptionManager) {.async: (raises: []).} = - ## Stop the edge filter orchestration loops and clean up pending futures. - if not isNil(self.edgeFilterSubLoopFut): - await self.edgeFilterSubLoopFut.cancelAndWait() - self.edgeFilterSubLoopFut = nil - - if not isNil(self.edgeFilterHealthLoopFut): - await self.edgeFilterHealthLoopFut.cancelAndWait() - self.edgeFilterHealthLoopFut = nil - - for shard, state in self.edgeFilterSubStates: - for fut in state.pending: - if not fut.finished: - await fut.cancelAndWait() - - await WakuPeerEvent.dropListener(self.node.brokerCtx, self.peerEventListener) - -# --------------------------------------------------------------------------- -# SubscriptionManager Lifecycle (calls Edge behavior above) -# -# startSubscriptionManager and stopSubscriptionManager orchestrate both the -# core (relay) and edge (filter) paths, and register/clear broker providers. -# --------------------------------------------------------------------------- - -proc startSubscriptionManager*(self: SubscriptionManager): Result[void, string] = - # Register edge filter broker providers. The shard/content health providers - # in WakuNode query these via the broker as a fallback when relay health is - # not available. If edge mode is not active, these providers simply return - # NOT_SUBSCRIBED / strength 0, which is harmless. - RequestEdgeShardHealth.setProvider( - self.node.brokerCtx, - proc(shard: PubsubTopic): Result[RequestEdgeShardHealth, string] = - self.edgeFilterSubStates.withValue(shard, state): - return ok(RequestEdgeShardHealth(health: state.currentHealth)) - return ok(RequestEdgeShardHealth(health: TopicHealth.NOT_SUBSCRIBED)), - ).isOkOr: - error "Can't set provider for RequestEdgeShardHealth", error = error - - RequestEdgeFilterPeerCount.setProvider( - self.node.brokerCtx, - proc(): Result[RequestEdgeFilterPeerCount, string] = - var minPeers = high(int) - for state in self.edgeFilterSubStates.values: - minPeers = min(minPeers, state.peers.len) - if minPeers == high(int): - minPeers = 0 - return ok(RequestEdgeFilterPeerCount(peerCount: minPeers)), - ).isOkOr: - error "Can't set provider for RequestEdgeFilterPeerCount", error = error - - if self.node.wakuRelay.isNil(): - return self.startEdgeFilterLoops() - - # Core mode: auto-subscribe relay to all shards in autosharding. - if self.node.wakuAutoSharding.isSome(): - let autoSharding = self.node.wakuAutoSharding.get() - let clusterId = autoSharding.clusterId - let numShards = autoSharding.shardCountGenZero - - if numShards > 0: - var clusterPubsubTopics = newSeqOfCap[PubsubTopic](numShards) - - for i in 0 ..< numShards: - let shardObj = RelayShard(clusterId: clusterId, shardId: uint16(i)) - clusterPubsubTopics.add(PubsubTopic($shardObj)) - - self.subscribePubsubTopics(clusterPubsubTopics).isOkOr: - error "Failed to auto-subscribe Relay to cluster shards: ", error = error - else: - info "SubscriptionManager has no AutoSharding configured; skipping auto-subscribe." - - return ok() - -proc stopSubscriptionManager*(self: SubscriptionManager) {.async: (raises: []).} = - if self.node.wakuRelay.isNil(): - await self.stopEdgeFilterLoops() - RequestEdgeShardHealth.clearProvider(self.node.brokerCtx) - RequestEdgeFilterPeerCount.clearProvider(self.node.brokerCtx) diff --git a/waku/node/health_monitor/node_health_monitor.nim b/waku/node/health_monitor/node_health_monitor.nim index c652f7cea..98c0f6c7a 100644 --- a/waku/node/health_monitor/node_health_monitor.nim +++ b/waku/node/health_monitor/node_health_monitor.nim @@ -14,6 +14,7 @@ import events/health_events, events/peer_events, node/waku_node, + node/node_telemetry, node/peer_manager, node/kernel_api, node/health_monitor/online_monitor, diff --git a/waku/node/kernel_api/filter.nim b/waku/node/kernel_api/filter.nim index 948035f14..0db4875b0 100644 --- a/waku/node/kernel_api/filter.nim +++ b/waku/node/kernel_api/filter.nim @@ -21,6 +21,7 @@ import import ../waku_node, + ../node_telemetry, ../../waku_core, ../../waku_core/topics/sharding, ../../waku_filter_v2, diff --git a/waku/node/kernel_api/peer_exchange.nim b/waku/node/kernel_api/peer_exchange.nim index a4bec727b..1cb6bd3bb 100644 --- a/waku/node/kernel_api/peer_exchange.nim +++ b/waku/node/kernel_api/peer_exchange.nim @@ -19,6 +19,7 @@ import import ../waku_node, + ../node_telemetry, ../../waku_peer_exchange, ../../waku_core, ../peer_manager, diff --git a/waku/node/kernel_api/relay.nim b/waku/node/kernel_api/relay.nim index f1b80cf19..30fc22ec3 100644 --- a/waku/node/kernel_api/relay.nim +++ b/waku/node/kernel_api/relay.nim @@ -29,90 +29,18 @@ import waku_store_sync, waku_rln_relay, node/waku_node, + node/subscription_manager, node/peer_manager, events/message_events, ] export waku_relay.WakuRelayHandler -declarePublicHistogram waku_histogram_message_size, - "message size histogram in kB", - buckets = [ - 0.0, 1.0, 3.0, 5.0, 15.0, 50.0, 75.0, 100.0, 125.0, 150.0, 500.0, 700.0, 1000.0, Inf - ] - logScope: topics = "waku node relay api" ## Waku relay -proc registerRelayHandler( - node: WakuNode, topic: PubsubTopic, appHandler: WakuRelayHandler = nil -): bool = - ## Registers the only handler for the given topic. - ## Notice that this handler internally calls other handlers, such as filter, - ## archive, etc, plus the handler provided by the application. - ## Returns `true` if a mesh subscription was created or `false` if the relay - ## was already subscribed to the topic. - - let alreadySubscribed = node.wakuRelay.isSubscribed(topic) - - if not appHandler.isNil(): - if not alreadySubscribed or not node.legacyAppHandlers.hasKey(topic): - node.legacyAppHandlers[topic] = appHandler - else: - debug "Legacy appHandler already exists for active PubsubTopic, ignoring new handler", - topic = topic - - if alreadySubscribed: - return false - - proc traceHandler(topic: PubsubTopic, msg: WakuMessage) {.async, gcsafe.} = - let msgSizeKB = msg.payload.len / 1000 - - waku_node_messages.inc(labelValues = ["relay"]) - waku_histogram_message_size.observe(msgSizeKB) - - proc filterHandler(topic: PubsubTopic, msg: WakuMessage) {.async, gcsafe.} = - if node.wakuFilter.isNil(): - return - - await node.wakuFilter.handleMessage(topic, msg) - - proc archiveHandler(topic: PubsubTopic, msg: WakuMessage) {.async, gcsafe.} = - if node.wakuArchive.isNil(): - return - - await node.wakuArchive.handleMessage(topic, msg) - - proc syncHandler(topic: PubsubTopic, msg: WakuMessage) {.async, gcsafe.} = - if node.wakuStoreReconciliation.isNil(): - return - - node.wakuStoreReconciliation.messageIngress(topic, msg) - - proc internalHandler(topic: PubsubTopic, msg: WakuMessage) {.async, gcsafe.} = - MessageSeenEvent.emit(node.brokerCtx, topic, msg) - - let uniqueTopicHandler = proc( - topic: PubsubTopic, msg: WakuMessage - ): Future[void] {.async, gcsafe.} = - await traceHandler(topic, msg) - await filterHandler(topic, msg) - await archiveHandler(topic, msg) - await syncHandler(topic, msg) - await internalHandler(topic, msg) - - # Call the legacy (kernel API) app handler if it exists. - # Normally, hasKey is false and the MessageSeenEvent bus (new API) is used instead. - # But we need to support legacy behavior (kernel API use), hence this. - # NOTE: We can delete `legacyAppHandlers` if instead we refactor WakuRelay to support multiple - # PubsubTopic handlers, since that's actually supported by libp2p PubSub (bigger refactor...) - if node.legacyAppHandlers.hasKey(topic) and not node.legacyAppHandlers[topic].isNil(): - await node.legacyAppHandlers[topic](topic, msg) - - node.wakuRelay.subscribe(topic, uniqueTopicHandler) - proc getTopicOfSubscriptionEvent( node: WakuNode, subscription: SubscriptionEvent ): Result[(PubsubTopic, Option[ContentTopic]), string] = @@ -143,21 +71,15 @@ proc subscribe*( error "Invalid API call to `subscribe`. WakuRelay not mounted." return err("Invalid API call to `subscribe`. WakuRelay not mounted.") - let (pubsubTopic, contentTopicOp) = getTopicOfSubscriptionEvent(node, subscription).valueOr: + let (pubsubTopic, _) = getTopicOfSubscriptionEvent(node, subscription).valueOr: error "Failed to decode subscription event", error = error return err("Failed to decode subscription event: " & error) - if node.registerRelayHandler(pubsubTopic, handler): - info "subscribe", pubsubTopic, contentTopicOp - node.topicSubscriptionQueue.emit((kind: PubsubSub, topic: pubsubTopic)) - else: - if isNil(handler): - warn "No-effect API call to subscribe. Already subscribed to topic", pubsubTopic - else: - info "subscribe (was already subscribed in the mesh; appHandler set)", - pubsubTopic = pubsubTopic - - return ok() + # strict version + #if contentTopicOp.isSome(): + # return + # node.subscriptionManager.subscribe(pubsubTopic, contentTopicOp.get(), handler) + return node.subscriptionManager.subscribeShard(pubsubTopic, handler) proc unsubscribe*( node: WakuNode, subscription: SubscriptionEvent @@ -170,26 +92,14 @@ proc unsubscribe*( error "Invalid API call to `unsubscribe`. WakuRelay not mounted." return err("Invalid API call to `unsubscribe`. WakuRelay not mounted.") - let (pubsubTopic, contentTopicOp) = getTopicOfSubscriptionEvent(node, subscription).valueOr: + let (pubsubTopic, _) = getTopicOfSubscriptionEvent(node, subscription).valueOr: error "Failed to decode unsubscribe event", error = error return err("Failed to decode unsubscribe event: " & error) - let hadHandler = node.legacyAppHandlers.hasKey(pubsubTopic) - if hadHandler: - node.legacyAppHandlers.del(pubsubTopic) - - if node.wakuRelay.isSubscribed(pubsubTopic): - info "unsubscribe", pubsubTopic, contentTopicOp - node.wakuRelay.unsubscribe(pubsubTopic) - node.topicSubscriptionQueue.emit((kind: PubsubUnsub, topic: pubsubTopic)) - else: - if not hadHandler: - warn "No-effect API call to `unsubscribe`. Was not subscribed", pubsubTopic - else: - info "unsubscribe (was not subscribed in the mesh; appHandler removed)", - pubsubTopic = pubsubTopic - - return ok() + # strict version + #if contentTopicOp.isSome(): + # return node.subscriptionManager.unsubscribe(pubsubTopic, contentTopicOp.get()) + return node.subscriptionManager.unsubscribeAll(pubsubTopic) proc isSubscribed*( node: WakuNode, subscription: SubscriptionEvent diff --git a/waku/node/node_telemetry.nim b/waku/node/node_telemetry.nim new file mode 100644 index 000000000..cd214969c --- /dev/null +++ b/waku/node/node_telemetry.nim @@ -0,0 +1,27 @@ +{.push raises: [].} + +import metrics + +declarePublicGauge waku_version, + "Waku version info (in git describe format)", ["version"] + +declarePublicCounter waku_node_errors, "number of wakunode errors", ["type"] + +declarePublicGauge waku_lightpush_peers, "number of lightpush peers" + +declarePublicGauge waku_filter_peers, "number of filter peers" + +declarePublicGauge waku_store_peers, "number of store peers" + +declarePublicGauge waku_px_peers, + "number of peers (in the node's peerManager) supporting the peer exchange protocol" + +declarePublicCounter waku_node_messages, "number of messages received", ["type"] + +declarePublicHistogram waku_histogram_message_size, + "message size histogram in kB", + buckets = [ + 0.0, 1.0, 3.0, 5.0, 15.0, 50.0, 75.0, 100.0, 125.0, 150.0, 500.0, 700.0, 1000.0, Inf + ] + +{.pop.} diff --git a/waku/node/node_types.nim b/waku/node/node_types.nim new file mode 100644 index 000000000..f5c2a56b6 --- /dev/null +++ b/waku/node/node_types.nim @@ -0,0 +1,116 @@ +{.push raises: [].} + +import + std/[options, tables, sets], + chronos, + results, + eth/keys, + bearssl/rand, + eth/p2p/discoveryv5/enr, + libp2p/crypto/crypto, + libp2p/[multiaddress, multicodec], + libp2p/protocols/ping, + libp2p/protocols/mix/mix_protocol, + brokers/broker_context + +import + waku/[ + waku_core, + waku_relay, + waku_archive, + waku_store/protocol as store, + waku_store/client as store_client, + waku_store/resume, + waku_store_sync, + waku_filter_v2, + waku_filter_v2/client as filter_client, + waku_metadata, + waku_rendezvous/protocol, + waku_rendezvous/client as rendezvous_client, + waku_lightpush_legacy/client as legacy_lightpush_client, + waku_lightpush_legacy as legacy_lightpush_protocol, + waku_lightpush/client as lightpush_client, + waku_lightpush as lightpush_protocol, + waku_peer_exchange, + waku_rln_relay, + waku_mix, + common/rate_limit/setting, + discovery/waku_kademlia, + net/bound_ports, + events/peer_events, + ], + ./peer_manager, + ./health_monitor/topic_health + +# key and crypto modules different +type + # TODO: Move to application instance (e.g., `WakuNode2`) + WakuInfo* = object # NOTE One for simplicity, can extend later as needed + listenAddresses*: seq[string] + enrUri*: string #multiaddrStrings*: seq[string] + mixPubKey*: Option[string] + + # NOTE based on Eth2Node in NBC eth2_network.nim + WakuNode* = ref object + peerManager*: PeerManager + switch*: Switch + wakuRelay*: WakuRelay + wakuArchive*: waku_archive.WakuArchive + wakuStore*: store.WakuStore + wakuStoreClient*: store_client.WakuStoreClient + wakuStoreResume*: StoreResume + wakuStoreReconciliation*: SyncReconciliation + wakuStoreTransfer*: SyncTransfer + wakuFilter*: waku_filter_v2.WakuFilter + wakuFilterClient*: filter_client.WakuFilterClient + wakuRlnRelay*: WakuRLNRelay + wakuLegacyLightPush*: WakuLegacyLightPush + wakuLegacyLightpushClient*: WakuLegacyLightPushClient + wakuLightPush*: WakuLightPush + wakuLightpushClient*: WakuLightPushClient + wakuPeerExchange*: WakuPeerExchange + wakuPeerExchangeClient*: WakuPeerExchangeClient + wakuMetadata*: WakuMetadata + wakuAutoSharding*: Option[Sharding] + enr*: enr.Record + libp2pPing*: Ping + rng*: ref rand.HmacDrbgContext + brokerCtx*: BrokerContext + wakuRendezvous*: WakuRendezVous + wakuRendezvousClient*: rendezvous_client.WakuRendezVousClient + announcedAddresses*: seq[MultiAddress] + extMultiAddrsOnly*: bool # When true, skip automatic IP address replacement + started*: bool # Indicates that node has started listening + topicSubscriptionQueue*: AsyncEventQueue[SubscriptionEvent] + rateLimitSettings*: ProtocolRateLimitSettings + legacyAppHandlers*: Table[PubsubTopic, WakuRelayHandler] + ## Kernel API Relay appHandlers (if any) + subscriptionManager*: SubscriptionManager + wakuMix*: WakuMix + kademliaDiscoveryLoop*: Future[void] + wakuKademlia*: WakuKademlia + ports*: BoundPorts + + ShardSubscription* = object + contentTopics*: HashSet[ContentTopic] + directShardSub*: bool + ## shard subscribed directly (PubsubSub), independent of content-topic interest + + EdgeFilterSubState* = object + peers*: seq[RemotePeerInfo] + pending*: seq[Future[void]] + pendingPeers*: HashSet[PeerId] + currentHealth*: TopicHealth + + SubscriptionManager* = ref object of RootObj + node*: WakuNode + shards*: Table[PubsubTopic, ShardSubscription] + edgeFilterSubStates*: Table[PubsubTopic, EdgeFilterSubState] + edgeFilterWakeup*: AsyncEvent + edgeFilterSubLoopFut*: Future[void] + edgeFilterConnectionLoopFut*: Future[void] + peerEventListener*: WakuPeerEventListener + ownsEdgeShardHealthProvider*: bool + ownsEdgeFilterPeerCountProvider*: bool + +{.pop.} diff --git a/waku/node/subscription_manager.nim b/waku/node/subscription_manager.nim new file mode 100644 index 000000000..0cac87073 --- /dev/null +++ b/waku/node/subscription_manager.nim @@ -0,0 +1,708 @@ +import std/[sequtils, sets, tables, options], chronos, chronicles, metrics, results +import libp2p/[peerid, peerinfo] +import brokers/broker_context + +import + waku/[ + waku_core, + waku_core/topics/sharding, + node/node_types, + node/node_telemetry, + waku_relay, + waku_archive, + waku_store_sync, + waku_filter_v2/common as filter_common, + waku_filter_v2/client as filter_client, + waku_filter_v2/protocol as filter_protocol, + events/health_events, + events/message_events, + events/peer_events, + requests/health_requests, + node/peer_manager, + node/health_monitor/topic_health, + node/health_monitor/connection_status, + ] + +{.push raises: [].} + +proc registerRelayHandler( + node: WakuNode, shard: PubsubTopic, appHandler: WakuRelayHandler = nil +): bool = + ## Returns true iff we did a new (and only) subscription for this shard in GossipSub. + let alreadySubscribed = node.wakuRelay.isSubscribed(shard) + + if not appHandler.isNil(): + if not alreadySubscribed or not node.legacyAppHandlers.hasKey(shard): + node.legacyAppHandlers[shard] = appHandler + else: + debug "Legacy appHandler already exists for active PubsubTopic, ignoring new handler", + topic = shard + + if alreadySubscribed: + return false + + proc traceHandler(topic: PubsubTopic, msg: WakuMessage) {.async, gcsafe.} = + let msgSizeKB = msg.payload.len / 1000 + + waku_node_messages.inc(labelValues = ["relay"]) + waku_histogram_message_size.observe(msgSizeKB) + + proc filterHandler(topic: PubsubTopic, msg: WakuMessage) {.async, gcsafe.} = + if node.wakuFilter.isNil(): + return + + await node.wakuFilter.handleMessage(topic, msg) + + proc archiveHandler(topic: PubsubTopic, msg: WakuMessage) {.async, gcsafe.} = + if node.wakuArchive.isNil(): + return + + await node.wakuArchive.handleMessage(topic, msg) + + proc syncHandler(topic: PubsubTopic, msg: WakuMessage) {.async, gcsafe.} = + if node.wakuStoreReconciliation.isNil(): + return + + node.wakuStoreReconciliation.messageIngress(topic, msg) + + proc internalHandler(topic: PubsubTopic, msg: WakuMessage) {.async, gcsafe.} = + MessageSeenEvent.emit(node.brokerCtx, topic, msg) + + let uniqueTopicHandler = proc( + topic: PubsubTopic, msg: WakuMessage + ): Future[void] {.async, gcsafe.} = + await traceHandler(topic, msg) + await filterHandler(topic, msg) + await archiveHandler(topic, msg) + await syncHandler(topic, msg) + await internalHandler(topic, msg) + + if node.legacyAppHandlers.hasKey(topic) and not node.legacyAppHandlers[topic].isNil(): + await node.legacyAppHandlers[topic](topic, msg) + + node.wakuRelay.subscribe(shard, uniqueTopicHandler) + return true + +proc unregisterRelayHandler(node: WakuNode, shard: PubsubTopic): bool = + ## Returns true iff we had a subscription for this shard in GossipSub and it was removed. + if node.legacyAppHandlers.hasKey(shard): + node.legacyAppHandlers.del(shard) + + if node.wakuRelay.isSubscribed(shard): + node.wakuRelay.unsubscribe(shard) + return true + return false + +proc doRelaySubscribe( + node: WakuNode, shard: PubsubTopic, appHandler: WakuRelayHandler = nil +): bool = + ## Subscribes the node to a shard. + ## Returns true if we actually subscribed (transitioned from unsubscribed to subscribed). + ## Emit the shard subscription event if we actually subscribed. + let installed = node.registerRelayHandler(shard, appHandler) + if installed: + node.topicSubscriptionQueue.emit((kind: PubsubSub, topic: shard)) + return installed + +proc doRelayUnsubscribe(node: WakuNode, shard: PubsubTopic): bool = + ## Unsubscribes the node from a shard. + ## Returns true if we actually unsubscribed (transitioned from subscribed to unsubscribed). + ## Emit the shard unsubscription event if we actually unsubscribed. + let unsubscribed = node.unregisterRelayHandler(shard) + if unsubscribed: + node.topicSubscriptionQueue.emit((kind: PubsubUnsub, topic: shard)) + return unsubscribed + +proc new*(T: type SubscriptionManager, node: WakuNode): T = + T( + node: node, + shards: initTable[PubsubTopic, ShardSubscription](), + edgeFilterSubStates: initTable[PubsubTopic, EdgeFilterSubState](), + edgeFilterWakeup: newAsyncEvent(), + ) + +func wanted(entry: ShardSubscription): bool = + ## True if the shard has content-topic interest or a direct subscription. + return entry.contentTopics.len > 0 or entry.directShardSub + +proc isContentSubscribed*( + self: SubscriptionManager, shard: PubsubTopic, contentTopic: ContentTopic +): bool = + self.shards.withValue(shard, sub): + return contentTopic in sub.contentTopics + return false + +iterator subscribedContentTopics*( + self: SubscriptionManager +): (PubsubTopic, HashSet[ContentTopic]) = + ## Yields each shard with its non-empty content-topic set. + for shard, sub in self.shards.pairs: + if sub.contentTopics.len > 0: + yield (shard, sub.contentTopics) + +func toTopicHealth*(peersCount: int): TopicHealth = + if peersCount >= HealthyThreshold: + return TopicHealth.SUFFICIENTLY_HEALTHY + elif peersCount > 0: + return TopicHealth.MINIMALLY_HEALTHY + else: + return TopicHealth.UNHEALTHY + +proc edgeFilterPeerCount*(self: SubscriptionManager, shard: PubsubTopic): int = + self.edgeFilterSubStates.withValue(shard, state): + return state.peers.len + return 0 + +proc getShardForContentTopic( + self: SubscriptionManager, topic: ContentTopic +): Result[PubsubTopic, string] = + if self.node.wakuAutoSharding.isSome(): + let shardObj = ?self.node.wakuAutoSharding.get().getShard(topic) + return ok($shardObj) + + return err("autosharding is not configured; pass an explicit shard") + +proc subscribeShard*( + self: SubscriptionManager, shard: PubsubTopic, handler: WakuRelayHandler = nil +): Result[void, string] = + ## Subscribes to the shard directly and joins the relay mesh. + var added = false + self.shards.withValue(shard, entry): + if not entry.directShardSub: + entry.directShardSub = true + added = true + do: + self.shards[shard] = ShardSubscription( + contentTopics: initHashSet[ContentTopic](), directShardSub: true + ) + added = true + if added: + self.edgeFilterWakeup.fire() + if not isNil(self.node.wakuRelay): + discard self.node.doRelaySubscribe(shard, handler) + return ok() + +proc unsubscribeShard*( + self: SubscriptionManager, shard: PubsubTopic +): Result[void, string] = + ## Drops the direct shard subscription; unsubscribes the mesh if no content topic wants it. + var removed = false + var shardEmpty = false + self.shards.withValue(shard, entry): + if entry.directShardSub: + entry.directShardSub = false + removed = true + shardEmpty = not entry[].wanted() + if removed: + self.edgeFilterWakeup.fire() + if shardEmpty: + self.shards.del(shard) + if not isNil(self.node.wakuRelay): + discard self.node.doRelayUnsubscribe(shard) + return ok() + +proc subscribe*( + self: SubscriptionManager, + shard: PubsubTopic, + contentTopic: ContentTopic, + handler: WakuRelayHandler = nil, +): Result[void, string] = + ## Adds content-topic interest on the shard and joins the relay mesh. + var added = false + self.shards.withValue(shard, entry): + if contentTopic notin entry.contentTopics: + entry.contentTopics.incl(contentTopic) + added = true + do: + var entry = ShardSubscription(contentTopics: initHashSet[ContentTopic]()) + entry.contentTopics.incl(contentTopic) + self.shards[shard] = entry + added = true + if added: + self.edgeFilterWakeup.fire() + if not isNil(self.node.wakuRelay): + discard self.node.doRelaySubscribe(shard, handler) + return ok() + +proc unsubscribe*( + self: SubscriptionManager, shard: PubsubTopic, contentTopic: ContentTopic +): Result[void, string] = + ## Drops content-topic interest on the shard; unsubscribes the mesh if nothing else wants it. + var removed = false + var shardEmpty = false + self.shards.withValue(shard, entry): + if contentTopic in entry.contentTopics: + entry.contentTopics.excl(contentTopic) + removed = true + shardEmpty = not entry[].wanted() + if removed: + self.edgeFilterWakeup.fire() + if shardEmpty: + self.shards.del(shard) + if not isNil(self.node.wakuRelay): + discard self.node.doRelayUnsubscribe(shard) + return ok() + +proc subscribe*(self: SubscriptionManager, topic: ContentTopic): Result[void, string] = + ## Subscribes to a content topic, resolving its shard via autosharding. + let shard = ?self.getShardForContentTopic(topic) + return self.subscribe(shard, topic) + +proc unsubscribe*( + self: SubscriptionManager, topic: ContentTopic +): Result[void, string] = + ## Unsubscribes from a content topic, resolving its shard via autosharding. + let shard = ?self.getShardForContentTopic(topic) + return self.unsubscribe(shard, topic) + +proc unsubscribeAll*( + self: SubscriptionManager, shard: PubsubTopic +): Result[void, string] = + ## Drops every content topic on the shard, then the direct subscription. + var snapshot: seq[ContentTopic] + self.shards.withValue(shard, sub): + snapshot = toSeq(sub.contentTopics) + for contentTopic in snapshot: + ?self.unsubscribe(shard, contentTopic) + return self.unsubscribeShard(shard) + +proc isSubscribed*( + self: SubscriptionManager, topic: ContentTopic +): Result[bool, string] = + let shard = ?self.getShardForContentTopic(topic) + return ok(self.isContentSubscribed(shard, topic)) + +proc subscribeAllAutoshards*(self: SubscriptionManager): Result[void, string] = + ## Subscribes the relay to every shard in the configured autosharding cluster. + if self.node.wakuRelay.isNil() or self.node.wakuAutoSharding.isNone(): + return ok() + + let autoSharding = self.node.wakuAutoSharding.get() + let numShards = autoSharding.shardCountGenZero + if numShards == 0: + return ok() + + for i in 0'u32 ..< numShards: + let shardObj = RelayShard(clusterId: autoSharding.clusterId, shardId: uint16(i)) + self.subscribeShard(PubsubTopic($shardObj)).isOkOr: + error "failed to auto-subscribe relay to cluster shard", + shard = $shardObj, error = error + + ok() + +{.pop.} + +const EdgeFilterSubscribeTimeout = chronos.seconds(15) + ## Timeout for a single filter subscribe/unsubscribe RPC to a service peer. +const EdgeFilterPingTimeout = chronos.seconds(5) + ## Timeout for a filter ping health check. +const EdgeFilterLoopInterval = chronos.seconds(30) + ## Interval for the edge filter health ping loop. +const EdgeFilterSubLoopDebounce = chronos.seconds(1) + ## Debounce delay to coalesce rapid-fire wakeups into a single reconciliation pass. + +type EdgeDialTask = object + peer: RemotePeerInfo + shard: PubsubTopic + topics: seq[ContentTopic] + +proc updateShardHealth( + self: SubscriptionManager, shard: PubsubTopic, state: var EdgeFilterSubState +) = + ## Recompute and emit health for a shard after its peer set changed. + let newHealth = toTopicHealth(state.peers.len) + if newHealth != state.currentHealth: + state.currentHealth = newHealth + EventShardTopicHealthChange.emit(self.node.brokerCtx, shard, newHealth) + +proc removePeer(self: SubscriptionManager, shard: PubsubTopic, peerId: PeerId) = + ## Remove a peer from edgeFilterSubStates for the given shard, + ## update health, and wake the sub loop to dial a replacement. + ## Best-effort unsubscribe so the service peer stops pushing to us. + self.edgeFilterSubStates.withValue(shard, state): + var idx = -1 + for i, p in state.peers: + if p.peerId == peerId: + idx = i + break + if idx < 0: + return + + let peer = state.peers[idx] + state.peers.del(idx) + self.updateShardHealth(shard, state[]) + self.edgeFilterWakeup.fire() + + if not self.node.wakuFilterClient.isNil(): + self.shards.withValue(shard, sub): + let ct = toSeq(sub.contentTopics) + if ct.len > 0: + proc doUnsubscribe() {.async.} = + discard await self.node.wakuFilterClient.unsubscribe(peer, shard, ct) + + asyncSpawn doUnsubscribe() + +type SendChunkedFilterRpcKind = enum + FilterSubscribe + FilterUnsubscribe + +proc sendChunkedFilterRpc( + self: SubscriptionManager, + peer: RemotePeerInfo, + shard: PubsubTopic, + topics: seq[ContentTopic], + kind: SendChunkedFilterRpcKind, +): Future[bool] {.async.} = + ## Send a chunked filter subscribe or unsubscribe RPC. Returns true on + ## success. On failure the peer is removed and false is returned. + try: + var i = 0 + while i < topics.len: + let chunk = + topics[i ..< min(i + filter_protocol.MaxContentTopicsPerRequest, topics.len)] + let fut = + case kind + of FilterSubscribe: + self.node.wakuFilterClient.subscribe(peer, shard, chunk) + of FilterUnsubscribe: + self.node.wakuFilterClient.unsubscribe(peer, shard, chunk) + if not (await fut.withTimeout(EdgeFilterSubscribeTimeout)) or fut.read().isErr(): + trace "sendChunkedFilterRpc: chunk failed", + op = kind, shard = shard, peer = peer.peerId + self.removePeer(shard, peer.peerId) + return false + i += filter_protocol.MaxContentTopicsPerRequest + except CatchableError as exc: + debug "sendChunkedFilterRpc: failed", + op = kind, shard = shard, peer = peer.peerId, err = exc.msg + self.removePeer(shard, peer.peerId) + return false + return true + +proc syncFilterDeltas( + self: SubscriptionManager, + peer: RemotePeerInfo, + shard: PubsubTopic, + added: seq[ContentTopic], + removed: seq[ContentTopic], +) {.async.} = + ## Push content topic changes (adds/removes) to an already-tracked peer. + if added.len > 0: + if not await self.sendChunkedFilterRpc(peer, shard, added, FilterSubscribe): + return + + if removed.len > 0: + discard await self.sendChunkedFilterRpc(peer, shard, removed, FilterUnsubscribe) + +proc dialFilterPeer( + self: SubscriptionManager, + peer: RemotePeerInfo, + shard: PubsubTopic, + contentTopics: seq[ContentTopic], +) {.async.} = + ## Subscribe a new peer to all content topics on a shard and start tracking it. + self.edgeFilterSubStates.withValue(shard, state): + state.pendingPeers.incl(peer.peerId) + + try: + if not await self.sendChunkedFilterRpc(peer, shard, contentTopics, FilterSubscribe): + return + + self.edgeFilterSubStates.withValue(shard, state): + if state.peers.anyIt(it.peerId == peer.peerId): + trace "dialFilterPeer: peer already tracked, skipping duplicate", + shard = shard, peer = peer.peerId + return + + state.peers.add(peer) + self.updateShardHealth(shard, state[]) + trace "dialFilterPeer: successfully subscribed to all chunks", + shard = shard, peer = peer.peerId, totalPeers = state.peers.len + do: + trace "dialFilterPeer: shard removed while subscribing, discarding result", + shard = shard, peer = peer.peerId + finally: + self.edgeFilterSubStates.withValue(shard, state): + state.pendingPeers.excl(peer.peerId) + +proc edgeFilterConnectionLoop(self: SubscriptionManager) {.async.} = + ## Periodically pings all tracked filter service peers to verify they are + ## still alive at the application layer. Peers that fail the ping are removed. + while true: + await sleepAsync(EdgeFilterLoopInterval) + + if self.node.wakuFilterClient.isNil(): + warn "filter client is nil within edge filter connection loop" + continue + + var connected = initTable[PeerId, RemotePeerInfo]() + for state in self.edgeFilterSubStates.values: + for peer in state.peers: + if self.node.peerManager.switch.peerStore.isConnected(peer.peerId): + connected[peer.peerId] = peer + + var alive = initHashSet[PeerId]() + + if connected.len > 0: + var pingTasks: seq[(PeerId, Future[FilterSubscribeResult])] + for peer in connected.values: + pingTasks.add( + (peer.peerId, self.node.wakuFilterClient.ping(peer, EdgeFilterPingTimeout)) + ) + + await allFutures(pingTasks.mapIt(it[1])) + + for (peerId, task) in pingTasks: + if task.read().isOk(): + alive.incl(peerId) + + var changed = false + for shard, state in self.edgeFilterSubStates.mpairs: + let oldLen = state.peers.len + state.peers.keepItIf(it.peerId notin connected or alive.contains(it.peerId)) + + if state.peers.len < oldLen: + changed = true + self.updateShardHealth(shard, state) + trace "Edge Filter health degraded by Ping failure", + shard = shard, new = state.currentHealth + + if changed: + self.edgeFilterWakeup.fire() + +proc selectFilterCandidates( + self: SubscriptionManager, shard: PubsubTopic, exclude: HashSet[PeerId], needed: int +): seq[RemotePeerInfo] = + ## Select filter service peer candidates for a shard. + + # Start with every filter server peer that can serve the shard + var allCandidates = self.node.peerManager.selectPeers( + filter_common.WakuFilterSubscribeCodec, some(shard) + ) + + # Remove all already used in this shard or being dialed for it + allCandidates.keepItIf(it.peerId notin exclude) + + # Collect peer IDs already tracked on other shards + var trackedOnOther = initHashSet[PeerId]() + for otherShard, otherState in self.edgeFilterSubStates.pairs: + if otherShard != shard: + for peer in otherState.peers: + trackedOnOther.incl(peer.peerId) + + # Prefer peers we already have a connection to first, preserving shuffle + var candidates = + allCandidates.filterIt(it.peerId in trackedOnOther) & + allCandidates.filterIt(it.peerId notin trackedOnOther) + + # We need to return 'needed' peers only + if candidates.len > needed: + candidates.setLen(needed) + return candidates + +proc edgeFilterSubLoop(self: SubscriptionManager) {.async.} = + ## Reconciles filter subscriptions with the desired state from SubscriptionManager. + var lastSynced = initTable[PubsubTopic, HashSet[ContentTopic]]() + + while true: + await self.edgeFilterWakeup.wait() + await sleepAsync(EdgeFilterSubLoopDebounce) + self.edgeFilterWakeup.clear() + trace "edgeFilterSubLoop: woke up" + + if isNil(self.node.wakuFilterClient): + trace "edgeFilterSubLoop: wakuFilterClient is nil, skipping" + continue + + var newSynced = initTable[PubsubTopic, HashSet[ContentTopic]]() + var allShards: HashSet[PubsubTopic] + for shard, sub in self.shards.pairs: + if sub.contentTopics.len > 0: + newSynced[shard] = sub.contentTopics + allShards.incl(shard) + for shard in lastSynced.keys: + allShards.incl(shard) + + trace "edgeFilterSubLoop: desired state", numShards = newSynced.len + + # Step 1: read state across all shards at once and + # create a list of peer dial tasks and shard tracking to delete. + + var dialTasks: seq[EdgeDialTask] + var shardsToDelete: seq[PubsubTopic] + + for shard in allShards: + # Compute added/removed deltas via direct iteration; no HashSet copies. + var addedTopics: seq[ContentTopic] + var removedTopics: seq[ContentTopic] + newSynced.withValue(shard, curr): + lastSynced.withValue(shard, prev): + for t in curr[]: + if t notin prev[]: + addedTopics.add(t) + for t in prev[]: + if t notin curr[]: + removedTopics.add(t) + do: + for t in curr[]: + addedTopics.add(t) + do: + lastSynced.withValue(shard, prev): + for t in prev[]: + removedTopics.add(t) + + discard self.edgeFilterSubStates.mgetOrPut( + shard, EdgeFilterSubState(currentHealth: TopicHealth.UNHEALTHY) + ) + + self.edgeFilterSubStates.withValue(shard, state): + state.peers.keepItIf( + self.node.peerManager.switch.peerStore.isConnected(it.peerId) + ) + state.pending.keepItIf(not it.finished) + + if addedTopics.len > 0 or removedTopics.len > 0: + for peer in state.peers: + asyncSpawn self.syncFilterDeltas(peer, shard, addedTopics, removedTopics) + + if shard notin newSynced: + shardsToDelete.add(shard) + else: + self.updateShardHealth(shard, state[]) + + let needed = max(0, HealthyThreshold - state.peers.len - state.pending.len) + + if needed > 0: + var tracked: HashSet[PeerId] + for p in state.peers: + tracked.incl(p.peerId) + for p in state.pendingPeers: + tracked.incl(p) + let candidates = self.selectFilterCandidates(shard, tracked, needed) + let toDial = min(needed, candidates.len) + + trace "edgeFilterSubLoop: shard reconciliation", + shard = shard, + num_peers = state.peers.len, + num_pending = state.pending.len, + num_needed = needed, + num_available = candidates.len, + toDial = toDial + + var dialTopics: seq[ContentTopic] + newSynced.withValue(shard, curr): + dialTopics = toSeq(curr[]) + + for i in 0 ..< toDial: + dialTasks.add( + EdgeDialTask(peer: candidates[i], shard: shard, topics: dialTopics) + ) + + # Step 2: execute deferred shard tracking deletion and dial tasks. + + for shard in shardsToDelete: + self.edgeFilterSubStates.withValue(shard, state): + for fut in state.pending: + if not fut.finished: + await fut.cancelAndWait() + self.edgeFilterSubStates.del(shard) + + for task in dialTasks: + let fut = self.dialFilterPeer(task.peer, task.shard, task.topics) + self.edgeFilterSubStates.withValue(task.shard, state): + state.pending.add(fut) + + lastSynced = newSynced + +proc startEdgeFilterLoops(self: SubscriptionManager): Result[void, string] = + ## Start the edge filter orchestration loops. + ## Caller must ensure this is only called in edge mode (relay nil, filter client present). + self.peerEventListener = WakuPeerEvent.listen( + self.node.brokerCtx, + proc(evt: WakuPeerEvent) {.async: (raises: []), gcsafe.} = + if evt.kind == WakuPeerEventKind.EventDisconnected: + # We know a peer is gone, so if it was a service filter peer for this + # edge node, remove it from the list of service filter peers for each + # shard it served and re-evaluate shard health for the affected shards. + for shard, state in self.edgeFilterSubStates.mpairs: + let oldLen = state.peers.len + state.peers.keepItIf(it.peerId != evt.peerId) + if state.peers.len < oldLen: + self.updateShardHealth(shard, state) + self.edgeFilterWakeup.fire() + elif evt.kind == WakuPeerEventKind.EventMetadataUpdated: + self.edgeFilterWakeup.fire(), + ).valueOr: + return err("Failed to listen to peer events for edge filter: " & error) + + self.edgeFilterSubLoopFut = self.edgeFilterSubLoop() + self.edgeFilterConnectionLoopFut = self.edgeFilterConnectionLoop() + return ok() + +proc stopEdgeFilterLoops(self: SubscriptionManager) {.async: (raises: []).} = + ## Stop the edge filter orchestration loops and clean up pending futures. + if not isNil(self.edgeFilterSubLoopFut): + await self.edgeFilterSubLoopFut.cancelAndWait() + self.edgeFilterSubLoopFut = nil + + if not isNil(self.edgeFilterConnectionLoopFut): + await self.edgeFilterConnectionLoopFut.cancelAndWait() + self.edgeFilterConnectionLoopFut = nil + + for shard, state in self.edgeFilterSubStates: + for fut in state.pending: + if not fut.finished: + await fut.cancelAndWait() + + await WakuPeerEvent.dropListener(self.node.brokerCtx, self.peerEventListener) + +proc start*(self: SubscriptionManager): Result[void, string] = + let edgeShardHealthRes = RequestEdgeShardHealth.setProvider( + self.node.brokerCtx, + proc(shard: PubsubTopic): Result[RequestEdgeShardHealth, string] = + self.edgeFilterSubStates.withValue(shard, state): + return ok(RequestEdgeShardHealth(health: state.currentHealth)) + return ok(RequestEdgeShardHealth(health: TopicHealth.NOT_SUBSCRIBED)), + ) + self.ownsEdgeShardHealthProvider = edgeShardHealthRes.isOk() + if edgeShardHealthRes.isErr(): + error "Can't set provider for RequestEdgeShardHealth", + error = edgeShardHealthRes.error + + let edgeFilterPeerCountRes = RequestEdgeFilterPeerCount.setProvider( + self.node.brokerCtx, + proc(): Result[RequestEdgeFilterPeerCount, string] = + var minPeers = high(int) + for state in self.edgeFilterSubStates.values: + minPeers = min(minPeers, state.peers.len) + if minPeers == high(int): + minPeers = 0 + return ok(RequestEdgeFilterPeerCount(peerCount: minPeers)), + ) + self.ownsEdgeFilterPeerCountProvider = edgeFilterPeerCountRes.isOk() + if edgeFilterPeerCountRes.isErr(): + error "Can't set provider for RequestEdgeFilterPeerCount", + error = edgeFilterPeerCountRes.error + + # Start Edge workers only when we are in Edge mode (relay not mounted) + # AND the filter client is mounted (otherwise the loops have nothing + # to talk to and just spam "filter client is nil" warnings). + if self.node.wakuRelay.isNil() and not self.node.wakuFilterClient.isNil(): + return self.startEdgeFilterLoops() + + return ok() + +proc stop*(self: SubscriptionManager) {.async: (raises: []).} = + # Stop Edge workers if we started them in `start` (Edge mode + filter client). + if self.node.wakuRelay.isNil() and not self.node.wakuFilterClient.isNil(): + await self.stopEdgeFilterLoops() + + # Only clear providers we actually registered: another SubscriptionManager + # sharing this brokerCtx may have won the race, and clearing its provider + # would leave the broker silently provider-less. + if self.ownsEdgeShardHealthProvider: + RequestEdgeShardHealth.clearProvider(self.node.brokerCtx) + self.ownsEdgeShardHealthProvider = false + if self.ownsEdgeFilterPeerCountProvider: + RequestEdgeFilterPeerCount.clearProvider(self.node.brokerCtx) + self.ownsEdgeFilterPeerCountProvider = false diff --git a/waku/node/waku_metrics.nim b/waku/node/waku_metrics.nim index af74b1532..bb4c10fff 100644 --- a/waku/node/waku_metrics.nim +++ b/waku/node/waku_metrics.nim @@ -4,6 +4,7 @@ import chronicles, chronos, metrics, metrics/chronos_httpserver import waku/[net/auto_port, waku_rln_relay/protocol_metrics as rln_metrics, utils/collector], ./peer_manager, + ./node_telemetry, ./waku_node const LogInterval = 10.minutes diff --git a/waku/node/waku_node.nim b/waku/node/waku_node.nim index 26a2b5a57..9ac3c5d00 100644 --- a/waku/node/waku_node.nim +++ b/waku/node/waku_node.nim @@ -60,23 +60,14 @@ import requests/health_requests, events/health_events, events/message_events, + events/peer_events, ], waku/discovery/waku_kademlia, waku/net/[bound_ports, net_config], ./peer_manager, ./health_monitor/health_status, - ./health_monitor/topic_health - -declarePublicCounter waku_node_messages, "number of messages received", ["type"] - -declarePublicGauge waku_version, - "Waku version info (in git describe format)", ["version"] -declarePublicCounter waku_node_errors, "number of wakunode errors", ["type"] -declarePublicGauge waku_lightpush_peers, "number of lightpush peers" -declarePublicGauge waku_filter_peers, "number of filter peers" -declarePublicGauge waku_store_peers, "number of store peers" -declarePublicGauge waku_px_peers, - "number of peers (in the node's peerManager) supporting the peer exchange protocol" + ./health_monitor/topic_health, + ./node_telemetry logScope: topics = "waku node" @@ -94,53 +85,10 @@ const clientId* = "Nimbus Waku v2 node" const WakuNodeVersionString* = "version / git commit hash: " & git_version -# key and crypto modules different -type - # TODO: Move to application instance (e.g., `WakuNode2`) - WakuInfo* = object # NOTE One for simplicity, can extend later as needed - listenAddresses*: seq[string] - enrUri*: string #multiaddrStrings*: seq[string] - mixPubKey*: Option[string] +import ./node_types +export node_types - # NOTE based on Eth2Node in NBC eth2_network.nim - WakuNode* = ref object - peerManager*: PeerManager - switch*: Switch - wakuRelay*: WakuRelay - wakuArchive*: waku_archive.WakuArchive - wakuStore*: store.WakuStore - wakuStoreClient*: store_client.WakuStoreClient - wakuStoreResume*: StoreResume - wakuStoreReconciliation*: SyncReconciliation - wakuStoreTransfer*: SyncTransfer - wakuFilter*: waku_filter_v2.WakuFilter - wakuFilterClient*: filter_client.WakuFilterClient - wakuRlnRelay*: WakuRLNRelay - wakuLegacyLightPush*: WakuLegacyLightPush - wakuLegacyLightpushClient*: WakuLegacyLightPushClient - wakuLightPush*: WakuLightPush - wakuLightpushClient*: WakuLightPushClient - wakuPeerExchange*: WakuPeerExchange - wakuPeerExchangeClient*: WakuPeerExchangeClient - wakuMetadata*: WakuMetadata - wakuAutoSharding*: Option[Sharding] - enr*: enr.Record - libp2pPing*: Ping - rng*: ref rand.HmacDrbgContext - brokerCtx*: BrokerContext - wakuRendezvous*: WakuRendezVous - wakuRendezvousClient*: rendezvous_client.WakuRendezVousClient - announcedAddresses*: seq[MultiAddress] - extMultiAddrsOnly*: bool # When true, skip automatic IP address replacement - started*: bool # Indicates that node has started listening - topicSubscriptionQueue*: AsyncEventQueue[SubscriptionEvent] - rateLimitSettings*: ProtocolRateLimitSettings - legacyAppHandlers*: Table[PubsubTopic, WakuRelayHandler] - ## Kernel API Relay appHandlers (if any) - wakuMix*: WakuMix - kademliaDiscoveryLoop*: Future[void] - wakuKademlia*: WakuKademlia - ports*: BoundPorts +import ./subscription_manager proc deduceRelayShard( node: WakuNode, @@ -230,6 +178,8 @@ proc new*( peerManager.setShardGetter(node.getShardsGetter(@[])) + node.subscriptionManager = SubscriptionManager.new(node) + return node proc peerInfo*(node: WakuNode): PeerInfo = @@ -600,6 +550,9 @@ proc start*(node: WakuNode) {.async.} = node.startProvidersAndListeners() + node.subscriptionManager.start().isOkOr: + error "failed to start subscription manager", error = error + if not zeroPortPresent: updateAnnouncedAddrWithPrimaryIpAddr(node).isOkOr: error "failed update announced addr", error = $error @@ -611,6 +564,8 @@ proc start*(node: WakuNode) {.async.} = proc stop*(node: WakuNode) {.async.} = ## By stopping the switch we are stopping all the underlying mounted protocols + await node.subscriptionManager.stop() + node.stopProvidersAndListeners() ## NOTE: This will dispatch gossipsub stop to the WakuRelay.stop method override diff --git a/waku/requests/health_requests.nim b/waku/requests/health_requests.nim index d48b3278f..ccf08f83d 100644 --- a/waku/requests/health_requests.nim +++ b/waku/requests/health_requests.nim @@ -38,14 +38,14 @@ RequestBroker: proc signature(protocol: WakuProtocol): Future[Result[RequestProtocolHealth, string]] -# Get edge filter health for a single shard (set by DeliveryService when edge mode is active) +# Get edge filter health for a single shard (set when edge mode is active) RequestBroker(sync): type RequestEdgeShardHealth* = object health*: TopicHealth proc signature(shard: PubsubTopic): Result[RequestEdgeShardHealth, string] -# Get edge filter confirmed peer count (set by DeliveryService when edge mode is active) +# Get edge filter confirmed peer count (set when edge mode is active) RequestBroker(sync): type RequestEdgeFilterPeerCount* = object peerCount*: int