logos-messaging-interop-tests/tests/wrappers_tests/test_send_errors_and_concurrency.py

import base64
from concurrent.futures import ThreadPoolExecutor

import pytest
from src.env_vars import NODE_2
from src.steps.common import StepsCommon
from src.steps.store import StepsStore
from src.libs.common import delay, to_base64
from src.libs.custom_logger import get_custom_logger
from src.node.waku_node import WakuNode
from src.node.wrappers_manager import WrapperManager
from src.node.wrapper_helpers import (
    EventCollector,
    assert_event_invariants,
    create_message_bindings,
    get_node_multiaddr,
    wait_for_connected,
    wait_for_propagated,
    wait_for_sent,
    wait_for_error,
)

logger = get_custom_logger(__name__)

PROPAGATED_TIMEOUT_S = 30.0
RECOVERY_TIMEOUT_S = 45.0

# MaxTimeInCache from send_service.nim.
MAX_TIME_IN_CACHE_S = 60.0
# Extra slack to cover the background retry loop tick after the window expires.
CACHE_EXPIRY_SLACK_S = 10.0
ERROR_AFTER_CACHE_EXPIRY_TIMEOUT_S = MAX_TIME_IN_CACHE_S + CACHE_EXPIRY_SLACK_S
RETRY_WINDOW_EXPIRED_MSG = "Unable to send within retry time window"

# Payload above DefaultMaxWakuMessageSize (150KiB), so the relay publish
# rejects it instead of failing with NO_PEERS_TO_RELAY.
OVERSIZED_PAYLOAD_BYTES = 200 * 1024
ERROR_TIMEOUT_S = 30.0
MESSAGE_SIZE_EXCEEDED_MSG = "Message size exceeded"

# S30: concurrent sends on the same content topic during initial auto-subscribe.
S30_CONCURRENT_SENDS = 5
S30_CONTENT_TOPIC = "/test/1/s30-concurrent/proto"

# S31: concurrent sends across mixed topics during peer churn.
S31_BURST_SIZE = 8
S31_CONTENT_TOPICS = [
    "/test/1/s31-topic-a/proto",
    "/test/1/s31-topic-b/proto",
    "/test/1/s31-topic-c/proto",
    "/test/1/s31-topic-d/proto",
    "/test/1/s31-topic-e/proto",
    "/test/1/s31-topic-f/proto",
    "/test/1/s31-topic-g/proto",
    "/test/1/s31-topic-h/proto",
]


class TestS12IsolatedSenderNoPeers(StepsCommon):
    """
    S12 — Isolated sender, no peers.
    Sender has relay enabled but zero relay peers and zero lightpush peers.
    Expected: send() returns Ok(RequestId), but eventually a message_error
    event arrives (no route to propagate).
    """

    def test_s12_send_with_no_peers_produces_error(self, node_config):
        sender_collector = EventCollector()

        node_config.update(
            {
                "relay": True,
                "store": False,
                "lightpush": False,
                "filter": False,
                "discv5Discovery": False,
                "numShardsInNetwork": 1,
            }
        )

        sender_result = WrapperManager.create_and_start(
            config=node_config,
            event_cb=sender_collector.event_callback,
        )
        assert sender_result.is_ok(), f"Failed to start sender: {sender_result.err()}"

        with sender_result.ok_value as sender:
            message = create_message_bindings(
                payload=to_base64("S12 isolated sender payload"),
                contentTopic="/test/1/s12-isolated/proto",
            )

            send_result = sender.send_message(message=message)
            assert send_result.is_ok(), f"send() must return Ok(RequestId) even with no peers, got: {send_result.err()}"

            request_id = send_result.ok_value
            assert request_id, "send() returned an empty RequestId"

            error = wait_for_error(
                collector=sender_collector,
                request_id=request_id,
                timeout_s=ERROR_AFTER_CACHE_EXPIRY_TIMEOUT_S,
            )
            assert error is not None, (
                f"No message_error event within {ERROR_AFTER_CACHE_EXPIRY_TIMEOUT_S}s "
                f"(MaxTimeInCache={MAX_TIME_IN_CACHE_S}s + slack) for isolated sender. "
                f"Collected events: {sender_collector.events}"
            )
            assert error["requestId"] == request_id

            propagated = wait_for_propagated(sender_collector, request_id, timeout_s=0)
            assert propagated is None, f"Unexpected message_propagated event for isolated sender: {propagated}"


class TestS21ErrorWhenRetryWindowExpires(StepsCommon):
    """
    S21: delivery retry window expires before any valid path recovers.
    """

    def test_s21_error_when_retry_window_expires(self, node_config):
        sender_collector = EventCollector()

        node_config.update(
            {
                "relay": True,
                "store": False,
                "lightpush": False,
                "filter": False,
                "discv5Discovery": False,
                "numShardsInNetwork": 1,
            }
        )

        sender_result = WrapperManager.create_and_start(
            config=node_config,
            event_cb=sender_collector.event_callback,
        )
        assert sender_result.is_ok(), f"Failed to start sender: {sender_result.err()}"

        with sender_result.ok_value as sender_node:
            message = create_message_bindings()
            send_result = sender_node.send_message(message=message)
            assert send_result.is_ok(), f"send() must return Ok(RequestId) even with no peers, got: {send_result.err()}"

            request_id = send_result.ok_value
            assert request_id, "send() returned an empty RequestId"

            # No peer
            error_event = wait_for_error(
                collector=sender_collector,
                request_id=request_id,
                timeout_s=ERROR_AFTER_CACHE_EXPIRY_TIMEOUT_S,
            )
            assert error_event is not None, (
                f"No MessageErrorEvent received within {ERROR_AFTER_CACHE_EXPIRY_TIMEOUT_S}s "
                f"(MaxTimeInCache={MAX_TIME_IN_CACHE_S}s + slack). "
                f"Collected events: {sender_collector.events}"
            )
            logger.info(f"S21 received error event: {error_event}")

            assert error_event.get("error") == RETRY_WINDOW_EXPIRED_MSG, (
                f"Unexpected error message in message_error event.\n"
                f"Expected: {RETRY_WINDOW_EXPIRED_MSG!r}\n"
                f"Got:      {error_event.get('error')!r}\n"
                f"Full event: {error_event}"
            )

            assert_event_invariants(sender_collector, request_id)


class TestS13RelayHardFailureWithoutFallback(StepsCommon):
    """
    S13: relay path is reachable (a relay peer is connected, so the publish
    gets past NO_PEERS_TO_RELAY), but the relay publish fails for another
    reason. An oversized payload is used so the relay processor rejects the
    message immediately. No lightpush fallback is configured.
      - Expected: Ok(RequestId), then a message_error event.
    """

    def test_s13_relay_hard_failure_without_fallback(self, node_config):
        sender_collector = EventCollector()

        node_config.update(
            {
                "relay": True,
                "numShardsInNetwork": 1,
            }
        )

        sender_result = WrapperManager.create_and_start(
            config=node_config,
            event_cb=sender_collector.event_callback,
        )
        assert sender_result.is_ok(), f"Failed to start sender: {sender_result.err()}"

        with sender_result.ok_value as sender_node:
            relay_config = {
                **node_config,
                "staticnodes": [get_node_multiaddr(sender_node)],
                "portsShift": 1,
            }

            relay_result = WrapperManager.create_and_start(config=relay_config)
            assert relay_result.is_ok(), f"Failed to start relay peer: {relay_result.err()}"

            with relay_result.ok_value:
                # A connected relay peer means the publish gets past
                # NO_PEERS_TO_RELAY and actually reaches the relay processor.
                assert wait_for_connected(sender_collector) is not None, (
                    f"Sender did not reach Connected/PartiallyConnected. " f"Collected events: {sender_collector.events}"
                )

                oversized_payload = base64.b64encode(b"x" * OVERSIZED_PAYLOAD_BYTES).decode()
                message = create_message_bindings(
                    payload=oversized_payload,
                    contentTopic="/test/1/s13-relay-hard-failure/proto",
                )

                send_result = sender_node.send_message(message=message)
                assert send_result.is_ok(), f"send() must return Ok(RequestId), got: {send_result.err()}"

                request_id = send_result.ok_value
                assert request_id, "send() returned an empty RequestId"

                error_event = wait_for_error(
                    collector=sender_collector,
                    request_id=request_id,
                    timeout_s=ERROR_TIMEOUT_S,
                )
                assert error_event is not None, (
                    f"No message_error event within {ERROR_TIMEOUT_S}s from the " f"relay processor. Collected events: {sender_collector.events}"
                )
                assert error_event["requestId"] == request_id
                assert MESSAGE_SIZE_EXCEEDED_MSG in (error_event.get("error") or ""), (
                    f"Expected error to contain {MESSAGE_SIZE_EXCEEDED_MSG!r}.\n" f"Got: {error_event.get('error')!r}\n" f"Full event: {error_event}"
                )

                propagated = wait_for_propagated(sender_collector, request_id, timeout_s=0)
                assert propagated is None, f"Unexpected message_propagated event for a failed relay publish: {propagated}"

                assert_event_invariants(sender_collector, request_id)


class TestS30ConcurrentSendsDuringAutoSubscribe(StepsCommon):
    """
    S30: concurrent sends on the same content topic during initial auto-subscribe.
      - Sender starts unsubscribed to the target topic.
      - Several send() calls are issued at nearly the same time.
      - Each call must return Ok(RequestId) with a unique id.
      - Each request id must get its own propagated event,
        with no dropped or cross-associated events.
    """

    def test_s30_concurrent_sends_during_auto_subscribe(self, node_config):
        sender_collector = EventCollector()

        node_config.update(
            {
                "relay": True,
                "store": False,
                "discv5Discovery": False,
                "numShardsInNetwork": 1,
            }
        )

        sender_result = WrapperManager.create_and_start(
            config=node_config,
            event_cb=sender_collector.event_callback,
        )
        assert sender_result.is_ok(), f"Failed to start sender: {sender_result.err()}"

        with sender_result.ok_value as sender_node:
            # Relay peer so the sender has a propagation path.
            relay_config = {
                **node_config,
                "staticnodes": [get_node_multiaddr(sender_node)],
                "portsShift": 1,
            }

            relay_result = WrapperManager.create_and_start(config=relay_config)
            assert relay_result.is_ok(), f"Failed to start relay peer: {relay_result.err()}"

            with relay_result.ok_value:
                # Build one message per send, with distinct payloads so we can
                # detect any cross-association between request ids and events.
                messages = [
                    create_message_bindings(
                        contentTopic=S30_CONTENT_TOPIC,
                        payload=to_base64(f"s30-concurrent-{i}"),
                    )
                    for i in range(S30_CONCURRENT_SENDS)
                ]

                # Fire all sends concurrently. The sender is not yet subscribed
                # to S30_CONTENT_TOPIC, so this exercises the auto-subscribe path
                # under contention.
                with ThreadPoolExecutor(max_workers=S30_CONCURRENT_SENDS) as pool:
                    send_results = list(pool.map(sender_node.send_message, messages))

                # Every send must return Ok(RequestId).
                request_ids = []
                for i, send_result in enumerate(send_results):
                    assert send_result.is_ok(), f"Concurrent send #{i} failed: {send_result.err()}"
                    request_id = send_result.ok_value
                    assert request_id, f"Concurrent send #{i} returned an empty RequestId"
                    request_ids.append(request_id)

                # Request ids must be unique across concurrent sends.
                assert len(set(request_ids)) == len(request_ids), f"Duplicate RequestIds returned by concurrent sends: {request_ids}"

                # Each request id must get its own propagated event and no error.
                for request_id in request_ids:
                    propagated_event = wait_for_propagated(
                        collector=sender_collector,
                        request_id=request_id,
                        timeout_s=PROPAGATED_TIMEOUT_S,
                    )
                    assert propagated_event is not None, (
                        f"No MessagePropagatedEvent for request_id={request_id} "
                        f"within {PROPAGATED_TIMEOUT_S}s. "
                        f"Collected events: {sender_collector.events}"
                    )

                    error_event = wait_for_error(
                        collector=sender_collector,
                        request_id=request_id,
                        timeout_s=0,
                    )
                    assert error_event is None, f"Unexpected message_error for request_id={request_id}: {error_event}"

                # Cross-association guard: every event with a requestId must
                # belong to exactly one of the request ids we issued.
                issued = set(request_ids)
                for event in sender_collector.snapshot():
                    event_request_id = event.get("requestId")
                    if event_request_id is None:
                        continue
                    assert event_request_id in issued, (
                        f"Event carries an unknown requestId={event_request_id!r}, " f"not in issued set {issued}. Event: {event}"
                    )

                # Per-request invariants apply to every concurrent send
                # (correct requestId, no duplicate terminal events,
                # Sent never before Propagated).
                for request_id in request_ids:
                    assert_event_invariants(sender_collector, request_id)


class TestS31ConcurrentSendsMixedTopicsDuringChurn(StepsStore):
    """
    S31: concurrent sends across mixed content topics during peer churn.
    """

    @pytest.mark.docker_required
    def test_s31_concurrent_sends_mixed_topics_during_churn(self, node_config):
        sender_collector = EventCollector()

        relay_peer = WakuNode(NODE_2, f"s31_relay_peer_{self.test_id}")
        relay_peer.start(relay="true", discv5_discovery="false")
        relay_peer.set_relay_subscriptions([self.test_pubsub_topic])

        lightpush_peer = WakuNode(NODE_2, f"s31_lightpush_peer_{self.test_id}")
        lightpush_peer.start(relay="true", lightpush="true", discv5_discovery="false")
        lightpush_peer.set_relay_subscriptions([self.test_pubsub_topic])

        store_peer = WakuNode(NODE_2, f"s31_store_peer_{self.test_id}")
        store_peer.start(relay="true", store="true", discv5_discovery="false")
        store_peer.set_relay_subscriptions([self.test_pubsub_topic])

        churn_peers = [relay_peer, lightpush_peer, store_peer]

        # Mesh docker peers so a lightpushed message can fan out to the store peer.
        peer_multiaddrs = [p.get_multiaddr_with_id() for p in churn_peers]
        for peer in churn_peers:
            others = [a for a in peer_multiaddrs if a != peer.get_multiaddr_with_id()]
            peer.add_peers(others)

        node_config.update(
            {
                "mode": "Edge",
                "relay": True,
                "lightpush": True,
                "store": False,
                "discv5Discovery": False,
                "numShardsInNetwork": 1,
                "lightpushnode": lightpush_peer.get_multiaddr_with_id(),
            }
        )

        sender_result = WrapperManager.create_and_start(
            config=node_config,
            event_cb=sender_collector.event_callback,
        )
        assert sender_result.is_ok(), f"Failed to start sender: {sender_result.err()}"

        with sender_result.ok_value as sender_node:
            sender_multiaddr = get_node_multiaddr(sender_node)
            for peer in churn_peers:
                peer.add_peers([sender_multiaddr])
            delay(3)  # let docker peers connect to the sender

            all_request_ids: list[str] = []
            phase1_ids = self._s31_fire_burst(sender_node, phase_label="phase1")
            all_request_ids.extend(phase1_ids)

            for peer in churn_peers:
                peer.restart()
            delay(1)  # small window so the restart is actually in-flight
            phase2_ids = self._s31_fire_burst(sender_node, phase_label="phase2")
            all_request_ids.extend(phase2_ids)

            # Wait for all peers to be ready again and re-attach the sender.
            for peer in churn_peers:
                peer.ensure_ready(timeout_duration=20)
                peer.add_peers([sender_multiaddr])

            peer_multiaddrs = [p.get_multiaddr_with_id() for p in churn_peers]
            for peer in churn_peers:
                others = [a for a in peer_multiaddrs if a != peer.get_multiaddr_with_id()]
                peer.add_peers(others)
            delay(3)

            phase3_ids = self._s31_fire_burst(sender_node, phase_label="phase3")
            all_request_ids.extend(phase3_ids)

            assert len(set(all_request_ids)) == len(all_request_ids), f"Duplicate RequestIds across bursts: {all_request_ids}"

            # Phase 1 ran before any churn, so the mesh was stable — standard timeout.
            # Phase 3 ran right after restart + re-attach, so the mesh needed to
            # re-stabilize — use the recovery timeout to avoid CI flakiness.
            phase_timeouts = [
                (phase1_ids, PROPAGATED_TIMEOUT_S),
                (phase3_ids, RECOVERY_TIMEOUT_S),
            ]
            for request_ids, timeout_s in phase_timeouts:
                for request_id in request_ids:
                    propagated_event = wait_for_propagated(
                        collector=sender_collector,
                        request_id=request_id,
                        timeout_s=timeout_s,
                    )
                    assert propagated_event is not None, (
                        f"No MessagePropagatedEvent for stable-phase "
                        f"request_id={request_id} within {timeout_s}s. "
                        f"Collected events: {sender_collector.events}"
                    )

                    error_event = wait_for_error(
                        collector=sender_collector,
                        request_id=request_id,
                        timeout_s=0,
                    )
                    assert error_event is None, f"Unexpected message_error event for stable-phase " f"request_id={request_id}: {error_event}"

            for request_id in phase2_ids:
                error_event = wait_for_error(
                    collector=sender_collector,
                    request_id=request_id,
                    timeout_s=0,
                )
                assert error_event is None, f"Unexpected terminal message_error for phase-2 " f"request_id={request_id} after recovery: {error_event}"

            issued = set(all_request_ids)
            for event in sender_collector.snapshot():
                event_request_id = event.get("requestId")
                if event_request_id is None:
                    continue
                assert event_request_id in issued, (
                    f"Event carries an unknown requestId={event_request_id!r}, " f"not in issued set {issued}. Event: {event}"
                )

            # Use the hash the wrapper emitted on message_sent so the store
            # lookup matches the exact bytes that were actually published.
            phase3_hashes = []
            for request_id in phase3_ids:
                sent_event = wait_for_sent(
                    collector=sender_collector,
                    request_id=request_id,
                    timeout_s=RECOVERY_TIMEOUT_S,
                )
                assert sent_event is not None, (
                    f"No message_sent event for phase-3 request_id={request_id} "
                    f"within {RECOVERY_TIMEOUT_S}s. Collected events: {sender_collector.events}"
                )
                msg_hash = sent_event.get("messageHash")
                assert msg_hash, f"message_sent event missing messageHash: {sent_event}"
                phase3_hashes.append(msg_hash)

            # 3 phases × S31_BURST_SIZE messages, so the page must fit them all,
            # otherwise phase-3 hashes (which sort last in ascending order) get cut off.
            self.check_sent_message_is_stored(
                expected_hashes=phase3_hashes,
                store_node=store_peer,
                pubsub_topic=self.test_pubsub_topic,
                page_size=S31_BURST_SIZE * 3,
                ascending="true",
            )

            # Per-request invariants apply across all phases, including the
            # retry-path bursts (phase 2). If retries ever emit duplicate
            # Propagated events or reorder Sent before Propagated, this catches it.
            for request_id in all_request_ids:
                assert_event_invariants(sender_collector, request_id)

    def _s31_fire_burst(self, sender_node, *, phase_label: str) -> list[str]:
        """Fire S31_BURST_SIZE concurrent sends, one per topic in S31_CONTENT_TOPICS.
        Returns the list of RequestIds. Asserts every send returned Ok."""
        messages = [
            self.create_message(
                contentTopic=S31_CONTENT_TOPICS[i],
                payload=to_base64(f"s31-{phase_label}-{i}"),
            )
            for i in range(S31_BURST_SIZE)
        ]

        with ThreadPoolExecutor(max_workers=S31_BURST_SIZE) as pool:
            send_results = list(pool.map(sender_node.send_message, messages))

        request_ids = []
        for i, send_result in enumerate(send_results):
            assert send_result.is_ok(), f"{phase_label}: concurrent send #{i} failed: {send_result.err()}"
            request_id = send_result.ok_value
            assert request_id, f"{phase_label}: concurrent send #{i} returned an empty RequestId"
            request_ids.append(request_id)

        return request_ids
-												E2e s13 s16 (#183)

* Add S13

* Adding S16

* Add error message check for S13

* msrk s16 as xfail
											
										
										
											2026-05-18 17:04:16 +03:00
+								import base64
-												e2e part3 (#181)

* add test s17

* Add temp changes

* Add s17 positive / negative scenarios

* add S19

* Add S06 relay-only test and fix wrapper helpers (#173)

* - Add S06 relay-only test case for testing message propagation without a store.
- Update `wrapper_helpers` for clearer event type handling and type annotations (`Optional[...]` usage).
- Simplify `get_node_multiaddr` to retrieve addresses via `get_node_info_raw`.
- Refactor `wrappers_manager` to adjust bindings path to `vendor` directory and add `get_node_info_raw` method.
- Update `.gitignore` to exclude `store.sqlite3*`.

* Refactor S06 relay-only test: replace try-finally blocks with context managers for clarity and conciseness.

* Migrate S06 relay-only test to `test_send_e2e.py` and refactor with `StepsCommon` for reusability.

---------

Co-authored-by: Egor Rachkovskii <egorrachkovskii@status.im>

* Modify S19 test

* Adding S21

* Fix review comments

* Adding S22/S23

* Adding S24

* Add S26

* Add S30

* Add S31

* Improve `wait_for_event` loop logic and add `assert_event_invariants` helper (#178)

- Refactored the `wait_for_event` function for clarity and to ensure proper deadline handling within the loop.
- Introduced `assert_event_invariants` to validate per-request event properties, enforcing invariants like correct `requestId`, no duplicate terminal events, and proper timing between `Propagated` and `Sent`.
- Added tests for `assert_event_invariants` enforcement in `S14` and `S15` lightpush scenarios.

Co-authored-by: Egor Rachkovskii <egorrachkovskii@status.im>

* Add S07 and S10 send API tests with event invariants helper  (#176)

* Add `assert_event_invariants` to enforce per-request event constraints and integrate into relevant tests

* Integrate `assert_event_invariants` into edge and store tests

* Remove redundant comments from `test_send_e2e.py`

---------

Co-authored-by: Egor Rachkovskii <egorrachkovskii@status.im>

* Fix some tests

* Add S02/S12 send API tests and PR CI pipeline (#174)

* Add tests for auto-subscribe on first send and isolated sender with no peers

* Add PR CI workflow with tiered test strategy

- pr_tests.yml: build job with cache, wrapper-tests, smoke-tests,
  and label-triggered full-suite
- test_common.yml: add deploy_allure/send_discord inputs so PR runs
  skip reporting side effects
- Add docker_required marker to S19 (needs Docker, excluded from
  wrapper-only CI job)
- Register docker_required marker in pytest.ini

* Document PR CI test workflows in README

* Refine PR CI test strategy:
- Exclude `docker_required` tests from smoke set in `pr_tests.yml`.
- Add `wait_for_connected` helper for connection state checks.
- Update S19 test to dynamically create and clean up the store node setup.
- General simplifications and improved test stability.

* Add `wait_for_connected` assertion to ensure sender connection state before propagation test

* Refine tests and CI workflows:
- Replace `ERROR_TIMEOUT_S` with `ERROR_AFTER_CACHE_EXPIRY_TIMEOUT_S` in `test_send_e2e.py`.
- Adjust timeout assertion for better clarity and accuracy.
- Update `pr_tests.yml` to add retries (`--reruns`) and ignore wrapper tests in smoke tests.
- Change `test_common.yml` default Discord reporting to `false`.

* Normalize `portsshift` to `portsShift` in `test_send_e2e.py` configuration definitions.

---------

Co-authored-by: Egor Rachkovskii <egorrachkovskii@status.im>

* Add relay-to-lightpush fallback integration tests (S08/S09) (#180)

Co-authored-by: Egor Rachkovskii <egorrachkovskii@status.im>

* Ignore S19

* fix s26

* Ignore s20 / s31 for errors

* Change image name

* fix xfail syntax error

* rename test file

* FIx flaky tests

* comment the skipped tests

* Fix review comments

* revert tag in yml in latest

* commenting lightpush

* Modify the PR

* Fix the ports conflict

* Modify S20

* fix portsshift option

* remove the /true from yml to allow errors to exist

* Modify the yml to continue on error

* First set of review comments

* adding xfail mark for failed tests

* address review comments about xfail

* cleanup unused lines

* event collector fix

* Address review comment about delay constant

* fix the timeout review comment

* Add assert_event_invariants

* enhance comment on S26 test

* mark the waku tests as docker_required

* Add S01

* add S01 second scenario

* Add S03

* Add S04

* Adding S11

* modify s11 scenario to pass

* Adding test S05

* Adding the new tests in part3 file

* Fix the yml file error

* Add the new test file to the PR job

* bump logos-delivery-python-bindings to include destroy_keep_ctx

* modify the S01 test

* mark S01 with xfail

* mark the second S01 test as xfail too

* use skip instead of xfail

* comment the skip line to try S01 again

* restore the xfail mark again

* remove the wrapped text code from test file

* Changing  the test files names

* skip S01 again

* removed extra comments

* Update logos-delivery-python-bindings submodule

---------

Co-authored-by: Egor Rachkovskii <32649334+at0m1x19@users.noreply.github.com>
Co-authored-by: Egor Rachkovskii <egorrachkovskii@status.im>
											
										
										
											2026-05-14 15:48:14 +03:00
+								from concurrent.futures import ThreadPoolExecutor
 								import pytest
 								from src.env_vars import NODE_2
 								from src.steps.common import StepsCommon
 								from src.steps.store import StepsStore
 								from src.libs.common import delay, to_base64
 								from src.libs.custom_logger import get_custom_logger
 								from src.node.waku_node import WakuNode
 								from src.node.wrappers_manager import WrapperManager
 								from src.node.wrapper_helpers import (
 								    EventCollector,
 								    assert_event_invariants,
 								    create_message_bindings,
 								    get_node_multiaddr,
-												E2e s13 s16 (#183)

* Add S13

* Adding S16

* Add error message check for S13

* msrk s16 as xfail
											
										
										
											2026-05-18 17:04:16 +03:00
+								    wait_for_connected,
-												e2e part3 (#181)

* add test s17

* Add temp changes

* Add s17 positive / negative scenarios

* add S19

* Add S06 relay-only test and fix wrapper helpers (#173)

* - Add S06 relay-only test case for testing message propagation without a store.
- Update `wrapper_helpers` for clearer event type handling and type annotations (`Optional[...]` usage).
- Simplify `get_node_multiaddr` to retrieve addresses via `get_node_info_raw`.
- Refactor `wrappers_manager` to adjust bindings path to `vendor` directory and add `get_node_info_raw` method.
- Update `.gitignore` to exclude `store.sqlite3*`.

* Refactor S06 relay-only test: replace try-finally blocks with context managers for clarity and conciseness.

* Migrate S06 relay-only test to `test_send_e2e.py` and refactor with `StepsCommon` for reusability.

---------

Co-authored-by: Egor Rachkovskii <egorrachkovskii@status.im>

* Modify S19 test

* Adding S21

* Fix review comments

* Adding S22/S23

* Adding S24

* Add S26

* Add S30

* Add S31

* Improve `wait_for_event` loop logic and add `assert_event_invariants` helper (#178)

- Refactored the `wait_for_event` function for clarity and to ensure proper deadline handling within the loop.
- Introduced `assert_event_invariants` to validate per-request event properties, enforcing invariants like correct `requestId`, no duplicate terminal events, and proper timing between `Propagated` and `Sent`.
- Added tests for `assert_event_invariants` enforcement in `S14` and `S15` lightpush scenarios.

Co-authored-by: Egor Rachkovskii <egorrachkovskii@status.im>

* Add S07 and S10 send API tests with event invariants helper  (#176)

* Add `assert_event_invariants` to enforce per-request event constraints and integrate into relevant tests

* Integrate `assert_event_invariants` into edge and store tests

* Remove redundant comments from `test_send_e2e.py`

---------

Co-authored-by: Egor Rachkovskii <egorrachkovskii@status.im>

* Fix some tests

* Add S02/S12 send API tests and PR CI pipeline (#174)

* Add tests for auto-subscribe on first send and isolated sender with no peers

* Add PR CI workflow with tiered test strategy

- pr_tests.yml: build job with cache, wrapper-tests, smoke-tests,
  and label-triggered full-suite
- test_common.yml: add deploy_allure/send_discord inputs so PR runs
  skip reporting side effects
- Add docker_required marker to S19 (needs Docker, excluded from
  wrapper-only CI job)
- Register docker_required marker in pytest.ini

* Document PR CI test workflows in README

* Refine PR CI test strategy:
- Exclude `docker_required` tests from smoke set in `pr_tests.yml`.
- Add `wait_for_connected` helper for connection state checks.
- Update S19 test to dynamically create and clean up the store node setup.
- General simplifications and improved test stability.

* Add `wait_for_connected` assertion to ensure sender connection state before propagation test

* Refine tests and CI workflows:
- Replace `ERROR_TIMEOUT_S` with `ERROR_AFTER_CACHE_EXPIRY_TIMEOUT_S` in `test_send_e2e.py`.
- Adjust timeout assertion for better clarity and accuracy.
- Update `pr_tests.yml` to add retries (`--reruns`) and ignore wrapper tests in smoke tests.
- Change `test_common.yml` default Discord reporting to `false`.

* Normalize `portsshift` to `portsShift` in `test_send_e2e.py` configuration definitions.

---------

Co-authored-by: Egor Rachkovskii <egorrachkovskii@status.im>

* Add relay-to-lightpush fallback integration tests (S08/S09) (#180)

Co-authored-by: Egor Rachkovskii <egorrachkovskii@status.im>

* Ignore S19

* fix s26

* Ignore s20 / s31 for errors

* Change image name

* fix xfail syntax error

* rename test file

* FIx flaky tests

* comment the skipped tests

* Fix review comments

* revert tag in yml in latest

* commenting lightpush

* Modify the PR

* Fix the ports conflict

* Modify S20

* fix portsshift option

* remove the /true from yml to allow errors to exist

* Modify the yml to continue on error

* First set of review comments

* adding xfail mark for failed tests

* address review comments about xfail

* cleanup unused lines

* event collector fix

* Address review comment about delay constant

* fix the timeout review comment

* Add assert_event_invariants

* enhance comment on S26 test

* mark the waku tests as docker_required

* Add S01

* add S01 second scenario

* Add S03

* Add S04

* Adding S11

* modify s11 scenario to pass

* Adding test S05

* Adding the new tests in part3 file

* Fix the yml file error

* Add the new test file to the PR job

* bump logos-delivery-python-bindings to include destroy_keep_ctx

* modify the S01 test

* mark S01 with xfail

* mark the second S01 test as xfail too

* use skip instead of xfail

* comment the skip line to try S01 again

* restore the xfail mark again

* remove the wrapped text code from test file

* Changing  the test files names

* skip S01 again

* removed extra comments

* Update logos-delivery-python-bindings submodule

---------

Co-authored-by: Egor Rachkovskii <32649334+at0m1x19@users.noreply.github.com>
Co-authored-by: Egor Rachkovskii <egorrachkovskii@status.im>
											
										
										
											2026-05-14 15:48:14 +03:00
+								    wait_for_propagated,
 								    wait_for_sent,
 								    wait_for_error,
 								)
 								logger = get_custom_logger(__name__)
 								PROPAGATED_TIMEOUT_S = 30.0
 								RECOVERY_TIMEOUT_S = 45.0
 								# MaxTimeInCache from send_service.nim.
 								MAX_TIME_IN_CACHE_S = 60.0
 								# Extra slack to cover the background retry loop tick after the window expires.
 								CACHE_EXPIRY_SLACK_S = 10.0
 								ERROR_AFTER_CACHE_EXPIRY_TIMEOUT_S = MAX_TIME_IN_CACHE_S + CACHE_EXPIRY_SLACK_S
 								RETRY_WINDOW_EXPIRED_MSG = "Unable to send within retry time window"
-												E2e s13 s16 (#183)

* Add S13

* Adding S16

* Add error message check for S13

* msrk s16 as xfail
											
										
										
											2026-05-18 17:04:16 +03:00
+								# Payload above DefaultMaxWakuMessageSize (150KiB), so the relay publish
 								# rejects it instead of failing with NO_PEERS_TO_RELAY.
 								OVERSIZED_PAYLOAD_BYTES = 200 * 1024
 								ERROR_TIMEOUT_S = 30.0
 								MESSAGE_SIZE_EXCEEDED_MSG = "Message size exceeded"
-												e2e part3 (#181)

* add test s17

* Add temp changes

* Add s17 positive / negative scenarios

* add S19

* Add S06 relay-only test and fix wrapper helpers (#173)

* - Add S06 relay-only test case for testing message propagation without a store.
- Update `wrapper_helpers` for clearer event type handling and type annotations (`Optional[...]` usage).
- Simplify `get_node_multiaddr` to retrieve addresses via `get_node_info_raw`.
- Refactor `wrappers_manager` to adjust bindings path to `vendor` directory and add `get_node_info_raw` method.
- Update `.gitignore` to exclude `store.sqlite3*`.

* Refactor S06 relay-only test: replace try-finally blocks with context managers for clarity and conciseness.

* Migrate S06 relay-only test to `test_send_e2e.py` and refactor with `StepsCommon` for reusability.

---------

Co-authored-by: Egor Rachkovskii <egorrachkovskii@status.im>

* Modify S19 test

* Adding S21

* Fix review comments

* Adding S22/S23

* Adding S24

* Add S26

* Add S30

* Add S31

* Improve `wait_for_event` loop logic and add `assert_event_invariants` helper (#178)

- Refactored the `wait_for_event` function for clarity and to ensure proper deadline handling within the loop.
- Introduced `assert_event_invariants` to validate per-request event properties, enforcing invariants like correct `requestId`, no duplicate terminal events, and proper timing between `Propagated` and `Sent`.
- Added tests for `assert_event_invariants` enforcement in `S14` and `S15` lightpush scenarios.

Co-authored-by: Egor Rachkovskii <egorrachkovskii@status.im>

* Add S07 and S10 send API tests with event invariants helper  (#176)

* Add `assert_event_invariants` to enforce per-request event constraints and integrate into relevant tests

* Integrate `assert_event_invariants` into edge and store tests

* Remove redundant comments from `test_send_e2e.py`

---------

Co-authored-by: Egor Rachkovskii <egorrachkovskii@status.im>

* Fix some tests

* Add S02/S12 send API tests and PR CI pipeline (#174)

* Add tests for auto-subscribe on first send and isolated sender with no peers

* Add PR CI workflow with tiered test strategy

- pr_tests.yml: build job with cache, wrapper-tests, smoke-tests,
  and label-triggered full-suite
- test_common.yml: add deploy_allure/send_discord inputs so PR runs
  skip reporting side effects
- Add docker_required marker to S19 (needs Docker, excluded from
  wrapper-only CI job)
- Register docker_required marker in pytest.ini

* Document PR CI test workflows in README

* Refine PR CI test strategy:
- Exclude `docker_required` tests from smoke set in `pr_tests.yml`.
- Add `wait_for_connected` helper for connection state checks.
- Update S19 test to dynamically create and clean up the store node setup.
- General simplifications and improved test stability.

* Add `wait_for_connected` assertion to ensure sender connection state before propagation test

* Refine tests and CI workflows:
- Replace `ERROR_TIMEOUT_S` with `ERROR_AFTER_CACHE_EXPIRY_TIMEOUT_S` in `test_send_e2e.py`.
- Adjust timeout assertion for better clarity and accuracy.
- Update `pr_tests.yml` to add retries (`--reruns`) and ignore wrapper tests in smoke tests.
- Change `test_common.yml` default Discord reporting to `false`.

* Normalize `portsshift` to `portsShift` in `test_send_e2e.py` configuration definitions.

---------

Co-authored-by: Egor Rachkovskii <egorrachkovskii@status.im>

* Add relay-to-lightpush fallback integration tests (S08/S09) (#180)

Co-authored-by: Egor Rachkovskii <egorrachkovskii@status.im>

* Ignore S19

* fix s26

* Ignore s20 / s31 for errors

* Change image name

* fix xfail syntax error

* rename test file

* FIx flaky tests

* comment the skipped tests

* Fix review comments

* revert tag in yml in latest

* commenting lightpush

* Modify the PR

* Fix the ports conflict

* Modify S20

* fix portsshift option

* remove the /true from yml to allow errors to exist

* Modify the yml to continue on error

* First set of review comments

* adding xfail mark for failed tests

* address review comments about xfail

* cleanup unused lines

* event collector fix

* Address review comment about delay constant

* fix the timeout review comment

* Add assert_event_invariants

* enhance comment on S26 test

* mark the waku tests as docker_required

* Add S01

* add S01 second scenario

* Add S03

* Add S04

* Adding S11

* modify s11 scenario to pass

* Adding test S05

* Adding the new tests in part3 file

* Fix the yml file error

* Add the new test file to the PR job

* bump logos-delivery-python-bindings to include destroy_keep_ctx

* modify the S01 test

* mark S01 with xfail

* mark the second S01 test as xfail too

* use skip instead of xfail

* comment the skip line to try S01 again

* restore the xfail mark again

* remove the wrapped text code from test file

* Changing  the test files names

* skip S01 again

* removed extra comments

* Update logos-delivery-python-bindings submodule

---------

Co-authored-by: Egor Rachkovskii <32649334+at0m1x19@users.noreply.github.com>
Co-authored-by: Egor Rachkovskii <egorrachkovskii@status.im>
											
										
										
											2026-05-14 15:48:14 +03:00
+								# S30: concurrent sends on the same content topic during initial auto-subscribe.
 								S30_CONCURRENT_SENDS = 5
 								S30_CONTENT_TOPIC = "/test/1/s30-concurrent/proto"
 								# S31: concurrent sends across mixed topics during peer churn.
 								S31_BURST_SIZE = 8
 								S31_CONTENT_TOPICS = [
 								    "/test/1/s31-topic-a/proto",
 								    "/test/1/s31-topic-b/proto",
 								    "/test/1/s31-topic-c/proto",
 								    "/test/1/s31-topic-d/proto",
 								    "/test/1/s31-topic-e/proto",
 								    "/test/1/s31-topic-f/proto",
 								    "/test/1/s31-topic-g/proto",
 								    "/test/1/s31-topic-h/proto",
 								]
 								class TestS12IsolatedSenderNoPeers(StepsCommon):
 								    """
 								    S12 — Isolated sender, no peers.
 								    Sender has relay enabled but zero relay peers and zero lightpush peers.
 								    Expected: send() returns Ok(RequestId), but eventually a message_error
 								    event arrives (no route to propagate).
 								    """
 								    def test_s12_send_with_no_peers_produces_error(self, node_config):
 								        sender_collector = EventCollector()
 								        node_config.update(
 								            {
 								                "relay": True,
 								                "store": False,
 								                "lightpush": False,
 								                "filter": False,
 								                "discv5Discovery": False,
 								                "numShardsInNetwork": 1,
 								            }
 								        )
 								        sender_result = WrapperManager.create_and_start(
 								            config=node_config,
 								            event_cb=sender_collector.event_callback,
 								        )
 								        assert sender_result.is_ok(), f"Failed to start sender: {sender_result.err()}"
 								        with sender_result.ok_value as sender:
 								            message = create_message_bindings(
 								                payload=to_base64("S12 isolated sender payload"),
 								                contentTopic="/test/1/s12-isolated/proto",
 								            )
 								            send_result = sender.send_message(message=message)
 								            assert send_result.is_ok(), f"send() must return Ok(RequestId) even with no peers, got: {send_result.err()}"
 								            request_id = send_result.ok_value
 								            assert request_id, "send() returned an empty RequestId"
 								            error = wait_for_error(
 								                collector=sender_collector,
 								                request_id=request_id,
 								                timeout_s=ERROR_AFTER_CACHE_EXPIRY_TIMEOUT_S,
 								            )
 								            assert error is not None, (
 								                f"No message_error event within {ERROR_AFTER_CACHE_EXPIRY_TIMEOUT_S}s "
 								                f"(MaxTimeInCache={MAX_TIME_IN_CACHE_S}s + slack) for isolated sender. "
 								                f"Collected events: {sender_collector.events}"
 								            )
 								            assert error["requestId"] == request_id
 								            propagated = wait_for_propagated(sender_collector, request_id, timeout_s=0)
 								            assert propagated is None, f"Unexpected message_propagated event for isolated sender: {propagated}"
 								class TestS21ErrorWhenRetryWindowExpires(StepsCommon):
 								    """
 								    S21: delivery retry window expires before any valid path recovers.
 								    """
 								    def test_s21_error_when_retry_window_expires(self, node_config):
 								        sender_collector = EventCollector()
 								        node_config.update(
 								            {
 								                "relay": True,
 								                "store": False,
 								                "lightpush": False,
 								                "filter": False,
 								                "discv5Discovery": False,
 								                "numShardsInNetwork": 1,
 								            }
 								        )
 								        sender_result = WrapperManager.create_and_start(
 								            config=node_config,
 								            event_cb=sender_collector.event_callback,
 								        )
 								        assert sender_result.is_ok(), f"Failed to start sender: {sender_result.err()}"
 								        with sender_result.ok_value as sender_node:
 								            message = create_message_bindings()
 								            send_result = sender_node.send_message(message=message)
 								            assert send_result.is_ok(), f"send() must return Ok(RequestId) even with no peers, got: {send_result.err()}"
 								            request_id = send_result.ok_value
 								            assert request_id, "send() returned an empty RequestId"
 								            # No peer
 								            error_event = wait_for_error(
 								                collector=sender_collector,
 								                request_id=request_id,
 								                timeout_s=ERROR_AFTER_CACHE_EXPIRY_TIMEOUT_S,
 								            )
 								            assert error_event is not None, (
 								                f"No MessageErrorEvent received within {ERROR_AFTER_CACHE_EXPIRY_TIMEOUT_S}s "
 								                f"(MaxTimeInCache={MAX_TIME_IN_CACHE_S}s + slack). "
 								                f"Collected events: {sender_collector.events}"
 								            )
 								            logger.info(f"S21 received error event: {error_event}")
 								            assert error_event.get("error") == RETRY_WINDOW_EXPIRED_MSG, (
 								                f"Unexpected error message in message_error event.\n"
 								                f"Expected: {RETRY_WINDOW_EXPIRED_MSG!r}\n"
 								                f"Got:      {error_event.get('error')!r}\n"
 								                f"Full event: {error_event}"
 								            )
 								            assert_event_invariants(sender_collector, request_id)
-												E2e s13 s16 (#183)

* Add S13

* Adding S16

* Add error message check for S13

* msrk s16 as xfail
											
										
										
											2026-05-18 17:04:16 +03:00
+								class TestS13RelayHardFailureWithoutFallback(StepsCommon):
 								    """
 								    S13: relay path is reachable (a relay peer is connected, so the publish
 								    gets past NO_PEERS_TO_RELAY), but the relay publish fails for another
 								    reason. An oversized payload is used so the relay processor rejects the
 								    message immediately. No lightpush fallback is configured.
 								      - Expected: Ok(RequestId), then a message_error event.
 								    """
 								    def test_s13_relay_hard_failure_without_fallback(self, node_config):
 								        sender_collector = EventCollector()
 								        node_config.update(
 								            {
 								                "relay": True,
 								                "numShardsInNetwork": 1,
 								            }
 								        )
 								        sender_result = WrapperManager.create_and_start(
 								            config=node_config,
 								            event_cb=sender_collector.event_callback,
 								        )
 								        assert sender_result.is_ok(), f"Failed to start sender: {sender_result.err()}"
 								        with sender_result.ok_value as sender_node:
 								            relay_config = {
 								                **node_config,
 								                "staticnodes": [get_node_multiaddr(sender_node)],
 								                "portsShift": 1,
 								            }
 								            relay_result = WrapperManager.create_and_start(config=relay_config)
 								            assert relay_result.is_ok(), f"Failed to start relay peer: {relay_result.err()}"
 								            with relay_result.ok_value:
 								                # A connected relay peer means the publish gets past
 								                # NO_PEERS_TO_RELAY and actually reaches the relay processor.
 								                assert wait_for_connected(sender_collector) is not None, (
 								                    f"Sender did not reach Connected/PartiallyConnected. " f"Collected events: {sender_collector.events}"
 								                )
 								                oversized_payload = base64.b64encode(b"x" * OVERSIZED_PAYLOAD_BYTES).decode()
 								                message = create_message_bindings(
 								                    payload=oversized_payload,
 								                    contentTopic="/test/1/s13-relay-hard-failure/proto",
 								                )
 								                send_result = sender_node.send_message(message=message)
 								                assert send_result.is_ok(), f"send() must return Ok(RequestId), got: {send_result.err()}"
 								                request_id = send_result.ok_value
 								                assert request_id, "send() returned an empty RequestId"
 								                error_event = wait_for_error(
 								                    collector=sender_collector,
 								                    request_id=request_id,
 								                    timeout_s=ERROR_TIMEOUT_S,
 								                )
 								                assert error_event is not None, (
 								                    f"No message_error event within {ERROR_TIMEOUT_S}s from the " f"relay processor. Collected events: {sender_collector.events}"
 								                )
 								                assert error_event["requestId"] == request_id
 								                assert MESSAGE_SIZE_EXCEEDED_MSG in (error_event.get("error") or ""), (
 								                    f"Expected error to contain {MESSAGE_SIZE_EXCEEDED_MSG!r}.\n" f"Got: {error_event.get('error')!r}\n" f"Full event: {error_event}"
 								                )
 								                propagated = wait_for_propagated(sender_collector, request_id, timeout_s=0)
 								                assert propagated is None, f"Unexpected message_propagated event for a failed relay publish: {propagated}"
 								                assert_event_invariants(sender_collector, request_id)
-												e2e part3 (#181)

* add test s17

* Add temp changes

* Add s17 positive / negative scenarios

* add S19

* Add S06 relay-only test and fix wrapper helpers (#173)

* - Add S06 relay-only test case for testing message propagation without a store.
- Update `wrapper_helpers` for clearer event type handling and type annotations (`Optional[...]` usage).
- Simplify `get_node_multiaddr` to retrieve addresses via `get_node_info_raw`.
- Refactor `wrappers_manager` to adjust bindings path to `vendor` directory and add `get_node_info_raw` method.
- Update `.gitignore` to exclude `store.sqlite3*`.

* Refactor S06 relay-only test: replace try-finally blocks with context managers for clarity and conciseness.

* Migrate S06 relay-only test to `test_send_e2e.py` and refactor with `StepsCommon` for reusability.

---------

Co-authored-by: Egor Rachkovskii <egorrachkovskii@status.im>

* Modify S19 test

* Adding S21

* Fix review comments

* Adding S22/S23

* Adding S24

* Add S26

* Add S30

* Add S31

* Improve `wait_for_event` loop logic and add `assert_event_invariants` helper (#178)

- Refactored the `wait_for_event` function for clarity and to ensure proper deadline handling within the loop.
- Introduced `assert_event_invariants` to validate per-request event properties, enforcing invariants like correct `requestId`, no duplicate terminal events, and proper timing between `Propagated` and `Sent`.
- Added tests for `assert_event_invariants` enforcement in `S14` and `S15` lightpush scenarios.

Co-authored-by: Egor Rachkovskii <egorrachkovskii@status.im>

* Add S07 and S10 send API tests with event invariants helper  (#176)

* Add `assert_event_invariants` to enforce per-request event constraints and integrate into relevant tests

* Integrate `assert_event_invariants` into edge and store tests

* Remove redundant comments from `test_send_e2e.py`

---------

Co-authored-by: Egor Rachkovskii <egorrachkovskii@status.im>

* Fix some tests

* Add S02/S12 send API tests and PR CI pipeline (#174)

* Add tests for auto-subscribe on first send and isolated sender with no peers

* Add PR CI workflow with tiered test strategy

- pr_tests.yml: build job with cache, wrapper-tests, smoke-tests,
  and label-triggered full-suite
- test_common.yml: add deploy_allure/send_discord inputs so PR runs
  skip reporting side effects
- Add docker_required marker to S19 (needs Docker, excluded from
  wrapper-only CI job)
- Register docker_required marker in pytest.ini

* Document PR CI test workflows in README

* Refine PR CI test strategy:
- Exclude `docker_required` tests from smoke set in `pr_tests.yml`.
- Add `wait_for_connected` helper for connection state checks.
- Update S19 test to dynamically create and clean up the store node setup.
- General simplifications and improved test stability.

* Add `wait_for_connected` assertion to ensure sender connection state before propagation test

* Refine tests and CI workflows:
- Replace `ERROR_TIMEOUT_S` with `ERROR_AFTER_CACHE_EXPIRY_TIMEOUT_S` in `test_send_e2e.py`.
- Adjust timeout assertion for better clarity and accuracy.
- Update `pr_tests.yml` to add retries (`--reruns`) and ignore wrapper tests in smoke tests.
- Change `test_common.yml` default Discord reporting to `false`.

* Normalize `portsshift` to `portsShift` in `test_send_e2e.py` configuration definitions.

---------

Co-authored-by: Egor Rachkovskii <egorrachkovskii@status.im>

* Add relay-to-lightpush fallback integration tests (S08/S09) (#180)

Co-authored-by: Egor Rachkovskii <egorrachkovskii@status.im>

* Ignore S19

* fix s26

* Ignore s20 / s31 for errors

* Change image name

* fix xfail syntax error

* rename test file

* FIx flaky tests

* comment the skipped tests

* Fix review comments

* revert tag in yml in latest

* commenting lightpush

* Modify the PR

* Fix the ports conflict

* Modify S20

* fix portsshift option

* remove the /true from yml to allow errors to exist

* Modify the yml to continue on error

* First set of review comments

* adding xfail mark for failed tests

* address review comments about xfail

* cleanup unused lines

* event collector fix

* Address review comment about delay constant

* fix the timeout review comment

* Add assert_event_invariants

* enhance comment on S26 test

* mark the waku tests as docker_required

* Add S01

* add S01 second scenario

* Add S03

* Add S04

* Adding S11

* modify s11 scenario to pass

* Adding test S05

* Adding the new tests in part3 file

* Fix the yml file error

* Add the new test file to the PR job

* bump logos-delivery-python-bindings to include destroy_keep_ctx

* modify the S01 test

* mark S01 with xfail

* mark the second S01 test as xfail too

* use skip instead of xfail

* comment the skip line to try S01 again

* restore the xfail mark again

* remove the wrapped text code from test file

* Changing  the test files names

* skip S01 again

* removed extra comments

* Update logos-delivery-python-bindings submodule

---------

Co-authored-by: Egor Rachkovskii <32649334+at0m1x19@users.noreply.github.com>
Co-authored-by: Egor Rachkovskii <egorrachkovskii@status.im>
											
										
										
											2026-05-14 15:48:14 +03:00
+								class TestS30ConcurrentSendsDuringAutoSubscribe(StepsCommon):
 								    """
 								    S30: concurrent sends on the same content topic during initial auto-subscribe.
 								      - Sender starts unsubscribed to the target topic.
 								      - Several send() calls are issued at nearly the same time.
 								      - Each call must return Ok(RequestId) with a unique id.
 								      - Each request id must get its own propagated event,
 								        with no dropped or cross-associated events.
 								    """
 								    def test_s30_concurrent_sends_during_auto_subscribe(self, node_config):
 								        sender_collector = EventCollector()
 								        node_config.update(
 								            {
 								                "relay": True,
 								                "store": False,
 								                "discv5Discovery": False,
 								                "numShardsInNetwork": 1,
 								            }
 								        )
 								        sender_result = WrapperManager.create_and_start(
 								            config=node_config,
 								            event_cb=sender_collector.event_callback,
 								        )
 								        assert sender_result.is_ok(), f"Failed to start sender: {sender_result.err()}"
 								        with sender_result.ok_value as sender_node:
 								            # Relay peer so the sender has a propagation path.
 								            relay_config = {
 								                **node_config,
 								                "staticnodes": [get_node_multiaddr(sender_node)],
 								                "portsShift": 1,
 								            }
 								            relay_result = WrapperManager.create_and_start(config=relay_config)
 								            assert relay_result.is_ok(), f"Failed to start relay peer: {relay_result.err()}"
 								            with relay_result.ok_value:
 								                # Build one message per send, with distinct payloads so we can
 								                # detect any cross-association between request ids and events.
 								                messages = [
 								                    create_message_bindings(
 								                        contentTopic=S30_CONTENT_TOPIC,
 								                        payload=to_base64(f"s30-concurrent-{i}"),
 								                    )
 								                    for i in range(S30_CONCURRENT_SENDS)
 								                ]
 								                # Fire all sends concurrently. The sender is not yet subscribed
 								                # to S30_CONTENT_TOPIC, so this exercises the auto-subscribe path
 								                # under contention.
 								                with ThreadPoolExecutor(max_workers=S30_CONCURRENT_SENDS) as pool:
 								                    send_results = list(pool.map(sender_node.send_message, messages))
 								                # Every send must return Ok(RequestId).
 								                request_ids = []
 								                for i, send_result in enumerate(send_results):
 								                    assert send_result.is_ok(), f"Concurrent send #{i} failed: {send_result.err()}"
 								                    request_id = send_result.ok_value
 								                    assert request_id, f"Concurrent send #{i} returned an empty RequestId"
 								                    request_ids.append(request_id)
 								                # Request ids must be unique across concurrent sends.
 								                assert len(set(request_ids)) == len(request_ids), f"Duplicate RequestIds returned by concurrent sends: {request_ids}"
 								                # Each request id must get its own propagated event and no error.
 								                for request_id in request_ids:
 								                    propagated_event = wait_for_propagated(
 								                        collector=sender_collector,
 								                        request_id=request_id,
 								                        timeout_s=PROPAGATED_TIMEOUT_S,
 								                    )
 								                    assert propagated_event is not None, (
 								                        f"No MessagePropagatedEvent for request_id={request_id} "
 								                        f"within {PROPAGATED_TIMEOUT_S}s. "
 								                        f"Collected events: {sender_collector.events}"
 								                    )
 								                    error_event = wait_for_error(
 								                        collector=sender_collector,
 								                        request_id=request_id,
 								                        timeout_s=0,
 								                    )
 								                    assert error_event is None, f"Unexpected message_error for request_id={request_id}: {error_event}"
 								                # Cross-association guard: every event with a requestId must
 								                # belong to exactly one of the request ids we issued.
 								                issued = set(request_ids)
 								                for event in sender_collector.snapshot():
 								                    event_request_id = event.get("requestId")
 								                    if event_request_id is None:
 								                        continue
 								                    assert event_request_id in issued, (
 								                        f"Event carries an unknown requestId={event_request_id!r}, " f"not in issued set {issued}. Event: {event}"
 								                    )
 								                # Per-request invariants apply to every concurrent send
 								                # (correct requestId, no duplicate terminal events,
 								                # Sent never before Propagated).
 								                for request_id in request_ids:
 								                    assert_event_invariants(sender_collector, request_id)
 								class TestS31ConcurrentSendsMixedTopicsDuringChurn(StepsStore):
 								    """
 								    S31: concurrent sends across mixed content topics during peer churn.
 								    """
 								    @pytest.mark.docker_required
 								    def test_s31_concurrent_sends_mixed_topics_during_churn(self, node_config):
 								        sender_collector = EventCollector()
 								        relay_peer = WakuNode(NODE_2, f"s31_relay_peer_{self.test_id}")
 								        relay_peer.start(relay="true", discv5_discovery="false")
 								        relay_peer.set_relay_subscriptions([self.test_pubsub_topic])
 								        lightpush_peer = WakuNode(NODE_2, f"s31_lightpush_peer_{self.test_id}")
 								        lightpush_peer.start(relay="true", lightpush="true", discv5_discovery="false")
 								        lightpush_peer.set_relay_subscriptions([self.test_pubsub_topic])
 								        store_peer = WakuNode(NODE_2, f"s31_store_peer_{self.test_id}")
 								        store_peer.start(relay="true", store="true", discv5_discovery="false")
 								        store_peer.set_relay_subscriptions([self.test_pubsub_topic])
 								        churn_peers = [relay_peer, lightpush_peer, store_peer]
 								        # Mesh docker peers so a lightpushed message can fan out to the store peer.
 								        peer_multiaddrs = [p.get_multiaddr_with_id() for p in churn_peers]
 								        for peer in churn_peers:
 								            others = [a for a in peer_multiaddrs if a != peer.get_multiaddr_with_id()]
 								            peer.add_peers(others)
 								        node_config.update(
 								            {
 								                "mode": "Edge",
 								                "relay": True,
 								                "lightpush": True,
 								                "store": False,
 								                "discv5Discovery": False,
 								                "numShardsInNetwork": 1,
 								                "lightpushnode": lightpush_peer.get_multiaddr_with_id(),
 								            }
 								        )
 								        sender_result = WrapperManager.create_and_start(
 								            config=node_config,
 								            event_cb=sender_collector.event_callback,
 								        )
 								        assert sender_result.is_ok(), f"Failed to start sender: {sender_result.err()}"
 								        with sender_result.ok_value as sender_node:
 								            sender_multiaddr = get_node_multiaddr(sender_node)
 								            for peer in churn_peers:
 								                peer.add_peers([sender_multiaddr])
 								            delay(3)  # let docker peers connect to the sender
 								            all_request_ids: list[str] = []
 								            phase1_ids = self._s31_fire_burst(sender_node, phase_label="phase1")
 								            all_request_ids.extend(phase1_ids)
 								            for peer in churn_peers:
 								                peer.restart()
 								            delay(1)  # small window so the restart is actually in-flight
 								            phase2_ids = self._s31_fire_burst(sender_node, phase_label="phase2")
 								            all_request_ids.extend(phase2_ids)
 								            # Wait for all peers to be ready again and re-attach the sender.
 								            for peer in churn_peers:
 								                peer.ensure_ready(timeout_duration=20)
 								                peer.add_peers([sender_multiaddr])
 								            peer_multiaddrs = [p.get_multiaddr_with_id() for p in churn_peers]
 								            for peer in churn_peers:
 								                others = [a for a in peer_multiaddrs if a != peer.get_multiaddr_with_id()]
 								                peer.add_peers(others)
 								            delay(3)
 								            phase3_ids = self._s31_fire_burst(sender_node, phase_label="phase3")
 								            all_request_ids.extend(phase3_ids)
 								            assert len(set(all_request_ids)) == len(all_request_ids), f"Duplicate RequestIds across bursts: {all_request_ids}"
 								            # Phase 1 ran before any churn, so the mesh was stable — standard timeout.
 								            # Phase 3 ran right after restart + re-attach, so the mesh needed to
 								            # re-stabilize — use the recovery timeout to avoid CI flakiness.
 								            phase_timeouts = [
 								                (phase1_ids, PROPAGATED_TIMEOUT_S),
 								                (phase3_ids, RECOVERY_TIMEOUT_S),
 								            ]
 								            for request_ids, timeout_s in phase_timeouts:
 								                for request_id in request_ids:
 								                    propagated_event = wait_for_propagated(
 								                        collector=sender_collector,
 								                        request_id=request_id,
 								                        timeout_s=timeout_s,
 								                    )
 								                    assert propagated_event is not None, (
 								                        f"No MessagePropagatedEvent for stable-phase "
 								                        f"request_id={request_id} within {timeout_s}s. "
 								                        f"Collected events: {sender_collector.events}"
 								                    )
 								                    error_event = wait_for_error(
 								                        collector=sender_collector,
 								                        request_id=request_id,
 								                        timeout_s=0,
 								                    )
 								                    assert error_event is None, f"Unexpected message_error event for stable-phase " f"request_id={request_id}: {error_event}"
 								            for request_id in phase2_ids:
 								                error_event = wait_for_error(
 								                    collector=sender_collector,
 								                    request_id=request_id,
 								                    timeout_s=0,
 								                )
 								                assert error_event is None, f"Unexpected terminal message_error for phase-2 " f"request_id={request_id} after recovery: {error_event}"
 								            issued = set(all_request_ids)
 								            for event in sender_collector.snapshot():
 								                event_request_id = event.get("requestId")
 								                if event_request_id is None:
 								                    continue
 								                assert event_request_id in issued, (
 								                    f"Event carries an unknown requestId={event_request_id!r}, " f"not in issued set {issued}. Event: {event}"
 								                )
 								            # Use the hash the wrapper emitted on message_sent so the store
 								            # lookup matches the exact bytes that were actually published.
 								            phase3_hashes = []
 								            for request_id in phase3_ids:
 								                sent_event = wait_for_sent(
 								                    collector=sender_collector,
 								                    request_id=request_id,
 								                    timeout_s=RECOVERY_TIMEOUT_S,
 								                )
 								                assert sent_event is not None, (
 								                    f"No message_sent event for phase-3 request_id={request_id} "
 								                    f"within {RECOVERY_TIMEOUT_S}s. Collected events: {sender_collector.events}"
 								                )
 								                msg_hash = sent_event.get("messageHash")
 								                assert msg_hash, f"message_sent event missing messageHash: {sent_event}"
 								                phase3_hashes.append(msg_hash)
 								            # 3 phases × S31_BURST_SIZE messages, so the page must fit them all,
 								            # otherwise phase-3 hashes (which sort last in ascending order) get cut off.
 								            self.check_sent_message_is_stored(
 								                expected_hashes=phase3_hashes,
 								                store_node=store_peer,
 								                pubsub_topic=self.test_pubsub_topic,
 								                page_size=S31_BURST_SIZE * 3,
 								                ascending="true",
 								            )
 								            # Per-request invariants apply across all phases, including the
 								            # retry-path bursts (phase 2). If retries ever emit duplicate
 								            # Propagated events or reorder Sent before Propagated, this catches it.
 								            for request_id in all_request_ids:
 								                assert_event_invariants(sender_collector, request_id)
 								    def _s31_fire_burst(self, sender_node, *, phase_label: str) -> list[str]:
 								        """Fire S31_BURST_SIZE concurrent sends, one per topic in S31_CONTENT_TOPICS.
 								        Returns the list of RequestIds. Asserts every send returned Ok."""
 								        messages = [
 								            self.create_message(
 								                contentTopic=S31_CONTENT_TOPICS[i],
 								                payload=to_base64(f"s31-{phase_label}-{i}"),
 								            )
 								            for i in range(S31_BURST_SIZE)
 								        ]
 								        with ThreadPoolExecutor(max_workers=S31_BURST_SIZE) as pool:
 								            send_results = list(pool.map(sender_node.send_message, messages))
 								        request_ids = []
 								        for i, send_result in enumerate(send_results):
 								            assert send_result.is_ok(), f"{phase_label}: concurrent send #{i} failed: {send_result.err()}"
 								            request_id = send_result.ok_value
 								            assert request_id, f"{phase_label}: concurrent send #{i} returned an empty RequestId"
 								            request_ids.append(request_id)
 								        return request_ids