Relay Publish: multiple nodes (#4)

* github actions report summary

* use env instead of inputs

* multiple nodes tests

* fix warm up

* fix warm up

* small fix after CI run

* small fix after CI run 2

* add new multi-node test

* self review
This commit is contained in:
Florin Barbu 2023-11-21 09:29:48 +02:00 committed by GitHub
parent 589368f434
commit a6a0440312
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 128 additions and 41 deletions

View File

@ -9,13 +9,20 @@ on:
workflow_dispatch:
inputs:
node1:
required: false
required: true
description: "Node that usually publishes messages. Used for all tests"
type: string
default: "wakuorg/nwaku:latest"
node2:
required: false
required: true
description: "Node that usually queries for published messages. Used for all tests"
type: string
default: "wakuorg/go-waku:latest"
additional_nodes:
required: false
description: "Additional optional nodes used in e2e tests, separated by ,"
type: string
default: "wakuorg/nwaku:latest,wakuorg/go-waku:latest"
protocol:
description: "Protocol used to comunicate inside the network"
required: true
@ -29,6 +36,7 @@ env:
FORCE_COLOR: "1"
NODE_1: ${{ inputs.node1 }}
NODE_2: ${{ inputs.node2 }}
ADDITIONAL_NODES: ${{ inputs.additional_nodes }}
PROTOCOL: ${{ inputs.protocol || 'REST' }}
jobs:
@ -75,3 +83,14 @@ jobs:
github_token: ${{ secrets.GITHUB_TOKEN }}
publish_branch: gh-pages
publish_dir: allure-history
- name: Create job summary
if: always()
run: |
echo "## Run Information" >> $GITHUB_STEP_SUMMARY
echo "- **Node1**: ${{ env.NODE_1 }}" >> $GITHUB_STEP_SUMMARY
echo "- **Node2**: ${{ env.NODE_2}}" >> $GITHUB_STEP_SUMMARY
echo "- **Additonal Nodes**: ${{ env.ADDITIONAL_NODES}}" >> $GITHUB_STEP_SUMMARY
echo "- **Protocol**: ${{ env.PROTOCOL }}" >> $GITHUB_STEP_SUMMARY
echo "## Test Results" >> $GITHUB_STEP_SUMMARY
echo "Allure report will be available at: https://waku-org.github.io/waku-interop-tests/${{ github.run_number }}" >> $GITHUB_STEP_SUMMARY

View File

@ -16,6 +16,8 @@ def get_env_var(var_name, default=None):
# Configuration constants. Need to be upercase to appear in reports
NODE_1 = get_env_var("NODE_1", "wakuorg/go-waku:latest")
NODE_2 = get_env_var("NODE_2", "wakuorg/nwaku:latest")
ADDITIONAL_NODES = get_env_var("ADDITIONAL_NODES", "wakuorg/nwaku:latest,wakuorg/go-waku:latest")
# more nodes need to follow the NODE_X pattern
DOCKER_LOG_DIR = get_env_var("DOCKER_LOG_DIR", "./log/docker")
NETWORK_NAME = get_env_var("NETWORK_NAME", "waku")
SUBNET = get_env_var("SUBNET", "172.18.0.0/16")

View File

@ -1,3 +1,4 @@
import inspect
from src.libs.custom_logger import get_custom_logger
import math
from time import time
@ -5,7 +6,7 @@ import pytest
import allure
from src.libs.common import to_base64, delay
from src.data_classes import message_rpc_response_schema
from src.env_vars import NODE_1, NODE_2, NODEKEY
from src.env_vars import NODE_1, NODE_2, ADDITIONAL_NODES, NODEKEY, RUNNING_IN_CI
from src.node.waku_node import WakuNode
from tenacity import retry, stop_after_delay, wait_fixed
@ -13,36 +14,81 @@ logger = get_custom_logger(__name__)
class StepsRelay:
@pytest.fixture(scope="function", autouse=True)
def setup_nodes(self, request):
self.node1 = WakuNode(NODE_1, "node1_" + request.cls.test_id)
self.node1.start(relay="true", discv5_discovery="true", peer_exchange="true", nodekey=NODEKEY)
enr_uri = self.node1.info()["enrUri"]
self.node2 = WakuNode(NODE_2, "node2_" + request.cls.test_id)
self.node2.start(relay="true", discv5_discovery="true", discv5_bootstrap_node=enr_uri, peer_exchange="true")
self.test_pubsub_topic = "/waku/2/rs/18/1"
self.test_content_topic = "/test/1/waku-relay/proto"
self.test_payload = "Relay works!!"
self.node1.set_subscriptions([self.test_pubsub_topic])
self.node2.set_subscriptions([self.test_pubsub_topic])
test_pubsub_topic = "/waku/2/rs/18/1"
test_content_topic = "/test/1/waku-relay/proto"
test_payload = "Relay works!!"
@pytest.fixture(scope="function", autouse=True)
def network_warm_up(self, setup_nodes):
@pytest.fixture(scope="function")
def setup_main_relay_nodes(self, request):
logger.debug(f"Running fixture setup: {inspect.currentframe().f_code.co_name}")
self.node1 = WakuNode(NODE_1, f"node1_{request.cls.test_id}")
self.node1.start(relay="true", discv5_discovery="true", peer_exchange="true", nodekey=NODEKEY)
self.enr_uri = self.node1.info()["enrUri"]
self.node2 = WakuNode(NODE_2, f"node1_{request.cls.test_id}")
self.node2.start(relay="true", discv5_discovery="true", discv5_bootstrap_node=self.enr_uri, peer_exchange="true")
self.main_nodes = [self.node1, self.node2]
self.optional_nodes = []
@pytest.fixture(scope="function")
def setup_optional_relay_nodes(self, request):
logger.debug(f"Running fixture setup: {inspect.currentframe().f_code.co_name}")
if ADDITIONAL_NODES:
nodes = [node.strip() for node in ADDITIONAL_NODES.split(",")]
else:
pytest.skip("ADDITIONAL_NODES is empty, cannot run test")
for index, node in enumerate(nodes):
node = WakuNode(node, f"node{index}_{request.cls.test_id}")
node.start(relay="true", discv5_discovery="true", discv5_bootstrap_node=self.enr_uri, peer_exchange="true")
self.optional_nodes.append(node)
@pytest.fixture(scope="function")
def subscribe_main_relay_nodes(self):
logger.debug(f"Running fixture setup: {inspect.currentframe().f_code.co_name}")
self.ensure_subscriptions_on_nodes(self.main_nodes, [self.test_pubsub_topic])
@pytest.fixture(scope="function")
def subscribe_optional_relay_nodes(self):
logger.debug(f"Running fixture setup: {inspect.currentframe().f_code.co_name}")
self.ensure_subscriptions_on_nodes(self.optional_nodes, [self.test_pubsub_topic])
@pytest.fixture(scope="function")
def relay_warm_up(self):
try:
self.wait_for_published_message_to_reach_peer(120)
logger.info("WARM UP successful !!")
self.wait_for_published_message_to_reach_peer()
logger.info("WARM UP successful!!")
except Exception as ex:
raise TimeoutError(f"WARM UP FAILED WITH: {ex}")
# this method should be used only for the tests that use the warm_up fixture
# otherwise use wait_for_published_message_to_reach_peer
@allure.step
def check_published_message_reaches_peer(self, message, pubsub_topic=None, message_propagation_delay=0.1):
self.node1.send_message(message, pubsub_topic or self.test_pubsub_topic)
def check_published_message_reaches_peer(self, message, pubsub_topic=None, message_propagation_delay=0.1, sender=None, peer_list=None):
if not sender:
sender = self.node1
if not peer_list:
peer_list = self.main_nodes + self.optional_nodes
sender.send_message(message, pubsub_topic or self.test_pubsub_topic)
delay(message_propagation_delay)
get_messages_response = self.node2.get_messages(pubsub_topic or self.test_pubsub_topic)
assert get_messages_response, "Peer node couldn't find any messages"
for index, peer in enumerate(peer_list):
logger.debug(f"Checking that peer NODE_{index + 1}:{peer.image} can find the published message")
get_messages_response = peer.get_messages(pubsub_topic or self.test_pubsub_topic)
assert get_messages_response, f"Peer NODE_{index}:{peer.image} couldn't find any messages"
received_message = message_rpc_response_schema.load(get_messages_response[0])
self.assert_received_message(message, received_message)
# we need much bigger timeout in CI because we run tests in parallel there and the machine itself is slower
@allure.step
def wait_for_published_message_to_reach_peer(
self, timeout_duration=120 if RUNNING_IN_CI else 20, time_between_retries=1, sender=None, peer_list=None
):
@retry(stop=stop_after_delay(timeout_duration), wait=wait_fixed(time_between_retries), reraise=True)
def check_peer_connection():
message = {"payload": to_base64(self.test_payload), "contentTopic": self.test_content_topic, "timestamp": int(time() * 1e9)}
self.check_published_message_reaches_peer(message, sender=sender, peer_list=peer_list)
check_peer_connection()
@allure.step
def assert_received_message(self, sent_message, received_message):
def assert_fail_message(field_name):
return f"Incorrect field: {field_name}. Published: {sent_message[field_name]} Received: {getattr(received_message, field_name)}"
@ -63,14 +109,7 @@ class StepsRelay:
if "rateLimitProof" in sent_message:
assert str(received_message.rateLimitProof) == str(sent_message["rateLimitProof"]), assert_fail_message("rateLimitProof")
def wait_for_published_message_to_reach_peer(self, timeout_duration, time_between_retries=1):
@retry(stop=stop_after_delay(timeout_duration), wait=wait_fixed(time_between_retries), reraise=True)
def check_peer_connection():
message = {"payload": to_base64(self.test_payload), "contentTopic": self.test_content_topic, "timestamp": int(time() * 1e9)}
self.check_published_message_reaches_peer(message)
check_peer_connection()
@allure.step
def ensure_subscriptions_on_nodes(self, node_list, pubsub_topic_list):
for node in node_list:
node.set_subscriptions(pubsub_topic_list)

View File

@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
import inspect
import glob
from src.libs.custom_logger import get_custom_logger
import os
@ -28,6 +29,7 @@ def pytest_runtest_makereport(item):
def set_allure_env_variables():
yield
if os.path.isdir("allure-results") and not os.path.isfile(os.path.join("allure-results", "environment.properties")):
logger.debug(f"Running fixture teardown: {inspect.currentframe().f_code.co_name}")
with open(os.path.join("allure-results", "environment.properties"), "w") as outfile:
for attribute_name in dir(env_vars):
if attribute_name.isupper():
@ -38,6 +40,7 @@ def set_allure_env_variables():
@pytest.fixture(scope="function", autouse=True)
def test_id(request):
# setting up an unique test id to be used where needed
logger.debug(f"Running fixture setup: {inspect.currentframe().f_code.co_name}")
request.cls.test_id = f"{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}__{str(uuid4())}"
@ -45,6 +48,7 @@ def test_id(request):
def test_setup(request, test_id):
logger.debug(f"Running test: {request.node.name} with id: {request.cls.test_id}")
yield
logger.debug(f"Running fixture teardown: {inspect.currentframe().f_code.co_name}")
for file in glob.glob(os.path.join(env_vars.DOCKER_LOG_DIR, "*")):
if os.path.getmtime(file) < time() - 3600:
logger.debug(f"Deleting old log file: {file}")
@ -58,6 +62,7 @@ def test_setup(request, test_id):
def attach_logs_on_fail(request):
yield
if env_vars.RUNNING_IN_CI and hasattr(request.node, "rep_call") and request.node.rep_call.failed:
logger.debug(f"Running fixture teardown: {inspect.currentframe().f_code.co_name}")
logger.debug("Test failed, attempting to attach logs to the allure reports")
for file in glob.glob(os.path.join(env_vars.DOCKER_LOG_DIR, "*" + request.cls.test_id + "*")):
attach_allure_file(file)
@ -67,6 +72,7 @@ def attach_logs_on_fail(request):
def close_open_nodes(attach_logs_on_fail):
DS.waku_nodes = []
yield
logger.debug(f"Running fixture teardown: {inspect.currentframe().f_code.co_name}")
crashed_containers = []
for node in DS.waku_nodes:
try:

View File

@ -0,0 +1,19 @@
import pytest
from src.steps.relay import StepsRelay
@pytest.mark.usefixtures("setup_main_relay_nodes", "setup_optional_relay_nodes", "subscribe_main_relay_nodes")
class TestMultipleNodes(StepsRelay):
def test_first_node_to_start_publishes(self, subscribe_optional_relay_nodes, relay_warm_up):
self.check_published_message_reaches_peer(self.create_message())
def test_last_node_to_start_publishes(self, subscribe_optional_relay_nodes, relay_warm_up):
self.check_published_message_reaches_peer(self.create_message(), sender=self.optional_nodes[-1])
def test_optional_nodes_not_subscribed_to_same_pubsub_topic(self):
self.wait_for_published_message_to_reach_peer(peer_list=self.main_nodes)
try:
self.check_published_message_reaches_peer(self.create_message(), peer_list=self.optional_nodes)
raise AssertionError("Non subscribed nodes received the message!!")
except Exception as ex:
assert "Not Found" in str(ex), "Expected 404 Not Found when the message is not found"

View File

@ -1,3 +1,4 @@
import pytest
from src.libs.custom_logger import get_custom_logger
from time import time
from src.libs.common import delay, to_base64
@ -8,6 +9,7 @@ from src.data_classes import message_rpc_response_schema
logger = get_custom_logger(__name__)
@pytest.mark.usefixtures("setup_main_relay_nodes", "subscribe_main_relay_nodes", "relay_warm_up")
class TestRelayPublish(StepsRelay):
def test_publish_with_valid_payloads(self):
failed_payloads = []
@ -55,7 +57,7 @@ class TestRelayPublish(StepsRelay):
self.check_published_message_reaches_peer(message, message_propagation_delay=2)
raise AssertionError("Duplicate message was retrieved twice")
except Exception as ex:
assert "Peer node couldn't find any messages" in str(ex)
assert "couldn't find any messages" in str(ex)
def test_publish_with_valid_content_topics(self):
failed_content_topics = []
@ -90,7 +92,7 @@ class TestRelayPublish(StepsRelay):
assert "Bad Request" in str(ex) or "Internal Server Error" in str(ex)
def test_publish_on_multiple_pubsub_topics(self):
self.ensure_subscriptions_on_nodes([self.node1, self.node2], VALID_PUBSUB_TOPICS)
self.ensure_subscriptions_on_nodes(self.main_nodes, VALID_PUBSUB_TOPICS)
failed_pubsub_topics = []
for pubsub_topic in VALID_PUBSUB_TOPICS:
logger.debug(f"Running test with pubsub topic {pubsub_topic}")
@ -102,7 +104,7 @@ class TestRelayPublish(StepsRelay):
assert not failed_pubsub_topics, f"PubusubTopic failed: {failed_pubsub_topics}"
def test_message_published_on_different_pubsub_topic_is_not_retrieved(self):
self.ensure_subscriptions_on_nodes([self.node1, self.node2], VALID_PUBSUB_TOPICS)
self.ensure_subscriptions_on_nodes(self.main_nodes, VALID_PUBSUB_TOPICS)
self.node1.send_message(self.create_message(), VALID_PUBSUB_TOPICS[0])
delay(0.1)
messages = self.node2.get_messages(VALID_PUBSUB_TOPICS[1])
@ -194,7 +196,7 @@ class TestRelayPublish(StepsRelay):
self.check_published_message_reaches_peer(message)
raise AssertionError("Duplicate message was retrieved twice")
except Exception as ex:
assert "Peer node couldn't find any messages" in str(ex)
assert "couldn't find any messages" in str(ex)
def test_publish_while_peer_is_paused(self):
message = self.create_message()
@ -218,14 +220,14 @@ class TestRelayPublish(StepsRelay):
def test_publish_after_node1_restarts(self):
self.check_published_message_reaches_peer(self.create_message())
self.node1.restart()
self.ensure_subscriptions_on_nodes([self.node1, self.node2], [self.test_pubsub_topic])
self.wait_for_published_message_to_reach_peer(20)
self.ensure_subscriptions_on_nodes(self.main_nodes, [self.test_pubsub_topic])
self.wait_for_published_message_to_reach_peer()
def test_publish_after_node2_restarts(self):
self.check_published_message_reaches_peer(self.create_message())
self.node2.restart()
self.ensure_subscriptions_on_nodes([self.node1, self.node2], [self.test_pubsub_topic])
self.wait_for_published_message_to_reach_peer(20)
self.ensure_subscriptions_on_nodes(self.main_nodes, [self.test_pubsub_topic])
self.wait_for_published_message_to_reach_peer()
def test_publish_and_retrieve_100_messages(self):
num_messages = 100 # if increase this number make sure to also increase rest-relay-cache-capacity flag