From 917e1cfd9be78105ba86537fd7dcd603d8d61057 Mon Sep 17 00:00:00 2001 From: Roman Date: Wed, 5 Mar 2025 00:22:40 +0000 Subject: [PATCH 01/32] test: sustained_high_rate_upload - sustained_high_rate_download - sustained_high_rate_mixed --- tests/dos_robustness/test_high_load_dos.py | 112 +++++++++++++++++++++ 1 file changed, 112 insertions(+) create mode 100644 tests/dos_robustness/test_high_load_dos.py diff --git a/tests/dos_robustness/test_high_load_dos.py b/tests/dos_robustness/test_high_load_dos.py new file mode 100644 index 0000000..ab178ac --- /dev/null +++ b/tests/dos_robustness/test_high_load_dos.py @@ -0,0 +1,112 @@ +import time + +import pytest + +from src.libs.common import to_app_id, to_index, delay +from src.steps.da import StepsDataAvailability, logger +from src.test_data import DATA_TO_DISPERSE + + +class TestHighLoadDos(StepsDataAvailability): + main_nodes = [] + + @pytest.mark.usefixtures("setup_2_node_cluster") + def test_sustained_high_rate_upload(self): + timeout = 60 + start_time = time.time() + successful_dispersals = 0 + unsuccessful_dispersals = 0 + + while True: + if time.time() - start_time > timeout: + break + + delay(0.01) + try: + response = self.disperse_data(DATA_TO_DISPERSE[7], to_app_id(1), to_index(0), timeout_duration=0) + if response.status_code == 200: + successful_dispersals += 1 + else: + unsuccessful_dispersals += 1 + except Exception: + unsuccessful_dispersals += 1 + + assert successful_dispersals > 0, "No successful dispersals" + + failure_ratio = unsuccessful_dispersals / successful_dispersals + logger.info(f"Unsuccessful dispersals ratio: {failure_ratio}") + + assert failure_ratio < 0.20, f"Dispersal failure ratio {failure_ratio} too high" + + @pytest.mark.usefixtures("setup_2_node_cluster") + def test_sustained_high_rate_download(self): + timeout = 60 + successful_downloads = 0 + unsuccessful_downloads = 0 + + try: + self.disperse_data(DATA_TO_DISPERSE[7], to_app_id(1), to_index(0)) + except Exception as ex: + raise Exception(f"Initial dispersal was not successful with error {ex}") + + delay(5) + start_time = time.time() + + while True: + if time.time() - start_time > timeout: + break + + delay(0.01) + try: + self.get_data_range(self.node2, to_app_id(1), to_index(0), to_index(5), timeout_duration=0) + successful_downloads += 1 + except Exception: + unsuccessful_downloads += 1 + + assert successful_downloads > 0, "No successful data downloads" + + failure_ratio = unsuccessful_downloads / successful_downloads + logger.info(f"Unsuccessful download ratio: {failure_ratio}") + + assert failure_ratio < 0.20, f"Data download failure ratio {failure_ratio} too high" + + @pytest.mark.usefixtures("setup_2_node_cluster") + def test_sustained_high_rate_mixed(self): + timeout = 60 + start_time = time.time() + successful_dispersals = 0 + unsuccessful_dispersals = 0 + successful_downloads = 0 + unsuccessful_downloads = 0 + + while True: + if time.time() - start_time > timeout: + break + + delay(0.01) + try: + response = self.disperse_data(DATA_TO_DISPERSE[6], to_app_id(1), to_index(0), timeout_duration=0) + if response.status_code == 200: + successful_dispersals += 1 + else: + unsuccessful_dispersals += 1 + except Exception: + unsuccessful_dispersals += 1 + + try: + self.get_data_range(self.node2, to_app_id(1), to_index(0), to_index(5), timeout_duration=0) + successful_downloads += 1 + except Exception: + unsuccessful_downloads += 1 + + assert successful_dispersals > 0, "No successful dispersals" + assert successful_downloads > 0, "No successful downloads" + + failure_ratio_w = unsuccessful_dispersals / successful_dispersals + failure_ratio_r = unsuccessful_downloads / successful_downloads + + logger.info(f"Unsuccessful dispersals ratio: {failure_ratio_w}") + logger.info(f"Unsuccessful download ratio: {failure_ratio_r}") + + assert failure_ratio_w < 0.20, f"Dispersal failure ratio {failure_ratio_w} too high" + assert failure_ratio_r < 0.20, f"Data download failure ratio {failure_ratio_r} too high" From 7e11b4939c6bdc8eaca9f10c116d605142f0f5b4 Mon Sep 17 00:00:00 2001 From: Roman Date: Wed, 5 Mar 2025 03:37:11 +0000 Subject: [PATCH 02/32] test: client nodes initialization --- src/api_clients/rest.py | 16 +++++---- src/cli/cli_vars.py | 7 ++++ src/cli/nomos_cli.py | 16 +++++---- src/data_storage.py | 1 + src/env_vars.py | 3 +- src/libs/common.py | 41 +++++++++++++++++++++ src/steps/common.py | 17 +++++++++ src/steps/da.py | 42 +--------------------- tests/dos_robustness/test_high_load_dos.py | 42 ++++++++++++++++++++++ 9 files changed, 129 insertions(+), 56 deletions(-) diff --git a/src/api_clients/rest.py b/src/api_clients/rest.py index 4e381bc..52bc741 100644 --- a/src/api_clients/rest.py +++ b/src/api_clients/rest.py @@ -9,13 +9,17 @@ class REST(BaseClient): def __init__(self, rest_port): self._rest_port = rest_port - def rest_call(self, method, endpoint, payload=None): - url = f"http://127.0.0.1:{self._rest_port}/{endpoint}" + def rest_call(self, method, endpoint, payload=None, host="127.0.0.1", port=None): + if port is None: + port = self._rest_port + url = f"http://{host}:{port}/{endpoint}" headers = {"Content-Type": "application/json", "Connection": "close"} return self.make_request(method, url, headers=headers, data=payload) - def rest_call_text(self, method, endpoint, payload=None): - url = f"http://127.0.0.1:{self._rest_port}/{endpoint}" + def rest_call_text(self, method, endpoint, payload=None, host="127.0.0.1", port=None): + if port is None: + port = self._rest_port + url = f"http://{host}:{port}/{endpoint}" headers = {"accept": "text/plain", "Connection": "close"} return self.make_request(method, url, headers=headers, data=payload) @@ -23,9 +27,9 @@ class REST(BaseClient): status_response = self.rest_call("get", "cryptarchia/info") return status_response.json() - def send_dispersal_request(self, data): + def send_dispersal_request(self, data, host=None, port=None): return self.rest_call("post", "disperse-data", json.dumps(data)) - def send_get_range(self, query): + def send_get_range(self, query, host=None, port=None): response = self.rest_call("post", "da/get-range", json.dumps(query)) return response.json() diff --git a/src/cli/cli_vars.py b/src/cli/cli_vars.py index 1c3efb8..97b0dc0 100644 --- a/src/cli/cli_vars.py +++ b/src/cli/cli_vars.py @@ -8,4 +8,11 @@ nomos_cli = { "ports": [], "entrypoint": "", }, + "client_node": { + "image": NOMOS_IMAGE, + "flags": [], + "volumes": [], + "ports": [], + "entrypoint": "", + }, } diff --git a/src/cli/nomos_cli.py b/src/cli/nomos_cli.py index 87c1d25..75f4793 100644 --- a/src/cli/nomos_cli.py +++ b/src/cli/nomos_cli.py @@ -3,14 +3,13 @@ import os import re from src.data_storage import DS -from src.libs.common import generate_log_prefix +from src.libs.common import generate_log_prefix, delay, remove_padding from src.libs.custom_logger import get_custom_logger from tenacity import retry, stop_after_delay, wait_fixed from src.cli.cli_vars import nomos_cli from src.docker_manager import DockerManager, stop, kill from src.env_vars import DOCKER_LOG_DIR, NOMOS_CLI -from src.steps.da import remove_padding logger = get_custom_logger(__name__) @@ -66,16 +65,19 @@ class NomosCli: command=cmd, ) - DS.nomos_nodes.append(self) + DS.client_nodes.append(self) match self._command: case "reconstruct": decode_only = kwargs.get("decode_only", False) - return self.reconstruct(input_values=input_values, decode_only=decode_only) + return self.reconstruct(decode_only=decode_only) + case "client_node": + delay(3600) + return None case _: - return + return None - def reconstruct(self, input_values=None, decode_only=False): + def reconstruct(self, decode_only=False): keywords = ["Reconstructed data"] log_stream = self._container.logs(stream=True) @@ -98,7 +100,7 @@ class NomosCli: result_bytes = remove_padding(result_bytes) result = bytes(result_bytes).decode("utf-8") - DS.nomos_nodes.remove(self) + DS.client_nodes.remove(self) return result diff --git a/src/data_storage.py b/src/data_storage.py index c6215e2..44c171e 100644 --- a/src/data_storage.py +++ b/src/data_storage.py @@ -1,3 +1,4 @@ # We use this class for global variables class DS: nomos_nodes = [] + client_nodes = [] diff --git a/src/env_vars.py b/src/env_vars.py index 01706e0..8dcef9b 100644 --- a/src/env_vars.py +++ b/src/env_vars.py @@ -20,13 +20,12 @@ NOMOS_EXECUTOR = "nomos_executor" CFGSYNC = "cfgsync" DEFAULT_IMAGE = "ghcr.io/logos-co/nomos-node:testnet" +NOMOS_IMAGE = get_env_var("NOMOS_IMAGE", DEFAULT_IMAGE) NODE_1 = get_env_var("NODE_1", NOMOS) NODE_2 = get_env_var("NODE_2", NOMOS_EXECUTOR) NODE_3 = get_env_var("NODE_3", CFGSYNC) -NOMOS_IMAGE = get_env_var("NOMOS_IMAGE", DEFAULT_IMAGE) - NOMOS_CLI = "/usr/bin/nomos-cli" ADDITIONAL_NODES = get_env_var("ADDITIONAL_NODES", f"{NOMOS},{NOMOS}") diff --git a/src/libs/common.py b/src/libs/common.py index 4d6184d..89de382 100644 --- a/src/libs/common.py +++ b/src/libs/common.py @@ -52,3 +52,44 @@ def generate_random_bytes(n=31): if n < 0: raise ValueError("Input must be an unsigned integer (non-negative)") return os.urandom(n) + + +def add_padding(orig_bytes): + """ + Pads a list of bytes (integers in [0..255]) using a PKCS#7-like scheme: + - The value of each padded byte is the number of bytes padded. + - If the original data is already a multiple of the block size, + an additional full block of bytes (each the block size) is added. + """ + block_size = 31 + original_len = len(orig_bytes) + padding_needed = block_size - (original_len % block_size) + # If the data is already a multiple of block_size, add a full block of padding + if padding_needed == 0: + padding_needed = block_size + + # Each padded byte will be equal to padding_needed + padded_bytes = orig_bytes + [padding_needed] * padding_needed + return padded_bytes + + +def remove_padding(padded_bytes): + """ + Removes PKCS#7-like padding from a list of bytes. + Raises: + ValueError: If the padding is incorrect. + Returns: + The original list of bytes without padding. + """ + if not padded_bytes: + raise ValueError("The input is empty, cannot remove padding.") + + padding_len = padded_bytes[-1] + + if padding_len < 1 or padding_len > 31: + raise ValueError("Invalid padding length.") + + if padded_bytes[-padding_len:] != [padding_len] * padding_len: + raise ValueError("Invalid padding bytes.") + + return padded_bytes[:-padding_len] diff --git a/src/steps/common.py b/src/steps/common.py index dcd593a..8842848 100644 --- a/src/steps/common.py +++ b/src/steps/common.py @@ -4,6 +4,7 @@ import shutil import pytest +from src.cli.nomos_cli import NomosCli from src.env_vars import CFGSYNC, NOMOS, NOMOS_EXECUTOR from src.libs.common import delay from src.libs.custom_logger import get_custom_logger @@ -43,6 +44,7 @@ class StepsCommon: def cluster_setup(self): logger.debug(f"Running fixture setup: {inspect.currentframe().f_code.co_name}") self.main_nodes = [] + self.cli_nodes = [] @pytest.fixture(scope="function") def setup_2_node_cluster(self, request): @@ -87,3 +89,18 @@ class StepsCommon: raise delay(5) + + @pytest.fixture(scope="function") + def init_client_nodes(self, request): + logger.debug(f"Running fixture init: {inspect.currentframe().f_code.co_name}") + + if hasattr(request, "param"): + num_clients = request.param + else: + num_clients = 5 + + for i in range(num_clients): + cli_node = NomosCli(command="client_node") + self.cli_nodes.append(cli_node) + + delay(1) diff --git a/src/steps/da.py b/src/steps/da.py index 53a04ed..720f11e 100644 --- a/src/steps/da.py +++ b/src/steps/da.py @@ -2,53 +2,13 @@ import allure from tenacity import retry, stop_after_delay, wait_fixed from src.env_vars import NOMOS_EXECUTOR +from src.libs.common import add_padding from src.libs.custom_logger import get_custom_logger from src.steps.common import StepsCommon logger = get_custom_logger(__name__) -def add_padding(orig_bytes): - """ - Pads a list of bytes (integers in [0..255]) using a PKCS#7-like scheme: - - The value of each padded byte is the number of bytes padded. - - If the original data is already a multiple of the block size, - an additional full block of bytes (each the block size) is added. - """ - block_size = 31 - original_len = len(orig_bytes) - padding_needed = block_size - (original_len % block_size) - # If the data is already a multiple of block_size, add a full block of padding - if padding_needed == 0: - padding_needed = block_size - - # Each padded byte will be equal to padding_needed - padded_bytes = orig_bytes + [padding_needed] * padding_needed - return padded_bytes - - -def remove_padding(padded_bytes): - """ - Removes PKCS#7-like padding from a list of bytes. - Raises: - ValueError: If the padding is incorrect. - Returns: - The original list of bytes without padding. - """ - if not padded_bytes: - raise ValueError("The input is empty, cannot remove padding.") - - padding_len = padded_bytes[-1] - - if padding_len < 1 or padding_len > 31: - raise ValueError("Invalid padding length.") - - if padded_bytes[-padding_len:] != [padding_len] * padding_len: - raise ValueError("Invalid padding bytes.") - - return padded_bytes[:-padding_len] - - def prepare_dispersal_request(data, app_id, index, utf8=True, padding=True): if utf8: data_bytes = data.encode("utf-8") diff --git a/tests/dos_robustness/test_high_load_dos.py b/tests/dos_robustness/test_high_load_dos.py index ab178ac..225bb8b 100644 --- a/tests/dos_robustness/test_high_load_dos.py +++ b/tests/dos_robustness/test_high_load_dos.py @@ -9,6 +9,7 @@ from src.test_data import DATA_TO_DISPERSE class TestHighLoadDos(StepsDataAvailability): main_nodes = [] + client_nodes = [] @pytest.mark.usefixtures("setup_2_node_cluster") def test_sustained_high_rate_upload(self): @@ -110,3 +111,44 @@ class TestHighLoadDos(StepsDataAvailability): assert failure_ratio_w < 0.20, f"Dispersal failure ratio {failure_ratio_w} too high" assert failure_ratio_r < 0.20, f"Data download failure ratio {failure_ratio_r} too high" + + @pytest.mark.usefixtures("setup_2_node_cluster", "init_client_nodes") + def test_sustained_high_rate_multiple_clients(self): + timeout = 60 + start_time = time.time() + successful_dispersals = 0 + unsuccessful_dispersals = 0 + successful_downloads = 0 + unsuccessful_downloads = 0 + + while True: + if time.time() - start_time > timeout: + break + + delay(0.01) + try: + response = self.disperse_data(DATA_TO_DISPERSE[6], to_app_id(1), to_index(0), timeout_duration=0) + if response.status_code == 200: + successful_dispersals += 1 + else: + unsuccessful_dispersals += 1 + except Exception: + unsuccessful_dispersals += 1 + + try: + self.get_data_range(self.node2, to_app_id(1), to_index(0), to_index(5), timeout_duration=0) + successful_downloads += 1 + except Exception: + unsuccessful_downloads += 1 + + assert successful_dispersals > 0, "No successful dispersals" + assert successful_downloads > 0, "No successful downloads" + + failure_ratio_w = unsuccessful_dispersals / successful_dispersals + failure_ratio_r = unsuccessful_downloads / successful_downloads + + logger.info(f"Unsuccessful dispersals ratio: {failure_ratio_w}") + logger.info(f"Unsuccessful download ratio: {failure_ratio_r}") + + assert failure_ratio_w < 0.20, f"Dispersal failure ratio {failure_ratio_w} too high" + assert failure_ratio_r < 0.20, f"Data download failure ratio {failure_ratio_r} too high" From 6ff8510d616b63e3d2bd7b00801522f99bc730df Mon Sep 17 00:00:00 2001 From: Roman Date: Wed, 5 Mar 2025 04:03:27 +0000 Subject: [PATCH 03/32] test: client node run --- src/cli/nomos_cli.py | 16 +++++++++------- src/steps/common.py | 4 ++-- tests/dos_robustness/test_high_load_dos.py | 2 ++ 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/src/cli/nomos_cli.py b/src/cli/nomos_cli.py index 75f4793..36286b0 100644 --- a/src/cli/nomos_cli.py +++ b/src/cli/nomos_cli.py @@ -44,12 +44,15 @@ class NomosCli: self._port_map = {} - cmd = [NOMOS_CLI, self._command] - for flag in nomos_cli[self._command]["flags"]: - for f, indexes in flag.items(): - cmd.append(f) - for j in indexes: - cmd.append(input_values[j]) + if self._command == "client_node": + cmd = ["tail", "-f", "/dev/null"] + else: + cmd = [NOMOS_CLI, self._command] + for flag in nomos_cli[self._command]["flags"]: + for f, indexes in flag.items(): + cmd.append(f) + for j in indexes: + cmd.append(input_values[j]) logger.debug(f"NomosCli command to run {cmd}") @@ -72,7 +75,6 @@ class NomosCli: decode_only = kwargs.get("decode_only", False) return self.reconstruct(decode_only=decode_only) case "client_node": - delay(3600) return None case _: return None diff --git a/src/steps/common.py b/src/steps/common.py index 8842848..2ca8f56 100644 --- a/src/steps/common.py +++ b/src/steps/common.py @@ -44,7 +44,7 @@ class StepsCommon: def cluster_setup(self): logger.debug(f"Running fixture setup: {inspect.currentframe().f_code.co_name}") self.main_nodes = [] - self.cli_nodes = [] + self.client_nodes = [] @pytest.fixture(scope="function") def setup_2_node_cluster(self, request): @@ -101,6 +101,6 @@ class StepsCommon: for i in range(num_clients): cli_node = NomosCli(command="client_node") - self.cli_nodes.append(cli_node) + self.client_nodes.append(cli_node) delay(1) diff --git a/tests/dos_robustness/test_high_load_dos.py b/tests/dos_robustness/test_high_load_dos.py index 225bb8b..ab5f258 100644 --- a/tests/dos_robustness/test_high_load_dos.py +++ b/tests/dos_robustness/test_high_load_dos.py @@ -121,6 +121,8 @@ class TestHighLoadDos(StepsDataAvailability): successful_downloads = 0 unsuccessful_downloads = 0 + self.client_nodes[0].run() + while True: if time.time() - start_time > timeout: break From c4fad7ac8475a90327de69036e7275aa57f5dda2 Mon Sep 17 00:00:00 2001 From: Roman Date: Wed, 5 Mar 2025 05:20:51 +0000 Subject: [PATCH 04/32] test: stop client nodes after test --- src/cli/nomos_cli.py | 3 +++ src/steps/common.py | 7 +++---- tests/conftest.py | 10 +++++++++- tests/dos_robustness/test_high_load_dos.py | 4 +--- 4 files changed, 16 insertions(+), 8 deletions(-) diff --git a/src/cli/nomos_cli.py b/src/cli/nomos_cli.py index 36286b0..df9dab9 100644 --- a/src/cli/nomos_cli.py +++ b/src/cli/nomos_cli.py @@ -113,3 +113,6 @@ class NomosCli: @retry(stop=stop_after_delay(5), wait=wait_fixed(0.1), reraise=True) def kill(self): self._container = kill(self._container) + + def name(self): + return self._container_name diff --git a/src/steps/common.py b/src/steps/common.py index 2ca8f56..1e3cb60 100644 --- a/src/steps/common.py +++ b/src/steps/common.py @@ -91,8 +91,8 @@ class StepsCommon: delay(5) @pytest.fixture(scope="function") - def init_client_nodes(self, request): - logger.debug(f"Running fixture init: {inspect.currentframe().f_code.co_name}") + def setup_client_nodes(self, request): + logger.debug(f"Running fixture setup: {inspect.currentframe().f_code.co_name}") if hasattr(request, "param"): num_clients = request.param @@ -101,6 +101,5 @@ class StepsCommon: for i in range(num_clients): cli_node = NomosCli(command="client_node") + cli_node.run() self.client_nodes.append(cli_node) - - delay(1) diff --git a/tests/conftest.py b/tests/conftest.py index 66d83d4..3145fb2 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -71,6 +71,7 @@ def attach_logs_on_fail(request): @pytest.fixture(scope="function", autouse=True) def close_open_nodes(attach_logs_on_fail): DS.nomos_nodes = [] + DS.client_nodes = [] yield logger.debug(f"Running fixture teardown: {inspect.currentframe().f_code.co_name}") crashed_containers = [] @@ -80,7 +81,14 @@ def close_open_nodes(attach_logs_on_fail): except Exception as ex: if "No such container" in str(ex): crashed_containers.append(node.image) - logger.error(f"Failed to stop container because of error {ex}") + logger.error(f"Failed to stop node container because of error {ex}") + for node in DS.client_nodes: + try: + node.stop() + except Exception as ex: + if "No such container" in str(ex): + crashed_containers.append(node.name()) + logger.error(f"Failed to stop client node container because of error {ex}") assert not crashed_containers, f"Containers {crashed_containers} crashed during the test!!!" diff --git a/tests/dos_robustness/test_high_load_dos.py b/tests/dos_robustness/test_high_load_dos.py index ab5f258..5e73dd6 100644 --- a/tests/dos_robustness/test_high_load_dos.py +++ b/tests/dos_robustness/test_high_load_dos.py @@ -112,7 +112,7 @@ class TestHighLoadDos(StepsDataAvailability): assert failure_ratio_w < 0.20, f"Dispersal failure ratio {failure_ratio_w} too high" assert failure_ratio_r < 0.20, f"Data download failure ratio {failure_ratio_r} too high" - @pytest.mark.usefixtures("setup_2_node_cluster", "init_client_nodes") + @pytest.mark.usefixtures("setup_2_node_cluster", "setup_client_nodes") def test_sustained_high_rate_multiple_clients(self): timeout = 60 start_time = time.time() @@ -121,8 +121,6 @@ class TestHighLoadDos(StepsDataAvailability): successful_downloads = 0 unsuccessful_downloads = 0 - self.client_nodes[0].run() - while True: if time.time() - start_time > timeout: break From d9e207fb1dfe7b0e7d69b4c9309cc03fcd826b81 Mon Sep 17 00:00:00 2001 From: Roman Date: Wed, 5 Mar 2025 06:50:20 +0000 Subject: [PATCH 05/32] test: remote API call - parallel container stop --- src/api_clients/rest.py | 15 +++++------ src/cli/nomos_cli.py | 6 +++++ src/node/nomos_node.py | 3 +++ src/steps/da.py | 10 +++++-- tests/conftest.py | 31 ++++++++++++---------- tests/dos_robustness/test_high_load_dos.py | 14 +++++----- 6 files changed, 47 insertions(+), 32 deletions(-) diff --git a/src/api_clients/rest.py b/src/api_clients/rest.py index 52bc741..2da68e2 100644 --- a/src/api_clients/rest.py +++ b/src/api_clients/rest.py @@ -6,20 +6,17 @@ logger = get_custom_logger(__name__) class REST(BaseClient): - def __init__(self, rest_port): + def __init__(self, rest_port, rest_host="127.0.0.1"): self._rest_port = rest_port + self._rest_host = rest_host - def rest_call(self, method, endpoint, payload=None, host="127.0.0.1", port=None): - if port is None: - port = self._rest_port - url = f"http://{host}:{port}/{endpoint}" + def rest_call(self, method, endpoint, payload=None): + url = f"http://{self._rest_host}:{self._rest_port}/{endpoint}" headers = {"Content-Type": "application/json", "Connection": "close"} return self.make_request(method, url, headers=headers, data=payload) - def rest_call_text(self, method, endpoint, payload=None, host="127.0.0.1", port=None): - if port is None: - port = self._rest_port - url = f"http://{host}:{port}/{endpoint}" + def rest_call_text(self, method, endpoint, payload=None): + url = f"http://{self._rest_host}:{self._rest_port}/{endpoint}" headers = {"accept": "text/plain", "Connection": "close"} return self.make_request(method, url, headers=headers, data=payload) diff --git a/src/cli/nomos_cli.py b/src/cli/nomos_cli.py index df9dab9..d051ded 100644 --- a/src/cli/nomos_cli.py +++ b/src/cli/nomos_cli.py @@ -2,6 +2,7 @@ import json import os import re +from src.api_clients.rest import REST from src.data_storage import DS from src.libs.common import generate_log_prefix, delay, remove_padding from src.libs.custom_logger import get_custom_logger @@ -35,6 +36,7 @@ class NomosCli: self._docker_manager = DockerManager(self._image_name) self._container_name = container_name self._container = None + self._api = None cwd = os.getcwd() self._volumes = [cwd + "/" + volume for volume in self._volumes] @@ -106,6 +108,10 @@ class NomosCli: return result + def set_rest_api(self, host, port): + logger.debug(f"Setting rest API object to host {host} port {port}") + self._api = REST(port, host) + @retry(stop=stop_after_delay(5), wait=wait_fixed(0.1), reraise=True) def stop(self): self._container = stop(self._container) diff --git a/src/node/nomos_node.py b/src/node/nomos_node.py index 86c5658..2c867ae 100644 --- a/src/node/nomos_node.py +++ b/src/node/nomos_node.py @@ -126,6 +126,9 @@ class NomosNode: def name(self): return self._container_name + def api_port(self): + return self._tcp_port + def check_nomos_log_errors(self, whitelist=None): keywords = LOG_ERROR_KEYWORDS diff --git a/src/steps/da.py b/src/steps/da.py index 720f11e..41575e6 100644 --- a/src/steps/da.py +++ b/src/steps/da.py @@ -1,4 +1,5 @@ import allure +from requests.packages import target from tenacity import retry, stop_after_delay, wait_fixed from src.env_vars import NOMOS_EXECUTOR @@ -45,14 +46,19 @@ class StepsDataAvailability(StepsCommon): return executor @allure.step - def disperse_data(self, data, app_id, index, timeout_duration=65, utf8=True, padding=True): + def disperse_data(self, data, app_id, index, client_node=None, timeout_duration=65, utf8=True, padding=True): @retry(stop=stop_after_delay(timeout_duration), wait=wait_fixed(1), reraise=True) def disperse(my_self=self): response = [] request = prepare_dispersal_request(data, app_id, index, utf8=utf8, padding=padding) executor = my_self.find_executor_node() + try: - response = executor.send_dispersal_request(request) + if client_node is None: + response = executor.send_dispersal_request(request) + else: + response = client_node.set_rest_api(executor.name(), executor.api_port()) + response = client_node.send_dispersal_request(request) except Exception as ex: assert "Bad Request" in str(ex) or "Internal Server Error" in str(ex) diff --git a/tests/conftest.py b/tests/conftest.py index 3145fb2..f70f72a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,6 +1,8 @@ # -*- coding: utf-8 -*- import inspect import glob +from concurrent.futures import ThreadPoolExecutor, as_completed + from src.libs.custom_logger import get_custom_logger import os import pytest @@ -68,6 +70,14 @@ def attach_logs_on_fail(request): attach_allure_file(file) +def stop_node(node): + try: + node.stop() + except Exception as ex: + if "No such container" in str(ex): + logger.error(f"Failed to stop node container because of error {ex}") + + @pytest.fixture(scope="function", autouse=True) def close_open_nodes(attach_logs_on_fail): DS.nomos_nodes = [] @@ -75,20 +85,13 @@ def close_open_nodes(attach_logs_on_fail): yield logger.debug(f"Running fixture teardown: {inspect.currentframe().f_code.co_name}") crashed_containers = [] - for node in DS.nomos_nodes: - try: - node.stop() - except Exception as ex: - if "No such container" in str(ex): - crashed_containers.append(node.image) - logger.error(f"Failed to stop node container because of error {ex}") - for node in DS.client_nodes: - try: - node.stop() - except Exception as ex: - if "No such container" in str(ex): - crashed_containers.append(node.name()) - logger.error(f"Failed to stop client node container because of error {ex}") + with ThreadPoolExecutor(max_workers=30) as executor: + node_cleanups = [executor.submit(stop_node, node) for node in DS.nomos_nodes] + [executor.submit(stop_node, node) for node in DS.client_nodes] + for cleanup in as_completed(node_cleanups): + result = cleanup.result() + if result is not None: + crashed_containers.append(result) + assert not crashed_containers, f"Containers {crashed_containers} crashed during the test!!!" diff --git a/tests/dos_robustness/test_high_load_dos.py b/tests/dos_robustness/test_high_load_dos.py index 5e73dd6..858f847 100644 --- a/tests/dos_robustness/test_high_load_dos.py +++ b/tests/dos_robustness/test_high_load_dos.py @@ -114,7 +114,7 @@ class TestHighLoadDos(StepsDataAvailability): @pytest.mark.usefixtures("setup_2_node_cluster", "setup_client_nodes") def test_sustained_high_rate_multiple_clients(self): - timeout = 60 + timeout = 10 start_time = time.time() successful_dispersals = 0 unsuccessful_dispersals = 0 @@ -127,7 +127,7 @@ class TestHighLoadDos(StepsDataAvailability): delay(0.01) try: - response = self.disperse_data(DATA_TO_DISPERSE[6], to_app_id(1), to_index(0), timeout_duration=0) + response = self.disperse_data(DATA_TO_DISPERSE[6], to_app_id(1), to_index(0), client_node=self.client_nodes[0], timeout_duration=0) if response.status_code == 200: successful_dispersals += 1 else: @@ -135,11 +135,11 @@ class TestHighLoadDos(StepsDataAvailability): except Exception: unsuccessful_dispersals += 1 - try: - self.get_data_range(self.node2, to_app_id(1), to_index(0), to_index(5), timeout_duration=0) - successful_downloads += 1 - except Exception: - unsuccessful_downloads += 1 + # try: + # self.get_data_range(self.node2, to_app_id(1), to_index(0), to_index(5), timeout_duration=0) + # successful_downloads += 1 + # except Exception: + # unsuccessful_downloads += 1 assert successful_dispersals > 0, "No successful dispersals" assert successful_downloads > 0, "No successful downloads" From 45401bf4374d0a86817ed25ef165b049084dedea Mon Sep 17 00:00:00 2001 From: Roman Date: Wed, 5 Mar 2025 06:55:21 +0000 Subject: [PATCH 06/32] fix: simplify parallel container stop --- tests/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/conftest.py b/tests/conftest.py index f70f72a..6b55d4b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -86,7 +86,7 @@ def close_open_nodes(attach_logs_on_fail): logger.debug(f"Running fixture teardown: {inspect.currentframe().f_code.co_name}") crashed_containers = [] with ThreadPoolExecutor(max_workers=30) as executor: - node_cleanups = [executor.submit(stop_node, node) for node in DS.nomos_nodes] + [executor.submit(stop_node, node) for node in DS.client_nodes] + node_cleanups = [executor.submit(stop_node, node) for node in DS.nomos_nodes + DS.client_nodes] for cleanup in as_completed(node_cleanups): result = cleanup.result() if result is not None: From 7138a141fd5e3ae4072e278e926b4e41dd7029a5 Mon Sep 17 00:00:00 2001 From: Roman Date: Wed, 5 Mar 2025 11:07:44 +0000 Subject: [PATCH 07/32] fix: remove unused params --- src/api_clients/rest.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/api_clients/rest.py b/src/api_clients/rest.py index 2da68e2..d62efa3 100644 --- a/src/api_clients/rest.py +++ b/src/api_clients/rest.py @@ -24,9 +24,9 @@ class REST(BaseClient): status_response = self.rest_call("get", "cryptarchia/info") return status_response.json() - def send_dispersal_request(self, data, host=None, port=None): + def send_dispersal_request(self, data): return self.rest_call("post", "disperse-data", json.dumps(data)) - def send_get_range(self, query, host=None, port=None): + def send_get_range(self, query): response = self.rest_call("post", "da/get-range", json.dumps(query)) return response.json() From e541f3fc1c390a9f1487e19d79254ca1800f7734 Mon Sep 17 00:00:00 2001 From: Roman Date: Wed, 5 Mar 2025 23:05:13 +0000 Subject: [PATCH 08/32] fix: better handling of cleanup failure --- tests/conftest.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 6b55d4b..dd418c0 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -75,7 +75,7 @@ def stop_node(node): node.stop() except Exception as ex: if "No such container" in str(ex): - logger.error(f"Failed to stop node container because of error {ex}") + logger.error(f"Failed to stop container {node.name()} because of error {ex}") @pytest.fixture(scope="function", autouse=True) @@ -84,15 +84,16 @@ def close_open_nodes(attach_logs_on_fail): DS.client_nodes = [] yield logger.debug(f"Running fixture teardown: {inspect.currentframe().f_code.co_name}") - crashed_containers = [] + failed_cleanups = [] with ThreadPoolExecutor(max_workers=30) as executor: node_cleanups = [executor.submit(stop_node, node) for node in DS.nomos_nodes + DS.client_nodes] for cleanup in as_completed(node_cleanups): - result = cleanup.result() - if result is not None: - crashed_containers.append(result) + try: + cleanup.result() + except Exception as ex: + failed_cleanups.append(ex) - assert not crashed_containers, f"Containers {crashed_containers} crashed during the test!!!" + assert not failed_cleanups, f"Container cleanup failed with {failed_cleanups} !!!" @pytest.fixture(scope="function", autouse=True) From 7c4e0a64ec5346e3d105251a0468002a811b146c Mon Sep 17 00:00:00 2001 From: Roman Date: Thu, 6 Mar 2025 01:57:21 +0000 Subject: [PATCH 09/32] fix: dispersal to remote host --- src/cli/nomos_cli.py | 6 ++++++ src/node/nomos_node.py | 6 ++++++ src/steps/da.py | 6 +++--- tests/dos_robustness/test_high_load_dos.py | 5 ++++- 4 files changed, 19 insertions(+), 4 deletions(-) diff --git a/src/cli/nomos_cli.py b/src/cli/nomos_cli.py index d051ded..420fd6e 100644 --- a/src/cli/nomos_cli.py +++ b/src/cli/nomos_cli.py @@ -122,3 +122,9 @@ class NomosCli: def name(self): return self._container_name + + def send_dispersal_request(self, data): + return self._api.send_dispersal_request(data) + + def send_get_data_range_request(self, data): + return self._api.send_get_range(data) diff --git a/src/node/nomos_node.py b/src/node/nomos_node.py index 2c867ae..4921a13 100644 --- a/src/node/nomos_node.py +++ b/src/node/nomos_node.py @@ -129,6 +129,12 @@ class NomosNode: def api_port(self): return self._tcp_port + def api_port_internal(self): + for internal_port, external_port in self._port_map.items(): + if str(external_port).replace("/tcp", "") == self._tcp_port: + return internal_port.replace("/tcp", "") + return None + def check_nomos_log_errors(self, whitelist=None): keywords = LOG_ERROR_KEYWORDS diff --git a/src/steps/da.py b/src/steps/da.py index 41575e6..72941e5 100644 --- a/src/steps/da.py +++ b/src/steps/da.py @@ -47,7 +47,7 @@ class StepsDataAvailability(StepsCommon): @allure.step def disperse_data(self, data, app_id, index, client_node=None, timeout_duration=65, utf8=True, padding=True): - @retry(stop=stop_after_delay(timeout_duration), wait=wait_fixed(1), reraise=True) + @retry(stop=stop_after_delay(timeout_duration), wait=wait_fixed(0.1), reraise=True) def disperse(my_self=self): response = [] request = prepare_dispersal_request(data, app_id, index, utf8=utf8, padding=padding) @@ -57,7 +57,7 @@ class StepsDataAvailability(StepsCommon): if client_node is None: response = executor.send_dispersal_request(request) else: - response = client_node.set_rest_api(executor.name(), executor.api_port()) + client_node.set_rest_api(executor.name(), executor.api_port_internal()) response = client_node.send_dispersal_request(request) except Exception as ex: assert "Bad Request" in str(ex) or "Internal Server Error" in str(ex) @@ -71,7 +71,7 @@ class StepsDataAvailability(StepsCommon): @allure.step def get_data_range(self, node, app_id, start, end, timeout_duration=45): - @retry(stop=stop_after_delay(timeout_duration), wait=wait_fixed(1), reraise=True) + @retry(stop=stop_after_delay(timeout_duration), wait=wait_fixed(0.1), reraise=True) def get_range(): response = [] query = prepare_get_range_request(app_id, start, end) diff --git a/tests/dos_robustness/test_high_load_dos.py b/tests/dos_robustness/test_high_load_dos.py index 858f847..dc27648 100644 --- a/tests/dos_robustness/test_high_load_dos.py +++ b/tests/dos_robustness/test_high_load_dos.py @@ -128,13 +128,16 @@ class TestHighLoadDos(StepsDataAvailability): delay(0.01) try: response = self.disperse_data(DATA_TO_DISPERSE[6], to_app_id(1), to_index(0), client_node=self.client_nodes[0], timeout_duration=0) + logger.debug(f"RESPONSE {response}") if response.status_code == 200: successful_dispersals += 1 else: unsuccessful_dispersals += 1 - except Exception: + except Exception as ex: + logger.debug(f"EXCEPTION {ex}") unsuccessful_dispersals += 1 + delay(3600) # try: # self.get_data_range(self.node2, to_app_id(1), to_index(0), to_index(5), timeout_duration=0) # successful_downloads += 1 From 04f2f1ad85b7b6361563ed85d19f7e2d144fda32 Mon Sep 17 00:00:00 2001 From: Roman Date: Thu, 6 Mar 2025 02:14:41 +0000 Subject: [PATCH 10/32] fix: get data range from remote host --- src/steps/da.py | 8 ++++++-- tests/dos_robustness/test_high_load_dos.py | 20 ++++++++++---------- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/src/steps/da.py b/src/steps/da.py index 72941e5..67af39d 100644 --- a/src/steps/da.py +++ b/src/steps/da.py @@ -70,13 +70,17 @@ class StepsDataAvailability(StepsCommon): return disperse() @allure.step - def get_data_range(self, node, app_id, start, end, timeout_duration=45): + def get_data_range(self, node, app_id, start, end, client_node=None, timeout_duration=45): @retry(stop=stop_after_delay(timeout_duration), wait=wait_fixed(0.1), reraise=True) def get_range(): response = [] query = prepare_get_range_request(app_id, start, end) try: - response = node.send_get_data_range_request(query) + if client_node is None: + response = node.send_get_data_range_request(query) + else: + client_node.set_rest_api(node.name(), node.api_port_internal()) + response = client_node.send_get_data_range_request(query) except Exception as ex: assert "Bad Request" in str(ex) or "Internal Server Error" in str(ex) diff --git a/tests/dos_robustness/test_high_load_dos.py b/tests/dos_robustness/test_high_load_dos.py index dc27648..adc2076 100644 --- a/tests/dos_robustness/test_high_load_dos.py +++ b/tests/dos_robustness/test_high_load_dos.py @@ -1,3 +1,4 @@ +import random import time import pytest @@ -125,24 +126,23 @@ class TestHighLoadDos(StepsDataAvailability): if time.time() - start_time > timeout: break + dispersal_cl, download_cl = random.choice(self.client_nodes), random.choice(self.client_nodes) + delay(0.01) try: - response = self.disperse_data(DATA_TO_DISPERSE[6], to_app_id(1), to_index(0), client_node=self.client_nodes[0], timeout_duration=0) - logger.debug(f"RESPONSE {response}") + response = self.disperse_data(DATA_TO_DISPERSE[6], to_app_id(1), to_index(0), client_node=dispersal_cl, timeout_duration=0) if response.status_code == 200: successful_dispersals += 1 else: unsuccessful_dispersals += 1 - except Exception as ex: - logger.debug(f"EXCEPTION {ex}") + except Exception: unsuccessful_dispersals += 1 - delay(3600) - # try: - # self.get_data_range(self.node2, to_app_id(1), to_index(0), to_index(5), timeout_duration=0) - # successful_downloads += 1 - # except Exception: - # unsuccessful_downloads += 1 + try: + self.get_data_range(self.node2, to_app_id(1), to_index(0), to_index(5), client_node=download_cl, timeout_duration=0) + successful_downloads += 1 + except Exception: + unsuccessful_downloads += 1 assert successful_dispersals > 0, "No successful dispersals" assert successful_downloads > 0, "No successful downloads" From 201f72097d6c5dfda994e3b89055f0cb83bffa06 Mon Sep 17 00:00:00 2001 From: Roman Date: Thu, 6 Mar 2025 02:48:56 +0000 Subject: [PATCH 11/32] fix: refactor client node flags --- src/cli/cli_vars.py | 2 +- src/cli/nomos_cli.py | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/cli/cli_vars.py b/src/cli/cli_vars.py index 97b0dc0..0dcb84c 100644 --- a/src/cli/cli_vars.py +++ b/src/cli/cli_vars.py @@ -10,7 +10,7 @@ nomos_cli = { }, "client_node": { "image": NOMOS_IMAGE, - "flags": [], + "flags": [{"tail": [], "-f": [], "/dev/null": []}], "volumes": [], "ports": [], "entrypoint": "", diff --git a/src/cli/nomos_cli.py b/src/cli/nomos_cli.py index 420fd6e..042fc65 100644 --- a/src/cli/nomos_cli.py +++ b/src/cli/nomos_cli.py @@ -47,14 +47,15 @@ class NomosCli: self._port_map = {} if self._command == "client_node": - cmd = ["tail", "-f", "/dev/null"] + cmd = [] else: cmd = [NOMOS_CLI, self._command] - for flag in nomos_cli[self._command]["flags"]: - for f, indexes in flag.items(): - cmd.append(f) - for j in indexes: - cmd.append(input_values[j]) + + for flag in nomos_cli[self._command]["flags"]: + for f, indexes in flag.items(): + cmd.append(f) + for j in indexes: + cmd.append(input_values[j]) logger.debug(f"NomosCli command to run {cmd}") @@ -109,7 +110,6 @@ class NomosCli: return result def set_rest_api(self, host, port): - logger.debug(f"Setting rest API object to host {host} port {port}") self._api = REST(port, host) @retry(stop=stop_after_delay(5), wait=wait_fixed(0.1), reraise=True) From c233edda986759b4e96b24454ef6dede74076c72 Mon Sep 17 00:00:00 2001 From: Roman Date: Thu, 6 Mar 2025 02:58:13 +0000 Subject: [PATCH 12/32] fix: shorter variable names --- src/api_clients/rest.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/api_clients/rest.py b/src/api_clients/rest.py index d62efa3..78539ec 100644 --- a/src/api_clients/rest.py +++ b/src/api_clients/rest.py @@ -6,9 +6,9 @@ logger = get_custom_logger(__name__) class REST(BaseClient): - def __init__(self, rest_port, rest_host="127.0.0.1"): - self._rest_port = rest_port - self._rest_host = rest_host + def __init__(self, port, host="127.0.0.1"): + self._rest_port = port + self._rest_host = host def rest_call(self, method, endpoint, payload=None): url = f"http://{self._rest_host}:{self._rest_port}/{endpoint}" From 73acf52329817c479891d48d4f2110032e746855 Mon Sep 17 00:00:00 2001 From: Roman Date: Fri, 7 Mar 2025 02:13:16 +0000 Subject: [PATCH 13/32] test: invalid rest API --- src/api_clients/invalid_rest.py | 67 +++++++++++++++++++++++++++++++++ src/cli/nomos_cli.py | 4 ++ 2 files changed, 71 insertions(+) create mode 100644 src/api_clients/invalid_rest.py diff --git a/src/api_clients/invalid_rest.py b/src/api_clients/invalid_rest.py new file mode 100644 index 0000000..4d026fd --- /dev/null +++ b/src/api_clients/invalid_rest.py @@ -0,0 +1,67 @@ +import random +import string + +from src.api_clients.rest import REST +from src.libs.common import generate_random_bytes +from src.libs.custom_logger import get_custom_logger +import json + +logger = get_custom_logger(__name__) + + +def alter_dispersal_data(data): + + # Add random bytes to data and break padding + def alter_data_content(): + random_n = random.randint(1, 31) + data["data"].extend(list(generate_random_bytes(random_n))) + + # Change structure and content for metadata + def alter_metadata(): + random_n = random.randint(7, 32) + data["metadata"] = list(generate_random_bytes(random_n)) + + # Add random property to the data object with random list content + def add_random_property(): + random_k = random.randint(1, 16) + random_n = random.randint(7, 64) + random_str = "".join(random.choices(string.printable, k=random_k)) + data[random_str] = list(generate_random_bytes(random_n)) + + choice = random.choice([alter_data_content, alter_metadata, add_random_property]) + choice() + + return data + + +def alter_get_range_query(query): + + # Swap range high with range low + def swap_range(): + end = query["range"]["end"] + query["range"]["end"] = query["range"]["start"] + query["range"]["start"] = end + + # Change app id + def alter_app_id(): + random_n = random.randint(8, 33) + query["app_id"] = list(generate_random_bytes(random_n)) + + choice = random.choice([swap_range, alter_app_id]) + choice() + + return query + + +class INVALID_REST(REST): + def __init__(self, port, host="127.0.0.1"): + super().__init__(port, host) + + def send_dispersal_request(self, data): + data = alter_dispersal_data(data) + return self.rest_call("post", "disperse-data", json.dumps(data)) + + def send_get_range(self, query): + query = alter_get_range_query(query) + response = self.rest_call("post", "da/get-range", json.dumps(query)) + return response.json() diff --git a/src/cli/nomos_cli.py b/src/cli/nomos_cli.py index 042fc65..af9a053 100644 --- a/src/cli/nomos_cli.py +++ b/src/cli/nomos_cli.py @@ -2,6 +2,7 @@ import json import os import re +from src.api_clients.invalid_rest import INVALID_REST from src.api_clients.rest import REST from src.data_storage import DS from src.libs.common import generate_log_prefix, delay, remove_padding @@ -112,6 +113,9 @@ class NomosCli: def set_rest_api(self, host, port): self._api = REST(port, host) + def set_invalid_rest_api(self, host, port): + self._api = INVALID_REST(port, host) + @retry(stop=stop_after_delay(5), wait=wait_fixed(0.1), reraise=True) def stop(self): self._container = stop(self._container) From dd1134dcc4f381ab3c8b25a45e2ed9e74a963d3e Mon Sep 17 00:00:00 2001 From: Roman Date: Fri, 7 Mar 2025 03:33:34 +0000 Subject: [PATCH 14/32] test: with invalid requests --- src/steps/da.py | 15 ++++--- tests/dos_robustness/test_high_load_dos.py | 52 ++++++++++++++++++++++ 2 files changed, 62 insertions(+), 5 deletions(-) diff --git a/src/steps/da.py b/src/steps/da.py index 67af39d..32bda30 100644 --- a/src/steps/da.py +++ b/src/steps/da.py @@ -1,5 +1,4 @@ import allure -from requests.packages import target from tenacity import retry, stop_after_delay, wait_fixed from src.env_vars import NOMOS_EXECUTOR @@ -46,7 +45,7 @@ class StepsDataAvailability(StepsCommon): return executor @allure.step - def disperse_data(self, data, app_id, index, client_node=None, timeout_duration=65, utf8=True, padding=True): + def disperse_data(self, data, app_id, index, client_node=None, timeout_duration=65, utf8=True, padding=True, send_invalid=False): @retry(stop=stop_after_delay(timeout_duration), wait=wait_fixed(0.1), reraise=True) def disperse(my_self=self): response = [] @@ -57,7 +56,10 @@ class StepsDataAvailability(StepsCommon): if client_node is None: response = executor.send_dispersal_request(request) else: - client_node.set_rest_api(executor.name(), executor.api_port_internal()) + if send_invalid: + client_node.set_invalid_rest_api(executor.name(), executor.api_port_internal()) + else: + client_node.set_rest_api(executor.name(), executor.api_port_internal()) response = client_node.send_dispersal_request(request) except Exception as ex: assert "Bad Request" in str(ex) or "Internal Server Error" in str(ex) @@ -70,7 +72,7 @@ class StepsDataAvailability(StepsCommon): return disperse() @allure.step - def get_data_range(self, node, app_id, start, end, client_node=None, timeout_duration=45): + def get_data_range(self, node, app_id, start, end, client_node=None, timeout_duration=45, send_invalid=False): @retry(stop=stop_after_delay(timeout_duration), wait=wait_fixed(0.1), reraise=True) def get_range(): response = [] @@ -79,7 +81,10 @@ class StepsDataAvailability(StepsCommon): if client_node is None: response = node.send_get_data_range_request(query) else: - client_node.set_rest_api(node.name(), node.api_port_internal()) + if send_invalid: + client_node.set_invalid_rest_api(node.name(), node.api_port_internal()) + else: + client_node.set_rest_api(node.name(), node.api_port_internal()) response = client_node.send_get_data_range_request(query) except Exception as ex: assert "Bad Request" in str(ex) or "Internal Server Error" in str(ex) diff --git a/tests/dos_robustness/test_high_load_dos.py b/tests/dos_robustness/test_high_load_dos.py index adc2076..a30ea5e 100644 --- a/tests/dos_robustness/test_high_load_dos.py +++ b/tests/dos_robustness/test_high_load_dos.py @@ -155,3 +155,55 @@ class TestHighLoadDos(StepsDataAvailability): assert failure_ratio_w < 0.20, f"Dispersal failure ratio {failure_ratio_w} too high" assert failure_ratio_r < 0.20, f"Data download failure ratio {failure_ratio_r} too high" + + @pytest.mark.usefixtures("setup_2_node_cluster", "setup_client_nodes") + def test_sustained_high_rate_with_invalid_requests(self): + timeout = 10 + start_time = time.time() + successful_dispersals = 0 + unsuccessful_dispersals = 0 + successful_downloads = 0 + unsuccessful_downloads = 0 + + while True: + if time.time() - start_time > timeout: + break + + dispersal_cl, download_cl = random.choice(self.client_nodes), random.choice(self.client_nodes) + + delay(0.01) + invalid = random.choice([True, False]) + + try: + response = self.disperse_data( + DATA_TO_DISPERSE[6], to_app_id(1), to_index(0), client_node=dispersal_cl, timeout_duration=0, send_invalid=invalid + ) + if response.status_code == 200: + successful_dispersals += 1 + elif not invalid: + unsuccessful_dispersals += 1 + except Exception: + if not invalid: + unsuccessful_dispersals += 1 + + try: + invalid = random.choice([True, False]) + self.get_data_range( + self.node2, to_app_id(1), to_index(0), to_index(5), client_node=download_cl, timeout_duration=0, send_invalid=invalid + ) + successful_downloads += 1 + except Exception: + if not invalid: + unsuccessful_downloads += 1 + + assert successful_dispersals > 0, "No successful dispersals" + assert successful_downloads > 0, "No successful downloads" + + failure_ratio_w = unsuccessful_dispersals / successful_dispersals + failure_ratio_r = unsuccessful_downloads / successful_downloads + + logger.info(f"Unsuccessful dispersals ratio: {failure_ratio_w}") + logger.info(f"Unsuccessful download ratio: {failure_ratio_r}") + + assert failure_ratio_w < 0.20, f"Dispersal failure ratio {failure_ratio_w} too high" + assert failure_ratio_r < 0.20, f"Data download failure ratio {failure_ratio_r} too high" From 5077adc28878721e0530f7066cb462fc6802dba7 Mon Sep 17 00:00:00 2001 From: Roman Date: Fri, 7 Mar 2025 03:40:36 +0000 Subject: [PATCH 15/32] fix: remove redundant invalid selection --- tests/dos_robustness/test_high_load_dos.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/dos_robustness/test_high_load_dos.py b/tests/dos_robustness/test_high_load_dos.py index a30ea5e..cf71256 100644 --- a/tests/dos_robustness/test_high_load_dos.py +++ b/tests/dos_robustness/test_high_load_dos.py @@ -187,7 +187,6 @@ class TestHighLoadDos(StepsDataAvailability): unsuccessful_dispersals += 1 try: - invalid = random.choice([True, False]) self.get_data_range( self.node2, to_app_id(1), to_index(0), to_index(5), client_node=download_cl, timeout_duration=0, send_invalid=invalid ) From d85c19269ae718d54a23ba5286306f52ecd3a5e2 Mon Sep 17 00:00:00 2001 From: Roman Date: Mon, 10 Mar 2025 03:33:54 +0000 Subject: [PATCH 16/32] test: replace client node with proxy client --- README.md | 2 +- src/api_clients/base_client.py | 6 +- src/{cli => client}/__init__.py | 0 .../cli_vars.py => client/client_vars.py} | 19 ++-- src/{cli => client}/nomos_cli.py | 20 +--- src/client/proxy_client.py | 100 ++++++++++++++++++ src/env_vars.py | 4 +- src/steps/common.py | 2 +- 8 files changed, 124 insertions(+), 29 deletions(-) rename src/{cli => client}/__init__.py (100%) rename src/{cli/cli_vars.py => client/client_vars.py} (50%) rename src/{cli => client}/nomos_cli.py (86%) create mode 100644 src/client/proxy_client.py diff --git a/README.md b/README.md index 5e13480..410ba5a 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ python -m venv .venv source .venv/bin/activate pip install -r requirements.txt pre-commit install -(optional) Overwrite default vars from src/env_vars.py via cli env vars or by adding a .env file +(optional) Overwrite default vars from src/env_vars.py via env vars or by adding a .env file pytest ``` diff --git a/src/api_clients/base_client.py b/src/api_clients/base_client.py index b731e0f..e8c9ddf 100644 --- a/src/api_clients/base_client.py +++ b/src/api_clients/base_client.py @@ -9,7 +9,11 @@ logger = get_custom_logger(__name__) class BaseClient: def make_request(self, method, url, headers=None, data=None): self.log_request_as_curl(method, url, headers, data) - response = requests.request(method.upper(), url, headers=headers, data=data, timeout=API_REQUEST_TIMEOUT) + try: + response = requests.request(method.upper(), url, headers=headers, data=data, timeout=API_REQUEST_TIMEOUT) + except Exception as ex: + logger.error(f"HERE An error occurred: {ex}. ") + try: response.raise_for_status() except requests.HTTPError as http_err: diff --git a/src/cli/__init__.py b/src/client/__init__.py similarity index 100% rename from src/cli/__init__.py rename to src/client/__init__.py diff --git a/src/cli/cli_vars.py b/src/client/client_vars.py similarity index 50% rename from src/cli/cli_vars.py rename to src/client/client_vars.py index 0dcb84c..2e31061 100644 --- a/src/cli/cli_vars.py +++ b/src/client/client_vars.py @@ -1,4 +1,4 @@ -from src.env_vars import NOMOS_IMAGE +from src.env_vars import NOMOS_IMAGE, HTTP_PROXY_IMAGE nomos_cli = { "reconstruct": { @@ -8,11 +8,14 @@ nomos_cli = { "ports": [], "entrypoint": "", }, - "client_node": { - "image": NOMOS_IMAGE, - "flags": [{"tail": [], "-f": [], "/dev/null": []}], - "volumes": [], - "ports": [], - "entrypoint": "", - }, +} + +http_proxy = { + "configurable-http-proxy": { + "image": HTTP_PROXY_IMAGE, + "flags": [{"--default-target": [0]}], # Value [] is a list of indexes into list of values required for the flag + "volumes": [], + "ports": ["8000/tcp"], + "entrypoint": "", + } } diff --git a/src/cli/nomos_cli.py b/src/client/nomos_cli.py similarity index 86% rename from src/cli/nomos_cli.py rename to src/client/nomos_cli.py index af9a053..b80db47 100644 --- a/src/cli/nomos_cli.py +++ b/src/client/nomos_cli.py @@ -9,7 +9,7 @@ from src.libs.common import generate_log_prefix, delay, remove_padding from src.libs.custom_logger import get_custom_logger from tenacity import retry, stop_after_delay, wait_fixed -from src.cli.cli_vars import nomos_cli +from src.client.client_vars import nomos_cli from src.docker_manager import DockerManager, stop, kill from src.env_vars import DOCKER_LOG_DIR, NOMOS_CLI @@ -25,7 +25,7 @@ class NomosCli: if command not in nomos_cli: raise ValueError("Unknown command provided") - logger.debug(f"Cli is going to be initialized with this config {nomos_cli[command]}") + logger.debug(f"NomosCli is going to be initialized with this config {nomos_cli[command]}") self._command = command self._image_name = nomos_cli[command]["image"] self._internal_ports = nomos_cli[command]["ports"] @@ -47,10 +47,7 @@ class NomosCli: self._port_map = {} - if self._command == "client_node": - cmd = [] - else: - cmd = [NOMOS_CLI, self._command] + cmd = [NOMOS_CLI, self._command] for flag in nomos_cli[self._command]["flags"]: for f, indexes in flag.items(): @@ -78,8 +75,6 @@ class NomosCli: case "reconstruct": decode_only = kwargs.get("decode_only", False) return self.reconstruct(decode_only=decode_only) - case "client_node": - return None case _: return None @@ -113,9 +108,6 @@ class NomosCli: def set_rest_api(self, host, port): self._api = REST(port, host) - def set_invalid_rest_api(self, host, port): - self._api = INVALID_REST(port, host) - @retry(stop=stop_after_delay(5), wait=wait_fixed(0.1), reraise=True) def stop(self): self._container = stop(self._container) @@ -126,9 +118,3 @@ class NomosCli: def name(self): return self._container_name - - def send_dispersal_request(self, data): - return self._api.send_dispersal_request(data) - - def send_get_data_range_request(self, data): - return self._api.send_get_range(data) diff --git a/src/client/proxy_client.py b/src/client/proxy_client.py new file mode 100644 index 0000000..cca35e0 --- /dev/null +++ b/src/client/proxy_client.py @@ -0,0 +1,100 @@ +import json +import os +import re + +from src.api_clients.invalid_rest import INVALID_REST +from src.api_clients.rest import REST +from src.data_storage import DS +from src.libs.common import generate_log_prefix, delay, remove_padding +from src.libs.custom_logger import get_custom_logger +from tenacity import retry, stop_after_delay, wait_fixed + +from src.client.client_vars import http_proxy +from src.docker_manager import DockerManager, stop, kill +from src.env_vars import DOCKER_LOG_DIR, NOMOS_CLI + +logger = get_custom_logger(__name__) + + +class ProxyClient: + def __init__(self): + command = "configurable-http-proxy" + + logger.debug(f"ProxyClient is going to be initialized with this config {http_proxy[command]}") + self._command = command + self._image_name = http_proxy[command]["image"] + self._internal_ports = http_proxy[command]["ports"] + self._volumes = http_proxy[command]["volumes"] + self._entrypoint = http_proxy[command]["entrypoint"] + + container_name = "proxy-client-" + generate_log_prefix() + self._log_path = os.path.join(DOCKER_LOG_DIR, f"{container_name}__{self._image_name.replace('/', '_')}.log") + self._docker_manager = DockerManager(self._image_name) + self._container_name = container_name + self._container = None + self._api = None + + cwd = os.getcwd() + self._volumes = [cwd + "/" + volume for volume in self._volumes] + + def run(self, input_values=None, **kwargs): + logger.debug(f"ProxyClient starting with log path {self._log_path}") + + self._port_map = {} + self._external_ports = self._docker_manager.generate_ports(count=1) + self._tcp_port = self._external_ports[0] + self._api = REST(self._tcp_port) + + logger.debug(f"Internal ports {self._internal_ports}") + + for i, port in enumerate(self._internal_ports): + self._port_map[port] = int(self._external_ports[i]) + + logger.debug(f"Port map {self._port_map}") + + cmd = [self._command] + + for flag in http_proxy[self._command]["flags"]: + for f, indexes in flag.items(): + cmd.append(f) + for j in indexes: + cmd.append(input_values[j]) + + logger.debug(f"ProxyCLient command to run {cmd}") + + self._container = self._docker_manager.start_container( + self._docker_manager.image, + port_bindings=self._port_map, + args=None, + log_path=self._log_path, + volumes=self._volumes, + entrypoint=self._entrypoint, + remove_container=True, + name=self._container_name, + command=cmd, + ) + + DS.client_nodes.append(self) + + def set_rest_api(self, host, port): + self._api = REST(port, host) + + def set_invalid_rest_api(self, host, port): + self._api = INVALID_REST(port, host) + + @retry(stop=stop_after_delay(5), wait=wait_fixed(0.1), reraise=True) + def stop(self): + self._container = stop(self._container) + + @retry(stop=stop_after_delay(5), wait=wait_fixed(0.1), reraise=True) + def kill(self): + self._container = kill(self._container) + + def name(self): + return self._container_name + + def send_dispersal_request(self, data): + return self._api.send_dispersal_request(data) + + def send_get_data_range_request(self, data): + return self._api.send_get_range(data) diff --git a/src/env_vars.py b/src/env_vars.py index 8dcef9b..46c149e 100644 --- a/src/env_vars.py +++ b/src/env_vars.py @@ -22,11 +22,13 @@ CFGSYNC = "cfgsync" DEFAULT_IMAGE = "ghcr.io/logos-co/nomos-node:testnet" NOMOS_IMAGE = get_env_var("NOMOS_IMAGE", DEFAULT_IMAGE) +HTTP_PROXY_IMAGE = get_env_var("HTTP_PROXY_IMAGE", "bitnami/configurable-http-proxy:latest") + NODE_1 = get_env_var("NODE_1", NOMOS) NODE_2 = get_env_var("NODE_2", NOMOS_EXECUTOR) NODE_3 = get_env_var("NODE_3", CFGSYNC) -NOMOS_CLI = "/usr/bin/nomos-cli" +NOMOS_CLI = "/usr/bin/nomos-client" ADDITIONAL_NODES = get_env_var("ADDITIONAL_NODES", f"{NOMOS},{NOMOS}") # more nodes need to follow the NODE_X pattern diff --git a/src/steps/common.py b/src/steps/common.py index 1e3cb60..e777b68 100644 --- a/src/steps/common.py +++ b/src/steps/common.py @@ -4,7 +4,7 @@ import shutil import pytest -from src.cli.nomos_cli import NomosCli +from src.client.nomos_cli import NomosCli from src.env_vars import CFGSYNC, NOMOS, NOMOS_EXECUTOR from src.libs.common import delay from src.libs.custom_logger import get_custom_logger From 5305bc91021065bc61dfb23d68be464b23c6c260 Mon Sep 17 00:00:00 2001 From: Roman Date: Mon, 10 Mar 2025 05:57:22 +0000 Subject: [PATCH 17/32] fix: update related test and variables --- src/api_clients/base_client.py | 6 +----- src/api_clients/invalid_rest.py | 4 ++-- src/api_clients/rest.py | 9 ++++----- src/client/nomos_cli.py | 7 +------ src/client/proxy_client.py | 8 ++++---- src/env_vars.py | 13 +++++++------ src/steps/common.py | 19 ++++++++++++------- src/steps/da.py | 8 ++++---- tests/data_integrity/test_data_integrity.py | 2 +- tests/dos_robustness/test_high_load_dos.py | 15 ++++++++------- 10 files changed, 44 insertions(+), 47 deletions(-) diff --git a/src/api_clients/base_client.py b/src/api_clients/base_client.py index e8c9ddf..b731e0f 100644 --- a/src/api_clients/base_client.py +++ b/src/api_clients/base_client.py @@ -9,11 +9,7 @@ logger = get_custom_logger(__name__) class BaseClient: def make_request(self, method, url, headers=None, data=None): self.log_request_as_curl(method, url, headers, data) - try: - response = requests.request(method.upper(), url, headers=headers, data=data, timeout=API_REQUEST_TIMEOUT) - except Exception as ex: - logger.error(f"HERE An error occurred: {ex}. ") - + response = requests.request(method.upper(), url, headers=headers, data=data, timeout=API_REQUEST_TIMEOUT) try: response.raise_for_status() except requests.HTTPError as http_err: diff --git a/src/api_clients/invalid_rest.py b/src/api_clients/invalid_rest.py index 4d026fd..907219b 100644 --- a/src/api_clients/invalid_rest.py +++ b/src/api_clients/invalid_rest.py @@ -54,8 +54,8 @@ def alter_get_range_query(query): class INVALID_REST(REST): - def __init__(self, port, host="127.0.0.1"): - super().__init__(port, host) + def __init__(self, rest_port): + super().__init__(rest_port) def send_dispersal_request(self, data): data = alter_dispersal_data(data) diff --git a/src/api_clients/rest.py b/src/api_clients/rest.py index 78539ec..8612ff1 100644 --- a/src/api_clients/rest.py +++ b/src/api_clients/rest.py @@ -6,17 +6,16 @@ logger = get_custom_logger(__name__) class REST(BaseClient): - def __init__(self, port, host="127.0.0.1"): - self._rest_port = port - self._rest_host = host + def __init__(self, rest_port): + self._rest_port = rest_port def rest_call(self, method, endpoint, payload=None): - url = f"http://{self._rest_host}:{self._rest_port}/{endpoint}" + url = f"http://localhost:{self._rest_port}/{endpoint}" headers = {"Content-Type": "application/json", "Connection": "close"} return self.make_request(method, url, headers=headers, data=payload) def rest_call_text(self, method, endpoint, payload=None): - url = f"http://{self._rest_host}:{self._rest_port}/{endpoint}" + url = f"http://localhost:{self._rest_port}/{endpoint}" headers = {"accept": "text/plain", "Connection": "close"} return self.make_request(method, url, headers=headers, data=payload) diff --git a/src/client/nomos_cli.py b/src/client/nomos_cli.py index b80db47..15817a8 100644 --- a/src/client/nomos_cli.py +++ b/src/client/nomos_cli.py @@ -37,7 +37,6 @@ class NomosCli: self._docker_manager = DockerManager(self._image_name) self._container_name = container_name self._container = None - self._api = None cwd = os.getcwd() self._volumes = [cwd + "/" + volume for volume in self._volumes] @@ -48,7 +47,6 @@ class NomosCli: self._port_map = {} cmd = [NOMOS_CLI, self._command] - for flag in nomos_cli[self._command]["flags"]: for f, indexes in flag.items(): cmd.append(f) @@ -76,7 +74,7 @@ class NomosCli: decode_only = kwargs.get("decode_only", False) return self.reconstruct(decode_only=decode_only) case _: - return None + return def reconstruct(self, decode_only=False): keywords = ["Reconstructed data"] @@ -105,9 +103,6 @@ class NomosCli: return result - def set_rest_api(self, host, port): - self._api = REST(port, host) - @retry(stop=stop_after_delay(5), wait=wait_fixed(0.1), reraise=True) def stop(self): self._container = stop(self._container) diff --git a/src/client/proxy_client.py b/src/client/proxy_client.py index cca35e0..082350a 100644 --- a/src/client/proxy_client.py +++ b/src/client/proxy_client.py @@ -76,11 +76,11 @@ class ProxyClient: DS.client_nodes.append(self) - def set_rest_api(self, host, port): - self._api = REST(port, host) + def set_rest_api(self): + self._api = REST(self._tcp_port) - def set_invalid_rest_api(self, host, port): - self._api = INVALID_REST(port, host) + def set_invalid_rest_api(self): + self._api = INVALID_REST(self._tcp_port) @retry(stop=stop_after_delay(5), wait=wait_fixed(0.1), reraise=True) def stop(self): diff --git a/src/env_vars.py b/src/env_vars.py index 46c149e..75b705e 100644 --- a/src/env_vars.py +++ b/src/env_vars.py @@ -15,20 +15,21 @@ def get_env_var(var_name, default=None): # Configuration constants. Need to be upercase to appear in reports +DEFAULT_NOMOS_IMAGE = "ghcr.io/logos-co/nomos-node:testnet" +NOMOS_IMAGE = get_env_var("NOMOS_IMAGE", DEFAULT_NOMOS_IMAGE) + +DEFAULT_PROXY_IMAGE = "bitnami/configurable-http-proxy:latest" +HTTP_PROXY_IMAGE = get_env_var("HTTP_PROXY_IMAGE", DEFAULT_PROXY_IMAGE) + NOMOS = "nomos" NOMOS_EXECUTOR = "nomos_executor" CFGSYNC = "cfgsync" -DEFAULT_IMAGE = "ghcr.io/logos-co/nomos-node:testnet" -NOMOS_IMAGE = get_env_var("NOMOS_IMAGE", DEFAULT_IMAGE) - -HTTP_PROXY_IMAGE = get_env_var("HTTP_PROXY_IMAGE", "bitnami/configurable-http-proxy:latest") - NODE_1 = get_env_var("NODE_1", NOMOS) NODE_2 = get_env_var("NODE_2", NOMOS_EXECUTOR) NODE_3 = get_env_var("NODE_3", CFGSYNC) -NOMOS_CLI = "/usr/bin/nomos-client" +NOMOS_CLI = "/usr/bin/nomos-cli" ADDITIONAL_NODES = get_env_var("ADDITIONAL_NODES", f"{NOMOS},{NOMOS}") # more nodes need to follow the NODE_X pattern diff --git a/src/steps/common.py b/src/steps/common.py index e777b68..b08e045 100644 --- a/src/steps/common.py +++ b/src/steps/common.py @@ -1,10 +1,9 @@ import inspect import os -import shutil import pytest -from src.client.nomos_cli import NomosCli +from src.client.proxy_client import ProxyClient from src.env_vars import CFGSYNC, NOMOS, NOMOS_EXECUTOR from src.libs.common import delay from src.libs.custom_logger import get_custom_logger @@ -91,15 +90,21 @@ class StepsCommon: delay(5) @pytest.fixture(scope="function") - def setup_client_nodes(self, request): + def setup_proxy_clients(self, request): logger.debug(f"Running fixture setup: {inspect.currentframe().f_code.co_name}") + assert len(self.main_nodes) == 3, "There should be two Nomos nodes running already" + if hasattr(request, "param"): num_clients = request.param else: - num_clients = 5 + num_clients = 10 + assert num_clients % 2 == 0, "num_clients must be an even number" + + # Every even proxy client for get-range, every odd for dispersal for i in range(num_clients): - cli_node = NomosCli(command="client_node") - cli_node.run() - self.client_nodes.append(cli_node) + proxy_client = ProxyClient() + default_target = [f"http://{self.main_nodes[1 + i % 2].name()}:18080"] + proxy_client.run(input_values=default_target) + self.client_nodes.append(proxy_client) diff --git a/src/steps/da.py b/src/steps/da.py index 32bda30..e768c7c 100644 --- a/src/steps/da.py +++ b/src/steps/da.py @@ -57,9 +57,9 @@ class StepsDataAvailability(StepsCommon): response = executor.send_dispersal_request(request) else: if send_invalid: - client_node.set_invalid_rest_api(executor.name(), executor.api_port_internal()) + client_node.set_invalid_rest_api() else: - client_node.set_rest_api(executor.name(), executor.api_port_internal()) + client_node.set_rest_api() response = client_node.send_dispersal_request(request) except Exception as ex: assert "Bad Request" in str(ex) or "Internal Server Error" in str(ex) @@ -82,9 +82,9 @@ class StepsDataAvailability(StepsCommon): response = node.send_get_data_range_request(query) else: if send_invalid: - client_node.set_invalid_rest_api(node.name(), node.api_port_internal()) + client_node.set_invalid_rest_api() else: - client_node.set_rest_api(node.name(), node.api_port_internal()) + client_node.set_rest_api() response = client_node.send_get_data_range_request(query) except Exception as ex: assert "Bad Request" in str(ex) or "Internal Server Error" in str(ex) diff --git a/tests/data_integrity/test_data_integrity.py b/tests/data_integrity/test_data_integrity.py index 40eae4a..bce91f7 100644 --- a/tests/data_integrity/test_data_integrity.py +++ b/tests/data_integrity/test_data_integrity.py @@ -3,7 +3,7 @@ import random import pytest -from src.cli.nomos_cli import NomosCli +from src.client.nomos_cli import NomosCli from src.libs.common import delay, to_app_id, to_index from src.libs.custom_logger import get_custom_logger from src.steps.da import StepsDataAvailability diff --git a/tests/dos_robustness/test_high_load_dos.py b/tests/dos_robustness/test_high_load_dos.py index cf71256..5cc1fdf 100644 --- a/tests/dos_robustness/test_high_load_dos.py +++ b/tests/dos_robustness/test_high_load_dos.py @@ -113,7 +113,7 @@ class TestHighLoadDos(StepsDataAvailability): assert failure_ratio_w < 0.20, f"Dispersal failure ratio {failure_ratio_w} too high" assert failure_ratio_r < 0.20, f"Data download failure ratio {failure_ratio_r} too high" - @pytest.mark.usefixtures("setup_2_node_cluster", "setup_client_nodes") + @pytest.mark.usefixtures("setup_2_node_cluster", "setup_proxy_clients") def test_sustained_high_rate_multiple_clients(self): timeout = 10 start_time = time.time() @@ -126,7 +126,7 @@ class TestHighLoadDos(StepsDataAvailability): if time.time() - start_time > timeout: break - dispersal_cl, download_cl = random.choice(self.client_nodes), random.choice(self.client_nodes) + dispersal_cl, download_cl = random.choice(self.client_nodes[1::2]), random.choice(self.client_nodes[::2]) delay(0.01) try: @@ -156,7 +156,8 @@ class TestHighLoadDos(StepsDataAvailability): assert failure_ratio_w < 0.20, f"Dispersal failure ratio {failure_ratio_w} too high" assert failure_ratio_r < 0.20, f"Data download failure ratio {failure_ratio_r} too high" - @pytest.mark.usefixtures("setup_2_node_cluster", "setup_client_nodes") + @pytest.mark.timeout(3600) + @pytest.mark.usefixtures("setup_2_node_cluster", "setup_proxy_clients") def test_sustained_high_rate_with_invalid_requests(self): timeout = 10 start_time = time.time() @@ -169,10 +170,8 @@ class TestHighLoadDos(StepsDataAvailability): if time.time() - start_time > timeout: break - dispersal_cl, download_cl = random.choice(self.client_nodes), random.choice(self.client_nodes) - - delay(0.01) - invalid = random.choice([True, False]) + dispersal_cl, download_cl = random.choice(self.client_nodes[1::2]), random.choice(self.client_nodes[::2]) + invalid = random.choice([False]) try: response = self.disperse_data( @@ -195,6 +194,8 @@ class TestHighLoadDos(StepsDataAvailability): if not invalid: unsuccessful_downloads += 1 + delay(10) + assert successful_dispersals > 0, "No successful dispersals" assert successful_downloads > 0, "No successful downloads" From f0a084bb4481cd935e2d1bf2ca632a4e7accef4b Mon Sep 17 00:00:00 2001 From: Roman Date: Mon, 10 Mar 2025 10:01:03 +0000 Subject: [PATCH 18/32] fix: update cfgsync-template --- cluster_config/cfgsync-template.yaml | 14 ++++++------ cluster_config/cfgsync.yaml | 32 ---------------------------- 2 files changed, 6 insertions(+), 40 deletions(-) delete mode 100644 cluster_config/cfgsync.yaml diff --git a/cluster_config/cfgsync-template.yaml b/cluster_config/cfgsync-template.yaml index 59e93e6..c57b201 100644 --- a/cluster_config/cfgsync-template.yaml +++ b/cluster_config/cfgsync-template.yaml @@ -13,20 +13,18 @@ num_samples: 1 num_subnets: 2 old_blobs_check_interval_secs: 5 blobs_validity_duration_secs: 60 -balancer_interval_secs: 1 global_params_path: "/kzgrs_test_params" +min_dispersal_peers: 1 +min_replication_peers: 1 +monitor_failure_time_window_secs: 5 +balancer_interval_secs: 5 # Tracing tracing_settings: logger: Stdout - tracing: !Otlp - endpoint: http://tempo:4317/ - sample_ratio: 0.5 - service_name: node + tracing: None filter: !EnvFilter filters: nomos: debug - metrics: !Otlp - endpoint: http://prometheus:9090/api/v1/otlp/v1/metrics - host_identifier: node + metrics: None level: INFO \ No newline at end of file diff --git a/cluster_config/cfgsync.yaml b/cluster_config/cfgsync.yaml deleted file mode 100644 index e1a0fad..0000000 --- a/cluster_config/cfgsync.yaml +++ /dev/null @@ -1,32 +0,0 @@ -port: 4400 -n_hosts: 2 -timeout: 30 - -# ConsensusConfig related parameters -security_param: 10 -active_slot_coeff: 0.9 - -# DaConfig related parameters -subnetwork_size: 2 -dispersal_factor: 2 -num_samples: 1 -num_subnets: 2 -old_blobs_check_interval_secs: 5 -blobs_validity_duration_secs: 60 -balancer_interval_secs: 1 -global_params_path: "/kzgrs_test_params" - -# Tracing -tracing_settings: - logger: Stdout - tracing: !Otlp - endpoint: http://tempo:4317/ - sample_ratio: 0.5 - service_name: node - filter: !EnvFilter - filters: - nomos: debug - metrics: !Otlp - endpoint: http://prometheus:9090/api/v1/otlp/v1/metrics - host_identifier: node - level: INFO \ No newline at end of file From b1d1518b8be7c06039653ab02ffa43dd233db715 Mon Sep 17 00:00:00 2001 From: Roman Date: Mon, 10 Mar 2025 23:59:21 +0000 Subject: [PATCH 19/32] fix: timeout for sustained_high_rate_with_invalid_requests --- tests/dos_robustness/test_high_load_dos.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/dos_robustness/test_high_load_dos.py b/tests/dos_robustness/test_high_load_dos.py index 5cc1fdf..57f2d86 100644 --- a/tests/dos_robustness/test_high_load_dos.py +++ b/tests/dos_robustness/test_high_load_dos.py @@ -156,10 +156,9 @@ class TestHighLoadDos(StepsDataAvailability): assert failure_ratio_w < 0.20, f"Dispersal failure ratio {failure_ratio_w} too high" assert failure_ratio_r < 0.20, f"Data download failure ratio {failure_ratio_r} too high" - @pytest.mark.timeout(3600) @pytest.mark.usefixtures("setup_2_node_cluster", "setup_proxy_clients") def test_sustained_high_rate_with_invalid_requests(self): - timeout = 10 + timeout = 60 start_time = time.time() successful_dispersals = 0 unsuccessful_dispersals = 0 @@ -194,7 +193,7 @@ class TestHighLoadDos(StepsDataAvailability): if not invalid: unsuccessful_downloads += 1 - delay(10) + delay(0.01) assert successful_dispersals > 0, "No successful dispersals" assert successful_downloads > 0, "No successful downloads" From 27f55c387065d3200911ee6d05b53936d7e062b7 Mon Sep 17 00:00:00 2001 From: Roman Date: Tue, 11 Mar 2025 02:38:25 +0000 Subject: [PATCH 20/32] fix: logs inspection at the of the test --- src/client/nomos_cli.py | 9 ++++++--- src/docker_manager.py | 20 ++++++++++---------- src/node/nomos_node.py | 12 ++++++++++-- src/test_data.py | 2 ++ tests/conftest.py | 2 +- 5 files changed, 29 insertions(+), 16 deletions(-) diff --git a/src/client/nomos_cli.py b/src/client/nomos_cli.py index 15817a8..8f1ed58 100644 --- a/src/client/nomos_cli.py +++ b/src/client/nomos_cli.py @@ -77,15 +77,18 @@ class NomosCli: return def reconstruct(self, decode_only=False): - keywords = ["Reconstructed data"] + keyword = "Reconstructed data" + keywords = [keyword] log_stream = self._container.logs(stream=True) matches = self._docker_manager.search_log_for_keywords(self._log_path, keywords, False, log_stream) - assert len(matches) > 0, f"Reconstructed data not found {matches}" + assert len(matches[keyword]) > 0, f"Reconstructed data not found {matches[keyword]}" + + logger.debug(f"Reconstructed data match found {matches[keyword]}") # Use regular expression that captures the byte list after "Reconstructed data" - result = re.sub(r".*Reconstructed data\s*(\[[^\]]+\]).*", r"\1", matches[keywords[0]][0]) + result = re.sub(r".*Reconstructed data\s*(\[[^\]]+\]).*", r"\1", matches[keyword][0]) result_bytes = [] try: diff --git a/src/docker_manager.py b/src/docker_manager.py index 4f5fd8b..22dc71f 100644 --- a/src/docker_manager.py +++ b/src/docker_manager.py @@ -142,35 +142,35 @@ class DockerManager: for keyword in keywords: if use_regex: if re.search(keyword, line, re.IGNORECASE): - matches[keyword].append(line.strip()) + matches[keyword].append(line) else: if keyword.lower() in line.lower(): - matches[keyword].append(line.strip()) + matches[keyword].append(line) return matches def search_log_for_keywords(self, log_path, keywords, use_regex=False, log_stream=None): - matches = {} + matches = {keyword: [] for keyword in keywords} # Read from stream if log_stream is not None: for line in log_stream: - matches = self.find_keywords_in_line(keywords, line.decode("utf-8"), use_regex=use_regex) + line_matches = self.find_keywords_in_line(keywords, line.decode("utf-8"), use_regex=use_regex) + for keyword, result in line_matches.items(): + matches[keyword].extend(result) else: # Open the log file and search line by line with open(log_path, "r") as log_file: for line in log_file: - matches = self.find_keywords_in_line(keywords, line, use_regex=use_regex) + line_matches = self.find_keywords_in_line(keywords, line, use_regex=use_regex) + for keyword, result in line_matches.items(): + matches[keyword].extend(result) # Check if there were any matches - if any(matches[keyword] for keyword in keywords): - for keyword, lines in matches.items(): - if lines: - logger.debug(f"Found matches for keyword '{keyword}': {lines}") + if any(matches_list for matches_list in matches.values()): return matches else: - logger.debug("No keywords found in the nomos logs.") return None diff --git a/src/node/nomos_node.py b/src/node/nomos_node.py index 4921a13..3547fa4 100644 --- a/src/node/nomos_node.py +++ b/src/node/nomos_node.py @@ -142,8 +142,16 @@ class NomosNode: if whitelist: keywords = [keyword for keyword in keywords if keyword not in whitelist] - matches = self._docker_manager.search_log_for_keywords(self._log_path, keywords, False) - assert not matches, f"Found errors {matches}" + matches_found = self._docker_manager.search_log_for_keywords(self._log_path, keywords, False) + + logger.info(f"Printing log matches for {self.name()}") + if matches_found: + for match in matches_found: + if len(matches_found[match]) != 0: + for log_line in matches_found[match]: + logger.debug(f"Log line matching keyword '{match}': {log_line}") + else: + logger.debug("No keyword matches found in the logs.") def send_dispersal_request(self, data): return self._api.send_dispersal_request(data) diff --git a/src/test_data.py b/src/test_data.py index de36752..1293358 100644 --- a/src/test_data.py +++ b/src/test_data.py @@ -23,6 +23,8 @@ LOG_ERROR_KEYWORDS = [ "goexit", "race condition", "double free", + "error", + "warn", ] DATA_TO_DISPERSE = [ diff --git a/tests/conftest.py b/tests/conftest.py index dd418c0..abcc9b7 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -97,7 +97,7 @@ def close_open_nodes(attach_logs_on_fail): @pytest.fixture(scope="function", autouse=True) -def check_nomos_log_errors(): +def check_nomos_log_errors(request): yield logger.debug(f"Running fixture teardown: {inspect.currentframe().f_code.co_name}") for node in DS.nomos_nodes: From 26c4aecd95d3d5022cc11a9a1b54f80695d83f69 Mon Sep 17 00:00:00 2001 From: Roman Date: Tue, 11 Mar 2025 02:53:35 +0000 Subject: [PATCH 21/32] fix: simplify match print --- src/node/nomos_node.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/node/nomos_node.py b/src/node/nomos_node.py index 3547fa4..5c0400a 100644 --- a/src/node/nomos_node.py +++ b/src/node/nomos_node.py @@ -146,10 +146,9 @@ class NomosNode: logger.info(f"Printing log matches for {self.name()}") if matches_found: - for match in matches_found: - if len(matches_found[match]) != 0: - for log_line in matches_found[match]: - logger.debug(f"Log line matching keyword '{match}': {log_line}") + for keyword, log_lines in matches_found.items(): + for line in log_lines: + logger.debug(f"Log line matching keyword '{keyword}': {line}") else: logger.debug("No keyword matches found in the logs.") From 948446e599707a2e4878515d72e1d508cfbd2a46 Mon Sep 17 00:00:00 2001 From: Roman Date: Tue, 11 Mar 2025 03:04:44 +0000 Subject: [PATCH 22/32] fix: simplify search_log_for_keywords --- src/docker_manager.py | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/src/docker_manager.py b/src/docker_manager.py index 22dc71f..46d1905 100644 --- a/src/docker_manager.py +++ b/src/docker_manager.py @@ -152,20 +152,16 @@ class DockerManager: def search_log_for_keywords(self, log_path, keywords, use_regex=False, log_stream=None): matches = {keyword: [] for keyword in keywords} - # Read from stream - if log_stream is not None: - for line in log_stream: - line_matches = self.find_keywords_in_line(keywords, line.decode("utf-8"), use_regex=use_regex) - for keyword, result in line_matches.items(): - matches[keyword].extend(result) + if log_stream is None: + log_stream = open(log_path, "r") - else: - # Open the log file and search line by line - with open(log_path, "r") as log_file: - for line in log_file: - line_matches = self.find_keywords_in_line(keywords, line, use_regex=use_regex) - for keyword, result in line_matches.items(): - matches[keyword].extend(result) + for line in log_stream: + # Decode line if it is a byte object not str + if hasattr(line, "decode"): + line = line.decode("utf-8") + line_matches = self.find_keywords_in_line(keywords, line, use_regex=use_regex) + for keyword, result in line_matches.items(): + matches[keyword].extend(result) # Check if there were any matches if any(matches_list for matches_list in matches.values()): From 74cd882963d732e9f59fd8b95404cf5cdb4c16bb Mon Sep 17 00:00:00 2001 From: Roman Date: Tue, 11 Mar 2025 03:14:39 +0000 Subject: [PATCH 23/32] fix: better log container start --- src/client/nomos_cli.py | 1 + src/client/proxy_client.py | 1 + src/node/nomos_node.py | 3 +-- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/client/nomos_cli.py b/src/client/nomos_cli.py index 8f1ed58..42c230e 100644 --- a/src/client/nomos_cli.py +++ b/src/client/nomos_cli.py @@ -67,6 +67,7 @@ class NomosCli: command=cmd, ) + logger.info(f"Started container {self._container_name} from image {self._image_name}.") DS.client_nodes.append(self) match self._command: diff --git a/src/client/proxy_client.py b/src/client/proxy_client.py index 082350a..e2198da 100644 --- a/src/client/proxy_client.py +++ b/src/client/proxy_client.py @@ -74,6 +74,7 @@ class ProxyClient: command=cmd, ) + logger.info(f"Started container {self._container_name} from image {self._image_name}.") DS.client_nodes.append(self) def set_rest_api(self): diff --git a/src/node/nomos_node.py b/src/node/nomos_node.py index 5c0400a..0d372b7 100644 --- a/src/node/nomos_node.py +++ b/src/node/nomos_node.py @@ -78,8 +78,7 @@ class NomosNode: name=self._container_name, ) - logger.debug(f"Started container from image {self._image_name}. " f"REST: {getattr(self, '_tcp_port', 'N/A')}") - + logger.info(f"Started container {self._container_name} from image {self._image_name}. " f"REST: {getattr(self, '_tcp_port', 'N/A')}") DS.nomos_nodes.append(self) @retry(stop=stop_after_delay(5), wait=wait_fixed(0.1), reraise=True) From e69c0230106b68b34344bb72b05932f597c06623 Mon Sep 17 00:00:00 2001 From: Roman Date: Tue, 11 Mar 2025 03:17:36 +0000 Subject: [PATCH 24/32] fix: move delay before try/except to guarantee it --- tests/dos_robustness/test_high_load_dos.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/dos_robustness/test_high_load_dos.py b/tests/dos_robustness/test_high_load_dos.py index 57f2d86..6dc3eeb 100644 --- a/tests/dos_robustness/test_high_load_dos.py +++ b/tests/dos_robustness/test_high_load_dos.py @@ -172,6 +172,7 @@ class TestHighLoadDos(StepsDataAvailability): dispersal_cl, download_cl = random.choice(self.client_nodes[1::2]), random.choice(self.client_nodes[::2]) invalid = random.choice([False]) + delay(0.01) try: response = self.disperse_data( DATA_TO_DISPERSE[6], to_app_id(1), to_index(0), client_node=dispersal_cl, timeout_duration=0, send_invalid=invalid @@ -193,8 +194,6 @@ class TestHighLoadDos(StepsDataAvailability): if not invalid: unsuccessful_downloads += 1 - delay(0.01) - assert successful_dispersals > 0, "No successful dispersals" assert successful_downloads > 0, "No successful downloads" From b8c6764bf10f1fb4e0e16ab5fe9d093dd191560d Mon Sep 17 00:00:00 2001 From: Roman Date: Tue, 11 Mar 2025 23:39:50 +0000 Subject: [PATCH 25/32] fix: invalid REST API --- src/api_clients/invalid_rest.py | 7 +++++-- src/client/nomos_cli.py | 2 -- src/client/proxy_client.py | 4 ++-- tests/dos_robustness/test_high_load_dos.py | 2 +- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/api_clients/invalid_rest.py b/src/api_clients/invalid_rest.py index 907219b..3c9339f 100644 --- a/src/api_clients/invalid_rest.py +++ b/src/api_clients/invalid_rest.py @@ -31,6 +31,7 @@ def alter_dispersal_data(data): choice = random.choice([alter_data_content, alter_metadata, add_random_property]) choice() + logger.debug(f"Data for dispersal request has been altered to: {data}") return data @@ -50,16 +51,18 @@ def alter_get_range_query(query): choice = random.choice([swap_range, alter_app_id]) choice() + logger.debug(f"Get-range query has been altered to: {query}") return query -class INVALID_REST(REST): +class InvalidRest(REST): def __init__(self, rest_port): super().__init__(rest_port) def send_dispersal_request(self, data): data = alter_dispersal_data(data) - return self.rest_call("post", "disperse-data", json.dumps(data)) + response = self.rest_call("post", "disperse-data", json.dumps(data)) + return response def send_get_range(self, query): query = alter_get_range_query(query) diff --git a/src/client/nomos_cli.py b/src/client/nomos_cli.py index 42c230e..f01da6c 100644 --- a/src/client/nomos_cli.py +++ b/src/client/nomos_cli.py @@ -2,8 +2,6 @@ import json import os import re -from src.api_clients.invalid_rest import INVALID_REST -from src.api_clients.rest import REST from src.data_storage import DS from src.libs.common import generate_log_prefix, delay, remove_padding from src.libs.custom_logger import get_custom_logger diff --git a/src/client/proxy_client.py b/src/client/proxy_client.py index e2198da..77c0b78 100644 --- a/src/client/proxy_client.py +++ b/src/client/proxy_client.py @@ -2,7 +2,7 @@ import json import os import re -from src.api_clients.invalid_rest import INVALID_REST +from src.api_clients.invalid_rest import InvalidRest from src.api_clients.rest import REST from src.data_storage import DS from src.libs.common import generate_log_prefix, delay, remove_padding @@ -81,7 +81,7 @@ class ProxyClient: self._api = REST(self._tcp_port) def set_invalid_rest_api(self): - self._api = INVALID_REST(self._tcp_port) + self._api = InvalidRest(self._tcp_port) @retry(stop=stop_after_delay(5), wait=wait_fixed(0.1), reraise=True) def stop(self): diff --git a/tests/dos_robustness/test_high_load_dos.py b/tests/dos_robustness/test_high_load_dos.py index 6dc3eeb..2645c97 100644 --- a/tests/dos_robustness/test_high_load_dos.py +++ b/tests/dos_robustness/test_high_load_dos.py @@ -170,7 +170,7 @@ class TestHighLoadDos(StepsDataAvailability): break dispersal_cl, download_cl = random.choice(self.client_nodes[1::2]), random.choice(self.client_nodes[::2]) - invalid = random.choice([False]) + invalid = random.choice([False, True]) delay(0.01) try: From 9b41603bb81c30b7494cc0123e2b05f5a7914ee3 Mon Sep 17 00:00:00 2001 From: Roman Date: Tue, 11 Mar 2025 23:47:48 +0000 Subject: [PATCH 26/32] fix: invalid REST API - logging --- src/api_clients/invalid_rest.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/api_clients/invalid_rest.py b/src/api_clients/invalid_rest.py index 3c9339f..8434e70 100644 --- a/src/api_clients/invalid_rest.py +++ b/src/api_clients/invalid_rest.py @@ -29,9 +29,10 @@ def alter_dispersal_data(data): data[random_str] = list(generate_random_bytes(random_n)) choice = random.choice([alter_data_content, alter_metadata, add_random_property]) + logger.debug(f"Data for dispersal request has been altered with: {choice.__name__}") + choice() - logger.debug(f"Data for dispersal request has been altered to: {data}") return data @@ -49,9 +50,10 @@ def alter_get_range_query(query): query["app_id"] = list(generate_random_bytes(random_n)) choice = random.choice([swap_range, alter_app_id]) + logger.debug(f"Get-range query has been altered with: {choice.__name__}") + choice() - logger.debug(f"Get-range query has been altered to: {query}") return query From a373193bdbef78fde5e5c896fd4df088401ee276 Mon Sep 17 00:00:00 2001 From: Roman Date: Wed, 12 Mar 2025 00:12:35 +0000 Subject: [PATCH 27/32] fix: simplify invalid REST API usage --- src/api_clients/rest.py | 7 ++++--- src/client/proxy_client.py | 20 ++++++++++---------- src/steps/da.py | 12 ++---------- 3 files changed, 16 insertions(+), 23 deletions(-) diff --git a/src/api_clients/rest.py b/src/api_clients/rest.py index 8612ff1..65b593f 100644 --- a/src/api_clients/rest.py +++ b/src/api_clients/rest.py @@ -20,11 +20,12 @@ class REST(BaseClient): return self.make_request(method, url, headers=headers, data=payload) def info(self): - status_response = self.rest_call("get", "cryptarchia/info") - return status_response.json() + response = self.rest_call("get", "cryptarchia/info") + return response.json() def send_dispersal_request(self, data): - return self.rest_call("post", "disperse-data", json.dumps(data)) + response = self.rest_call("post", "disperse-data", json.dumps(data)) + return response def send_get_range(self, query): response = self.rest_call("post", "da/get-range", json.dumps(query)) diff --git a/src/client/proxy_client.py b/src/client/proxy_client.py index 77c0b78..86dc908 100644 --- a/src/client/proxy_client.py +++ b/src/client/proxy_client.py @@ -1,6 +1,4 @@ -import json import os -import re from src.api_clients.invalid_rest import InvalidRest from src.api_clients.rest import REST @@ -33,6 +31,7 @@ class ProxyClient: self._container_name = container_name self._container = None self._api = None + self._invalid_api = None cwd = os.getcwd() self._volumes = [cwd + "/" + volume for volume in self._volumes] @@ -44,6 +43,7 @@ class ProxyClient: self._external_ports = self._docker_manager.generate_ports(count=1) self._tcp_port = self._external_ports[0] self._api = REST(self._tcp_port) + self._invalid_api = InvalidRest(self._tcp_port) logger.debug(f"Internal ports {self._internal_ports}") @@ -77,12 +77,6 @@ class ProxyClient: logger.info(f"Started container {self._container_name} from image {self._image_name}.") DS.client_nodes.append(self) - def set_rest_api(self): - self._api = REST(self._tcp_port) - - def set_invalid_rest_api(self): - self._api = InvalidRest(self._tcp_port) - @retry(stop=stop_after_delay(5), wait=wait_fixed(0.1), reraise=True) def stop(self): self._container = stop(self._container) @@ -94,8 +88,14 @@ class ProxyClient: def name(self): return self._container_name - def send_dispersal_request(self, data): + def send_dispersal_request(self, data, send_invalid=False): + if send_invalid: + return self._invalid_api.send_dispersal_request(data) + return self._api.send_dispersal_request(data) - def send_get_data_range_request(self, data): + def send_get_data_range_request(self, data, send_invalid=False): + if send_invalid: + return self._invalid_api.send_get_range(data) + return self._api.send_get_range(data) diff --git a/src/steps/da.py b/src/steps/da.py index e768c7c..5bf8f98 100644 --- a/src/steps/da.py +++ b/src/steps/da.py @@ -56,11 +56,7 @@ class StepsDataAvailability(StepsCommon): if client_node is None: response = executor.send_dispersal_request(request) else: - if send_invalid: - client_node.set_invalid_rest_api() - else: - client_node.set_rest_api() - response = client_node.send_dispersal_request(request) + response = client_node.send_dispersal_request(request, send_invalid=send_invalid) except Exception as ex: assert "Bad Request" in str(ex) or "Internal Server Error" in str(ex) @@ -81,11 +77,7 @@ class StepsDataAvailability(StepsCommon): if client_node is None: response = node.send_get_data_range_request(query) else: - if send_invalid: - client_node.set_invalid_rest_api() - else: - client_node.set_rest_api() - response = client_node.send_get_data_range_request(query) + response = client_node.send_get_data_range_request(query, send_invalid=send_invalid) except Exception as ex: assert "Bad Request" in str(ex) or "Internal Server Error" in str(ex) From d19d2248136734df16ec2a510a3bbfc841080f2c Mon Sep 17 00:00:00 2001 From: Roman Date: Wed, 12 Mar 2025 00:37:10 +0000 Subject: [PATCH 28/32] fix: move fixture to class level --- tests/dos_robustness/test_high_load_dos.py | 24 ++++++---------------- 1 file changed, 6 insertions(+), 18 deletions(-) diff --git a/tests/dos_robustness/test_high_load_dos.py b/tests/dos_robustness/test_high_load_dos.py index 2645c97..fb7c7f9 100644 --- a/tests/dos_robustness/test_high_load_dos.py +++ b/tests/dos_robustness/test_high_load_dos.py @@ -8,20 +8,18 @@ from src.steps.da import StepsDataAvailability, logger from src.test_data import DATA_TO_DISPERSE +@pytest.mark.usefixtures("setup_2_node_cluster") class TestHighLoadDos(StepsDataAvailability): main_nodes = [] client_nodes = [] - @pytest.mark.usefixtures("setup_2_node_cluster") def test_sustained_high_rate_upload(self): timeout = 60 start_time = time.time() successful_dispersals = 0 unsuccessful_dispersals = 0 - while True: - if time.time() - start_time > timeout: - break + while time.time() - start_time < timeout: delay(0.01) try: @@ -40,7 +38,6 @@ class TestHighLoadDos(StepsDataAvailability): assert failure_ratio < 0.20, f"Dispersal failure ratio {failure_ratio} too high" - @pytest.mark.usefixtures("setup_2_node_cluster") def test_sustained_high_rate_download(self): timeout = 60 successful_downloads = 0 @@ -54,9 +51,7 @@ class TestHighLoadDos(StepsDataAvailability): delay(5) start_time = time.time() - while True: - if time.time() - start_time > timeout: - break + while time.time() - start_time < timeout: delay(0.01) try: @@ -72,7 +67,6 @@ class TestHighLoadDos(StepsDataAvailability): assert failure_ratio < 0.20, f"Data download failure ratio {failure_ratio} too high" - @pytest.mark.usefixtures("setup_2_node_cluster") def test_sustained_high_rate_mixed(self): timeout = 60 start_time = time.time() @@ -81,9 +75,7 @@ class TestHighLoadDos(StepsDataAvailability): successful_downloads = 0 unsuccessful_downloads = 0 - while True: - if time.time() - start_time > timeout: - break + while time.time() - start_time < timeout: delay(0.01) try: @@ -122,9 +114,7 @@ class TestHighLoadDos(StepsDataAvailability): successful_downloads = 0 unsuccessful_downloads = 0 - while True: - if time.time() - start_time > timeout: - break + while time.time() - start_time < timeout: dispersal_cl, download_cl = random.choice(self.client_nodes[1::2]), random.choice(self.client_nodes[::2]) @@ -165,9 +155,7 @@ class TestHighLoadDos(StepsDataAvailability): successful_downloads = 0 unsuccessful_downloads = 0 - while True: - if time.time() - start_time > timeout: - break + while time.time() - start_time < timeout: dispersal_cl, download_cl = random.choice(self.client_nodes[1::2]), random.choice(self.client_nodes[::2]) invalid = random.choice([False, True]) From bc7fc6164f50d6e65b66827f1a615a5873f2064e Mon Sep 17 00:00:00 2001 From: Roman Date: Wed, 12 Mar 2025 03:07:32 +0000 Subject: [PATCH 29/32] fix: move status code assertion to caller - replace Exception with AssertionError --- src/steps/da.py | 1 - tests/dos_robustness/test_high_load_dos.py | 52 +++++++++------------- 2 files changed, 21 insertions(+), 32 deletions(-) diff --git a/src/steps/da.py b/src/steps/da.py index 5bf8f98..1e0ae23 100644 --- a/src/steps/da.py +++ b/src/steps/da.py @@ -61,7 +61,6 @@ class StepsDataAvailability(StepsCommon): assert "Bad Request" in str(ex) or "Internal Server Error" in str(ex) assert hasattr(response, "status_code"), "Missing status_code" - assert response.status_code in (200, 429), "Unexpected status code" return response diff --git a/tests/dos_robustness/test_high_load_dos.py b/tests/dos_robustness/test_high_load_dos.py index fb7c7f9..8fed93f 100644 --- a/tests/dos_robustness/test_high_load_dos.py +++ b/tests/dos_robustness/test_high_load_dos.py @@ -24,14 +24,12 @@ class TestHighLoadDos(StepsDataAvailability): delay(0.01) try: response = self.disperse_data(DATA_TO_DISPERSE[7], to_app_id(1), to_index(0), timeout_duration=0) - if response.status_code == 200: - successful_dispersals += 1 - else: - unsuccessful_dispersals += 1 - except Exception: + assert response.status_code == 200, f"Dispersal failed with status code {response.status_code}" + successful_dispersals += 1 + except AssertionError: unsuccessful_dispersals += 1 - assert successful_dispersals > 0, "No successful dispersals" + assert successful_dispersals > 0, "No successful dispersal" failure_ratio = unsuccessful_dispersals / successful_dispersals logger.info(f"Unsuccessful dispersals ratio: {failure_ratio}") @@ -43,10 +41,8 @@ class TestHighLoadDos(StepsDataAvailability): successful_downloads = 0 unsuccessful_downloads = 0 - try: - self.disperse_data(DATA_TO_DISPERSE[7], to_app_id(1), to_index(0)) - except Exception as ex: - raise Exception(f"Initial dispersal was not successful with error {ex}") + response = self.disperse_data(DATA_TO_DISPERSE[7], to_app_id(1), to_index(0)) + assert response.status_code == 200, "Initial dispersal was not successful" delay(5) start_time = time.time() @@ -80,11 +76,9 @@ class TestHighLoadDos(StepsDataAvailability): delay(0.01) try: response = self.disperse_data(DATA_TO_DISPERSE[6], to_app_id(1), to_index(0), timeout_duration=0) - if response.status_code == 200: - successful_dispersals += 1 - else: - unsuccessful_dispersals += 1 - except Exception: + assert response.status_code == 200, f"Dispersal failed with status code {response.status_code}" + successful_dispersals += 1 + except AssertionError: unsuccessful_dispersals += 1 try: @@ -93,8 +87,8 @@ class TestHighLoadDos(StepsDataAvailability): except Exception: unsuccessful_downloads += 1 - assert successful_dispersals > 0, "No successful dispersals" - assert successful_downloads > 0, "No successful downloads" + assert successful_dispersals > 0, "No successful dispersal" + assert successful_downloads > 0, "No successful download" failure_ratio_w = unsuccessful_dispersals / successful_dispersals failure_ratio_r = unsuccessful_downloads / successful_downloads @@ -121,11 +115,9 @@ class TestHighLoadDos(StepsDataAvailability): delay(0.01) try: response = self.disperse_data(DATA_TO_DISPERSE[6], to_app_id(1), to_index(0), client_node=dispersal_cl, timeout_duration=0) - if response.status_code == 200: - successful_dispersals += 1 - else: - unsuccessful_dispersals += 1 - except Exception: + assert response.status_code == 200, f"Dispersal failed with status code {response.status_code}" + successful_dispersals += 1 + except AssertionError: unsuccessful_dispersals += 1 try: @@ -134,8 +126,8 @@ class TestHighLoadDos(StepsDataAvailability): except Exception: unsuccessful_downloads += 1 - assert successful_dispersals > 0, "No successful dispersals" - assert successful_downloads > 0, "No successful downloads" + assert successful_dispersals > 0, "No successful dispersal" + assert successful_downloads > 0, "No successful download" failure_ratio_w = unsuccessful_dispersals / successful_dispersals failure_ratio_r = unsuccessful_downloads / successful_downloads @@ -165,11 +157,9 @@ class TestHighLoadDos(StepsDataAvailability): response = self.disperse_data( DATA_TO_DISPERSE[6], to_app_id(1), to_index(0), client_node=dispersal_cl, timeout_duration=0, send_invalid=invalid ) - if response.status_code == 200: - successful_dispersals += 1 - elif not invalid: - unsuccessful_dispersals += 1 - except Exception: + assert response.status_code == 200, f"Dispersal failed with status code {response.status_code}" + successful_dispersals += 1 + except AssertionError: if not invalid: unsuccessful_dispersals += 1 @@ -182,8 +172,8 @@ class TestHighLoadDos(StepsDataAvailability): if not invalid: unsuccessful_downloads += 1 - assert successful_dispersals > 0, "No successful dispersals" - assert successful_downloads > 0, "No successful downloads" + assert successful_dispersals > 0, "No successful dispersal" + assert successful_downloads > 0, "No successful download" failure_ratio_w = unsuccessful_dispersals / successful_dispersals failure_ratio_r = unsuccessful_downloads / successful_downloads From f95f57a7cedac30010a7ad3419bdd0b558c52643 Mon Sep 17 00:00:00 2001 From: Roman Date: Wed, 12 Mar 2025 03:09:35 +0000 Subject: [PATCH 30/32] fix: timeout back to 60 --- tests/dos_robustness/test_high_load_dos.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/dos_robustness/test_high_load_dos.py b/tests/dos_robustness/test_high_load_dos.py index 8fed93f..4da8f89 100644 --- a/tests/dos_robustness/test_high_load_dos.py +++ b/tests/dos_robustness/test_high_load_dos.py @@ -101,7 +101,7 @@ class TestHighLoadDos(StepsDataAvailability): @pytest.mark.usefixtures("setup_2_node_cluster", "setup_proxy_clients") def test_sustained_high_rate_multiple_clients(self): - timeout = 10 + timeout = 60 start_time = time.time() successful_dispersals = 0 unsuccessful_dispersals = 0 From 463875f7bfd6bbaa40d214cd66bc2f3b2003855b Mon Sep 17 00:00:00 2001 From: Roman Date: Wed, 12 Mar 2025 05:11:20 +0000 Subject: [PATCH 31/32] fix: refactor disperse_data and get_data_range - make check log errors optional --- README.md | 5 +++++ src/env_vars.py | 1 + src/steps/da.py | 23 +++++++++++++++++------ tests/conftest.py | 8 +++++--- 4 files changed, 28 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 410ba5a..09dad4e 100644 --- a/README.md +++ b/README.md @@ -14,6 +14,11 @@ pre-commit install (optional) Overwrite default vars from src/env_vars.py via env vars or by adding a .env file pytest ``` +Set optional environment variable to search logs for errors after each tests: +```shell +export CHECK_LOG_ERRORS=True +``` + ## License diff --git a/src/env_vars.py b/src/env_vars.py index 75b705e..863c0a1 100644 --- a/src/env_vars.py +++ b/src/env_vars.py @@ -40,3 +40,4 @@ IP_RANGE = get_env_var("IP_RANGE", "172.19.0.0/24") GATEWAY = get_env_var("GATEWAY", "172.19.0.1") RUNNING_IN_CI = get_env_var("CI") API_REQUEST_TIMEOUT = get_env_var("API_REQUEST_TIMEOUT", 20) +CHECK_LOG_ERRORS = get_env_var("CHECK_LOG_ERRORS", False) diff --git a/src/steps/da.py b/src/steps/da.py index 1e0ae23..cffe4a9 100644 --- a/src/steps/da.py +++ b/src/steps/da.py @@ -45,15 +45,21 @@ class StepsDataAvailability(StepsCommon): return executor @allure.step - def disperse_data(self, data, app_id, index, client_node=None, timeout_duration=65, utf8=True, padding=True, send_invalid=False): + def disperse_data(self, data, app_id, index, client_node=None, **kwargs): + + timeout_duration = kwargs.get("timeout_duration", 65) + utf8 = kwargs.get("utf8", True) + padding = kwargs.get("padding", True) + send_invalid = kwargs.get("send_invalid", False) + + request = prepare_dispersal_request(data, app_id, index, utf8=utf8, padding=padding) + @retry(stop=stop_after_delay(timeout_duration), wait=wait_fixed(0.1), reraise=True) def disperse(my_self=self): response = [] - request = prepare_dispersal_request(data, app_id, index, utf8=utf8, padding=padding) - executor = my_self.find_executor_node() - try: if client_node is None: + executor = my_self.find_executor_node() response = executor.send_dispersal_request(request) else: response = client_node.send_dispersal_request(request, send_invalid=send_invalid) @@ -67,11 +73,16 @@ class StepsDataAvailability(StepsCommon): return disperse() @allure.step - def get_data_range(self, node, app_id, start, end, client_node=None, timeout_duration=45, send_invalid=False): + def get_data_range(self, node, app_id, start, end, client_node=None, **kwargs): + + timeout_duration = kwargs.get("timeout_duration", 65) + send_invalid = kwargs.get("send_invalid", False) + + query = prepare_get_range_request(app_id, start, end) + @retry(stop=stop_after_delay(timeout_duration), wait=wait_fixed(0.1), reraise=True) def get_range(): response = [] - query = prepare_get_range_request(app_id, start, end) try: if client_node is None: response = node.send_get_data_range_request(query) diff --git a/tests/conftest.py b/tests/conftest.py index abcc9b7..b28d45e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -3,6 +3,7 @@ import inspect import glob from concurrent.futures import ThreadPoolExecutor, as_completed +from src.env_vars import CHECK_LOG_ERRORS from src.libs.custom_logger import get_custom_logger import os import pytest @@ -99,6 +100,7 @@ def close_open_nodes(attach_logs_on_fail): @pytest.fixture(scope="function", autouse=True) def check_nomos_log_errors(request): yield - logger.debug(f"Running fixture teardown: {inspect.currentframe().f_code.co_name}") - for node in DS.nomos_nodes: - node.check_nomos_log_errors() + if CHECK_LOG_ERRORS.lower() == "true" or CHECK_LOG_ERRORS.lower() == "yes": + logger.debug(f"Running fixture teardown: {inspect.currentframe().f_code.co_name}") + for node in DS.nomos_nodes: + node.check_nomos_log_errors() From 22e56bdf55296b8ba50aee3371aadb2388d4b97a Mon Sep 17 00:00:00 2001 From: Roman Date: Wed, 12 Mar 2025 05:44:10 +0000 Subject: [PATCH 32/32] fix: any value counts as true --- tests/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/conftest.py b/tests/conftest.py index b28d45e..ea0920e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -100,7 +100,7 @@ def close_open_nodes(attach_logs_on_fail): @pytest.fixture(scope="function", autouse=True) def check_nomos_log_errors(request): yield - if CHECK_LOG_ERRORS.lower() == "true" or CHECK_LOG_ERRORS.lower() == "yes": + if CHECK_LOG_ERRORS: logger.debug(f"Running fixture teardown: {inspect.currentframe().f_code.co_name}") for node in DS.nomos_nodes: node.check_nomos_log_errors()