From e83e0b0b239784a61da426ae13514b2a2b3a484b Mon Sep 17 00:00:00 2001 From: Roman Date: Thu, 20 Feb 2025 16:52:53 +1100 Subject: [PATCH 1/7] test: disperse to nodes member of many subnets --- cluster_config/cfgsync-2node1024.yaml | 31 +++++++++++++++++++ ...cfgsync-2node.yaml => cfgsync-2node2.yaml} | 0 ...cfgsync-4node.yaml => cfgsync-4node2.yaml} | 0 src/steps/common.py | 12 +++++-- tests/networking_privacy/__init__.py | 0 .../test_networking_privacy.py | 14 +++++++++ 6 files changed, 54 insertions(+), 3 deletions(-) create mode 100644 cluster_config/cfgsync-2node1024.yaml rename cluster_config/{cfgsync-2node.yaml => cfgsync-2node2.yaml} (100%) rename cluster_config/{cfgsync-4node.yaml => cfgsync-4node2.yaml} (100%) create mode 100644 tests/networking_privacy/__init__.py create mode 100644 tests/networking_privacy/test_networking_privacy.py diff --git a/cluster_config/cfgsync-2node1024.yaml b/cluster_config/cfgsync-2node1024.yaml new file mode 100644 index 0000000..64b1df0 --- /dev/null +++ b/cluster_config/cfgsync-2node1024.yaml @@ -0,0 +1,31 @@ +port: 4400 +n_hosts: 2 +timeout: 30 + +# ConsensusConfig related parameters +security_param: 10 +active_slot_coeff: 0.9 + +# DaConfig related parameters +subnetwork_size: 1024 +dispersal_factor: 2 +num_samples: 1 +num_subnets: 2 +old_blobs_check_interval_secs: 5 +blobs_validity_duration_secs: 60 +global_params_path: "/kzgrs_test_params" + +# Tracing +tracing_settings: + logger: Stdout + tracing: !Otlp + endpoint: http://tempo:4317/ + sample_ratio: 0.5 + service_name: node + filter: !EnvFilter + filters: + nomos: debug + metrics: !Otlp + endpoint: http://prometheus:9090/api/v1/otlp/v1/metrics + host_identifier: node + level: INFO \ No newline at end of file diff --git a/cluster_config/cfgsync-2node.yaml b/cluster_config/cfgsync-2node2.yaml similarity index 100% rename from cluster_config/cfgsync-2node.yaml rename to cluster_config/cfgsync-2node2.yaml diff --git a/cluster_config/cfgsync-4node.yaml b/cluster_config/cfgsync-4node2.yaml similarity index 100% rename from cluster_config/cfgsync-4node.yaml rename to cluster_config/cfgsync-4node2.yaml diff --git a/src/steps/common.py b/src/steps/common.py index 4a2c810..a2f74f4 100644 --- a/src/steps/common.py +++ b/src/steps/common.py @@ -11,10 +11,10 @@ from src.node.nomos_node import NomosNode logger = get_custom_logger(__name__) -def prepare_cluster_config(node_count): +def prepare_cluster_config(node_count, subnetwork_size=2): cwd = os.getcwd() config_dir = "cluster_config" - src = f"{cwd}/{config_dir}/cfgsync-{node_count}node.yaml" + src = f"{cwd}/{config_dir}/cfgsync-{node_count}node{subnetwork_size}.yaml" dst = f"{cwd}/{config_dir}/cfgsync.yaml" shutil.copyfile(src, dst) @@ -38,7 +38,13 @@ class StepsCommon: @pytest.fixture(scope="function") def setup_2_node_cluster(self, request): logger.debug(f"Running fixture setup: {inspect.currentframe().f_code.co_name}") - prepare_cluster_config(2) + + if hasattr(request, "param"): + subnet_size = request.param + else: + subnet_size = 2 + + prepare_cluster_config(2, subnet_size) self.node1 = NomosNode(CFGSYNC, "cfgsync") self.node2 = NomosNode(NOMOS, "nomos_node_0") self.node3 = NomosNode(NOMOS_EXECUTOR, "nomos_node_1") diff --git a/tests/networking_privacy/__init__.py b/tests/networking_privacy/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/networking_privacy/test_networking_privacy.py b/tests/networking_privacy/test_networking_privacy.py new file mode 100644 index 0000000..aab916e --- /dev/null +++ b/tests/networking_privacy/test_networking_privacy.py @@ -0,0 +1,14 @@ +import pytest + +from src.libs.common import delay, to_app_id, to_index +from src.steps.da import StepsDataAvailability +from src.test_data import DATA_TO_DISPERSE + + +class TestNetworkingPrivacy(StepsDataAvailability): + main_nodes = [] + + @pytest.mark.parametrize("setup_2_node_cluster", [1024], indirect=True) + def test_consumed_bandwidth_dispersal(self, setup_2_node_cluster): + delay(5) + self.disperse_data(DATA_TO_DISPERSE[1], to_app_id(1), to_index(0)) From cc8ff45d76477a0abf84899a6e4852463472291a Mon Sep 17 00:00:00 2001 From: Roman Date: Fri, 21 Feb 2025 10:10:34 +1100 Subject: [PATCH 2/7] test: use cfgsync template --- cluster_config/cfgsync-template.yaml | 31 +++++++++++++++++++ cluster_config/cfgsync.yaml | 2 +- requirements.txt | 1 + src/steps/common.py | 15 +++++++-- .../test_networking_privacy.py | 2 +- 5 files changed, 46 insertions(+), 5 deletions(-) create mode 100644 cluster_config/cfgsync-template.yaml diff --git a/cluster_config/cfgsync-template.yaml b/cluster_config/cfgsync-template.yaml new file mode 100644 index 0000000..308c315 --- /dev/null +++ b/cluster_config/cfgsync-template.yaml @@ -0,0 +1,31 @@ +port: 4400 +n_hosts: {{ num_hosts }} +timeout: 30 + +# ConsensusConfig related parameters +security_param: 10 +active_slot_coeff: 0.9 + +# DaConfig related parameters +subnetwork_size: {{ subnet_size }} +dispersal_factor: 2 +num_samples: 1 +num_subnets: 2 +old_blobs_check_interval_secs: 5 +blobs_validity_duration_secs: 60 +global_params_path: "/kzgrs_test_params" + +# Tracing +tracing_settings: + logger: Stdout + tracing: !Otlp + endpoint: http://tempo:4317/ + sample_ratio: 0.5 + service_name: node + filter: !EnvFilter + filters: + nomos: debug + metrics: !Otlp + endpoint: http://prometheus:9090/api/v1/otlp/v1/metrics + host_identifier: node + level: INFO \ No newline at end of file diff --git a/cluster_config/cfgsync.yaml b/cluster_config/cfgsync.yaml index 10840a5..64b1df0 100644 --- a/cluster_config/cfgsync.yaml +++ b/cluster_config/cfgsync.yaml @@ -7,7 +7,7 @@ security_param: 10 active_slot_coeff: 0.9 # DaConfig related parameters -subnetwork_size: 2 +subnetwork_size: 1024 dispersal_factor: 2 num_samples: 1 num_subnets: 2 diff --git a/requirements.txt b/requirements.txt index 2965db5..0733397 100644 --- a/requirements.txt +++ b/requirements.txt @@ -39,3 +39,4 @@ typing-inspect==0.9.0 typing_extensions==4.9.0 urllib3==2.2.2 virtualenv==20.25.0 +Jinja2~=3.1.5 \ No newline at end of file diff --git a/src/steps/common.py b/src/steps/common.py index a2f74f4..9ddb65d 100644 --- a/src/steps/common.py +++ b/src/steps/common.py @@ -8,15 +8,24 @@ from src.env_vars import CFGSYNC, NOMOS, NOMOS_EXECUTOR from src.libs.custom_logger import get_custom_logger from src.node.nomos_node import NomosNode +from jinja2 import Template + logger = get_custom_logger(__name__) def prepare_cluster_config(node_count, subnetwork_size=2): cwd = os.getcwd() config_dir = "cluster_config" - src = f"{cwd}/{config_dir}/cfgsync-{node_count}node{subnetwork_size}.yaml" - dst = f"{cwd}/{config_dir}/cfgsync.yaml" - shutil.copyfile(src, dst) + + with open(f"{cwd}/{config_dir}/cfgsync-template.yaml", "r") as file: + template_content = file.read() + template = Template(template_content) + + rendered = template.render(num_hosts=node_count, subnet_size=subnetwork_size) + logger.debug(f"Rendered template {rendered}") + + with open(f"{cwd}/{config_dir}/cfgsync.yaml", "w") as outfile: + outfile.write(rendered) def start_nodes(nodes): diff --git a/tests/networking_privacy/test_networking_privacy.py b/tests/networking_privacy/test_networking_privacy.py index aab916e..a912380 100644 --- a/tests/networking_privacy/test_networking_privacy.py +++ b/tests/networking_privacy/test_networking_privacy.py @@ -11,4 +11,4 @@ class TestNetworkingPrivacy(StepsDataAvailability): @pytest.mark.parametrize("setup_2_node_cluster", [1024], indirect=True) def test_consumed_bandwidth_dispersal(self, setup_2_node_cluster): delay(5) - self.disperse_data(DATA_TO_DISPERSE[1], to_app_id(1), to_index(0)) + # self.disperse_data(DATA_TO_DISPERSE[1], to_app_id(1), to_index(0)) From 9f729d9289c2fb06375e0a08b26110c162bd3918 Mon Sep 17 00:00:00 2001 From: Roman Date: Fri, 21 Feb 2025 10:28:03 +1100 Subject: [PATCH 3/7] fix: cleanup config files --- cluster_config/cfgsync-2node1024.yaml | 31 ------------------- cluster_config/cfgsync-2node2.yaml | 31 ------------------- cluster_config/cfgsync-4node2.yaml | 31 ------------------- .../test_networking_privacy.py | 2 +- 4 files changed, 1 insertion(+), 94 deletions(-) delete mode 100644 cluster_config/cfgsync-2node1024.yaml delete mode 100644 cluster_config/cfgsync-2node2.yaml delete mode 100644 cluster_config/cfgsync-4node2.yaml diff --git a/cluster_config/cfgsync-2node1024.yaml b/cluster_config/cfgsync-2node1024.yaml deleted file mode 100644 index 64b1df0..0000000 --- a/cluster_config/cfgsync-2node1024.yaml +++ /dev/null @@ -1,31 +0,0 @@ -port: 4400 -n_hosts: 2 -timeout: 30 - -# ConsensusConfig related parameters -security_param: 10 -active_slot_coeff: 0.9 - -# DaConfig related parameters -subnetwork_size: 1024 -dispersal_factor: 2 -num_samples: 1 -num_subnets: 2 -old_blobs_check_interval_secs: 5 -blobs_validity_duration_secs: 60 -global_params_path: "/kzgrs_test_params" - -# Tracing -tracing_settings: - logger: Stdout - tracing: !Otlp - endpoint: http://tempo:4317/ - sample_ratio: 0.5 - service_name: node - filter: !EnvFilter - filters: - nomos: debug - metrics: !Otlp - endpoint: http://prometheus:9090/api/v1/otlp/v1/metrics - host_identifier: node - level: INFO \ No newline at end of file diff --git a/cluster_config/cfgsync-2node2.yaml b/cluster_config/cfgsync-2node2.yaml deleted file mode 100644 index 10840a5..0000000 --- a/cluster_config/cfgsync-2node2.yaml +++ /dev/null @@ -1,31 +0,0 @@ -port: 4400 -n_hosts: 2 -timeout: 30 - -# ConsensusConfig related parameters -security_param: 10 -active_slot_coeff: 0.9 - -# DaConfig related parameters -subnetwork_size: 2 -dispersal_factor: 2 -num_samples: 1 -num_subnets: 2 -old_blobs_check_interval_secs: 5 -blobs_validity_duration_secs: 60 -global_params_path: "/kzgrs_test_params" - -# Tracing -tracing_settings: - logger: Stdout - tracing: !Otlp - endpoint: http://tempo:4317/ - sample_ratio: 0.5 - service_name: node - filter: !EnvFilter - filters: - nomos: debug - metrics: !Otlp - endpoint: http://prometheus:9090/api/v1/otlp/v1/metrics - host_identifier: node - level: INFO \ No newline at end of file diff --git a/cluster_config/cfgsync-4node2.yaml b/cluster_config/cfgsync-4node2.yaml deleted file mode 100644 index 68a9878..0000000 --- a/cluster_config/cfgsync-4node2.yaml +++ /dev/null @@ -1,31 +0,0 @@ -port: 4400 -n_hosts: 4 -timeout: 30 - -# ConsensusConfig related parameters -security_param: 10 -active_slot_coeff: 0.9 - -# DaConfig related parameters -subnetwork_size: 2 -dispersal_factor: 2 -num_samples: 1 -num_subnets: 2 -old_blobs_check_interval_secs: 5 -blobs_validity_duration_secs: 60 -global_params_path: "/kzgrs_test_params" - -# Tracing -tracing_settings: - logger: Stdout - tracing: !Otlp - endpoint: http://tempo:4317/ - sample_ratio: 0.5 - service_name: node - filter: !EnvFilter - filters: - nomos: debug - metrics: !Otlp - endpoint: http://prometheus:9090/api/v1/otlp/v1/metrics - host_identifier: node - level: INFO \ No newline at end of file diff --git a/tests/networking_privacy/test_networking_privacy.py b/tests/networking_privacy/test_networking_privacy.py index a912380..aab916e 100644 --- a/tests/networking_privacy/test_networking_privacy.py +++ b/tests/networking_privacy/test_networking_privacy.py @@ -11,4 +11,4 @@ class TestNetworkingPrivacy(StepsDataAvailability): @pytest.mark.parametrize("setup_2_node_cluster", [1024], indirect=True) def test_consumed_bandwidth_dispersal(self, setup_2_node_cluster): delay(5) - # self.disperse_data(DATA_TO_DISPERSE[1], to_app_id(1), to_index(0)) + self.disperse_data(DATA_TO_DISPERSE[1], to_app_id(1), to_index(0)) From f11208c649be9c40fa448e2af70bb9ad091587f0 Mon Sep 17 00:00:00 2001 From: Roman Date: Fri, 21 Feb 2025 10:30:57 +1100 Subject: [PATCH 4/7] fix: cleanup logging in prepare_cluster_config --- src/steps/common.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/steps/common.py b/src/steps/common.py index 9ddb65d..2ba7c8a 100644 --- a/src/steps/common.py +++ b/src/steps/common.py @@ -22,7 +22,6 @@ def prepare_cluster_config(node_count, subnetwork_size=2): template = Template(template_content) rendered = template.render(num_hosts=node_count, subnet_size=subnetwork_size) - logger.debug(f"Rendered template {rendered}") with open(f"{cwd}/{config_dir}/cfgsync.yaml", "w") as outfile: outfile.write(rendered) From dd32fe1987c357d46bd7275c92412d9f083b416a Mon Sep 17 00:00:00 2001 From: Roman Date: Fri, 21 Feb 2025 10:52:10 +1100 Subject: [PATCH 5/7] test: consumed bandwidth measurement --- requirements.txt | 3 ++- .../networking_privacy/test_networking_privacy.py | 15 +++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 0733397..3ae825e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -39,4 +39,5 @@ typing-inspect==0.9.0 typing_extensions==4.9.0 urllib3==2.2.2 virtualenv==20.25.0 -Jinja2~=3.1.5 \ No newline at end of file +Jinja2~=3.1.5 +psutil~=7.0.0 \ No newline at end of file diff --git a/tests/networking_privacy/test_networking_privacy.py b/tests/networking_privacy/test_networking_privacy.py index aab916e..c2bb207 100644 --- a/tests/networking_privacy/test_networking_privacy.py +++ b/tests/networking_privacy/test_networking_privacy.py @@ -1,9 +1,13 @@ import pytest +import psutil from src.libs.common import delay, to_app_id, to_index +from src.libs.custom_logger import get_custom_logger from src.steps.da import StepsDataAvailability from src.test_data import DATA_TO_DISPERSE +logger = get_custom_logger(__name__) + class TestNetworkingPrivacy(StepsDataAvailability): main_nodes = [] @@ -11,4 +15,15 @@ class TestNetworkingPrivacy(StepsDataAvailability): @pytest.mark.parametrize("setup_2_node_cluster", [1024], indirect=True) def test_consumed_bandwidth_dispersal(self, setup_2_node_cluster): delay(5) + net_io = psutil.net_io_counters() + prev_total = net_io.bytes_sent + net_io.bytes_recv self.disperse_data(DATA_TO_DISPERSE[1], to_app_id(1), to_index(0)) + net_io = psutil.net_io_counters() + curr_total = net_io.bytes_sent + net_io.bytes_recv + + logger.debug(f"prev_total: {prev_total}") + logger.debug(f"curr_total: {curr_total}") + + consumed = curr_total - prev_total + + logger.debug(f"consumed: {consumed}") From 9336c2a3c1bafbf0267a97e34f8beae8352946d4 Mon Sep 17 00:00:00 2001 From: Roman Date: Fri, 21 Feb 2025 12:59:36 +1100 Subject: [PATCH 6/7] test: get range --- tests/networking_privacy/test_networking_privacy.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/networking_privacy/test_networking_privacy.py b/tests/networking_privacy/test_networking_privacy.py index c2bb207..3804245 100644 --- a/tests/networking_privacy/test_networking_privacy.py +++ b/tests/networking_privacy/test_networking_privacy.py @@ -27,3 +27,7 @@ class TestNetworkingPrivacy(StepsDataAvailability): consumed = curr_total - prev_total logger.debug(f"consumed: {consumed}") + + delay(5) + rcv_data = self.get_data_range(self.node2, to_app_id(1), to_index(0), to_index(5)) + logger.debug(f"Received data: {rcv_data}") From db1856de6964898294e3cbab7b01ac050bc62edc Mon Sep 17 00:00:00 2001 From: Roman Date: Fri, 21 Feb 2025 19:00:04 +1100 Subject: [PATCH 7/7] fix: add dispersal overhead check - improve error handling for disperse_data --- src/steps/da.py | 5 +++- src/test_data.py | 1 + .../test_networking_privacy.py | 29 +++++++++++++------ 3 files changed, 25 insertions(+), 10 deletions(-) diff --git a/src/steps/da.py b/src/steps/da.py index d65329c..779742b 100644 --- a/src/steps/da.py +++ b/src/steps/da.py @@ -77,13 +77,16 @@ class StepsDataAvailability(StepsCommon): @allure.step @retry(stop=stop_after_delay(65), wait=wait_fixed(1), reraise=True) def disperse_data(self, data, app_id, index): + response = [] request = prepare_dispersal_request(data, app_id, index) executor = self.find_executor_node() try: - executor.send_dispersal_request(request) + response = executor.send_dispersal_request(request) except Exception as ex: assert "Bad Request" in str(ex) or "Internal Server Error" in str(ex) + assert response.status_code == 200, "Send dispersal finished with unexpected response code" + @allure.step @retry(stop=stop_after_delay(45), wait=wait_fixed(1), reraise=True) def get_data_range(self, node, app_id, start, end): diff --git a/src/test_data.py b/src/test_data.py index 31cdf31..de36752 100644 --- a/src/test_data.py +++ b/src/test_data.py @@ -33,4 +33,5 @@ DATA_TO_DISPERSE = [ "🚀🌟✨", "Lorem ipsum dolor sit amet", "Hello", + "0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF01234", ] diff --git a/tests/networking_privacy/test_networking_privacy.py b/tests/networking_privacy/test_networking_privacy.py index 3804245..30ae389 100644 --- a/tests/networking_privacy/test_networking_privacy.py +++ b/tests/networking_privacy/test_networking_privacy.py @@ -12,22 +12,33 @@ logger = get_custom_logger(__name__) class TestNetworkingPrivacy(StepsDataAvailability): main_nodes = [] - @pytest.mark.parametrize("setup_2_node_cluster", [1024], indirect=True) + @pytest.mark.parametrize("setup_2_node_cluster", [2], indirect=True) def test_consumed_bandwidth_dispersal(self, setup_2_node_cluster): delay(5) net_io = psutil.net_io_counters() prev_total = net_io.bytes_sent + net_io.bytes_recv - self.disperse_data(DATA_TO_DISPERSE[1], to_app_id(1), to_index(0)) + + successful_dispersals = 0 + for i in range(20): + try: + self.disperse_data(DATA_TO_DISPERSE[7], to_app_id(1), to_index(0)) + successful_dispersals += 1 + except Exception as ex: + logger.warning(f"Dispersal #{i} was not successful with error {ex}") + + if successful_dispersals == 10: + break + + delay(0.1) + net_io = psutil.net_io_counters() curr_total = net_io.bytes_sent + net_io.bytes_recv - logger.debug(f"prev_total: {prev_total}") - logger.debug(f"curr_total: {curr_total}") - consumed = curr_total - prev_total - logger.debug(f"consumed: {consumed}") + assert successful_dispersals == 10, "Unable to finish 10 successful dispersals" - delay(5) - rcv_data = self.get_data_range(self.node2, to_app_id(1), to_index(0), to_index(5)) - logger.debug(f"Received data: {rcv_data}") + data_sent = 2 * successful_dispersals * len(DATA_TO_DISPERSE[7]) + overhead = (consumed - data_sent) / data_sent + + assert overhead < 400, "Dispersal overhead is too high"