mirror of
https://github.com/logos-co/nomos-e2e-tests.git
synced 2025-02-03 18:34:33 +00:00
Merge pull request #1 from logos-co/two-node-cl-start
Two node cluster start automation
This commit is contained in:
commit
78f9e2bfe0
@ -19,10 +19,10 @@ pytest
|
||||
|
||||
Licensed and distributed under either of
|
||||
|
||||
- MIT license: [LICENSE-MIT](https://github.com/waku-org/js-waku/blob/master/LICENSE-MIT) or http://opensource.org/licenses/MIT
|
||||
- MIT license: [LICENSE-MIT](http://opensource.org/licenses/MIT)
|
||||
|
||||
or
|
||||
|
||||
- Apache License, Version 2.0, ([LICENSE-APACHE-v2](https://github.com/waku-org/js-waku/blob/master/LICENSE-APACHE-v2) or http://www.apache.org/licenses/LICENSE-2.0)
|
||||
- Apache License, Version 2.0, [LICENSE-APACHE-v2](http://www.apache.org/licenses/LICENSE-2.0)
|
||||
|
||||
at your option. These files may not be copied, modified, or distributed except according to those terms.
|
||||
|
31
cluster_config/cfgsync.yaml
Normal file
31
cluster_config/cfgsync.yaml
Normal file
@ -0,0 +1,31 @@
|
||||
port: 4400
|
||||
n_hosts: 2
|
||||
timeout: 30
|
||||
|
||||
# ConsensusConfig related parameters
|
||||
security_param: 10
|
||||
active_slot_coeff: 0.9
|
||||
|
||||
# DaConfig related parameters
|
||||
subnetwork_size: 2
|
||||
dispersal_factor: 2
|
||||
num_samples: 1
|
||||
num_subnets: 2
|
||||
old_blobs_check_interval_secs: 5
|
||||
blobs_validity_duration_secs: 60
|
||||
global_params_path: "/kzgrs_test_params"
|
||||
|
||||
# Tracing
|
||||
tracing_settings:
|
||||
logger: Stdout
|
||||
tracing: !Otlp
|
||||
endpoint: http://tempo:4317/
|
||||
sample_ratio: 0.5
|
||||
service_name: node
|
||||
filter: !EnvFilter
|
||||
filters:
|
||||
nomos: debug
|
||||
metrics: !Otlp
|
||||
endpoint: http://prometheus:9090/api/v1/otlp/v1/metrics
|
||||
host_identifier: node
|
||||
level: INFO
|
5
cluster_config/scripts/run_cfgsync.sh
Executable file
5
cluster_config/scripts/run_cfgsync.sh
Executable file
@ -0,0 +1,5 @@
|
||||
#!/bin/sh
|
||||
|
||||
set -e
|
||||
|
||||
exec /usr/bin/cfgsync-server /etc/nomos/cfgsync.yaml
|
14
cluster_config/scripts/run_nomos_executor.sh
Executable file
14
cluster_config/scripts/run_nomos_executor.sh
Executable file
@ -0,0 +1,14 @@
|
||||
#!/bin/sh
|
||||
|
||||
set -e
|
||||
|
||||
export CFG_FILE_PATH="/config.yaml" \
|
||||
CFG_SERVER_ADDR="http://cfgsync:4400" \
|
||||
CFG_HOST_IP=$(hostname -i) \
|
||||
CFG_HOST_KIND="executor" \
|
||||
CFG_HOST_IDENTIFIER="executor-$(hostname -i)" \
|
||||
LOG_LEVEL="INFO" \
|
||||
RISC0_DEV_MODE=true
|
||||
|
||||
/usr/bin/cfgsync-client && \
|
||||
exec /usr/bin/nomos-executor /config.yaml
|
13
cluster_config/scripts/run_nomos_node.sh
Executable file
13
cluster_config/scripts/run_nomos_node.sh
Executable file
@ -0,0 +1,13 @@
|
||||
#!/bin/sh
|
||||
|
||||
set -e
|
||||
|
||||
export CFG_FILE_PATH="/config.yaml" \
|
||||
CFG_SERVER_ADDR="http://cfgsync:4400" \
|
||||
CFG_HOST_IP=$(hostname -i) \
|
||||
CFG_HOST_IDENTIFIER="validator-$(hostname -i)" \
|
||||
LOG_LEVEL="INFO" \
|
||||
RISC0_DEV_MODE=true
|
||||
|
||||
/usr/bin/cfgsync-client && \
|
||||
exec /usr/bin/nomos-node /config.yaml
|
@ -31,7 +31,7 @@ pytest-xdist==3.5.0
|
||||
python-dotenv==1.0.1
|
||||
pytest-dependency==0.6.0
|
||||
PyYAML==6.0.1
|
||||
requests==2.32.0
|
||||
requests==2.31.0
|
||||
setuptools==70.0.0
|
||||
tenacity==8.2.3
|
||||
typeguard==4.1.5
|
||||
|
3
src/data_storage.py
Normal file
3
src/data_storage.py
Normal file
@ -0,0 +1,3 @@
|
||||
# We use this class for global variables
|
||||
class DS:
|
||||
nomos_nodes = []
|
@ -15,10 +15,15 @@ def get_env_var(var_name, default=None):
|
||||
|
||||
|
||||
# Configuration constants. Need to be upercase to appear in reports
|
||||
DEFAULT_NOMOS = "nomos:latest"
|
||||
NODE_1 = get_env_var("NODE_1", DEFAULT_NOMOS)
|
||||
NODE_2 = get_env_var("NODE_2", DEFAULT_NOMOS)
|
||||
ADDITIONAL_NODES = get_env_var("ADDITIONAL_NODES", f"{DEFAULT_NOMOS},{DEFAULT_NOMOS}")
|
||||
NOMOS = "nomos"
|
||||
NOMOS_EXECUTOR = "nomos_executor"
|
||||
CFGSYNC = "cfgsync"
|
||||
|
||||
NODE_1 = get_env_var("NODE_1", NOMOS)
|
||||
NODE_2 = get_env_var("NODE_2", NOMOS_EXECUTOR)
|
||||
NODE_3 = get_env_var("NODE_3", CFGSYNC)
|
||||
|
||||
ADDITIONAL_NODES = get_env_var("ADDITIONAL_NODES", f"{NOMOS},{NOMOS}")
|
||||
# more nodes need to follow the NODE_X pattern
|
||||
DOCKER_LOG_DIR = get_env_var("DOCKER_LOG_DIR", "./log/docker")
|
||||
NETWORK_NAME = get_env_var("NETWORK_NAME", "nomos")
|
||||
|
@ -20,6 +20,6 @@ class REST(BaseClient):
|
||||
headers = {"accept": "text/plain"}
|
||||
return self.make_request(method, url, headers=headers, data=payload)
|
||||
|
||||
def status(self):
|
||||
info_response = self.rest_call("get", "cl/status")
|
||||
return info_response.json()
|
||||
def info(self):
|
||||
status_response = self.rest_call("get", "cryptarchia/info")
|
||||
return status_response.json()
|
||||
|
@ -35,7 +35,7 @@ class DockerManager:
|
||||
logger.debug(f"Network {network_name} created")
|
||||
return network
|
||||
|
||||
def start_container(self, image_name, ports, args, log_path, container_ip, volumes, remove_container=True):
|
||||
def start_container(self, image_name, port_bindings, args, log_path, volumes, entrypoint, remove_container=True, name=None):
|
||||
cli_args = []
|
||||
for key, value in args.items():
|
||||
if isinstance(value, list): # Check if value is a list
|
||||
@ -45,18 +45,21 @@ class DockerManager:
|
||||
else:
|
||||
cli_args.append(f"--{key}={value}") # Add a single command
|
||||
|
||||
port_bindings = {f"{port}/tcp": ("", port) for port in ports}
|
||||
port_bindings_for_log = " ".join(f"-p {port}:{port}" for port in ports)
|
||||
cli_args_str_for_log = " ".join(cli_args)
|
||||
logger.debug(f"docker run -i -t {port_bindings_for_log} {image_name} {cli_args_str_for_log}")
|
||||
logger.debug(f"docker run -i -t {port_bindings} {image_name} {cli_args_str_for_log}")
|
||||
container = self._client.containers.run(
|
||||
image_name, command=cli_args, ports=port_bindings, detach=True, remove=remove_container, auto_remove=remove_container, volumes=volumes
|
||||
image_name,
|
||||
command=cli_args,
|
||||
ports=port_bindings,
|
||||
detach=True,
|
||||
remove=remove_container,
|
||||
auto_remove=remove_container,
|
||||
volumes=volumes,
|
||||
entrypoint=entrypoint,
|
||||
name=name,
|
||||
network=NETWORK_NAME,
|
||||
)
|
||||
|
||||
network = self._client.networks.get(NETWORK_NAME)
|
||||
logger.debug(f"docker network connect --ip {container_ip} {NETWORK_NAME} {container.id}")
|
||||
network.connect(container, ipv4_address=container_ip)
|
||||
|
||||
logger.debug(f"Container started with ID {container.short_id}. Setting up logs at {log_path}")
|
||||
log_thread = threading.Thread(target=self._log_container_output, args=(container, log_path))
|
||||
log_thread.daemon = True
|
||||
@ -105,7 +108,7 @@ class DockerManager:
|
||||
|
||||
@staticmethod
|
||||
def generate_random_ext_ip():
|
||||
base_ip_fragments = ["172", "18"]
|
||||
base_ip_fragments = ["172", "19"]
|
||||
ext_ip = ".".join(base_ip_fragments + [str(random.randint(0, 255)) for _ in range(2)])
|
||||
logger.debug(f"Generated random external IP {ext_ip}")
|
||||
return ext_ip
|
||||
|
20
src/node/node_vars.py
Normal file
20
src/node/node_vars.py
Normal file
@ -0,0 +1,20 @@
|
||||
nomos_nodes = {
|
||||
"nomos": {
|
||||
"image": "nomos:latest",
|
||||
"volumes": ["cluster_config:/etc/nomos", "./kzgrs/kzgrs_test_params:/kzgrs_test_params:z"],
|
||||
"ports": ["3000/udp", "18080/tcp"],
|
||||
"entrypoint": "/etc/nomos/scripts/run_nomos_node.sh",
|
||||
},
|
||||
"nomos_executor": {
|
||||
"image": "nomos:latest",
|
||||
"volumes": ["cluster_config:/etc/nomos", "./kzgrs/kzgrs_test_params:/kzgrs_test_params:z"],
|
||||
"ports": ["3000/udp", "18080/tcp"],
|
||||
"entrypoint": "/etc/nomos/scripts/run_nomos_executor.sh",
|
||||
},
|
||||
"cfgsync": {
|
||||
"image": "nomos:latest",
|
||||
"volumes": ["cluster_config:/etc/nomos"],
|
||||
"ports": "",
|
||||
"entrypoint": "/etc/nomos/scripts/run_cfgsync.sh",
|
||||
},
|
||||
}
|
@ -1,10 +1,14 @@
|
||||
import os
|
||||
|
||||
from src.data_storage import DS
|
||||
from src.libs.custom_logger import get_custom_logger
|
||||
from tenacity import retry, stop_after_delay, wait_fixed
|
||||
|
||||
from src.node.api_clients.rest import REST
|
||||
from src.node.docker_mananger import DockerManager
|
||||
from src.env_vars import DOCKER_LOG_DIR
|
||||
from src.node.node_vars import nomos_nodes
|
||||
from src.test_data import LOG_ERROR_KEYWORDS
|
||||
|
||||
logger = get_custom_logger(__name__)
|
||||
|
||||
@ -19,13 +23,125 @@ def sanitize_docker_flags(input_flags):
|
||||
|
||||
|
||||
class NomosNode:
|
||||
def __init__(self, docker_image, docker_log_prefix=""):
|
||||
self._image_name = docker_image
|
||||
self._log_path = os.path.join(DOCKER_LOG_DIR, f"{docker_log_prefix}__{self._image_name.replace('/', '_')}.log")
|
||||
def __init__(self, node_type, container_name=""):
|
||||
logger.debug(f"Node is going to be initialized with this config {nomos_nodes[node_type]}")
|
||||
self._image_name = nomos_nodes[node_type]["image"]
|
||||
self._internal_ports = nomos_nodes[node_type]["ports"]
|
||||
self._volumes = nomos_nodes[node_type]["volumes"]
|
||||
self._entrypoint = nomos_nodes[node_type]["entrypoint"]
|
||||
|
||||
self._log_path = os.path.join(DOCKER_LOG_DIR, f"{container_name}__{self._image_name.replace('/', '_')}.log")
|
||||
self._docker_manager = DockerManager(self._image_name)
|
||||
self._container_name = container_name
|
||||
self._container = None
|
||||
|
||||
cwd = os.getcwd()
|
||||
for i, volume in enumerate(self._volumes):
|
||||
self._volumes[i] = cwd + "/" + volume
|
||||
|
||||
logger.debug(f"NomosNode instance initialized with log path {self._log_path}")
|
||||
|
||||
@retry(stop=stop_after_delay(60), wait=wait_fixed(0.1), reraise=True)
|
||||
def start(self, wait_for_node_sec=20, **kwargs):
|
||||
def start(self, wait_for_node_sec=120, **kwargs):
|
||||
logger.debug("Starting Node...")
|
||||
self._docker_manager.create_network()
|
||||
self._ext_ip = self._docker_manager.generate_random_ext_ip()
|
||||
|
||||
number_of_ports = len(self._internal_ports)
|
||||
self._port_map = {}
|
||||
|
||||
if number_of_ports > 0:
|
||||
self._external_ports = self._docker_manager.generate_ports(count=number_of_ports)
|
||||
self._udp_port = self._external_ports[0]
|
||||
self._tcp_port = self._external_ports[1]
|
||||
self._api = REST(self._tcp_port)
|
||||
|
||||
logger.debug(f"Internal ports {self._internal_ports}")
|
||||
|
||||
for i, port in enumerate(self._internal_ports):
|
||||
self._port_map[port] = int(self._external_ports[i])
|
||||
|
||||
default_args = {}
|
||||
|
||||
logger.debug(f"Using volumes {self._volumes}")
|
||||
|
||||
logger.debug(f"Port map {self._port_map}")
|
||||
|
||||
self._container = self._docker_manager.start_container(
|
||||
self._docker_manager.image,
|
||||
port_bindings=self._port_map,
|
||||
args=default_args,
|
||||
log_path=self._log_path,
|
||||
volumes=self._volumes,
|
||||
entrypoint=self._entrypoint,
|
||||
remove_container=True,
|
||||
name=self._container_name,
|
||||
)
|
||||
|
||||
logger.debug(f"Started container from image {self._image_name}. " f"REST: {getattr(self, '_tcp_port', 'N/A')}")
|
||||
|
||||
DS.nomos_nodes.append(self)
|
||||
|
||||
@retry(stop=stop_after_delay(5), wait=wait_fixed(0.1), reraise=True)
|
||||
def stop(self):
|
||||
if self._container:
|
||||
logger.debug(f"Stopping container with id {self._container.short_id}")
|
||||
self._container.stop()
|
||||
try:
|
||||
self._container.remove()
|
||||
except:
|
||||
pass
|
||||
self._container = None
|
||||
logger.debug("Container stopped.")
|
||||
|
||||
@retry(stop=stop_after_delay(5), wait=wait_fixed(0.1), reraise=True)
|
||||
def kill(self):
|
||||
if self._container:
|
||||
logger.debug(f"Killing container with id {self._container.short_id}")
|
||||
self._container.kill()
|
||||
try:
|
||||
self._container.remove()
|
||||
except:
|
||||
pass
|
||||
self._container = None
|
||||
logger.debug("Container killed.")
|
||||
|
||||
def restart(self):
|
||||
if self._container:
|
||||
logger.debug(f"Restarting container with id {self._container.short_id}")
|
||||
self._container.restart()
|
||||
|
||||
def pause(self):
|
||||
if self._container:
|
||||
logger.debug(f"Pausing container with id {self._container.short_id}")
|
||||
self._container.pause()
|
||||
|
||||
def unpause(self):
|
||||
if self._container:
|
||||
logger.debug(f"Unpause container with id {self._container.short_id}")
|
||||
self._container.unpause()
|
||||
|
||||
def ensure_ready(self, timeout_duration=10):
|
||||
@retry(stop=stop_after_delay(timeout_duration), wait=wait_fixed(0.1), reraise=True)
|
||||
def check_ready(node=self):
|
||||
node.info_response = node.info()
|
||||
logger.info("REST service is ready !!")
|
||||
|
||||
if self.is_nomos():
|
||||
check_ready()
|
||||
|
||||
def is_nomos(self):
|
||||
return "nomos" in self._container_name
|
||||
|
||||
def info(self):
|
||||
return self._api.info()
|
||||
|
||||
def check_nomos_log_errors(self, whitelist=None):
|
||||
keywords = LOG_ERROR_KEYWORDS
|
||||
|
||||
# If a whitelist is provided, remove those keywords from the keywords list
|
||||
if whitelist:
|
||||
keywords = [keyword for keyword in keywords if keyword not in whitelist]
|
||||
|
||||
matches = self._docker_manager.search_log_for_keywords(self._log_path, keywords, False)
|
||||
assert not matches, f"Found errors {matches}"
|
||||
|
26
src/test_data.py
Normal file
26
src/test_data.py
Normal file
@ -0,0 +1,26 @@
|
||||
from time import time
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
LOG_ERROR_KEYWORDS = [
|
||||
"crash",
|
||||
"fatal",
|
||||
"panic",
|
||||
"abort",
|
||||
"segfault",
|
||||
"corrupt",
|
||||
"terminated",
|
||||
"unhandled",
|
||||
"stacktrace",
|
||||
"deadlock",
|
||||
"SIGSEGV",
|
||||
"SIGABRT",
|
||||
"stack overflow",
|
||||
"index out of bounds",
|
||||
"nil pointer dereference",
|
||||
"goroutine exit",
|
||||
"nil pointer",
|
||||
"runtime error",
|
||||
"goexit",
|
||||
"race condition",
|
||||
"double free",
|
||||
]
|
92
tests/conftest.py
Normal file
92
tests/conftest.py
Normal file
@ -0,0 +1,92 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import inspect
|
||||
import glob
|
||||
from src.libs.custom_logger import get_custom_logger
|
||||
import os
|
||||
import pytest
|
||||
from datetime import datetime
|
||||
from time import time
|
||||
from uuid import uuid4
|
||||
from src.libs.common import attach_allure_file
|
||||
import src.env_vars as env_vars
|
||||
from src.data_storage import DS
|
||||
|
||||
logger = get_custom_logger(__name__)
|
||||
|
||||
|
||||
# See https://docs.pytest.org/en/latest/example/simple.html#making-test-result-information-available-in-fixtures
|
||||
@pytest.hookimpl(hookwrapper=True, tryfirst=True)
|
||||
def pytest_runtest_makereport(item):
|
||||
outcome = yield
|
||||
rep = outcome.get_result()
|
||||
if rep.when == "call":
|
||||
setattr(item, "rep_call", rep)
|
||||
return rep
|
||||
return None
|
||||
|
||||
|
||||
@pytest.fixture(scope="session", autouse=True)
|
||||
def set_allure_env_variables():
|
||||
yield
|
||||
if os.path.isdir("allure-results") and not os.path.isfile(os.path.join("allure-results", "environment.properties")):
|
||||
logger.debug(f"Running fixture teardown: {inspect.currentframe().f_code.co_name}")
|
||||
with open(os.path.join("allure-results", "environment.properties"), "w") as outfile:
|
||||
for attribute_name in dir(env_vars):
|
||||
if attribute_name.isupper():
|
||||
attribute_value = getattr(env_vars, attribute_name)
|
||||
outfile.write(f"{attribute_name}={attribute_value}\n")
|
||||
|
||||
|
||||
@pytest.fixture(scope="function", autouse=True)
|
||||
def test_id(request):
|
||||
# setting up an unique test id to be used where needed
|
||||
logger.debug(f"Running fixture setup: {inspect.currentframe().f_code.co_name}")
|
||||
request.cls.test_id = f"{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}__{str(uuid4())}"
|
||||
|
||||
|
||||
@pytest.fixture(scope="function", autouse=True)
|
||||
def test_setup(request, test_id):
|
||||
logger.debug(f"Running test: {request.node.name} with id: {request.cls.test_id}")
|
||||
yield
|
||||
logger.debug(f"Running fixture teardown: {inspect.currentframe().f_code.co_name}")
|
||||
for file in glob.glob(os.path.join(env_vars.DOCKER_LOG_DIR, "*")):
|
||||
if os.path.getmtime(file) < time() - 3600:
|
||||
logger.debug(f"Deleting old log file: {file}")
|
||||
try:
|
||||
os.remove(file)
|
||||
except:
|
||||
logger.error("Could not delete file")
|
||||
|
||||
|
||||
@pytest.fixture(scope="function", autouse=True)
|
||||
def attach_logs_on_fail(request):
|
||||
yield
|
||||
if env_vars.RUNNING_IN_CI and hasattr(request.node, "rep_call") and request.node.rep_call.failed:
|
||||
logger.debug(f"Running fixture teardown: {inspect.currentframe().f_code.co_name}")
|
||||
logger.debug("Test failed, attempting to attach logs to the allure reports")
|
||||
for file in glob.glob(os.path.join(env_vars.DOCKER_LOG_DIR, "*" + request.cls.test_id + "*")):
|
||||
attach_allure_file(file)
|
||||
|
||||
|
||||
@pytest.fixture(scope="function", autouse=True)
|
||||
def close_open_nodes(attach_logs_on_fail):
|
||||
DS.nomos_nodes = []
|
||||
yield
|
||||
logger.debug(f"Running fixture teardown: {inspect.currentframe().f_code.co_name}")
|
||||
crashed_containers = []
|
||||
for node in DS.nomos_nodes:
|
||||
try:
|
||||
node.stop()
|
||||
except Exception as ex:
|
||||
if "No such container" in str(ex):
|
||||
crashed_containers.append(node.image)
|
||||
logger.error(f"Failed to stop container because of error {ex}")
|
||||
assert not crashed_containers, f"Containers {crashed_containers} crashed during the test!!!"
|
||||
|
||||
|
||||
@pytest.fixture(scope="function", autouse=True)
|
||||
def check_nomos_log_errors():
|
||||
yield
|
||||
logger.debug(f"Running fixture teardown: {inspect.currentframe().f_code.co_name}")
|
||||
for node in DS.nomos_nodes:
|
||||
node.check_nomos_log_errors()
|
0
tests/data_integrity/__init__.py
Normal file
0
tests/data_integrity/__init__.py
Normal file
0
tests/e2e/__init__.py
Normal file
0
tests/e2e/__init__.py
Normal file
24
tests/e2e/test_2node_alive.py
Normal file
24
tests/e2e/test_2node_alive.py
Normal file
@ -0,0 +1,24 @@
|
||||
from src.env_vars import CFGSYNC, NOMOS, NOMOS_EXECUTOR
|
||||
from src.libs.custom_logger import get_custom_logger
|
||||
from src.node.nomos_node import NomosNode
|
||||
|
||||
logger = get_custom_logger(__name__)
|
||||
|
||||
|
||||
class Test2NodeClAlive:
|
||||
def test_cluster_start(self):
|
||||
|
||||
self.node1 = NomosNode(CFGSYNC, "cfgsync")
|
||||
self.node2 = NomosNode(NOMOS, "nomos_node_0")
|
||||
self.node3 = NomosNode(NOMOS_EXECUTOR, "nomos_node_1")
|
||||
|
||||
self.node1.start()
|
||||
self.node2.start()
|
||||
self.node3.start()
|
||||
|
||||
try:
|
||||
self.node2.ensure_ready()
|
||||
self.node3.ensure_ready()
|
||||
except Exception as ex:
|
||||
logger.error(f"REST service did not become ready in time: {ex}")
|
||||
raise
|
Loading…
x
Reference in New Issue
Block a user