From f35a6f10a61925fa9d3ff75e4882a584a17d61ec Mon Sep 17 00:00:00 2001 From: NagyZoltanPeter <113987313+NagyZoltanPeter@users.noreply.github.com> Date: Tue, 26 Nov 2024 20:42:27 +0100 Subject: [PATCH] chore: add supervisor for lpt infra (#3176) * Adding lpt-runner script and assemble into liteprotocoltester image - to ease infra deployment * Add supervisor that can run lpt continously in infra environment, infra.env defines defaults for run, in case image tag of lpt docker image is deploy it will build a specific image for infra deployment. * Added message latency metrics * DELAY_MESSAGES to MESSAGE_INTERVAL renaming * Adjust name of START_PUBLISHING_AFTER * Extend lpt readme with how to use make to build dockerized image and notice about infra deployment * As fixed in discussion, we will control infra testing by built in predefined test setup * Prevent peer switch in case using fixed service peers --- Makefile | 4 +- apps/liteprotocoltester/.env | 4 +- .../Dockerfile.liteprotocoltester.compile | 30 +++-- apps/liteprotocoltester/README.md | 46 ++++++-- .../docker-compose-on-simularor.yml | 4 +- apps/liteprotocoltester/docker-compose.yml | 4 +- apps/liteprotocoltester/filter_subscriber.nim | 11 +- apps/liteprotocoltester/infra.env | 11 ++ .../lightpush_publisher.nim | 10 +- apps/liteprotocoltester/lpt_metrics.nim | 3 + apps/liteprotocoltester/lpt_supervisor.py | 54 +++++++++ apps/liteprotocoltester/run_tester_node.sh | 15 +-- .../run_tester_node_at_infra.sh | 105 ++++++++++++++++++ apps/liteprotocoltester/statistics.nim | 7 +- apps/liteprotocoltester/tester_config.nim | 13 ++- ci/Jenkinsfile.lpt | 1 + 16 files changed, 279 insertions(+), 43 deletions(-) create mode 100644 apps/liteprotocoltester/infra.env create mode 100755 apps/liteprotocoltester/lpt_supervisor.py create mode 100644 apps/liteprotocoltester/run_tester_node_at_infra.sh diff --git a/Makefile b/Makefile index 3e285a2af..a2deeac37 100644 --- a/Makefile +++ b/Makefile @@ -373,16 +373,16 @@ DOCKER_LPT_NIMFLAGS ?= -d:chronicles_colors:none -d:insecure # build a docker image for the fleet docker-liteprotocoltester: DOCKER_LPT_TAG ?= latest docker-liteprotocoltester: DOCKER_LPT_NAME ?= wakuorg/liteprotocoltester:$(DOCKER_LPT_TAG) +# --no-cache docker-liteprotocoltester: docker build \ - --no-cache \ --build-arg="MAKE_TARGET=liteprotocoltester" \ --build-arg="NIMFLAGS=$(DOCKER_LPT_NIMFLAGS)" \ --build-arg="NIM_COMMIT=$(DOCKER_NIM_COMMIT)" \ --build-arg="LOG_LEVEL=TRACE" \ --label="commit=$(shell git rev-parse HEAD)" \ --label="version=$(GIT_VERSION)" \ - --target $(TARGET) \ + --target $(if $(filter deploy,$(DOCKER_LPT_TAG)),deployment_lpt,standalone_lpt) \ --tag $(DOCKER_LPT_NAME) \ --file apps/liteprotocoltester/Dockerfile.liteprotocoltester.compile \ . diff --git a/apps/liteprotocoltester/.env b/apps/liteprotocoltester/.env index b2f08188b..4f7c49976 100644 --- a/apps/liteprotocoltester/.env +++ b/apps/liteprotocoltester/.env @@ -1,10 +1,10 @@ -START_PUBLISHING_AFTER=45 +START_PUBLISHING_AFTER_SECS=45 # can add some seconds delay before SENDER starts publishing NUM_MESSAGES=0 # 0 for infinite number of messages -DELAY_MESSAGES=8000 +MESSAGE_INTERVAL_MILLIS=8000 # ms delay between messages diff --git a/apps/liteprotocoltester/Dockerfile.liteprotocoltester.compile b/apps/liteprotocoltester/Dockerfile.liteprotocoltester.compile index 21f3484ad..cee1929ce 100644 --- a/apps/liteprotocoltester/Dockerfile.liteprotocoltester.compile +++ b/apps/liteprotocoltester/Dockerfile.liteprotocoltester.compile @@ -25,9 +25,8 @@ RUN make -j$(nproc) deps QUICK_AND_DIRTY_COMPILER=1 ${NIM_COMMIT} RUN make -j$(nproc) ${NIM_COMMIT} $MAKE_TARGET LOG_LEVEL=${LOG_LEVEL} NIMFLAGS="${NIMFLAGS}" -# PRODUCTION IMAGE ------------------------------------------------------------- - -FROM alpine:3.18 AS prod +# REFERENCE IMAGE as BASE for specialized PRODUCTION IMAGES---------------------------------------- +FROM alpine:3.18 AS base_lpt ARG MAKE_TARGET=liteprotocoltester @@ -43,16 +42,33 @@ EXPOSE 30303 60000 8545 # Referenced in the binary RUN apk add --no-cache libgcc pcre-dev libpq-dev \ wget \ - iproute2 + iproute2 \ + python3 # Fix for 'Error loading shared library libpcre.so.3: No such file or directory' RUN ln -s /usr/lib/libpcre.so /usr/lib/libpcre.so.3 COPY --from=nim-build /app/build/liteprotocoltester /usr/bin/ +RUN chmod +x /usr/bin/liteprotocoltester + +# Standalone image to be used manually and in lpt-runner ------------------------------------------- +FROM base_lpt AS standalone_lpt + COPY --from=nim-build /app/apps/liteprotocoltester/run_tester_node.sh /usr/bin/ -COPY --from=nim-build /app/apps/liteprotocoltester/run_tester_node_on_fleet.sh /usr/bin/ +RUN chmod +x /usr/bin/run_tester_node.sh ENTRYPOINT ["/usr/bin/run_tester_node.sh", "/usr/bin/liteprotocoltester"] -# # By default just show help if called without arguments -CMD ["--help"] +# Image for infra deployment ------------------------------------------- +FROM base_lpt AS deployment_lpt + +# let supervisor python script flush logs immediately +ENV PYTHONUNBUFFERED="1" + +COPY --from=nim-build /app/apps/liteprotocoltester/run_tester_node_at_infra.sh /usr/bin/ +COPY --from=nim-build /app/apps/liteprotocoltester/infra.env /usr/bin/ +COPY --from=nim-build /app/apps/liteprotocoltester/lpt_supervisor.py /usr/bin/ +RUN chmod +x /usr/bin/run_tester_node_at_infra.sh +RUN chmod +x /usr/bin/lpt_supervisor.py + +ENTRYPOINT ["/usr/bin/lpt_supervisor.py"] diff --git a/apps/liteprotocoltester/README.md b/apps/liteprotocoltester/README.md index 1fad6eb75..eff025969 100644 --- a/apps/liteprotocoltester/README.md +++ b/apps/liteprotocoltester/README.md @@ -126,11 +126,11 @@ Run a SENDER role liteprotocoltester and a RECEIVER role one on different termin | Variable | Description | Default | | ---: | :--- | :--- | | NUM_MESSAGES | Number of message to publish, 0 means infinite | 120 | -| DELAY_MESSAGES | Frequency of messages in milliseconds | 1000 | +| MESSAGE_INTERVAL_MILLIS | Frequency of messages in milliseconds | 1000 | | PUBSUB | Used pubsub_topic for testing | /waku/2/rs/66/0 | | CONTENT_TOPIC | content_topic for testing | /tester/1/light-pubsub-example/proto | | CLUSTER_ID | cluster_id of the network | 16 | -| START_PUBLISHING_AFTER | Delay in seconds before starting to publish to let service node connected | 5 | +| START_PUBLISHING_AFTER_SECS | Delay in seconds before starting to publish to let service node connected | 5 | | MIN_MESSAGE_SIZE | Minimum message size in bytes | 1KiB | | MAX_MESSAGE_SIZE | Maximum message size in bytes | 120KiB | @@ -143,7 +143,7 @@ Run a SENDER role liteprotocoltester and a RECEIVER role one on different termin | --service-node| Address of the service node to use for lightpush and/or filter service | - | | --bootstrap-node| Address of the fleet's bootstrap node to use to determine service peer randomly choosen from the network. `--service-node` switch has precedence over this | - | | --num-messages | Number of message to publish | 120 | -| --delay-messages | Frequency of messages in milliseconds | 1000 | +| --message-interval | Frequency of messages in milliseconds | 1000 | | --min-message-size | Minimum message size in bytes | 1KiB | | --max-message-size | Maximum message size in bytes | 120KiB | | --start-publishing-after | Delay in seconds before starting to publish to let service node connected in seconds | 5 | @@ -169,9 +169,37 @@ There are multiple benefits of using bootstrap nodes. By using them liteprotocol Also by using bootstrap node and peer exchange discovery, litprotocoltester will be able to simulate service peer switch in case of failures. There are built in tresholds count for service peer failures (3) after service peer will be switched during the test. Also there will be max 10 trials of switching peer before test declared failed and quit. These service peer failures are reported, thus extending network reliability measures. -### Docker image notice +### Building docker image -#### Building for docker compose runs on simulator or standalone +Easiest way to build the docker image is to use the provided Makefile target. + +```bash +cd +make docker-liteprotocoltester +``` +This will build liteprotocoltester from the ground up and create a docker image with the binary copied to it under image name and tag `wakuorg/liteprotocoltester:latest`. + +#### Building public image + +If you want to push the image to a public registry, you can use the jenkins job to do so. +The job is available at https://ci.status.im/job/waku/job/liteprotocoltester/job/build-liteprotocoltester-image + +#### Building and deployment for infra testing + +For specific and continuous testing purposes we have a deployment of `liteprotocoltester` test suite to our infra appliances. +This has its own configuration, constraints and requirements. To ease this job, image shall be built and pushed with `deploy` tag. +This can be done by the jenkins job mentioned above. + +or manually by: +```bash +cd +make DOCKER_LPT_TAG=deploy docker-liteprotocoltester +``` + +The image created with this method will be different from under any other tag. It prepared to run a preconfigured test suite continuously. +It will also miss prometheus metrics scraping endpoint and grafana, thus it is not recommended to use it for general testing. + +#### Manually building for docker compose runs on simulator or standalone Please note that currently to ease testing and development tester application docker image is based on ubuntu and uses the externally pre-built binary of 'liteprotocoltester'. This speeds up image creation. Another dokcer build file is provided for proper build of boundle image. @@ -239,9 +267,9 @@ Cluster id and Pubsub-topic must be accurately set according to the network conf The example shows that either multiaddress or ENR form accepted. ```bash -export START_PUBLISHING_AFTER=60 +export START_PUBLISHING_AFTER_SECS=60 export NUM_MESSAGES=200 -export DELAY_MESSAGES=1000 +export MESSAGE_INTERVAL_MILLIS=1000 export MIN_MESSAGE_SIZE=15Kb export MAX_MESSAGE_SIZE=145Kb export PUBSUB=/waku/2/rs/16/32 @@ -274,9 +302,9 @@ cd lpt-runner export NUM_PUBLISHER_NODES=3 export NUM_RECEIVER_NODES=1 -export START_PUBLISHING_AFTER=120 +export START_PUBLISHING_AFTER_SECS=120 export NUM_MESSAGES=300 -export DELAY_MESSAGES=7000 +export MESSAGE_INTERVAL_MILLIS=7000 export MIN_MESSAGE_SIZE=15Kb export MAX_MESSAGE_SIZE=145Kb export PUBSUB=/waku/2/rs/1/4 diff --git a/apps/liteprotocoltester/docker-compose-on-simularor.yml b/apps/liteprotocoltester/docker-compose-on-simularor.yml index e6002376d..c63a294f2 100644 --- a/apps/liteprotocoltester/docker-compose-on-simularor.yml +++ b/apps/liteprotocoltester/docker-compose-on-simularor.yml @@ -15,13 +15,13 @@ x-rln-environment: &rln_env x-test-running-conditions: &test_running_conditions NUM_MESSAGES: ${NUM_MESSAGES:-120} - DELAY_MESSAGES: "${DELAY_MESSAGES:-1000}" + MESSAGE_INTERVAL_MILLIS: "${MESSAGE_INTERVAL_MILLIS:-1000}" PUBSUB: ${PUBSUB:-/waku/2/rs/66/0} CONTENT_TOPIC: ${CONTENT_TOPIC:-/tester/2/light-pubsub-test/wakusim} CLUSTER_ID: ${CLUSTER_ID:-66} MIN_MESSAGE_SIZE: ${MIN_MESSAGE_SIZE:-1Kb} MAX_MESSAGE_SIZE: ${MAX_MESSAGE_SIZE:-150Kb} - START_PUBLISHING_AFTER: ${START_PUBLISHING_AFTER:-5} # seconds + START_PUBLISHING_AFTER_SECS: ${START_PUBLISHING_AFTER_SECS:-5} # seconds # Services definitions diff --git a/apps/liteprotocoltester/docker-compose.yml b/apps/liteprotocoltester/docker-compose.yml index 9f3bd380e..afd2f1e72 100644 --- a/apps/liteprotocoltester/docker-compose.yml +++ b/apps/liteprotocoltester/docker-compose.yml @@ -15,13 +15,13 @@ x-rln-environment: &rln_env x-test-running-conditions: &test_running_conditions NUM_MESSAGES: ${NUM_MESSAGES:-120} - DELAY_MESSAGES: "${DELAY_MESSAGES:-1000}" + MESSAGE_INTERVAL_MILLIS: "${MESSAGE_INTERVAL_MILLIS:-1000}" PUBSUB: ${PUBSUB:-/waku/2/rs/66/0} CONTENT_TOPIC: ${CONTENT_TOPIC:-/tester/2/light-pubsub-test/wakusim} CLUSTER_ID: ${CLUSTER_ID:-66} MIN_MESSAGE_SIZE: ${MIN_MESSAGE_SIZE:-1Kb} MAX_MESSAGE_SIZE: ${MAX_MESSAGE_SIZE:-150Kb} - START_PUBLISHING_AFTER: ${START_PUBLISHING_AFTER:-5} # seconds + START_PUBLISHING_AFTER_SECS: ${START_PUBLISHING_AFTER_SECS:-5} # seconds STANDALONE: ${STANDALONE:-1} RECEIVER_METRICS_PORT: 8003 PUBLISHER_METRICS_PORT: 8003 diff --git a/apps/liteprotocoltester/filter_subscriber.nim b/apps/liteprotocoltester/filter_subscriber.nim index fa8c38a4c..be9b1497e 100644 --- a/apps/liteprotocoltester/filter_subscriber.nim +++ b/apps/liteprotocoltester/filter_subscriber.nim @@ -44,7 +44,10 @@ proc unsubscribe( notice "unsubscribe request successful" proc maintainSubscription( - wakuNode: WakuNode, filterPubsubTopic: PubsubTopic, filterContentTopic: ContentTopic + wakuNode: WakuNode, + filterPubsubTopic: PubsubTopic, + filterContentTopic: ContentTopic, + preventPeerSwitch: bool, ) {.async.} = const maxFailedSubscribes = 3 const maxFailedServiceNodeSwitches = 10 @@ -83,7 +86,7 @@ proc maintainSubscription( if noFailedSubscribes < maxFailedSubscribes: await sleepAsync(2.seconds) # Wait a bit before retrying continue - else: + elif not preventPeerSwitch: let peerOpt = selectRandomServicePeer( wakuNode.peerManager, some(actualFilterPeer), WakuFilterSubscribeCodec ) @@ -164,4 +167,6 @@ proc setupAndSubscribe*( discard setTimer(Moment.fromNow(interval), printStats) # Start maintaining subscription - asyncSpawn maintainSubscription(wakuNode, conf.pubsubTopics[0], conf.contentTopics[0]) + asyncSpawn maintainSubscription( + wakuNode, conf.pubsubTopics[0], conf.contentTopics[0], conf.fixedServicePeer + ) diff --git a/apps/liteprotocoltester/infra.env b/apps/liteprotocoltester/infra.env new file mode 100644 index 000000000..3ead4dc50 --- /dev/null +++ b/apps/liteprotocoltester/infra.env @@ -0,0 +1,11 @@ +TEST_INTERVAL_MINUTES=180 +START_PUBLISHING_AFTER_SECS=120 +NUM_MESSAGES=300 +MESSAGE_INTERVAL_MILLIS=1000 +MIN_MESSAGE_SIZE=15Kb +MAX_MESSAGE_SIZE=145Kb +PUBSUB=/waku/2/rs/16/32 +CONTENT_TOPIC=/tester/2/light-pubsub-test/fleet +CLUSTER_ID=16 +LIGHTPUSH_BOOTSTRAP=enr:-QEKuED9AJm2HGgrRpVaJY2nj68ao_QiPeUT43sK-aRM7sMJ6R4G11OSDOwnvVacgN1sTw-K7soC5dzHDFZgZkHU0u-XAYJpZIJ2NIJpcISnYxMvim11bHRpYWRkcnO4WgAqNiVib290LTAxLmRvLWFtczMuc3RhdHVzLnByb2Quc3RhdHVzLmltBnZfACw2JWJvb3QtMDEuZG8tYW1zMy5zdGF0dXMucHJvZC5zdGF0dXMuaW0GAbveA4Jyc40AEAUAAQAgAEAAgAEAiXNlY3AyNTZrMaEC3rRtFQSgc24uWewzXaxTY8hDAHB8sgnxr9k8Rjb5GeSDdGNwgnZfg3VkcIIjKIV3YWt1Mg0 +FILTER_BOOTSTRAP=enr:-QEcuED7ww5vo2rKc1pyBp7fubBUH-8STHEZHo7InjVjLblEVyDGkjdTI9VdqmYQOn95vuQH-Htku17WSTzEufx-Wg4mAYJpZIJ2NIJpcIQihw1Xim11bHRpYWRkcnO4bAAzNi5ib290LTAxLmdjLXVzLWNlbnRyYWwxLWEuc3RhdHVzLnByb2Quc3RhdHVzLmltBnZfADU2LmJvb3QtMDEuZ2MtdXMtY2VudHJhbDEtYS5zdGF0dXMucHJvZC5zdGF0dXMuaW0GAbveA4Jyc40AEAUAAQAgAEAAgAEAiXNlY3AyNTZrMaECxjqgDQ0WyRSOilYU32DA5k_XNlDis3m1VdXkK9xM6kODdGNwgnZfg3VkcIIjKIV3YWt1Mg0 diff --git a/apps/liteprotocoltester/lightpush_publisher.nim b/apps/liteprotocoltester/lightpush_publisher.nim index 739b0298d..4f32f7f03 100644 --- a/apps/liteprotocoltester/lightpush_publisher.nim +++ b/apps/liteprotocoltester/lightpush_publisher.nim @@ -113,7 +113,8 @@ proc publishMessages( lightpushContentTopic: ContentTopic, numMessages: uint32, messageSizeRange: SizeRange, - delayMessages: Duration, + messageInterval: Duration, + preventPeerSwitch: bool, ) {.async.} = var actualServicePeer = servicePeer let startedAt = getNowInNanosecondTime() @@ -177,7 +178,7 @@ proc publishMessages( else: noFailedPush += 1 lpt_service_peer_failure_count.inc(labelValues = ["publisher"]) - if noFailedPush > maxFailedPush: + if not preventPeerSwitch and noFailedPush > maxFailedPush: info "Max push failure limit reached, Try switching peer." let peerOpt = selectRandomServicePeer( wakuNode.peerManager, some(actualServicePeer), WakuLightPushCodec @@ -198,7 +199,7 @@ proc publishMessages( noFailedServiceNodeSwitches += 1 break - await sleepAsync(delayMessages) + await sleepAsync(messageInterval) proc setupAndPublish*( wakuNode: WakuNode, conf: LiteProtocolTesterConf, servicePeer: RemotePeerInfo @@ -252,5 +253,6 @@ proc setupAndPublish*( conf.contentTopics[0], conf.numMessages, (min: parsedMinMsgSize, max: parsedMaxMsgSize), - conf.delayMessages.milliseconds, + conf.messageInterval.milliseconds, + conf.fixedServicePeer, ) diff --git a/apps/liteprotocoltester/lpt_metrics.nim b/apps/liteprotocoltester/lpt_metrics.nim index 2cdc515b6..61666fb14 100644 --- a/apps/liteprotocoltester/lpt_metrics.nim +++ b/apps/liteprotocoltester/lpt_metrics.nim @@ -22,6 +22,9 @@ declarePublicCounter lpt_receiver_duplicate_messages_count, declarePublicGauge lpt_receiver_distinct_duplicate_messages_count, "number of distinct duplicate messages per peer", ["peer"] +declarePublicGauge lpt_receiver_latencies, + "Message delivery latency per peer (min-avg-max)", ["peer", "latency"] + declarePublicCounter lpt_receiver_lost_subscription_count, "number of filter service peer failed PING requests - lost subscription" diff --git a/apps/liteprotocoltester/lpt_supervisor.py b/apps/liteprotocoltester/lpt_supervisor.py new file mode 100755 index 000000000..24c395b0a --- /dev/null +++ b/apps/liteprotocoltester/lpt_supervisor.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python3 + +import os +import time +from subprocess import Popen +import sys + +def load_env(file_path): + predefined_test_env = {} + with open(file_path) as f: + for line in f: + if line.strip() and not line.startswith('#'): + key, value = line.strip().split('=', 1) + predefined_test_env[key] = value + return predefined_test_env + +def run_tester_node(predefined_test_env): + role = sys.argv[1] + # override incoming environment variables with the ones from the file to prefer predefined testing environment. + for key, value in predefined_test_env.items(): + os.environ[key] = value + + script_cmd = "/usr/bin/run_tester_node_at_infra.sh /usr/bin/liteprotocoltester {role}".format(role=role) + return os.system(script_cmd) + +if __name__ == "__main__": + if len(sys.argv) < 2 or sys.argv[1] not in ["RECEIVER", "SENDER"]: + print("Error: First argument must be either 'RECEIVER' or 'SENDER'") + sys.exit(1) + + predefined_test_env_file = '/usr/bin/infra.env' + predefined_test_env = load_env(predefined_test_env_file) + + test_interval_minutes = int(predefined_test_env.get('TEST_INTERVAL_MINUTES', 60)) # Default to 60 minutes if not set + print(f"supervisor: Start testing loop. Interval is {test_interval_minutes} minutes") + counter = 0 + + while True: + counter += 1 + start_time = time.time() + print(f"supervisor: Run #{counter} started at {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time))}") + print(f"supervisor: with arguments: {predefined_test_env}") + + exit_code = run_tester_node(predefined_test_env) + + end_time = time.time() + run_time = end_time - start_time + sleep_time = max(5 * 60, (test_interval_minutes * 60) - run_time) + + print(f"supervisor: Tester node finished at {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time))}") + print(f"supervisor: Runtime was {run_time:.2f} seconds") + print(f"supervisor: Next run scheduled in {sleep_time // 60:.2f} minutes") + + time.sleep(sleep_time) diff --git a/apps/liteprotocoltester/run_tester_node.sh b/apps/liteprotocoltester/run_tester_node.sh index d278af1fc..8975fba91 100755 --- a/apps/liteprotocoltester/run_tester_node.sh +++ b/apps/liteprotocoltester/run_tester_node.sh @@ -88,7 +88,7 @@ if [ -z "${SERIVCE_NODE_ADDR}" ]; then fi if $SERVICE_NODE_DIRECT; then - FULL_NODE=--service-node="${SERIVCE_NODE_ADDR}" + FULL_NODE=--service-node="${SERIVCE_NODE_ADDR} --fixed-service-peer" else FULL_NODE=--bootstrap-node="${SERIVCE_NODE_ADDR}" fi @@ -107,8 +107,8 @@ if [ -n "${CLUSTER_ID}" ]; then CLUSTER_ID=--cluster-id="${CLUSTER_ID}" fi -if [ -n "${START_PUBLISHING_AFTER}" ]; then - START_PUBLISHING_AFTER=--start-publishing-after="${START_PUBLISHING_AFTER}" +if [ -n "${START_PUBLISHING_AFTER_SECS}" ]; then + START_PUBLISHING_AFTER_SECS=--start-publishing-after="${START_PUBLISHING_AFTER_SECS}" fi if [ -n "${MIN_MESSAGE_SIZE}" ]; then @@ -124,8 +124,8 @@ if [ -n "${NUM_MESSAGES}" ]; then NUM_MESSAGES=--num-messages="${NUM_MESSAGES}" fi -if [ -n "${DELAY_MESSAGES}" ]; then - DELAY_MESSAGES=--delay-messages="${DELAY_MESSAGES}" +if [ -n "${MESSAGE_INTERVAL_MILLIS}" ]; then + MESSAGE_INTERVAL_MILLIS=--message-interval="${MESSAGE_INTERVAL_MILLIS}" fi echo "Running binary: ${BINARY_PATH}" @@ -136,14 +136,15 @@ echo "My external IP: ${MY_EXT_IP}" exec "${BINARY_PATH}"\ --log-level=INFO\ --nat=extip:${MY_EXT_IP}\ + --test-peers\ ${FULL_NODE}\ - ${DELAY_MESSAGES}\ + ${MESSAGE_INTERVAL_MILLIS}\ ${NUM_MESSAGES}\ ${PUBSUB}\ ${CONTENT_TOPIC}\ ${CLUSTER_ID}\ ${FUNCTION}\ - ${START_PUBLISHING_AFTER}\ + ${START_PUBLISHING_AFTER_SECS}\ ${MIN_MESSAGE_SIZE}\ ${MAX_MESSAGE_SIZE} # --config-file=config.toml\ diff --git a/apps/liteprotocoltester/run_tester_node_at_infra.sh b/apps/liteprotocoltester/run_tester_node_at_infra.sh new file mode 100644 index 000000000..6cec4b006 --- /dev/null +++ b/apps/liteprotocoltester/run_tester_node_at_infra.sh @@ -0,0 +1,105 @@ +#!/bin/sh + +#set -x +#echo "$@" + +if test -f .env; then + echo "Using .env file" + . $(pwd)/.env +fi + + +echo "I am a lite-protocol-tester node" + +BINARY_PATH=$1 + +if [ ! -x "${BINARY_PATH}" ]; then + echo "Invalid binary path '${BINARY_PATH}'. Failing" + exit 1 +fi + +if [ "${2}" = "--help" ]; then + echo "You might want to check nwaku/apps/liteprotocoltester/README.md" + exec "${BINARY_PATH}" --help + exit 0 +fi + +FUNCTION=$2 +if [ "${FUNCTION}" = "SENDER" ]; then + FUNCTION=--test-func=SENDER + SERIVCE_NODE_ADDR=${LIGHTPUSH_SERVICE_PEER:-${LIGHTPUSH_BOOTSTRAP:-}} + NODE_ARG=${LIGHTPUSH_SERVICE_PEER:+--service-node="${LIGHTPUSH_SERVICE_PEER}"} + NODE_ARG=${NODE_ARG:---bootstrap-node="${LIGHTPUSH_BOOTSTRAP}"} + METRICS_PORT=--metrics-port="${PUBLISHER_METRICS_PORT:-8003}" +fi + +if [ "${FUNCTION}" = "RECEIVER" ]; then + FUNCTION=--test-func=RECEIVER + SERIVCE_NODE_ADDR=${FILTER_SERVICE_PEER:-${FILTER_BOOTSTRAP:-}} + NODE_ARG=${FILTER_SERVICE_PEER:+--service-node="${FILTER_SERVICE_PEER}"} + NODE_ARG=${NODE_ARG:---bootstrap-node="${FILTER_BOOTSTRAP}"} + METRICS_PORT=--metrics-port="${RECEIVER_METRICS_PORT:-8003}" +fi + +if [ -z "${SERIVCE_NODE_ADDR}" ]; then + echo "Service/Bootsrap node peer_id or enr is not provided. Failing" + exit 1 +fi + +MY_EXT_IP=$(wget -qO- --no-check-certificate https://api4.ipify.org) + +if [ -n "${PUBSUB}" ]; then + PUBSUB=--pubsub-topic="${PUBSUB}" +else + PUBSUB=--pubsub-topic="/waku/2/rs/66/0" +fi + +if [ -n "${CONTENT_TOPIC}" ]; then + CONTENT_TOPIC=--content-topic="${CONTENT_TOPIC}" +fi + +if [ -n "${CLUSTER_ID}" ]; then + CLUSTER_ID=--cluster-id="${CLUSTER_ID}" +fi + +if [ -n "${START_PUBLISHING_AFTER_SECS}" ]; then + START_PUBLISHING_AFTER_SECS=--start-publishing-after="${START_PUBLISHING_AFTER_SECS}" +fi + +if [ -n "${MIN_MESSAGE_SIZE}" ]; then + MIN_MESSAGE_SIZE=--min-test-msg-size="${MIN_MESSAGE_SIZE}" +fi + +if [ -n "${MAX_MESSAGE_SIZE}" ]; then + MAX_MESSAGE_SIZE=--max-test-msg-size="${MAX_MESSAGE_SIZE}" +fi + + +if [ -n "${NUM_MESSAGES}" ]; then + NUM_MESSAGES=--num-messages="${NUM_MESSAGES}" +fi + +if [ -n "${MESSAGE_INTERVAL_MILLIS}" ]; then + MESSAGE_INTERVAL_MILLIS=--message-interval="${MESSAGE_INTERVAL_MILLIS}" +fi + +echo "Running binary: ${BINARY_PATH}" +echo "Node function is: ${FUNCTION}" +echo "Using service/bootstrap node as: ${NODE_ARG}" +echo "My external IP: ${MY_EXT_IP}" + +exec "${BINARY_PATH}"\ + --log-level=INFO\ + --nat=extip:${MY_EXT_IP}\ + --test-peers\ + ${NODE_ARG}\ + ${MESSAGE_INTERVAL_MILLIS}\ + ${NUM_MESSAGES}\ + ${PUBSUB}\ + ${CONTENT_TOPIC}\ + ${CLUSTER_ID}\ + ${FUNCTION}\ + ${START_PUBLISHING_AFTER_SECS}\ + ${MIN_MESSAGE_SIZE}\ + ${MAX_MESSAGE_SIZE}\ + ${METRICS_PORT} diff --git a/apps/liteprotocoltester/statistics.nim b/apps/liteprotocoltester/statistics.nim index 333ed04c4..be85d9bdb 100644 --- a/apps/liteprotocoltester/statistics.nim +++ b/apps/liteprotocoltester/statistics.nim @@ -189,8 +189,11 @@ proc dupMsgs(self: Statistics): string = ) return dupMsgs -proc echoStat*(self: Statistics) = +proc echoStat*(self: Statistics, peerId: string) = let (minL, maxL, avgL) = self.calcLatency() + lpt_receiver_latencies.set(labelValues = [peerId, "min"], value = minL.nanos()) + lpt_receiver_latencies.set(labelValues = [peerId, "avg"], value = avgL.nanos()) + lpt_receiver_latencies.set(labelValues = [peerId, "max"], value = maxL.nanos()) let printable = catch: """*------------------------------------------------------------------------------------------* @@ -248,7 +251,7 @@ proc echoStats*(self: var PerPeerStatistics) = echo "Error while printing statistics" else: echo peerLine.get() - stats.echoStat() + stats.echoStat(peerId) proc jsonStats*(self: PerPeerStatistics): string = try: diff --git a/apps/liteprotocoltester/tester_config.nim b/apps/liteprotocoltester/tester_config.nim index b21bf44f6..3e1de499d 100644 --- a/apps/liteprotocoltester/tester_config.nim +++ b/apps/liteprotocoltester/tester_config.nim @@ -89,10 +89,10 @@ type LiteProtocolTesterConf* = object name: "start-publishing-after" .}: uint32 - delayMessages* {. + messageInterval* {. desc: "Delay between messages in milliseconds.", defaultValue: 1000, - name: "delay-messages" + name: "message-interval" .}: uint32 pubsubTopics* {. @@ -142,7 +142,7 @@ type LiteProtocolTesterConf* = object testPeers* {. desc: "Run dial test on gathered PeerExchange peers.", - defaultValue: true, + defaultValue: false, name: "test-peers" .}: bool @@ -158,6 +158,13 @@ type LiteProtocolTesterConf* = object name: "rest-port" .}: uint16 + fixedServicePeer* {. + desc: + "Prevent changing the service peer in case of failures, the full test will stict to the first service peer in use.", + defaultValue: false, + name: "fixed-service-peer" + .}: bool + restAllowOrigin* {. desc: "Allow cross-origin requests from the specified origin." & diff --git a/ci/Jenkinsfile.lpt b/ci/Jenkinsfile.lpt index 9e1357e5b..2109a090b 100644 --- a/ci/Jenkinsfile.lpt +++ b/ci/Jenkinsfile.lpt @@ -61,6 +61,7 @@ pipeline { "--build-arg=MAKE_TARGET='liteprotocoltester' " + "--build-arg=NIMFLAGS='${params.NIMFLAGS}' " + "--build-arg=LOG_LEVEL='${params.LOWEST_LOG_LEVEL_ALLOWED}' " + + "--target ${params.IMAGE_TAG == 'deploy' ? 'deployment_lpt' : 'standalone_lpt'} " + "--file=apps/liteprotocoltester/Dockerfile.liteprotocoltester.compile " + " ." )