sonda: adapt setup for deployment (#3151)

Referenced issue: https://github.com/status-im/infra-hq/issues/135

Signed-off-by: markoburcul <marko@status.im>
This commit is contained in:
Marko Burčul 2024-10-25 16:31:59 +02:00 committed by GitHub
parent 9f56891b88
commit 2198d78fc6
4 changed files with 56 additions and 26 deletions

View File

@ -18,14 +18,16 @@ EXTRA_ARGS=
RLN_RELAY_CONTRACT_ADDRESS=
# -------------------- SONDA CONFIG ------------------
METRICS_PORT=8004
NODE_REST_ADDRESS="http://nwaku:8645"
CLUSTER_ID=16
SHARD=32
# Comma separated list of store nodes to poll
STORE_NODES="/dns4/store-01.do-ams3.shards.test.status.im/tcp/30303/p2p/16Uiu2HAmAUdrQ3uwzuE4Gy4D56hX6uLKEeerJAnhKEHZ3DxF1EfT,\
/dns4/store-02.do-ams3.shards.test.status.im/tcp/30303/p2p/16Uiu2HAm9aDJPkhGxc2SFcEACTFdZ91Q5TJjp76qZEhq9iF59x7R,\
/dns4/store-01.gc-us-central1-a.shards.test.status.im/tcp/30303/p2p/16Uiu2HAmMELCo218hncCtTvC2Dwbej3rbyHQcR8erXNnKGei7WPZ,\
/dns4/store-02.gc-us-central1-a.shards.test.status.im/tcp/30303/p2p/16Uiu2HAmJnVR7ZzFaYvciPVafUXuYGLHPzSUigqAmeNw9nJUVGeM,\
/dns4/store-01.ac-cn-hongkong-c.shards.test.status.im/tcp/30303/p2p/16Uiu2HAm2M7xs7cLPc3jamawkEqbr7cUJX11uvY7LxQ6WFUdUKUT,\
STORE_NODES="/dns4/store-01.do-ams3.shards.test.status.im/tcp/30303/p2p/16Uiu2HAmAUdrQ3uwzuE4Gy4D56hX6uLKEeerJAnhKEHZ3DxF1EfT,
/dns4/store-02.do-ams3.shards.test.status.im/tcp/30303/p2p/16Uiu2HAm9aDJPkhGxc2SFcEACTFdZ91Q5TJjp76qZEhq9iF59x7R,
/dns4/store-01.gc-us-central1-a.shards.test.status.im/tcp/30303/p2p/16Uiu2HAmMELCo218hncCtTvC2Dwbej3rbyHQcR8erXNnKGei7WPZ,
/dns4/store-02.gc-us-central1-a.shards.test.status.im/tcp/30303/p2p/16Uiu2HAmJnVR7ZzFaYvciPVafUXuYGLHPzSUigqAmeNw9nJUVGeM,
/dns4/store-01.ac-cn-hongkong-c.shards.test.status.im/tcp/30303/p2p/16Uiu2HAm2M7xs7cLPc3jamawkEqbr7cUJX11uvY7LxQ6WFUdUKUT,
/dns4/store-02.ac-cn-hongkong-c.shards.test.status.im/tcp/30303/p2p/16Uiu2HAm9CQhsuwPR54q27kNj9iaQVfyRzTGKrhFmr94oD8ujU6P"
# Wait time in seconds between two consecutive queries
QUERY_DELAY=60

View File

@ -1,3 +1,23 @@
FROM python:3.9.18-alpine3.18
RUN pip install requests argparse prometheus_client
ENV METRICS_PORT=8004
ENV NODE_REST_ADDRESS="http://nwaku:8645"
ENV QUERY_DELAY=60
ENV STORE_NODES=""
ENV CLUSTER_ID=1
ENV SHARD=1
ENV HEALTH_THRESHOLD=5
WORKDIR /opt
COPY sonda.py /opt/sonda.py
RUN pip install requests argparse prometheus_client
CMD python -u /opt/sonda.py \
--metrics-port=$METRICS_PORT \
--node-rest-address="${NODE_REST_ADDRESS}" \
--delay-seconds=$QUERY_DELAY \
--pubsub-topic="/waku/2/rs/${CLUSTER_ID}/${SHARD}" \
--store-nodes="${STORE_NODES}" \
--health-threshold=$HEALTH_THRESHOLD

View File

@ -1,5 +1,4 @@
version: "3.7"
x-logging: &logging
logging:
driver: json-file
@ -15,6 +14,8 @@ x-rln-environment: &rln_env
RLN_RELAY_CRED_PASSWORD: ${RLN_RELAY_CRED_PASSWORD:-} # Optional: Add your RLN_RELAY_CRED_PASSWORD after the "-"
x-sonda-env: &sonda_env
METRICS_PORT: ${METRICS_PORT:-8004}
NODE_REST_ADDRESS: ${NODE_REST_ADDRESS:-"http://nwaku:8645"}
CLUSTER_ID: ${CLUSTER_ID:-1}
SHARD: ${SHARD:-0}
STORE_NODES: ${STORE_NODES:-}
@ -24,7 +25,8 @@ x-sonda-env: &sonda_env
# Services definitions
services:
nwaku:
image: ${NWAKU_IMAGE:-harbor.status.im/wakuorg/nwaku:v0.30.1}
image: ${NWAKU_IMAGE:-harbor.status.im/wakuorg/nwaku:deploy-status-prod}
container_name: nwaku
restart: on-failure
ports:
- 30304:30304/tcp
@ -54,29 +56,27 @@ services:
entrypoint: sh
command:
- /opt/run_node.sh
networks:
- nwaku-sonda
sonda:
build:
context: .
dockerfile: Dockerfile.sonda
container_name: sonda
ports:
- 127.0.0.1:8004:8004
- 127.0.0.1:${METRICS_PORT}:${METRICS_PORT}
environment:
<<:
- *sonda_env
command: >
python -u /opt/sonda.py
--delay-seconds=${QUERY_DELAY}
--pubsub-topic=/waku/2/rs/${CLUSTER_ID}/${SHARD}
--store-nodes=${STORE_NODES}
--health-threshold=${HEALTH_THRESHOLD}
volumes:
- ./sonda.py:/opt/sonda.py:Z
depends_on:
- nwaku
networks:
- nwaku-sonda
prometheus:
image: docker.io/prom/prometheus:latest
container_name: prometheus
volumes:
- ./monitoring/prometheus-config.yml:/etc/prometheus/prometheus.yml:Z
command:
@ -86,9 +86,12 @@ services:
restart: on-failure:5
depends_on:
- nwaku
networks:
- nwaku-sonda
grafana:
image: docker.io/grafana/grafana:latest
container_name: grafana
env_file:
- ./monitoring/configuration/grafana-plugins.env
volumes:
@ -104,4 +107,8 @@ services:
restart: on-failure:5
depends_on:
- prometheus
networks:
- nwaku-sonda
networks:
nwaku-sonda:

View File

@ -26,10 +26,12 @@ node_health = Gauge('node_health', "Binary indicator of a node's health. 1 is he
# Argparser configuration
parser = argparse.ArgumentParser(description='')
parser.add_argument('-p', '--pubsub-topic', type=str, help='pubsub topic', default='/waku/2/rs/1/0')
parser.add_argument('-d', '--delay-seconds', type=int, help='delay in second between messages', default=60)
parser.add_argument('-n', '--store-nodes', type=str, help='comma separated list of store nodes to query', required=True)
parser.add_argument('-t', '--health-threshold', type=int, help='consecutive successful store requests to consider a store node healthy', default=5)
parser.add_argument('-m', '--metrics-port', type=int, default=8004, help='Port to expose prometheus metrics.')
parser.add_argument('-a', '--node-rest-address', type=str, default="http://nwaku:8645", help='Address of the waku node to send messages to.')
parser.add_argument('-p', '--pubsub-topic', type=str, default='/waku/2/rs/1/0', help='PubSub topic.')
parser.add_argument('-d', '--delay-seconds', type=int, default=60, help='Delay in seconds between messages.')
parser.add_argument('-n', '--store-nodes', type=str, required=True, help='Comma separated list of store nodes to query.')
parser.add_argument('-t', '--health-threshold', type=int, default=5, help='Consecutive successful store requests to consider a store node healthy.')
args = parser.parse_args()
@ -178,22 +180,21 @@ def main():
store_nodes = [s.strip() for s in args.store_nodes.split(",")]
log_with_utc(f'Store nodes to query: {store_nodes}')
# Start Prometheus HTTP server at port 8004
start_http_server(8004)
# Start Prometheus HTTP server at port set by the CLI(default 8004)
start_http_server(args.metrics_port)
node_rest_address = 'http://nwaku:8645'
while True:
timestamp = time.time_ns()
# Send Sonda message
res = send_sonda_msg(node_rest_address, args.pubsub_topic, SONDA_CONTENT_TOPIC, timestamp)
res = send_sonda_msg(args.node_rest_address, args.pubsub_topic, SONDA_CONTENT_TOPIC, timestamp)
log_with_utc(f'sleeping: {args.delay_seconds} seconds')
time.sleep(args.delay_seconds)
# Only send store query if message was successfully published
if(res):
send_store_queries(node_rest_address, store_nodes, args.pubsub_topic, SONDA_CONTENT_TOPIC, timestamp)
send_store_queries(args.node_rest_address, store_nodes, args.pubsub_topic, SONDA_CONTENT_TOPIC, timestamp)
# Update node health metrics
for store_node in store_nodes: