diff --git a/apps/sonda/.env.example b/apps/sonda/.env.example index accc2732c..2423d06cb 100644 --- a/apps/sonda/.env.example +++ b/apps/sonda/.env.example @@ -18,14 +18,16 @@ EXTRA_ARGS= RLN_RELAY_CONTRACT_ADDRESS= # -------------------- SONDA CONFIG ------------------ +METRICS_PORT=8004 +NODE_REST_ADDRESS="http://nwaku:8645" CLUSTER_ID=16 SHARD=32 # Comma separated list of store nodes to poll -STORE_NODES="/dns4/store-01.do-ams3.shards.test.status.im/tcp/30303/p2p/16Uiu2HAmAUdrQ3uwzuE4Gy4D56hX6uLKEeerJAnhKEHZ3DxF1EfT,\ -/dns4/store-02.do-ams3.shards.test.status.im/tcp/30303/p2p/16Uiu2HAm9aDJPkhGxc2SFcEACTFdZ91Q5TJjp76qZEhq9iF59x7R,\ -/dns4/store-01.gc-us-central1-a.shards.test.status.im/tcp/30303/p2p/16Uiu2HAmMELCo218hncCtTvC2Dwbej3rbyHQcR8erXNnKGei7WPZ,\ -/dns4/store-02.gc-us-central1-a.shards.test.status.im/tcp/30303/p2p/16Uiu2HAmJnVR7ZzFaYvciPVafUXuYGLHPzSUigqAmeNw9nJUVGeM,\ -/dns4/store-01.ac-cn-hongkong-c.shards.test.status.im/tcp/30303/p2p/16Uiu2HAm2M7xs7cLPc3jamawkEqbr7cUJX11uvY7LxQ6WFUdUKUT,\ +STORE_NODES="/dns4/store-01.do-ams3.shards.test.status.im/tcp/30303/p2p/16Uiu2HAmAUdrQ3uwzuE4Gy4D56hX6uLKEeerJAnhKEHZ3DxF1EfT, +/dns4/store-02.do-ams3.shards.test.status.im/tcp/30303/p2p/16Uiu2HAm9aDJPkhGxc2SFcEACTFdZ91Q5TJjp76qZEhq9iF59x7R, +/dns4/store-01.gc-us-central1-a.shards.test.status.im/tcp/30303/p2p/16Uiu2HAmMELCo218hncCtTvC2Dwbej3rbyHQcR8erXNnKGei7WPZ, +/dns4/store-02.gc-us-central1-a.shards.test.status.im/tcp/30303/p2p/16Uiu2HAmJnVR7ZzFaYvciPVafUXuYGLHPzSUigqAmeNw9nJUVGeM, +/dns4/store-01.ac-cn-hongkong-c.shards.test.status.im/tcp/30303/p2p/16Uiu2HAm2M7xs7cLPc3jamawkEqbr7cUJX11uvY7LxQ6WFUdUKUT, /dns4/store-02.ac-cn-hongkong-c.shards.test.status.im/tcp/30303/p2p/16Uiu2HAm9CQhsuwPR54q27kNj9iaQVfyRzTGKrhFmr94oD8ujU6P" # Wait time in seconds between two consecutive queries QUERY_DELAY=60 diff --git a/apps/sonda/Dockerfile.sonda b/apps/sonda/Dockerfile.sonda index 536be13d4..0e5a60672 100644 --- a/apps/sonda/Dockerfile.sonda +++ b/apps/sonda/Dockerfile.sonda @@ -1,3 +1,23 @@ FROM python:3.9.18-alpine3.18 -RUN pip install requests argparse prometheus_client \ No newline at end of file +ENV METRICS_PORT=8004 +ENV NODE_REST_ADDRESS="http://nwaku:8645" +ENV QUERY_DELAY=60 +ENV STORE_NODES="" +ENV CLUSTER_ID=1 +ENV SHARD=1 +ENV HEALTH_THRESHOLD=5 + +WORKDIR /opt + +COPY sonda.py /opt/sonda.py + +RUN pip install requests argparse prometheus_client + +CMD python -u /opt/sonda.py \ + --metrics-port=$METRICS_PORT \ + --node-rest-address="${NODE_REST_ADDRESS}" \ + --delay-seconds=$QUERY_DELAY \ + --pubsub-topic="/waku/2/rs/${CLUSTER_ID}/${SHARD}" \ + --store-nodes="${STORE_NODES}" \ + --health-threshold=$HEALTH_THRESHOLD diff --git a/apps/sonda/docker-compose.yml b/apps/sonda/docker-compose.yml index bfad693da..2141bbfc8 100644 --- a/apps/sonda/docker-compose.yml +++ b/apps/sonda/docker-compose.yml @@ -1,5 +1,4 @@ -version: "3.7" x-logging: &logging logging: driver: json-file @@ -15,6 +14,8 @@ x-rln-environment: &rln_env RLN_RELAY_CRED_PASSWORD: ${RLN_RELAY_CRED_PASSWORD:-} # Optional: Add your RLN_RELAY_CRED_PASSWORD after the "-" x-sonda-env: &sonda_env + METRICS_PORT: ${METRICS_PORT:-8004} + NODE_REST_ADDRESS: ${NODE_REST_ADDRESS:-"http://nwaku:8645"} CLUSTER_ID: ${CLUSTER_ID:-1} SHARD: ${SHARD:-0} STORE_NODES: ${STORE_NODES:-} @@ -24,7 +25,8 @@ x-sonda-env: &sonda_env # Services definitions services: nwaku: - image: ${NWAKU_IMAGE:-harbor.status.im/wakuorg/nwaku:v0.30.1} + image: ${NWAKU_IMAGE:-harbor.status.im/wakuorg/nwaku:deploy-status-prod} + container_name: nwaku restart: on-failure ports: - 30304:30304/tcp @@ -54,29 +56,27 @@ services: entrypoint: sh command: - /opt/run_node.sh + networks: + - nwaku-sonda sonda: build: context: . dockerfile: Dockerfile.sonda + container_name: sonda ports: - - 127.0.0.1:8004:8004 + - 127.0.0.1:${METRICS_PORT}:${METRICS_PORT} environment: <<: - *sonda_env - command: > - python -u /opt/sonda.py - --delay-seconds=${QUERY_DELAY} - --pubsub-topic=/waku/2/rs/${CLUSTER_ID}/${SHARD} - --store-nodes=${STORE_NODES} - --health-threshold=${HEALTH_THRESHOLD} - volumes: - - ./sonda.py:/opt/sonda.py:Z depends_on: - nwaku + networks: + - nwaku-sonda prometheus: image: docker.io/prom/prometheus:latest + container_name: prometheus volumes: - ./monitoring/prometheus-config.yml:/etc/prometheus/prometheus.yml:Z command: @@ -86,9 +86,12 @@ services: restart: on-failure:5 depends_on: - nwaku + networks: + - nwaku-sonda grafana: image: docker.io/grafana/grafana:latest + container_name: grafana env_file: - ./monitoring/configuration/grafana-plugins.env volumes: @@ -104,4 +107,8 @@ services: restart: on-failure:5 depends_on: - prometheus + networks: + - nwaku-sonda +networks: + nwaku-sonda: \ No newline at end of file diff --git a/apps/sonda/sonda.py b/apps/sonda/sonda.py index e22b61c54..8b74bd029 100644 --- a/apps/sonda/sonda.py +++ b/apps/sonda/sonda.py @@ -26,10 +26,12 @@ node_health = Gauge('node_health', "Binary indicator of a node's health. 1 is he # Argparser configuration parser = argparse.ArgumentParser(description='') -parser.add_argument('-p', '--pubsub-topic', type=str, help='pubsub topic', default='/waku/2/rs/1/0') -parser.add_argument('-d', '--delay-seconds', type=int, help='delay in second between messages', default=60) -parser.add_argument('-n', '--store-nodes', type=str, help='comma separated list of store nodes to query', required=True) -parser.add_argument('-t', '--health-threshold', type=int, help='consecutive successful store requests to consider a store node healthy', default=5) +parser.add_argument('-m', '--metrics-port', type=int, default=8004, help='Port to expose prometheus metrics.') +parser.add_argument('-a', '--node-rest-address', type=str, default="http://nwaku:8645", help='Address of the waku node to send messages to.') +parser.add_argument('-p', '--pubsub-topic', type=str, default='/waku/2/rs/1/0', help='PubSub topic.') +parser.add_argument('-d', '--delay-seconds', type=int, default=60, help='Delay in seconds between messages.') +parser.add_argument('-n', '--store-nodes', type=str, required=True, help='Comma separated list of store nodes to query.') +parser.add_argument('-t', '--health-threshold', type=int, default=5, help='Consecutive successful store requests to consider a store node healthy.') args = parser.parse_args() @@ -178,22 +180,21 @@ def main(): store_nodes = [s.strip() for s in args.store_nodes.split(",")] log_with_utc(f'Store nodes to query: {store_nodes}') - # Start Prometheus HTTP server at port 8004 - start_http_server(8004) + # Start Prometheus HTTP server at port set by the CLI(default 8004) + start_http_server(args.metrics_port) - node_rest_address = 'http://nwaku:8645' while True: timestamp = time.time_ns() # Send Sonda message - res = send_sonda_msg(node_rest_address, args.pubsub_topic, SONDA_CONTENT_TOPIC, timestamp) + res = send_sonda_msg(args.node_rest_address, args.pubsub_topic, SONDA_CONTENT_TOPIC, timestamp) log_with_utc(f'sleeping: {args.delay_seconds} seconds') time.sleep(args.delay_seconds) # Only send store query if message was successfully published if(res): - send_store_queries(node_rest_address, store_nodes, args.pubsub_topic, SONDA_CONTENT_TOPIC, timestamp) + send_store_queries(args.node_rest_address, store_nodes, args.pubsub_topic, SONDA_CONTENT_TOPIC, timestamp) # Update node health metrics for store_node in store_nodes: