From 5a2c41574adf8c84c488fa7af7eea02f6d79df2c Mon Sep 17 00:00:00 2001 From: gusto Date: Fri, 25 Oct 2024 13:39:07 +0300 Subject: [PATCH] Tests: Local debugging setup using grafana (#855) * Local debug setup for integration tests * Tests debugging setup readme * Clippy too smart --- compose.debug.yml | 76 +++++++++++++++++++ compose.static.yml | 2 +- nomos-services/tracing/src/lib.rs | 22 +++++- nomos-tracing/Cargo.toml | 2 +- nomos-tracing/src/filter/envfilter.rs | 27 +++++++ nomos-tracing/src/filter/mod.rs | 1 + nomos-tracing/src/lib.rs | 1 + testnet/cfgsync/src/config.rs | 3 +- testnet/monitoring/grafana/datasources.yaml | 19 +++++ testnet/monitoring/graylog.conf | 46 ----------- .../{prometheus-config.yml => prometheus.yml} | 10 +-- tests/Cargo.toml | 1 + tests/README.md | 54 +++++++++++++ tests/src/nodes/executor.rs | 20 +++-- tests/src/nodes/validator.rs | 18 +++-- tests/src/topology/configs/tracing.rs | 59 ++++++++++++-- 16 files changed, 286 insertions(+), 75 deletions(-) create mode 100644 compose.debug.yml create mode 100644 nomos-tracing/src/filter/envfilter.rs create mode 100644 nomos-tracing/src/filter/mod.rs delete mode 100644 testnet/monitoring/graylog.conf rename testnet/monitoring/{prometheus-config.yml => prometheus.yml} (54%) create mode 100644 tests/README.md diff --git a/compose.debug.yml b/compose.debug.yml new file mode 100644 index 00000000..383f163c --- /dev/null +++ b/compose.debug.yml @@ -0,0 +1,76 @@ +version: '3.8' + +services: + + prometheus: + container_name: prometheus + image: prom/prometheus:latest + volumes: + - ./testnet/monitoring/prometheus.yml:/etc/prometheus/prometheus.yml:z + command: + - --config.file=/etc/prometheus/prometheus.yml + - --storage.tsdb.retention.time=7d + ports: + - 127.0.0.1:9090:9090 + restart: on-failure + + grafana: + container_name: grafana + image: grafana/grafana:latest + environment: + - GF_AUTH_ANONYMOUS_ENABLED=true + - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin + - GF_AUTH_DISABLE_LOGIN_FORM=true + - GF_FEATURE_TOGGLES_ENABLE=traceqlEditor traceQLStreaming metricsSummary + env_file: + - ./testnet/monitoring/grafana/plugins.env + volumes: + - ./testnet/monitoring/grafana/grafana.ini:/etc/grafana/grafana.ini:z + - ./testnet/monitoring/grafana/datasources.yaml:/etc/grafana/provisioning/datasources/datasources.yaml:z + ports: + - 9091:3000 + restart: on-failure + depends_on: + - prometheus + + loki: + image: grafana/loki:2.9.2 + ports: + - "3100:3100" + command: -config.file=/etc/loki/local-config.yaml + + # Tempo runs as unpriviliged user, volumes need to be chowned before running. + tempo-init: + image: &tempoImage grafana/tempo:latest + user: root + entrypoint: + - "chown" + - "10001:10001" + - "/var/tempo" + volumes: + - tempo-data:/var/tempo + + memcached: + image: memcached:1.6.29 + container_name: memcached + ports: + - "11211:11211" + environment: + - MEMCACHED_MAX_MEMORY=64m + - MEMCACHED_THREADS=4 + + tempo: + image: *tempoImage + container_name: tempo + command: [ "-config.file=/etc/tempo.yaml" ] + volumes: + - ./testnet/monitoring/tempo.yaml:/etc/tempo.yaml:z + - tempo-data:/var/tempo + ports: + - "4317:4317" # otlp grpc + depends_on: + - tempo-init + - memcached + +volumes: + tempo-data: diff --git a/compose.static.yml b/compose.static.yml index 77df2e45..7b385aff 100644 --- a/compose.static.yml +++ b/compose.static.yml @@ -80,7 +80,7 @@ services: container_name: prometheus image: prom/prometheus:latest volumes: - - ./testnet/monitoring/prometheus-config.yml:/etc/prometheus/prometheus.yml:z + - ./testnet/monitoring/prometheus.yml:/etc/prometheus/prometheus.yml:z command: - --config.file=/etc/prometheus/prometheus.yml - --storage.tsdb.retention.time=7d diff --git a/nomos-services/tracing/src/lib.rs b/nomos-services/tracing/src/lib.rs index f4fb6cbd..15d928fa 100644 --- a/nomos-services/tracing/src/lib.rs +++ b/nomos-services/tracing/src/lib.rs @@ -4,6 +4,7 @@ use std::io::Write; use std::sync::{Arc, Mutex}; // crates use futures::StreamExt; +use nomos_tracing::filter::envfilter::{create_envfilter_layer, EnvFilterConfig}; use nomos_tracing::logging::gelf::{create_gelf_layer, GelfConfig}; use nomos_tracing::logging::local::{create_file_layer, create_writer_layer, FileConfig}; use nomos_tracing::logging::loki::{create_loki_layer, LokiConfig}; @@ -86,10 +87,17 @@ pub enum TracingLayer { None, } +#[derive(Clone, Debug, Serialize, Deserialize)] +pub enum FilterLayer { + EnvFilter(EnvFilterConfig), + None, +} + #[derive(Clone, Debug, Serialize, Deserialize)] pub struct TracingSettings { pub logger: LoggerLayer, pub tracing: TracingLayer, + pub filter: FilterLayer, #[serde(with = "serde_level")] pub level: Level, } @@ -99,6 +107,7 @@ impl Default for TracingSettings { Self { logger: LoggerLayer::Stdout, tracing: TracingLayer::None, + filter: FilterLayer::None, level: Level::DEBUG, } } @@ -106,10 +115,16 @@ impl Default for TracingSettings { impl TracingSettings { #[inline] - pub const fn new(logger: LoggerLayer, tracing: TracingLayer, level: Level) -> Self { + pub const fn new( + logger: LoggerLayer, + tracing: TracingLayer, + filter: FilterLayer, + level: Level, + ) -> Self { Self { logger, tracing, + filter, level, } } @@ -174,6 +189,11 @@ impl ServiceCore for Tracing { layers.push(Box::new(tracing_layer)); } + if let FilterLayer::EnvFilter(config) = config.filter { + let filter_layer = create_envfilter_layer(config)?; + layers.push(Box::new(filter_layer)); + } + // If no layers are created, tracing subscriber is not required. if layers.is_empty() { return Ok(Self { diff --git a/nomos-tracing/Cargo.toml b/nomos-tracing/Cargo.toml index b05382af..0c876cdf 100644 --- a/nomos-tracing/Cargo.toml +++ b/nomos-tracing/Cargo.toml @@ -13,6 +13,6 @@ tracing = "0.1" tracing-appender = "0.2" tracing-loki = "0.2.5" tracing-opentelemetry = "0.27" -tracing-subscriber = { version = "0.3", features = ["json", "registry"] } +tracing-subscriber = { version = "0.3", features = ["env-filter", "json", "registry"] } tracing-gelf = "0.7" url = { version = "2", features = ["serde"] } diff --git a/nomos-tracing/src/filter/envfilter.rs b/nomos-tracing/src/filter/envfilter.rs new file mode 100644 index 00000000..54e38afe --- /dev/null +++ b/nomos-tracing/src/filter/envfilter.rs @@ -0,0 +1,27 @@ +// std +use std::collections::HashMap; +use std::error::Error; +// crates +use serde::{Deserialize, Serialize}; +use tracing_subscriber::EnvFilter; +// internal + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct EnvFilterConfig { + /// HashMap where the key is the crate/module name, and the value is the desired log level. + /// More: https://docs.rs/tracing-subscriber/latest/tracing_subscriber/filter/struct.EnvFilter.html#directives + pub filters: HashMap, +} + +pub fn create_envfilter_layer( + config: EnvFilterConfig, +) -> Result> { + let filter_string = config + .filters + .into_iter() + .map(|(target, level)| format!("{}={}", target, level)) + .collect::>() + .join(","); + + EnvFilter::try_new(filter_string).map_err(|e| e.into()) +} diff --git a/nomos-tracing/src/filter/mod.rs b/nomos-tracing/src/filter/mod.rs new file mode 100644 index 00000000..e595241a --- /dev/null +++ b/nomos-tracing/src/filter/mod.rs @@ -0,0 +1 @@ +pub mod envfilter; diff --git a/nomos-tracing/src/lib.rs b/nomos-tracing/src/lib.rs index 8f89f062..68b36ec4 100644 --- a/nomos-tracing/src/lib.rs +++ b/nomos-tracing/src/lib.rs @@ -1,2 +1,3 @@ +pub mod filter; pub mod logging; pub mod tracing; diff --git a/testnet/cfgsync/src/config.rs b/testnet/cfgsync/src/config.rs index 357998c1..776b57a6 100644 --- a/testnet/cfgsync/src/config.rs +++ b/testnet/cfgsync/src/config.rs @@ -3,7 +3,7 @@ use std::{collections::HashMap, net::Ipv4Addr, str::FromStr}; // crates use nomos_libp2p::{Multiaddr, PeerId, Protocol}; use nomos_tracing::{logging::loki::LokiConfig, tracing::otlp::OtlpTracingConfig}; -use nomos_tracing_service::{LoggerLayer, TracingSettings}; +use nomos_tracing_service::{FilterLayer, LoggerLayer, TracingSettings}; use rand::{thread_rng, Rng}; use tests::topology::configs::{ api::GeneralApiConfig, @@ -207,6 +207,7 @@ fn tracing_config_for_grafana(params: TracingParams, identifier: String) -> Gene endpoint: params.tempo_endpoint, sample_ratio: 1.0, }), + filter: FilterLayer::None, level: Level::INFO, }, } diff --git a/testnet/monitoring/grafana/datasources.yaml b/testnet/monitoring/grafana/datasources.yaml index 2d99f845..37b2fc7f 100644 --- a/testnet/monitoring/grafana/datasources.yaml +++ b/testnet/monitoring/grafana/datasources.yaml @@ -9,3 +9,22 @@ datasources: is_default: true version: 1 editable: true + + - name: Tempo + type: tempo + access: proxy + org_id: 1 + url: http://tempo:3200 + is_default: false + version: 1 + editable: true + + - name: Loki + type: loki + access: proxy + org_id: 1 + url: http://loki:3100 + is_default: false + version: 1 + editable: true + diff --git a/testnet/monitoring/graylog.conf b/testnet/monitoring/graylog.conf deleted file mode 100644 index 5e1d8b6d..00000000 --- a/testnet/monitoring/graylog.conf +++ /dev/null @@ -1,46 +0,0 @@ -elasticsearch_discovery_enabled = false - -node_id_file = /usr/share/graylog/data/data/node-id - -http_bind_address = 0.0.0.0:9000 -http_external_uri = http://localhost:9000/ - -mongodb_uri = mongodb://mongodb:27017/graylog -is_leader = true - -password_secret = -root_password_sha2 = - -bin_dir = /usr/share/graylog/bin -data_dir = /usr/share/graylog/data -plugin_dir = /usr/share/graylog/plugin - -stream_aware_field_types=false - -disabled_retention_strategies = none -allow_leading_wildcard_searches = false - -allow_highlighting = false - -output_batch_size = 500 - -output_flush_interval = 1 - -output_fault_count_threshold = 5 -output_fault_penalty_seconds = 30 - -processbuffer_processors = 5 -outputbuffer_processors = 3 -processor_wait_strategy = blocking - -ring_size = 65536 - -inputbuffer_ring_size = 65536 -inputbuffer_processors = 2 -inputbuffer_wait_strategy = blocking - -message_journal_enabled = true -message_journal_dir = data/journal -lb_recognition_period_seconds = 3 - -mongodb_max_connections = 1000 diff --git a/testnet/monitoring/prometheus-config.yml b/testnet/monitoring/prometheus.yml similarity index 54% rename from testnet/monitoring/prometheus-config.yml rename to testnet/monitoring/prometheus.yml index 7be135f9..6772dd7e 100644 --- a/testnet/monitoring/prometheus-config.yml +++ b/testnet/monitoring/prometheus.yml @@ -5,10 +5,10 @@ global: monitor: "Monitoring" scrape_configs: - - job_name: "libp2p" + - job_name: "nomos" static_configs: - targets: - - bootstrap:18080 - - libp2p_node_1:18080 - - libp2p_node_2:18080 - - libp2p_node_3:18080 + - nomos-node-0:18080 + - nomos-node-1:18080 + - nomos-node-2:18080 + - nomos-node-3:18080 diff --git a/tests/Cargo.toml b/tests/Cargo.toml index 7159c30f..f9c1f917 100644 --- a/tests/Cargo.toml +++ b/tests/Cargo.toml @@ -60,3 +60,4 @@ path = "src/tests/da.rs" [features] metrics = ["nomos-node/metrics"] +debug = [] diff --git a/tests/README.md b/tests/README.md new file mode 100644 index 00000000..51536f04 --- /dev/null +++ b/tests/README.md @@ -0,0 +1,54 @@ +# Tests + +## Tests Debugging Setup + +This document provides instructions for setting up and using the testing environment, including how to start the Docker setup, run tests with a feature flag, and access the Grafana dashboard. + +## Prerequisites + +Ensure that the following are installed on your system: +- [Docker](https://docs.docker.com/get-docker/) +- [Docker Compose](https://docs.docker.com/compose/install/) + +## Setup and Usage + +### 1. Start `compose.debug.yml` + +To start the services defined in `compose.debug.yml` using Docker Compose, run the following command: + +```bash +docker-compose -f compose.debug.yml up -d +``` + +This command will: + Use the configuration specified in compose.debug.yml. + Start all services in detached mode (-d), allowing the terminal to be used for other commands. + +To stop the services, you can run: +``` +docker-compose -f compose.debug.yml down # compose filename needs to be the same +``` + +### 2. Run Tests with Debug Feature Flag + +To execute the test suite with the debug feature flag, use the following command: + +```bash +RISC0_DEV_MODE=true cargo test -p tests -F debug disseminate_and_retrieve +``` + +`-F debug`: Enables the debug feature flag for the integration tests, allowing for extra debug output or specific debug-only code paths to be enabled during the tests. +To modify the tracing configuration when using `-F debug` flag go to `tests/src/topology/configs/tracing.rs`. If debug flag is not used, logs will be written into each nodes temporary directory. + +### 3. Access the Grafana Dashboard +> It's important that the test is performed after the docker compose is started + +Once the Docker setup is running, you can access the Grafana dashboard to view metrics and logs: + Open a browser and navigate to http://localhost:9091. + +Use "Explore" tab to select data source: "Loki", "Tempo", "Prometheus". Prometheus source is unusable at the moment in local setup. + +- Loki - to kickstart your query, select "host" as label filter, and "nomo-0" or other nodes as value, this will show all logs for selected host. +- Tempo - to kickstart your query, enter "{}" as TraceQL query to see all traces. + + diff --git a/tests/src/nodes/executor.rs b/tests/src/nodes/executor.rs index ff79573f..dcdd5c43 100644 --- a/tests/src/nodes/executor.rs +++ b/tests/src/nodes/executor.rs @@ -23,12 +23,9 @@ use nomos_executor::config::Config; use nomos_network::{backends::libp2p::Libp2pConfig, NetworkConfig}; use nomos_node::api::paths::{CL_METRICS, DA_GET_RANGE}; use nomos_node::RocksBackendSettings; -use nomos_tracing::logging::local::FileConfig; -use nomos_tracing_service::LoggerLayer; use tempfile::NamedTempFile; use crate::adjust_timeout; -use crate::nodes::LOGS_PREFIX; use crate::topology::configs::GeneralConfig; use super::{create_tempdir, persist_tempdir, GetRangeReq, CLIENT}; @@ -62,11 +59,18 @@ impl Executor { let mut file = NamedTempFile::new().unwrap(); let config_path = file.path().to_owned(); - // setup logging so that we can intercept it later in testing - config.tracing.logger = LoggerLayer::File(FileConfig { - directory: dir.path().to_owned(), - prefix: Some(LOGS_PREFIX.into()), - }); + #[cfg(not(feature = "debug"))] + { + use crate::nodes::LOGS_PREFIX; + use nomos_tracing::logging::local::FileConfig; + use nomos_tracing_service::LoggerLayer; + + // setup logging so that we can intercept it later in testing + config.tracing.logger = LoggerLayer::File(FileConfig { + directory: dir.path().to_owned(), + prefix: Some(LOGS_PREFIX.into()), + }); + } config.storage.db_path = dir.path().join("db"); config diff --git a/tests/src/nodes/validator.rs b/tests/src/nodes/validator.rs index 77ca63e4..e7816a7b 100644 --- a/tests/src/nodes/validator.rs +++ b/tests/src/nodes/validator.rs @@ -20,8 +20,6 @@ use nomos_node::api::paths::{ }; use nomos_node::{api::backend::AxumBackendSettings, Config, RocksBackendSettings}; use nomos_node::{BlobInfo, HeaderId, Tx}; -use nomos_tracing::logging::local::FileConfig; -use nomos_tracing_service::LoggerLayer; use reqwest::Url; use tempfile::NamedTempFile; @@ -65,11 +63,17 @@ impl Validator { let mut file = NamedTempFile::new().unwrap(); let config_path = file.path().to_owned(); - // setup logging so that we can intercept it later in testing - config.tracing.logger = LoggerLayer::File(FileConfig { - directory: dir.path().to_owned(), - prefix: Some(LOGS_PREFIX.into()), - }); + #[cfg(not(feature = "debug"))] + { + use nomos_tracing::logging::local::FileConfig; + use nomos_tracing_service::LoggerLayer; + + // setup logging so that we can intercept it later in testing + config.tracing.logger = LoggerLayer::File(FileConfig { + directory: dir.path().to_owned(), + prefix: Some(LOGS_PREFIX.into()), + }); + } config.storage.db_path = dir.path().join("db"); config diff --git a/tests/src/topology/configs/tracing.rs b/tests/src/topology/configs/tracing.rs index cf87a797..8e445d21 100644 --- a/tests/src/topology/configs/tracing.rs +++ b/tests/src/topology/configs/tracing.rs @@ -1,14 +1,63 @@ -use nomos_tracing_service::TracingSettings; +use nomos_tracing::{logging::loki::LokiConfig, tracing::otlp::OtlpTracingConfig}; +use nomos_tracing_service::{FilterLayer, LoggerLayer, TracingLayer, TracingSettings}; +use tracing::Level; -#[derive(Clone)] +#[derive(Clone, Default)] pub struct GeneralTracingConfig { pub tracing_settings: TracingSettings, } +impl GeneralTracingConfig { + #[allow(dead_code)] + fn local_debug_tracing(id: usize) -> Self { + Self { + tracing_settings: TracingSettings { + logger: LoggerLayer::Loki(LokiConfig { + endpoint: "http://localhost:3100".try_into().unwrap(), + host_identifier: format!("node-{id}"), + }), + tracing: TracingLayer::Otlp(OtlpTracingConfig { + endpoint: "http://localhost:4317".try_into().unwrap(), + sample_ratio: 0.1, + }), + filter: FilterLayer::EnvFilter(nomos_tracing::filter::envfilter::EnvFilterConfig { + // Allow events only from modules that matches the regex, if it matches - use + // provided tracing level. Libp2p and risc0 related crates are very log + // intensive in debug mode. + filters: [("nomos", "debug")] + .iter() + .map(|(k, v)| (k.to_string(), v.to_string())) + .collect(), + }), + level: Level::DEBUG, + }, + } + } +} + pub fn create_tracing_configs(ids: &[[u8; 32]]) -> Vec { + #[cfg(feature = "debug")] + { + create_debug_configs(ids) + } + + #[cfg(not(feature = "debug"))] + { + create_default_configs(ids) + } +} + +#[allow(dead_code)] +fn create_debug_configs(ids: &[[u8; 32]]) -> Vec { ids.iter() - .map(|_| GeneralTracingConfig { - tracing_settings: Default::default(), - }) + .enumerate() + .map(|(i, _)| GeneralTracingConfig::local_debug_tracing(i)) + .collect() +} + +#[allow(dead_code)] +fn create_default_configs(ids: &[[u8; 32]]) -> Vec { + ids.iter() + .map(|_| GeneralTracingConfig::default()) .collect() }