Tests: Local debugging setup using grafana (#855)

* Local debug setup for integration tests

* Tests debugging setup readme

* Clippy too smart
This commit is contained in:
gusto 2024-10-25 13:39:07 +03:00 committed by GitHub
parent 938bcd7926
commit 5a2c41574a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
16 changed files with 286 additions and 75 deletions

76
compose.debug.yml Normal file
View File

@ -0,0 +1,76 @@
version: '3.8'
services:
prometheus:
container_name: prometheus
image: prom/prometheus:latest
volumes:
- ./testnet/monitoring/prometheus.yml:/etc/prometheus/prometheus.yml:z
command:
- --config.file=/etc/prometheus/prometheus.yml
- --storage.tsdb.retention.time=7d
ports:
- 127.0.0.1:9090:9090
restart: on-failure
grafana:
container_name: grafana
image: grafana/grafana:latest
environment:
- GF_AUTH_ANONYMOUS_ENABLED=true
- GF_AUTH_ANONYMOUS_ORG_ROLE=Admin
- GF_AUTH_DISABLE_LOGIN_FORM=true
- GF_FEATURE_TOGGLES_ENABLE=traceqlEditor traceQLStreaming metricsSummary
env_file:
- ./testnet/monitoring/grafana/plugins.env
volumes:
- ./testnet/monitoring/grafana/grafana.ini:/etc/grafana/grafana.ini:z
- ./testnet/monitoring/grafana/datasources.yaml:/etc/grafana/provisioning/datasources/datasources.yaml:z
ports:
- 9091:3000
restart: on-failure
depends_on:
- prometheus
loki:
image: grafana/loki:2.9.2
ports:
- "3100:3100"
command: -config.file=/etc/loki/local-config.yaml
# Tempo runs as unpriviliged user, volumes need to be chowned before running.
tempo-init:
image: &tempoImage grafana/tempo:latest
user: root
entrypoint:
- "chown"
- "10001:10001"
- "/var/tempo"
volumes:
- tempo-data:/var/tempo
memcached:
image: memcached:1.6.29
container_name: memcached
ports:
- "11211:11211"
environment:
- MEMCACHED_MAX_MEMORY=64m
- MEMCACHED_THREADS=4
tempo:
image: *tempoImage
container_name: tempo
command: [ "-config.file=/etc/tempo.yaml" ]
volumes:
- ./testnet/monitoring/tempo.yaml:/etc/tempo.yaml:z
- tempo-data:/var/tempo
ports:
- "4317:4317" # otlp grpc
depends_on:
- tempo-init
- memcached
volumes:
tempo-data:

View File

@ -80,7 +80,7 @@ services:
container_name: prometheus
image: prom/prometheus:latest
volumes:
- ./testnet/monitoring/prometheus-config.yml:/etc/prometheus/prometheus.yml:z
- ./testnet/monitoring/prometheus.yml:/etc/prometheus/prometheus.yml:z
command:
- --config.file=/etc/prometheus/prometheus.yml
- --storage.tsdb.retention.time=7d

View File

@ -4,6 +4,7 @@ use std::io::Write;
use std::sync::{Arc, Mutex};
// crates
use futures::StreamExt;
use nomos_tracing::filter::envfilter::{create_envfilter_layer, EnvFilterConfig};
use nomos_tracing::logging::gelf::{create_gelf_layer, GelfConfig};
use nomos_tracing::logging::local::{create_file_layer, create_writer_layer, FileConfig};
use nomos_tracing::logging::loki::{create_loki_layer, LokiConfig};
@ -86,10 +87,17 @@ pub enum TracingLayer {
None,
}
#[derive(Clone, Debug, Serialize, Deserialize)]
pub enum FilterLayer {
EnvFilter(EnvFilterConfig),
None,
}
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct TracingSettings {
pub logger: LoggerLayer,
pub tracing: TracingLayer,
pub filter: FilterLayer,
#[serde(with = "serde_level")]
pub level: Level,
}
@ -99,6 +107,7 @@ impl Default for TracingSettings {
Self {
logger: LoggerLayer::Stdout,
tracing: TracingLayer::None,
filter: FilterLayer::None,
level: Level::DEBUG,
}
}
@ -106,10 +115,16 @@ impl Default for TracingSettings {
impl TracingSettings {
#[inline]
pub const fn new(logger: LoggerLayer, tracing: TracingLayer, level: Level) -> Self {
pub const fn new(
logger: LoggerLayer,
tracing: TracingLayer,
filter: FilterLayer,
level: Level,
) -> Self {
Self {
logger,
tracing,
filter,
level,
}
}
@ -174,6 +189,11 @@ impl ServiceCore for Tracing {
layers.push(Box::new(tracing_layer));
}
if let FilterLayer::EnvFilter(config) = config.filter {
let filter_layer = create_envfilter_layer(config)?;
layers.push(Box::new(filter_layer));
}
// If no layers are created, tracing subscriber is not required.
if layers.is_empty() {
return Ok(Self {

View File

@ -13,6 +13,6 @@ tracing = "0.1"
tracing-appender = "0.2"
tracing-loki = "0.2.5"
tracing-opentelemetry = "0.27"
tracing-subscriber = { version = "0.3", features = ["json", "registry"] }
tracing-subscriber = { version = "0.3", features = ["env-filter", "json", "registry"] }
tracing-gelf = "0.7"
url = { version = "2", features = ["serde"] }

View File

@ -0,0 +1,27 @@
// std
use std::collections::HashMap;
use std::error::Error;
// crates
use serde::{Deserialize, Serialize};
use tracing_subscriber::EnvFilter;
// internal
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct EnvFilterConfig {
/// HashMap where the key is the crate/module name, and the value is the desired log level.
/// More: https://docs.rs/tracing-subscriber/latest/tracing_subscriber/filter/struct.EnvFilter.html#directives
pub filters: HashMap<String, String>,
}
pub fn create_envfilter_layer(
config: EnvFilterConfig,
) -> Result<EnvFilter, Box<dyn Error + Send + Sync>> {
let filter_string = config
.filters
.into_iter()
.map(|(target, level)| format!("{}={}", target, level))
.collect::<Vec<_>>()
.join(",");
EnvFilter::try_new(filter_string).map_err(|e| e.into())
}

View File

@ -0,0 +1 @@
pub mod envfilter;

View File

@ -1,2 +1,3 @@
pub mod filter;
pub mod logging;
pub mod tracing;

View File

@ -3,7 +3,7 @@ use std::{collections::HashMap, net::Ipv4Addr, str::FromStr};
// crates
use nomos_libp2p::{Multiaddr, PeerId, Protocol};
use nomos_tracing::{logging::loki::LokiConfig, tracing::otlp::OtlpTracingConfig};
use nomos_tracing_service::{LoggerLayer, TracingSettings};
use nomos_tracing_service::{FilterLayer, LoggerLayer, TracingSettings};
use rand::{thread_rng, Rng};
use tests::topology::configs::{
api::GeneralApiConfig,
@ -207,6 +207,7 @@ fn tracing_config_for_grafana(params: TracingParams, identifier: String) -> Gene
endpoint: params.tempo_endpoint,
sample_ratio: 1.0,
}),
filter: FilterLayer::None,
level: Level::INFO,
},
}

View File

@ -9,3 +9,22 @@ datasources:
is_default: true
version: 1
editable: true
- name: Tempo
type: tempo
access: proxy
org_id: 1
url: http://tempo:3200
is_default: false
version: 1
editable: true
- name: Loki
type: loki
access: proxy
org_id: 1
url: http://loki:3100
is_default: false
version: 1
editable: true

View File

@ -1,46 +0,0 @@
elasticsearch_discovery_enabled = false
node_id_file = /usr/share/graylog/data/data/node-id
http_bind_address = 0.0.0.0:9000
http_external_uri = http://localhost:9000/
mongodb_uri = mongodb://mongodb:27017/graylog
is_leader = true
password_secret =
root_password_sha2 =
bin_dir = /usr/share/graylog/bin
data_dir = /usr/share/graylog/data
plugin_dir = /usr/share/graylog/plugin
stream_aware_field_types=false
disabled_retention_strategies = none
allow_leading_wildcard_searches = false
allow_highlighting = false
output_batch_size = 500
output_flush_interval = 1
output_fault_count_threshold = 5
output_fault_penalty_seconds = 30
processbuffer_processors = 5
outputbuffer_processors = 3
processor_wait_strategy = blocking
ring_size = 65536
inputbuffer_ring_size = 65536
inputbuffer_processors = 2
inputbuffer_wait_strategy = blocking
message_journal_enabled = true
message_journal_dir = data/journal
lb_recognition_period_seconds = 3
mongodb_max_connections = 1000

View File

@ -5,10 +5,10 @@ global:
monitor: "Monitoring"
scrape_configs:
- job_name: "libp2p"
- job_name: "nomos"
static_configs:
- targets:
- bootstrap:18080
- libp2p_node_1:18080
- libp2p_node_2:18080
- libp2p_node_3:18080
- nomos-node-0:18080
- nomos-node-1:18080
- nomos-node-2:18080
- nomos-node-3:18080

View File

@ -60,3 +60,4 @@ path = "src/tests/da.rs"
[features]
metrics = ["nomos-node/metrics"]
debug = []

54
tests/README.md Normal file
View File

@ -0,0 +1,54 @@
# Tests
## Tests Debugging Setup
This document provides instructions for setting up and using the testing environment, including how to start the Docker setup, run tests with a feature flag, and access the Grafana dashboard.
## Prerequisites
Ensure that the following are installed on your system:
- [Docker](https://docs.docker.com/get-docker/)
- [Docker Compose](https://docs.docker.com/compose/install/)
## Setup and Usage
### 1. Start `compose.debug.yml`
To start the services defined in `compose.debug.yml` using Docker Compose, run the following command:
```bash
docker-compose -f compose.debug.yml up -d
```
This command will:
Use the configuration specified in compose.debug.yml.
Start all services in detached mode (-d), allowing the terminal to be used for other commands.
To stop the services, you can run:
```
docker-compose -f compose.debug.yml down # compose filename needs to be the same
```
### 2. Run Tests with Debug Feature Flag
To execute the test suite with the debug feature flag, use the following command:
```bash
RISC0_DEV_MODE=true cargo test -p tests -F debug disseminate_and_retrieve
```
`-F debug`: Enables the debug feature flag for the integration tests, allowing for extra debug output or specific debug-only code paths to be enabled during the tests.
To modify the tracing configuration when using `-F debug` flag go to `tests/src/topology/configs/tracing.rs`. If debug flag is not used, logs will be written into each nodes temporary directory.
### 3. Access the Grafana Dashboard
> It's important that the test is performed after the docker compose is started
Once the Docker setup is running, you can access the Grafana dashboard to view metrics and logs:
Open a browser and navigate to http://localhost:9091.
Use "Explore" tab to select data source: "Loki", "Tempo", "Prometheus". Prometheus source is unusable at the moment in local setup.
- Loki - to kickstart your query, select "host" as label filter, and "nomo-0" or other nodes as value, this will show all logs for selected host.
- Tempo - to kickstart your query, enter "{}" as TraceQL query to see all traces.

View File

@ -23,12 +23,9 @@ use nomos_executor::config::Config;
use nomos_network::{backends::libp2p::Libp2pConfig, NetworkConfig};
use nomos_node::api::paths::{CL_METRICS, DA_GET_RANGE};
use nomos_node::RocksBackendSettings;
use nomos_tracing::logging::local::FileConfig;
use nomos_tracing_service::LoggerLayer;
use tempfile::NamedTempFile;
use crate::adjust_timeout;
use crate::nodes::LOGS_PREFIX;
use crate::topology::configs::GeneralConfig;
use super::{create_tempdir, persist_tempdir, GetRangeReq, CLIENT};
@ -62,11 +59,18 @@ impl Executor {
let mut file = NamedTempFile::new().unwrap();
let config_path = file.path().to_owned();
// setup logging so that we can intercept it later in testing
config.tracing.logger = LoggerLayer::File(FileConfig {
directory: dir.path().to_owned(),
prefix: Some(LOGS_PREFIX.into()),
});
#[cfg(not(feature = "debug"))]
{
use crate::nodes::LOGS_PREFIX;
use nomos_tracing::logging::local::FileConfig;
use nomos_tracing_service::LoggerLayer;
// setup logging so that we can intercept it later in testing
config.tracing.logger = LoggerLayer::File(FileConfig {
directory: dir.path().to_owned(),
prefix: Some(LOGS_PREFIX.into()),
});
}
config.storage.db_path = dir.path().join("db");
config

View File

@ -20,8 +20,6 @@ use nomos_node::api::paths::{
};
use nomos_node::{api::backend::AxumBackendSettings, Config, RocksBackendSettings};
use nomos_node::{BlobInfo, HeaderId, Tx};
use nomos_tracing::logging::local::FileConfig;
use nomos_tracing_service::LoggerLayer;
use reqwest::Url;
use tempfile::NamedTempFile;
@ -65,11 +63,17 @@ impl Validator {
let mut file = NamedTempFile::new().unwrap();
let config_path = file.path().to_owned();
// setup logging so that we can intercept it later in testing
config.tracing.logger = LoggerLayer::File(FileConfig {
directory: dir.path().to_owned(),
prefix: Some(LOGS_PREFIX.into()),
});
#[cfg(not(feature = "debug"))]
{
use nomos_tracing::logging::local::FileConfig;
use nomos_tracing_service::LoggerLayer;
// setup logging so that we can intercept it later in testing
config.tracing.logger = LoggerLayer::File(FileConfig {
directory: dir.path().to_owned(),
prefix: Some(LOGS_PREFIX.into()),
});
}
config.storage.db_path = dir.path().join("db");
config

View File

@ -1,14 +1,63 @@
use nomos_tracing_service::TracingSettings;
use nomos_tracing::{logging::loki::LokiConfig, tracing::otlp::OtlpTracingConfig};
use nomos_tracing_service::{FilterLayer, LoggerLayer, TracingLayer, TracingSettings};
use tracing::Level;
#[derive(Clone)]
#[derive(Clone, Default)]
pub struct GeneralTracingConfig {
pub tracing_settings: TracingSettings,
}
impl GeneralTracingConfig {
#[allow(dead_code)]
fn local_debug_tracing(id: usize) -> Self {
Self {
tracing_settings: TracingSettings {
logger: LoggerLayer::Loki(LokiConfig {
endpoint: "http://localhost:3100".try_into().unwrap(),
host_identifier: format!("node-{id}"),
}),
tracing: TracingLayer::Otlp(OtlpTracingConfig {
endpoint: "http://localhost:4317".try_into().unwrap(),
sample_ratio: 0.1,
}),
filter: FilterLayer::EnvFilter(nomos_tracing::filter::envfilter::EnvFilterConfig {
// Allow events only from modules that matches the regex, if it matches - use
// provided tracing level. Libp2p and risc0 related crates are very log
// intensive in debug mode.
filters: [("nomos", "debug")]
.iter()
.map(|(k, v)| (k.to_string(), v.to_string()))
.collect(),
}),
level: Level::DEBUG,
},
}
}
}
pub fn create_tracing_configs(ids: &[[u8; 32]]) -> Vec<GeneralTracingConfig> {
#[cfg(feature = "debug")]
{
create_debug_configs(ids)
}
#[cfg(not(feature = "debug"))]
{
create_default_configs(ids)
}
}
#[allow(dead_code)]
fn create_debug_configs(ids: &[[u8; 32]]) -> Vec<GeneralTracingConfig> {
ids.iter()
.map(|_| GeneralTracingConfig {
tracing_settings: Default::default(),
})
.enumerate()
.map(|(i, _)| GeneralTracingConfig::local_debug_tracing(i))
.collect()
}
#[allow(dead_code)]
fn create_default_configs(ids: &[[u8; 32]]) -> Vec<GeneralTracingConfig> {
ids.iter()
.map(|_| GeneralTracingConfig::default())
.collect()
}