diff --git a/.env.testnet b/.env.testnet index 4f80beb7..52e82866 100644 --- a/.env.testnet +++ b/.env.testnet @@ -7,3 +7,6 @@ DOCKER_COMPOSE_ETCDCTL_API=3 DOCKER_COMPOSE_BOOSTRAP_NET_NODE_KEY=1000000000000000000000000000000000000000000000000000000000000000 DOCKER_COMPOSE_OVERLAY_NODES=1000000000000000000000000000000000000000000000000000000000000000 DOCKER_COMPOSE_NET_INITIAL_PEERS=/dns/bootstrap/udp/3000/quic-v1 + +GRAYLOG_PASSWORD_SECRET="Jcjw7g22kJw3aSjjnCQ7DiQvlSJJ38WZ2MvuIyZ4RTILUoxGEQb5EsmAAdcp3lnnlwdSKaZTDFcvh4Xq2h4aTsa4HLx3SZxM" +GRAYLOG_ROOT_PASSWORD_SHA2="7092a9ec7c94ba6c452a3937a380b9cfdac8e2d0b342c034ea9e306d41ce6d89" diff --git a/compose.static.yml b/compose.static.yml index 3b9bf7c2..db9cac8b 100644 --- a/compose.static.yml +++ b/compose.static.yml @@ -1,4 +1,5 @@ -version: '3.7' +version: '3.8' + services: bootstrap: container_name: bootstrap @@ -11,6 +12,8 @@ services: - "18080:18080/tcp" volumes: - ./testnet:/etc/nomos + depends_on: + - graylog environment: - BOOTSTRAP_NODE_KEY=${DOCKER_COMPOSE_BOOSTRAP_NET_NODE_KEY:-1000000000000000000000000000000000000000000000000000000000000000} - LIBP2P_NODE_MASK=${DOCKER_COMPOSE_LIBP2P_NODE_KEY_MASK:-2000000000000000000000000000000000000000000000000000000000000000} @@ -26,6 +29,7 @@ services: depends_on: - bootstrap - etcd + - graylog ports: - "3001:3000/udp" - "18081:18080/tcp" @@ -47,6 +51,7 @@ services: depends_on: - bootstrap - etcd + - graylog ports: - "3002:3000/udp" - "18082:18080/tcp" @@ -68,6 +73,7 @@ services: depends_on: - bootstrap - etcd + - graylog ports: - "3003:3000/udp" - "18083:18080/tcp" @@ -123,3 +129,64 @@ services: restart: on-failure depends_on: - prometheus + + # Graylog related configuration + # More information at https://github.com/Graylog2/docker-compose/blob/main/open-core/docker-compose.yml + mongodb: + image: "mongo:5.0" + volumes: + - "mongodb_data:/data/db" + restart: "on-failure" + + datanode: + image: "${DATANODE_IMAGE:-graylog/graylog-datanode:5.2}" + hostname: "datanode" + environment: + GRAYLOG_DATANODE_NODE_ID_FILE: "/var/lib/graylog-datanode/node-id" + GRAYLOG_DATANODE_PASSWORD_SECRET: "${GRAYLOG_PASSWORD_SECRET:?Please configure GRAYLOG_PASSWORD_SECRET in the .env file}" + GRAYLOG_DATANODE_ROOT_PASSWORD_SHA2: "${GRAYLOG_ROOT_PASSWORD_SHA2:?Please configure GRAYLOG_ROOT_PASSWORD_SHA2 in the .env file}" + GRAYLOG_DATANODE_MONGODB_URI: "mongodb://mongodb:27017/graylog" + ulimits: + memlock: + hard: -1 + soft: -1 + nofile: + soft: 65536 + hard: 65536 + ports: + - "8999:8999/tcp" # DataNode API + - "9200:9200/tcp" + - "9300:9300/tcp" + volumes: + - "graylog-datanode:/var/lib/graylog-datanode" + restart: "on-failure" + + graylog: + image: "${GRAYLOG_IMAGE:-graylog/graylog:5.2}" + depends_on: + mongodb: + condition: "service_started" + entrypoint: "/usr/bin/tini -- /docker-entrypoint.sh" + environment: + GRAYLOG_NODE_ID_FILE: "/usr/share/graylog/data/data/node-id" + GRAYLOG_PASSWORD_SECRET: "${GRAYLOG_PASSWORD_SECRET:?Please configure GRAYLOG_PASSWORD_SECRET in the .env file}" + GRAYLOG_ROOT_PASSWORD_SHA2: "${GRAYLOG_ROOT_PASSWORD_SHA2:?Please configure GRAYLOG_ROOT_PASSWORD_SHA2 in the .env file}" + GRAYLOG_HTTP_BIND_ADDRESS: "0.0.0.0:9000" + GRAYLOG_HTTP_EXTERNAL_URI: "http://localhost:9000/" + GRAYLOG_MONGODB_URI: "mongodb://mongodb:27017/graylog" + ports: + - "9000:9000/tcp" # Server API + - "12201:12201/tcp" # GELF TCP + - "12201:12201/udp" # GELF UDP + volumes: + - "graylog_data:/usr/share/graylog/data/data" + - "graylog_journal:/usr/share/graylog/data/journal" + - ./testnet/monitoring/graylog.conf:/usr/share/graylog/data/config/graylog.conf + restart: "on-failure" + + +volumes: + mongodb_data: + graylog-datanode: + graylog_data: + graylog_journal: diff --git a/nodes/nomos-node/src/config.rs b/nodes/nomos-node/src/config.rs index 9286a57b..fa0a2295 100644 --- a/nodes/nomos-node/src/config.rs +++ b/nodes/nomos-node/src/config.rs @@ -1,5 +1,5 @@ use std::{ - net::{IpAddr, SocketAddr}, + net::{IpAddr, SocketAddr, ToSocketAddrs}, path::PathBuf, }; @@ -31,7 +31,7 @@ pub enum LoggerBackendType { pub struct LogArgs { /// Address for the Gelf backend #[clap(long = "log-addr", env = "LOG_ADDR", required_if_eq("backend", "Gelf"))] - log_addr: Option, + log_addr: Option, /// Directory for the File backend #[clap(long = "log-dir", env = "LOG_DIR", required_if_eq("backend", "File"))] @@ -135,7 +135,11 @@ impl Config { if let Some(backend) = backend { self.log.backend = match backend { LoggerBackendType::Gelf => LoggerBackend::Gelf { - addr: addr.ok_or_else(|| eyre!("Gelf backend requires an address."))?, + addr: addr + .ok_or_else(|| eyre!("Gelf backend requires an address."))? + .to_socket_addrs()? + .next() + .ok_or_else(|| eyre!("Invalid gelf address"))?, }, LoggerBackendType::File => LoggerBackend::File { directory: directory diff --git a/nomos-services/log/Cargo.toml b/nomos-services/log/Cargo.toml index a7c8cde5..7a70fb13 100644 --- a/nomos-services/log/Cargo.toml +++ b/nomos-services/log/Cargo.toml @@ -7,10 +7,11 @@ edition = "2021" [dependencies] async-trait = "0.1" +futures = "0.3" overwatch-rs = { git = "https://github.com/logos-co/Overwatch", rev = "2f70806" } serde = { version = "1.0", features = ["derive"] } +tokio = { version = "1", features = ["time"] } tracing = "0.1" tracing-appender = "0.2" tracing-subscriber = { version = "0.3", features = ["json"] } tracing-gelf = "0.7" -futures = "0.3" diff --git a/nomos-services/log/src/lib.rs b/nomos-services/log/src/lib.rs index 8578e08a..8d7648ee 100644 --- a/nomos-services/log/src/lib.rs +++ b/nomos-services/log/src/lib.rs @@ -5,6 +5,7 @@ use std::io::Write; use std::net::SocketAddr; use std::path::PathBuf; use std::sync::{Arc, Mutex}; +use std::time::Duration; // crates use serde::{Deserialize, Serialize}; use tracing::{error, Level}; @@ -19,6 +20,8 @@ use overwatch_rs::services::{ ServiceCore, ServiceData, }; +const GELF_RECONNECT_INTERVAL: u64 = 10; + pub struct Logger { service_state: ServiceStateHandle, worker_guard: Option, @@ -152,11 +155,20 @@ impl ServiceCore for Logger { let config = service_state.settings_reader.get_updated_settings(); let (non_blocking, _guard) = match config.backend { LoggerBackend::Gelf { addr } => { - let (layer, mut task) = tracing_gelf::Logger::builder().connect_tcp(addr).unwrap(); - service_state - .overwatch_handle - .runtime() - .spawn(async move { task.connect().await }); + let (layer, mut task) = tracing_gelf::Logger::builder() + .connect_tcp(addr) + .expect("Connect to the graylog instance"); + service_state.overwatch_handle.runtime().spawn(async move { + loop { + if task.connect().await.0.is_empty() { + break; + } else { + eprintln!("Failed to connect to graylog"); + let delay = Duration::from_secs(GELF_RECONNECT_INTERVAL); + tokio::time::sleep(delay).await; + } + } + }); #[cfg(test)] ONCE_INIT.call_once(move || { registry_init!(layer, config.format, config.level); diff --git a/testnet/monitoring/graylog.conf b/testnet/monitoring/graylog.conf new file mode 100644 index 00000000..5e1d8b6d --- /dev/null +++ b/testnet/monitoring/graylog.conf @@ -0,0 +1,46 @@ +elasticsearch_discovery_enabled = false + +node_id_file = /usr/share/graylog/data/data/node-id + +http_bind_address = 0.0.0.0:9000 +http_external_uri = http://localhost:9000/ + +mongodb_uri = mongodb://mongodb:27017/graylog +is_leader = true + +password_secret = +root_password_sha2 = + +bin_dir = /usr/share/graylog/bin +data_dir = /usr/share/graylog/data +plugin_dir = /usr/share/graylog/plugin + +stream_aware_field_types=false + +disabled_retention_strategies = none +allow_leading_wildcard_searches = false + +allow_highlighting = false + +output_batch_size = 500 + +output_flush_interval = 1 + +output_fault_count_threshold = 5 +output_fault_penalty_seconds = 30 + +processbuffer_processors = 5 +outputbuffer_processors = 3 +processor_wait_strategy = blocking + +ring_size = 65536 + +inputbuffer_ring_size = 65536 +inputbuffer_processors = 2 +inputbuffer_wait_strategy = blocking + +message_journal_enabled = true +message_journal_dir = data/journal +lb_recognition_period_seconds = 3 + +mongodb_max_connections = 1000 diff --git a/testnet/scripts/run_bootstrap_node.sh b/testnet/scripts/run_bootstrap_node.sh index 96fc1d7c..96e4bd5f 100755 --- a/testnet/scripts/run_bootstrap_node.sh +++ b/testnet/scripts/run_bootstrap_node.sh @@ -25,4 +25,4 @@ echo "CONSENSUS_COIN_VALUE: ${CONSENSUS_COIN_VALUE}" echo "DA_VOTER: ${DA_VOTER}" echo "OVERLAY_NODES: ${OVERLAY_NODES}" -exec /usr/bin/nomos-node /etc/nomos/bootstrap_config.yaml --with-metrics +exec /usr/bin/nomos-node /etc/nomos/bootstrap_config.yaml --with-metrics --log-backend gelf --log-addr graylog:12201 diff --git a/testnet/scripts/run_nomos_node.sh b/testnet/scripts/run_nomos_node.sh index 10c0f719..f0563488 100755 --- a/testnet/scripts/run_nomos_node.sh +++ b/testnet/scripts/run_nomos_node.sh @@ -41,4 +41,4 @@ echo "DA_VOTER: ${DA_VOTER}" echo "OVERLAY_NODES: ${OVERLAY_NODES}" echo "NET_INITIAL_PEERS: ${NET_INITIAL_PEERS}" -exec /usr/bin/nomos-node /etc/nomos/libp2p_config.yaml --with-metrics +exec /usr/bin/nomos-node /etc/nomos/libp2p_config.yaml --with-metrics --log-backend gelf --log-addr graylog:12201