nimbus-stats: port to use systemd timer, fix consul query
Signed-off-by: Jakub Sokołowski <jakub@status.im>
This commit is contained in:
parent
9091772f37
commit
fcd17845b8
|
@ -1,8 +1,12 @@
|
|||
---
|
||||
# general container config
|
||||
cont_restart: false
|
||||
cont_recreate: 'smart'
|
||||
cont_recreate: false
|
||||
cont_state: 'present'
|
||||
# general compose config
|
||||
compose_restart: false
|
||||
compose_recreate: 'smart'
|
||||
compose_state: 'present'
|
||||
|
||||
# CloudFlare Origin certificates
|
||||
origin_certs:
|
||||
|
@ -12,7 +16,7 @@ origin_certs:
|
|||
default: true
|
||||
|
||||
# Consul Catalog Query URL
|
||||
consul_catalog_url: 'http://localhost:8500/v1/catalog/service'
|
||||
consul_catalog_url: 'http://localhost:8500/v1/catalog'
|
||||
|
||||
# Root password
|
||||
bootstrap__root_pass: '{{lookup("passwordstore", "hosts/admin-pass")}}'
|
||||
|
|
|
@ -21,7 +21,7 @@
|
|||
pre_tasks:
|
||||
- name: Fetch available elasticsearch nodes
|
||||
uri:
|
||||
url: '{{ consul_catalog_url }}/elasticsearch?dc={{ data_center }}&tag={{ es_lb_cluster_name }}'
|
||||
url: '{{ consul_catalog_url }}/service/elasticsearch?dc={{ data_center }}&tag={{ es_lb_cluster_name }}'
|
||||
register: es_services
|
||||
- name: Extract ElasticSearch hostnames
|
||||
set_fact:
|
||||
|
|
|
@ -11,8 +11,8 @@
|
|||
hosts:
|
||||
- nimbus-master
|
||||
roles:
|
||||
#- { role: origin-certs, tags: origin-certs }
|
||||
#- { role: nimbus-stats, tags: nimbus-stats }
|
||||
- { role: origin-certs, tags: origin-certs }
|
||||
- { role: nimbus-stats, tags: nimbus-stats }
|
||||
|
||||
- name: Configure Nimbus cluster
|
||||
hosts:
|
||||
|
|
|
@ -53,3 +53,7 @@
|
|||
- name: elasticsearch-lb
|
||||
src: git@github.com:status-im/infra-role-elasticsearch-lb.git
|
||||
scm: git
|
||||
|
||||
- name: systemd-timer
|
||||
src: git@github.com:status-im/infra-role-systemd-timer.git
|
||||
scm: git
|
||||
|
|
|
@ -7,6 +7,9 @@ This role defined a simple service which lists the current state of the nodes in
|
|||
The only setting that should be usually changed is the domain:
|
||||
```yaml
|
||||
nimbus_stats_domain: my-amazing-domain.example.org
|
||||
# To query for ElasticSearch Load Blancer to query
|
||||
consul_service_name: elasticsearch-lb
|
||||
consul_service_tag: my-logs-cluster-name
|
||||
```
|
||||
|
||||
# Script
|
||||
|
@ -34,11 +37,13 @@ Options:
|
|||
-p PROGRAM, --program=PROGRAM
|
||||
Program to query for. (*beacon-node-*)
|
||||
-s SINCE, --since=SINCE
|
||||
Period for which to query logs. (now-30m)
|
||||
Period for which to query logs. (now-15m)
|
||||
-S PAGE_SIZE, --page-size=PAGE_SIZE
|
||||
Size of results page. (10000)
|
||||
-f FLEET, --fleet=FLEET
|
||||
Fleet to query for. (nimbus.test)
|
||||
-t TIMEOUT, --timeout=TIMEOUT
|
||||
Connection timeout in seconds. (120)
|
||||
-l LOG_LEVEL, --log-level=LOG_LEVEL
|
||||
Logging level. (INFO)
|
||||
-o OUTPUT_FILE, --output-file=OUTPUT_FILE
|
||||
|
@ -47,7 +52,31 @@ Options:
|
|||
Example: collect -i logstash-2019.03.01 output.json
|
||||
```
|
||||
|
||||
# Timer
|
||||
|
||||
The script runs on a [systemd timer](https://github.com/status-im/infra-role-systemd-timer) which can be checked with:
|
||||
```
|
||||
$ sudo systemctl list-timers -a nimbus-stats.timer
|
||||
NEXT LEFT LAST PASSED UNIT ACTIVATES
|
||||
Wed 2020-02-19 10:50:00 UTC 37s left Wed 2020-02-19 10:45:00 UTC 4min 21s ago nimbus-stats.timer nimbus-stats.service
|
||||
```
|
||||
Which triggers the `nimbus-stats` service:
|
||||
```
|
||||
$ sudo systemctl status nimbus-stats.service
|
||||
● nimbus-stats.service - Generates stats for Nimbus cluster.
|
||||
Loaded: loaded (/lib/systemd/system/nimbus-stats.service; static; vendor preset: enabled)
|
||||
Active: inactive (dead) since Wed 2020-02-19 10:47:24 UTC; 2min 33s ago
|
||||
Docs: https://github.com/status-im/infra-role-systemd-timer
|
||||
Process: 24950 ExecStart=/usr/local/bin/nimbus-stats (code=exited, status=0/SUCCESS)
|
||||
Main PID: 24950 (code=exited, status=0/SUCCESS)
|
||||
|
||||
Feb 19 10:47:21 master-01.aws-eu-central-1a.nimbus.test systemd[1]: Starting Generates stats for Nimbus cluster....
|
||||
Feb 19 10:47:22 master-01.aws-eu-central-1a.nimbus.test nimbus-stats[24950]: [INFO]: Querying fleet: nimbus.test
|
||||
Feb 19 10:47:24 master-01.aws-eu-central-1a.nimbus.test nimbus-stats[24950]: [INFO]: Found matching logs: 10000
|
||||
Feb 19 10:47:24 master-01.aws-eu-central-1a.nimbus.test nimbus-stats[24950]: [INFO]: Saving to file: /var/www/nimbus/nimbus_stats.json
|
||||
Feb 19 10:47:24 master-01.aws-eu-central-1a.nimbus.test systemd[1]: Started Generates stats for Nimbus cluster..
|
||||
```
|
||||
|
||||
# Context
|
||||
|
||||
For more details see: https://github.com/status-im/infra-nimbus/issues/1
|
||||
|
||||
|
|
|
@ -5,9 +5,10 @@ nimbus_stats_json_name: nimbus_stats.json
|
|||
nimbus_stats_json: '{{ nimbus_stats_web_root }}/{{ nimbus_stats_json_name }}'
|
||||
nimbus_stats_script: /usr/local/bin/collect_nimbus_stats.py
|
||||
nimbus_stats_cron_script: /usr/local/bin/save_nimbus_stats.py
|
||||
nimbus_stats_service_name: 'nimbus-stats'
|
||||
|
||||
# necessary to query for ES lb
|
||||
consul_base_url: 'http://localhost:8500/v1/catalog'
|
||||
consul_catalog_url: 'http://localhost:8500/v1/catalog'
|
||||
consul_service_name: elasticsearch-lb
|
||||
consul_service_tag: status-logs-search
|
||||
consul_query_url: '{{ consul_base_url }}/service/{{ consul_service_name }}?tag={{ consul_service_tag }}'
|
||||
consul_service_tag: nimbus-logs-search
|
||||
consul_query_url: '{{ consul_catalog_url }}/service/{{ consul_service_name }}?tag={{ consul_service_tag }}'
|
||||
|
|
|
@ -20,14 +20,14 @@ DEFAULT_MESSAGES = [
|
|||
ENV = os.environ
|
||||
LOG = logging.getLogger('root')
|
||||
handler = logging.StreamHandler(sys.stderr)
|
||||
formatter = logging.Formatter('%(asctime)s [%(levelname)s]: %(message)s')
|
||||
formatter = logging.Formatter('[%(levelname)s]: %(message)s')
|
||||
handler.setFormatter(formatter)
|
||||
LOG.addHandler(handler)
|
||||
|
||||
class ES:
|
||||
def __init__(self, host, port, page_size):
|
||||
def __init__(self, host, port, page_size, timeout):
|
||||
self.page_size = page_size
|
||||
self.es = Elasticsearch([host], port=port, timeout=30)
|
||||
self.es = Elasticsearch([host], port=port, timeout=timeout)
|
||||
|
||||
def make_query(self, fleet, program, messages, after):
|
||||
return {
|
||||
|
@ -99,12 +99,14 @@ def parse_opts():
|
|||
help='ElasticSearch port. (%default)')
|
||||
parser.add_option('-p', '--program', default='*beacon-node-*',
|
||||
help='Program to query for. (%default)')
|
||||
parser.add_option('-s', '--since', default='now-30m',
|
||||
parser.add_option('-s', '--since', default='now-15m',
|
||||
help='Period for which to query logs. (%default)')
|
||||
parser.add_option('-S', '--page-size', default=10000,
|
||||
help='Size of results page. (%default)')
|
||||
parser.add_option('-f', '--fleet', default='nimbus.test',
|
||||
help='Fleet to query for. (%default)')
|
||||
parser.add_option('-t', '--timeout', default=120,
|
||||
help='Connection timeout in seconds. (%default)')
|
||||
parser.add_option('-l', '--log-level', default='INFO',
|
||||
help='Logging level. (%default)')
|
||||
parser.add_option('-o', '--output-file',
|
||||
|
@ -123,13 +125,13 @@ def main():
|
|||
|
||||
debug_options(opts)
|
||||
|
||||
es = ES(opts.es_host, opts.es_port, opts.page_size)
|
||||
es = ES(opts.es_host, opts.es_port, opts.page_size, opts.timeout)
|
||||
|
||||
LOG.info('Querying fleet: %s', opts.fleet)
|
||||
query = es.make_query(opts.fleet, opts.program, opts.messages, opts.since)
|
||||
rval = es.get_logs(query)
|
||||
|
||||
LOG.info('Found matching logs: %d', rval['hits']['total'])
|
||||
LOG.info('Found matching logs: %d', rval['hits']['total']['value'])
|
||||
logs = rval['hits']['hits']
|
||||
|
||||
data = get_first_for_node(logs)
|
||||
|
|
|
@ -15,6 +15,12 @@
|
|||
url: '{{ consul_query_url }}'
|
||||
register: es_lbs
|
||||
|
||||
- name: Verify a load balancer was found
|
||||
assert:
|
||||
that: es_lbs.json | length > 0
|
||||
quiet: true
|
||||
fail_msg: 'No ElasticSearch LB found!'
|
||||
|
||||
- name: Install stat collecting script
|
||||
copy:
|
||||
src: collect.py
|
||||
|
@ -22,27 +28,15 @@
|
|||
mode: 0755
|
||||
register: nimbus_script
|
||||
|
||||
- name: Create the cron script
|
||||
copy:
|
||||
dest: '{{ nimbus_stats_cron_script }}'
|
||||
mode: 0755
|
||||
content: |
|
||||
#!/usr/bin/env bash
|
||||
exec {{ nimbus_stats_script }} \
|
||||
-H {{ es_lbs.json[0].ServiceAddress }} \
|
||||
-o {{ nimbus_stats_json }}
|
||||
|
||||
- name: Create www directory
|
||||
file:
|
||||
path: '{{ nimbus_stats_web_root }}'
|
||||
state: directory
|
||||
owner: www-data
|
||||
group: www-data
|
||||
recurse: true
|
||||
mode: 0755
|
||||
|
||||
- name: Run the script before Nginx configuration
|
||||
command: '{{ nimbus_stats_cron_script }}'
|
||||
when: nimbus_script.changed
|
||||
|
||||
- name: Create nginx config
|
||||
template:
|
||||
src: proxy.conf.j2
|
||||
|
@ -67,9 +61,22 @@
|
|||
notify:
|
||||
- Save iptables rules
|
||||
|
||||
- name: Create a cron job for updating stats
|
||||
cron:
|
||||
name: Nimbus Fleet Stats
|
||||
minute: '*/5'
|
||||
user: root
|
||||
job: '{{ nimbus_stats_cron_script }}'
|
||||
- name: Set systemd timer
|
||||
include_role: name=systemd-timer
|
||||
vars:
|
||||
systemd_timer_name: '{{ nimbus_stats_service_name }}'
|
||||
systemd_timer_description: 'Generates stats for Nimbus cluster.'
|
||||
systemd_timer_user: 'www-data'
|
||||
systemd_timer_frequency: '*:0/5' # every 5 minutes
|
||||
systemd_timer_timeout_sec: 120
|
||||
systemd_timer_requires_extra: 'network.target'
|
||||
systemd_timer_script_content: |
|
||||
#!/usr/bin/env bash
|
||||
exec {{ nimbus_stats_script }} \
|
||||
-H {{ es_lbs.json[0].ServiceAddress }} \
|
||||
-o {{ nimbus_stats_json }}
|
||||
|
||||
- name: Run the script before Nginx configuration
|
||||
systemd:
|
||||
name: '{{ nimbus_stats_service_name }}'
|
||||
state: 'started'
|
||||
|
|
Loading…
Reference in New Issue