logclean-job: keep only 9 newest logstash daily indices

Signed-off-by: Jakub Sokołowski <jakub@status.im>
This commit is contained in:
Jakub Sokołowski 2020-02-07 14:11:17 +01:00
parent 24a87596b4
commit 84f80b9aa0
No known key found for this signature in database
GPG Key ID: 4EF064D0E6D63020
9 changed files with 147 additions and 2 deletions

View File

@ -27,6 +27,9 @@ es_lb_data_center: do-ams3
es_lb_cluster_name: 'nimbus-logs-search'
es_lb_api_port: 9200
logclean_es_host: 'localhost'
logclean_es_port: '{{ es_lb_api_port }}'
# JVM Memory settings
es_lb_jvm_min_heap: 2g
es_lb_jvm_max_heap: 2g

View File

@ -39,6 +39,8 @@
roles:
- role: elasticsearch-lb
tags: elasticsearch-lb
- role: logclean-job
tags: logclean-job
- role: kibana
tags: kibana
- role: kibana-logtrail

View File

@ -11,8 +11,8 @@
hosts:
- nimbus-master
roles:
- { role: origin-certs, tags: origin-certs }
- { role: nimbus-stats, tags: nimbus-stats }
#- { role: origin-certs, tags: origin-certs }
#- { role: nimbus-stats, tags: nimbus-stats }
- name: Configure Nimbus cluster
hosts:

View File

@ -0,0 +1,37 @@
# Description
This is a systemd timer that runs daily and removes from ElasticSearch logs older than `N` days.
This is done because Nimbus generates a metric shitton of `TRACE` logs.
# Configuration
The main configuration values are:
```yaml
logclean_es_host: '127.0.0.1'
logclean_es_port: 9200
logclean_index_regex: 'logstash-*'
logclean_keep_indices: 120
logclean_service_name: 'logclean-job'
logclean_service_timeout: 60
logclean_timer_frequency: 'daily'
```
For sake of security minimum for `logclean_keep_indices` is `60`.
# Usage
To check the timer status use:
```
$ sudo systemctl list-timers logclean-job.timer
NEXT LEFT LAST PASSED UNIT ACTIVATES
Sat 2020-02-08 00:00:00 UTC 10h left n/a n/a logclean-job.timer logclean-job.service
```
You can check job logs using:
```
$ sudo journalctl -o cat -a -u logclean-job.service
...
Starting "Job for cleaning ElasticSearch cluster periodically."...
Checking ElasticSearch for indices to clean....
Nothing to remove. (3/90 indices)
Started "Job for cleaning ElasticSearch cluster periodically.".
```

View File

@ -0,0 +1,13 @@
---
logclean_es_host: ~
logclean_es_port: ~
logclean_script_path: '/usr/local/bin/logclean'
logclean_index_regex: 'logstash-*'
logclean_keep_indices: 90
logclean_service_path: '/lib/systemd/system'
logclean_service_name: 'logclean-job'
logclean_service_desc: 'Job for cleaning ElasticSearch cluster periodically.'
logclean_service_user: 'nobody'
logclean_service_after: 'docker.service'
logclean_service_timeout: 60
logclean_timer_frequency: 'daily'

View File

@ -0,0 +1,39 @@
---
- name: 'Log Clean | Verify we keep a minimum of 60 indices'
assert:
quiet: true
that:
- logclean_keep_indices > 60
- name: 'Log Clean | Create logclean script'
template:
src: logclean.sh
dest: '{{ logclean_script_path }}'
mode: 0755
- name: 'Log Clean | Create systemd service: {{ logclean_service_name }}'
template:
src: logclean.service.j2
dest: '{{ logclean_service_path }}/{{ logclean_service_name }}.service'
mode: 0644
- name: 'Log Clean | Create systemd timer: {{ logclean_service_name }}'
template:
src: logclean.timer.j2
dest: '{{ logclean_service_path }}/{{ logclean_service_name }}.timer'
mode: 0644
- name: Reload systemctl
command: systemctl daemon-reload
- name: 'Log Clean | (Re)start fetching service: {{ logclean_service_name }}'
service:
name: '{{ logclean_service_name }}.service'
state: started
enabled: true
- name: 'Log Clean | Enable the service timer: {{ logclean_service_name }}'
systemd:
name: '{{ logclean_service_name }}.timer'
state: started
enabled: yes

View File

@ -0,0 +1,12 @@
[Unit]
Description="{{ logclean_service_desc }}"
Documentation=https://github.com/status-im/infra-nimbus/tree/master/ansible/roles/esclean-job
Requires=network-online.target
After=network-online.target
After={{ logclean_service_after }}
[Service]
User={{ logclean_service_user }}
ExecStart={{ logclean_script_path }}
Type=oneshot
TimeoutStartSec={{ logclean_service_timeout }}

View File

@ -0,0 +1,33 @@
#!/usr/bin/env bash
# ElasticSearch access
ES_HOST="{{ logclean_es_host | mandatory }}"
ES_PORT="{{ logclean_es_port | mandatory }}"
# Keep only this number of newest indices
INDICES_KEEP="{{ logclean_keep_indices | mandatory }}"
ES_REGEX="{{ logclean_index_regex }}"
ES_URL="http://${ES_HOST}:${ES_PORT}"
echo "Checking ElasticSearch for indices to clean...."
# Get list of indices
INDICES=$(curl -s "${ES_URL}/_cat/indices/${ES_REGEX}?pretty&h=index&s=index")
INDICES_NUM=$(echo "${INDICES}" | wc -l)
# If there are less indices than days stop
if [[ ${INDICES_NUM} -le ${INDICES_KEEP} ]]; then
echo "Nothing to remove. (${INDICES_NUM}/${INDICES_KEEP} indices)"
exit 0
fi
# Subtract how many to keep from number of existing indices
INDICES_TO_DELETE=$(echo "${INDICES}" | tail -n$((INDICES_NUM-INDICES_KEEP)) )
echo "${INDICES_TO_DELETE}"
while IFS= read -r INDEX; do
echo "Deleting: ${INDEX}"
curl -s -XDELETE "${ES_URL}/${INDEX}"
done <<< "${INDICES_TO_DELETE}"

View File

@ -0,0 +1,6 @@
[Unit]
After=multi-user.target
[Timer]
OnCalendar={{ logclean_timer_frequency }}
Persistent=yes