first working version generating graphs
Signed-off-by: Jakub Sokołowski <jakub@status.im>
This commit is contained in:
parent
7a8063839f
commit
3a4f3ffc4a
|
@ -0,0 +1,25 @@
|
|||
import numpy as np
|
||||
import pandas as pd
|
||||
import seaborn as sns
|
||||
import matplotlib.pyplot as plt; plt.rcdefaults()
|
||||
import matplotlib.colors as mcolors
|
||||
from operator import attrgetter
|
||||
|
||||
class PDGraphPeers():
|
||||
|
||||
def __init__(self, data):
|
||||
self.df = pd.DataFrame(data)
|
||||
|
||||
def unique_peers_counts(self):
|
||||
return self.df.groupby(['Peer'])['Date'].nunique()
|
||||
|
||||
def number_of_days(self, exclude=20):
|
||||
nu_peers = self.unique_peers_counts()
|
||||
ex_twenty_day = nu_peers[nu_peers > exclude]
|
||||
ax = sns.distplot(ex_twenty_day, kde=False, hist=True)
|
||||
ax.set(
|
||||
title='Distribution of number of days per peers excluding 20 days',
|
||||
xlabel='# of days',
|
||||
ylabel='# of peers'
|
||||
)
|
||||
return ax.get_figure()
|
|
@ -0,0 +1,46 @@
|
|||
#!/usr/bin/env python3
|
||||
from optparse import OptionParser
|
||||
|
||||
from query import ESQueryPeers
|
||||
from graph import PDGraphPeers
|
||||
|
||||
HELP_DESCRIPTION='This generates a CSV with buckets of peer_ids for every day.'
|
||||
HELP_EXAMPLE='Example: ./unique_count.py -i "logstash-2019.11.*" -f peer_id'
|
||||
|
||||
def parse_opts():
|
||||
parser = OptionParser(description=HELP_DESCRIPTION, epilog=HELP_EXAMPLE)
|
||||
parser.add_option('-H', '--host', dest='es_host', default='localhost',
|
||||
help='ElasticSearch host.')
|
||||
parser.add_option('-P', '--port', dest='es_port', default=9200,
|
||||
help='ElasticSearch port.')
|
||||
parser.add_option('-i', '--index-pattern', default='logstash-*',
|
||||
help='Patter for matching indices.')
|
||||
parser.add_option('-f', '--field', type='str', default='peer_id',
|
||||
help='Name of the field to count.')
|
||||
parser.add_option('-m', '--max-size', default=10000,
|
||||
help='Max number of counts to find.')
|
||||
(opts, args) = parser.parse_args()
|
||||
|
||||
if not opts.field:
|
||||
parser.error('No field name specified!')
|
||||
|
||||
return (opts, args)
|
||||
|
||||
def main():
|
||||
(opts, args) = parse_opts()
|
||||
|
||||
esq = ESQueryPeers(opts.es_host, opts.es_port)
|
||||
|
||||
data = []
|
||||
for index in esq.get_indices(opts.index_pattern):
|
||||
print('Index: {}'.format(index))
|
||||
data.extend(esq.get_peers(index, opts.field, opts.max_size))
|
||||
|
||||
pdg = PDGraphPeers(data)
|
||||
|
||||
print(pdg.unique_peers_counts())
|
||||
plot = pdg.number_of_days()
|
||||
plot.savefig("output.png")
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
Binary file not shown.
After Width: | Height: | Size: 20 KiB |
|
@ -0,0 +1,47 @@
|
|||
import hashlib
|
||||
from elasticsearch import Elasticsearch
|
||||
|
||||
def remove_prefix(text, prefix):
|
||||
return text[text.startswith(prefix) and len(prefix):]
|
||||
|
||||
def hash_string(text):
|
||||
return hashlib.sha256(text.encode('utf-8')).hexdigest()
|
||||
|
||||
class ESQueryPeers():
|
||||
|
||||
def __init__(self, host='localhost', port=9200, timeout=1200):
|
||||
self.client = Elasticsearch(
|
||||
[{ 'host': host,
|
||||
'port': port, }],
|
||||
timeout=timeout,
|
||||
retry_on_timeout=True
|
||||
)
|
||||
self.cluster = self.client.info().get('cluster_name')
|
||||
|
||||
def get_indices(self, pattern='logstash-*'):
|
||||
return self.client.indices.get(index=pattern).keys()
|
||||
|
||||
def get_peers(self, index, field='peer_id', max_query=10000):
|
||||
body = {
|
||||
'size': 0, # Don't return actual values
|
||||
'aggs': { 'peers': {
|
||||
'terms': {
|
||||
'field': field,
|
||||
'size': 10000,
|
||||
},
|
||||
}, },
|
||||
}
|
||||
# Query
|
||||
resp = self.client.search(index=index, body=body)
|
||||
aggs = resp.get('aggregations')
|
||||
|
||||
# Collect results as list of dicts
|
||||
rval = []
|
||||
for bucket in aggs['peers']['buckets']:
|
||||
rval.append({
|
||||
'Date': remove_prefix(index, 'logstash-'),
|
||||
'Peer': hash_string(bucket['key']),
|
||||
'Count': bucket['doc_count'],
|
||||
})
|
||||
|
||||
return rval
|
|
@ -0,0 +1,11 @@
|
|||
cycler==0.10.0
|
||||
kiwisolver==1.2.0
|
||||
matplotlib==3.2.2
|
||||
numpy==1.19.0
|
||||
pandas==1.0.5
|
||||
pyparsing==2.4.7
|
||||
python-dateutil==2.8.1
|
||||
pytz==2020.1
|
||||
scipy==1.5.1
|
||||
seaborn==0.10.1
|
||||
six==1.15.0
|
Loading…
Reference in New Issue