2020-07-14 08:37:18 +00:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
from os import path
|
2020-07-14 10:08:17 +00:00
|
|
|
from datetime import datetime
|
2020-07-14 08:37:18 +00:00
|
|
|
from optparse import OptionParser
|
|
|
|
|
2020-07-14 10:11:31 +00:00
|
|
|
from log import setup_custom_logger
|
2020-07-14 08:37:18 +00:00
|
|
|
from query import ESQueryPeers
|
2020-07-14 09:28:40 +00:00
|
|
|
from postgres import PGDatabase
|
2020-07-14 08:37:18 +00:00
|
|
|
|
|
|
|
HELP_DESCRIPTION = 'This generates a CSV with buckets of peer_ids for every day.'
|
2020-07-15 17:49:06 +00:00
|
|
|
HELP_EXAMPLE = 'Example: ./unique_count.py -i "logstash-2019.11.*" -f "peer_id"'
|
2020-07-14 08:37:18 +00:00
|
|
|
|
|
|
|
|
|
|
|
def parse_opts():
|
|
|
|
parser = OptionParser(description=HELP_DESCRIPTION, epilog=HELP_EXAMPLE)
|
2020-07-14 09:28:40 +00:00
|
|
|
parser.add_option('-H', '--es-host', default='localhost',
|
2020-07-14 08:37:18 +00:00
|
|
|
help='ElasticSearch host.')
|
2020-07-14 09:28:40 +00:00
|
|
|
parser.add_option('-P', '--es-port', default=9200,
|
2020-07-14 08:37:18 +00:00
|
|
|
help='ElasticSearch port.')
|
2020-07-14 09:28:40 +00:00
|
|
|
parser.add_option('-d', '--db-host', default='localhost',
|
2020-07-14 08:37:18 +00:00
|
|
|
help='PostgreSQL host.')
|
2020-07-14 09:28:40 +00:00
|
|
|
parser.add_option('-b', '--db-port', default=5432,
|
2020-07-14 08:37:18 +00:00
|
|
|
help='PostgreSQL port.')
|
2020-07-14 09:28:40 +00:00
|
|
|
parser.add_option('-u', '--db-user', default='postgres',
|
|
|
|
help='PostgreSQL user.')
|
|
|
|
parser.add_option('-p', '--db-pass', default='postgres',
|
|
|
|
help='PostgreSQL password.')
|
|
|
|
parser.add_option('-n', '--db-name', default='postgres',
|
|
|
|
help='PostgreSQL database name.')
|
2020-07-14 08:37:18 +00:00
|
|
|
parser.add_option('-i', '--index-pattern', default='logstash-*',
|
|
|
|
help='Patter for matching indices.')
|
2020-07-24 18:28:03 +00:00
|
|
|
parser.add_option('-f', '--field', default='peer_id',
|
2020-07-14 08:37:18 +00:00
|
|
|
help='Name of the field to count.')
|
2020-07-24 18:28:03 +00:00
|
|
|
parser.add_option('-F', '--fleet', default='eth.prod',
|
2020-07-15 17:59:14 +00:00
|
|
|
help='Name of the fleet to query.')
|
2021-01-27 14:37:47 +00:00
|
|
|
parser.add_option('-D', '--program', default='docker/statusd-whisper-node',
|
2020-07-24 18:28:03 +00:00
|
|
|
help='Name of the program to query.')
|
2020-07-14 08:37:18 +00:00
|
|
|
parser.add_option('-m', '--max-size', type='int', default=100000,
|
|
|
|
help='Max number of counts to find.')
|
2020-07-14 10:11:31 +00:00
|
|
|
parser.add_option('-l', '--log-level', default='INFO',
|
|
|
|
help='Level of logging.')
|
2020-07-14 08:37:18 +00:00
|
|
|
(opts, args) = parser.parse_args()
|
|
|
|
|
|
|
|
if not opts.field:
|
|
|
|
parser.error('No field name specified!')
|
|
|
|
|
|
|
|
return (opts, args)
|
|
|
|
|
|
|
|
|
|
|
|
def main():
|
|
|
|
(opts, args) = parse_opts()
|
|
|
|
|
2020-07-14 10:11:31 +00:00
|
|
|
LOG = setup_custom_logger('root', opts.log_level.upper())
|
|
|
|
|
2020-07-14 09:28:40 +00:00
|
|
|
esq = ESQueryPeers(
|
|
|
|
opts.es_host,
|
|
|
|
opts.es_port
|
|
|
|
)
|
|
|
|
psg = PGDatabase(
|
|
|
|
opts.db_name,
|
|
|
|
opts.db_user,
|
|
|
|
opts.db_pass,
|
|
|
|
opts.db_host,
|
|
|
|
opts.db_port
|
|
|
|
)
|
2020-07-14 08:37:18 +00:00
|
|
|
|
2020-07-14 10:04:10 +00:00
|
|
|
days = psg.get_present_days()
|
|
|
|
present_indices = ['logstash-{}'.format(d.replace('-', '.')) for d in days]
|
|
|
|
|
2020-07-14 10:11:31 +00:00
|
|
|
LOG.info('Querying ES cluster for peers...')
|
2020-07-14 10:04:10 +00:00
|
|
|
peers = []
|
2020-07-14 08:37:18 +00:00
|
|
|
for index in esq.get_indices(opts.index_pattern):
|
2020-07-14 10:08:17 +00:00
|
|
|
# skip already injected indices
|
2020-07-14 10:04:10 +00:00
|
|
|
if index in present_indices:
|
2020-07-14 10:11:31 +00:00
|
|
|
LOG.debug('Skipping existing index: %s', index)
|
2020-07-14 10:04:10 +00:00
|
|
|
continue
|
2020-07-14 10:08:17 +00:00
|
|
|
# skip current day as it's incomplete
|
|
|
|
if index == datetime.now().strftime('logstash-%Y.%m.%d'):
|
2020-07-14 10:11:31 +00:00
|
|
|
LOG.debug('Skipping incomplete current day.')
|
2020-07-14 10:08:17 +00:00
|
|
|
continue
|
2020-07-14 10:11:31 +00:00
|
|
|
LOG.info('Index: {}'.format(index))
|
2020-07-15 17:49:06 +00:00
|
|
|
rval = esq.get_peers(
|
|
|
|
index=index,
|
|
|
|
field=opts.field,
|
|
|
|
fleet=opts.fleet,
|
2020-07-24 18:28:03 +00:00
|
|
|
program=opts.program,
|
2020-07-15 17:49:06 +00:00
|
|
|
max_query=opts.max_size
|
|
|
|
)
|
2020-07-14 10:15:14 +00:00
|
|
|
if len(rval) == 0:
|
|
|
|
LOG.warning('No entries found!')
|
2020-07-24 18:28:03 +00:00
|
|
|
LOG.debug('Found: %s', len(rval))
|
2020-07-14 10:15:14 +00:00
|
|
|
peers.extend(rval)
|
2020-07-14 08:37:18 +00:00
|
|
|
|
2020-07-14 10:04:10 +00:00
|
|
|
if len(peers) == 0:
|
2020-07-14 10:11:31 +00:00
|
|
|
LOG.info('Nothing to insert into database.')
|
2020-07-14 10:04:10 +00:00
|
|
|
exit(0)
|
2020-07-14 09:28:40 +00:00
|
|
|
|
2020-07-14 10:11:31 +00:00
|
|
|
LOG.info('Injecting peers data into database...')
|
2020-07-14 10:08:17 +00:00
|
|
|
psg.inject_peers(peers)
|
2020-07-14 08:37:18 +00:00
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
main()
|