From f0e4fb08465ac3d7a657fa76b3d83aebd11e66b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Soko=C5=82owski?= Date: Tue, 26 May 2020 20:50:41 +0200 Subject: [PATCH] es: add utility for removing fields MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Jakub SokoĊ‚owski --- elasticsearch/delete_field.py | 61 +++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100755 elasticsearch/delete_field.py diff --git a/elasticsearch/delete_field.py b/elasticsearch/delete_field.py new file mode 100755 index 0000000..450d406 --- /dev/null +++ b/elasticsearch/delete_field.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python3 +import os +import json +from optparse import OptionParser +from elasticsearch import Elasticsearch + +HELP_DESCRIPTION='This a utility for deleting fields from documents.' +HELP_EXAMPLE='Example: ./delete_field.py -i 2018-10-01 --delete' + +def parse_opts(): + parser = OptionParser(description=HELP_DESCRIPTION, epilog=HELP_EXAMPLE) + parser.add_option('-H', '--host', dest='es_host', default='localhost', + help='ElasticSearch host.') + parser.add_option('-P', '--port', dest='es_port', default=9200, + help='ElasticSearch port.') + parser.add_option('-i', '--index-pattern', default='logstash-*', + help='Patter for matching indices.') + parser.add_option('-f', '--field', type='str', + help='Name of field to remove from documents.') + parser.add_option('-d', '--delete', action='store_true', + help='Delete field from matching documents.') + (opts, args) = parser.parse_args() + + if not opts.field: + parser.error('Field not specified!') + + return (opts, args) + +def main(): + (opts, args) = parse_opts() + + es = Elasticsearch( + [{ 'host': opts.es_host, + 'port': opts.es_port }], + timeout=1200, + retry_on_timeout=True + ) + + print('Cluster: {}'.format(es.info().get('cluster_name'))) + + indices = es.indices.get(index=opts.index_pattern).keys() + + body = { + 'query':{'bool':{'must':{'exists':{'field':opts.field}}}} + } + + for index in indices: + resp = es.count(index=index, body=body) + count = resp.get('count') + print('{:22} count: {:6}'.format(index, count)) + + if opts.delete and count > 0: + body['script'] = 'ctx._source.remove("peer_id")' + try: + rval = es.update_by_query(index=index, body=body) + except Exception as ex: + print(json.dumps(ex.info, indent=2)) + print('{:22} Updated: {:10} Failed: {}'.format(index, rval['updated'], rval.get('failed', 0))) + +if __name__ == '__main__': + main()