allow specifying time bound for resource listing operations, increase aggregation bucket count

This commit is contained in:
gmega 2023-11-14 20:35:31 -03:00
parent 1dc45c8dc7
commit 08f42e9cbb
No known key found for this signature in database
GPG Key ID: FFD8DAF00660270F
11 changed files with 576 additions and 481 deletions

View File

@ -1,7 +1,9 @@
import datetime
import math
import os
import textwrap
from argparse import ArgumentParser
from datetime import timedelta
from enum import Enum
from typing import List, Iterable
@ -77,6 +79,10 @@ def main():
subparsers = parser.add_subparsers(title='Command', required=True)
get = subparsers.add_parser('get', help='Display existing resources')
get.add_argument('--from', type=tsparser.parse,
help='Show resources present in log messages starting at the given date '
'(MM-DD-YYYY, or MM-DD-YYYY HH:MM:SS.mmmmmm). Defaults to 7 days ago.',
default=(datetime.datetime.today() - timedelta(days=7)).date())
get.set_defaults(main=get_object)
get_subparsers = get.add_subparsers(title='Resource type', dest='resource_type', required=True)

View File

@ -1,10 +1,8 @@
import abc
import re
from abc import ABC
from dataclasses import dataclass
from datetime import datetime
from enum import Enum
from typing import Optional, TypeVar, Generic, Iterable, Iterator
from typing import TypeVar, Generic, Iterator
TLocation = TypeVar('TLocation')

View File

@ -1,7 +1,7 @@
import logging
from dataclasses import dataclass
from datetime import datetime
import logging
from typing import Optional, Iterator
from typing import Optional, Iterator, Dict, Any
from elasticsearch import Elasticsearch
@ -23,11 +23,15 @@ class Pod:
indices: tuple[str, ...]
MAX_AGGREGATION_BUCKETS = 1000
class ElasticSearchLogRepo:
def __init__(
self,
client: Optional[Elasticsearch] = None,
indices: str = 'continuous-tests-pods-*',
since: Optional[datetime] = None,
):
if client is None:
logger.warning('No client provided, defaulting to localhost')
@ -35,20 +39,21 @@ class ElasticSearchLogRepo:
self.client = client
self.indices = indices
self.since = since
def namespaces(self, prefix: Optional[str] = None) -> Iterator[Namespace]:
query = {
query = self._time_limited({
'size': 0,
'aggs': {
'distinct_namespaces': {
'terms': {'field': 'pod_namespace.keyword'},
'terms': {'field': 'pod_namespace.keyword', 'size': MAX_AGGREGATION_BUCKETS},
'aggs': {
'indices': {'terms': {'field': '_index'}},
'runid': {'terms': {'field': 'pod_labels.runid.keyword'}},
}
}
}
}
})
if prefix is not None:
query['aggs']['distinct_namespaces']['terms']['include'] = f'{prefix}.*' # type: ignore
@ -63,19 +68,19 @@ class ElasticSearchLogRepo:
)
def pods(self, prefix: Optional[str] = None, run_id: Optional[str] = None):
query = {
query = self._time_limited({
'size': 0,
'aggs': {
'distinct_pods': {
'terms': {'field': 'pod_name.keyword'},
'terms': {'field': 'pod_name.keyword', 'size': MAX_AGGREGATION_BUCKETS},
'aggs': {
'indices': {'terms': {'field': '_index'}},
'namespace': {'terms': {'field': 'pod_namespace.keyword'}},
'runid': {'terms': {'field': 'pod_labels.runid.keyword'}},
'runid': {'terms': {'field': 'pod_labels.runid.keyword', 'size': MAX_AGGREGATION_BUCKETS}},
}
}
}
}
})
if prefix is not None:
query['aggs']['distinct_pods']['terms']['include'] = f'{prefix}.*' # type: ignore
@ -98,3 +103,13 @@ class ElasticSearchLogRepo:
run_id=pod['runid']['buckets'][0]['key'],
indices=tuple(sorted(index['key'] for index in pod['indices']['buckets']))
)
def _time_limited(self, query: Dict[str, Any]) -> Dict[str, Any]:
if self.since is not None:
query['query'] = {
'bool': {
'filter': [{'range': {'@timestamp': {'gte': self.since.isoformat()}}}]
}
}
return query

View File

@ -1,7 +1,7 @@
import logging
from dataclasses import dataclass
from datetime import datetime, timedelta
from typing import Optional, List, Dict, Any, Iterator, Set
from typing import Optional, Dict, Any, Iterator, Set
from elasticsearch import Elasticsearch

View File

@ -0,0 +1,58 @@
interactions:
- request:
body: '{"aggs":{"distinct_namespaces":{"terms":{"field":"pod_namespace.keyword","size":1000,"include":"codex-continuous-tests-profiling.*"},"aggs":{"indices":{"terms":{"field":"_index"}},"runid":{"terms":{"field":"pod_labels.runid.keyword"}}}}},"query":{"bool":{"filter":[{"range":{"@timestamp":{"gte":"2023-11-14T18:00:00+00:00"}}}]}},"size":0}'
headers:
accept:
- application/vnd.elasticsearch+json; compatible-with=8
connection:
- keep-alive
content-type:
- application/vnd.elasticsearch+json; compatible-with=8
user-agent:
- elasticsearch-py/8.10.1 (Python/3.11.5; elastic-transport/8.10.0)
x-elastic-client-meta:
- es=8.10.1,py=3.11.5,t=8.10.0,ur=2.0.7
method: POST
uri: http://localhost:9200/continuous-tests-pods-*/_search
response:
body:
string: '{"took":2,"timed_out":false,"_shards":{"total":14,"successful":14,"skipped":0,"failed":0},"hits":{"total":{"value":10000,"relation":"gte"},"max_score":null,"hits":[]},"aggregations":{"distinct_namespaces":{"doc_count_error_upper_bound":0,"sum_other_doc_count":0,"buckets":[]}}}'
headers:
Transfer-Encoding:
- chunked
X-elastic-product:
- Elasticsearch
content-type:
- application/vnd.elasticsearch+json;compatible-with=8
status:
code: 200
message: OK
- request:
body: '{"aggs":{"distinct_namespaces":{"terms":{"field":"pod_namespace.keyword","size":1000,"include":"codex-continuous-tests-profiling.*"},"aggs":{"indices":{"terms":{"field":"_index"}},"runid":{"terms":{"field":"pod_labels.runid.keyword"}}}}},"query":{"bool":{"filter":[{"range":{"@timestamp":{"gte":"2023-11-07T18:00:00+00:00"}}}]}},"size":0}'
headers:
accept:
- application/vnd.elasticsearch+json; compatible-with=8
connection:
- keep-alive
content-type:
- application/vnd.elasticsearch+json; compatible-with=8
user-agent:
- elasticsearch-py/8.10.1 (Python/3.11.5; elastic-transport/8.10.0)
x-elastic-client-meta:
- es=8.10.1,py=3.11.5,t=8.10.0,ur=2.0.7
method: POST
uri: http://localhost:9200/continuous-tests-pods-*/_search
response:
body:
string: '{"took":2,"timed_out":false,"_shards":{"total":14,"successful":14,"skipped":0,"failed":0},"hits":{"total":{"value":10000,"relation":"gte"},"max_score":null,"hits":[]},"aggregations":{"distinct_namespaces":{"doc_count_error_upper_bound":0,"sum_other_doc_count":0,"buckets":[{"key":"codex-continuous-tests-profiling-two-client-tests-0","doc_count":32272649,"indices":{"doc_count_error_upper_bound":0,"sum_other_doc_count":0,"buckets":[{"key":"continuous-tests-pods-2023.11.10","doc_count":23279858},{"key":"continuous-tests-pods-2023.11.09","doc_count":8405729},{"key":"continuous-tests-pods-2023.11.07","doc_count":573187},{"key":"continuous-tests-pods-2023.11.14","doc_count":13875}]},"runid":{"doc_count_error_upper_bound":0,"sum_other_doc_count":0,"buckets":[{"key":"20231109-085853","doc_count":31604533},{"key":"20231107-074743","doc_count":517515},{"key":"20231109-043100","doc_count":77259},{"key":"20231107-065930","doc_count":50350},{"key":"20231114-051742","doc_count":7115},{"key":"20231107-064223","doc_count":5322},{"key":"20231114-045924","doc_count":3995},{"key":"20231109-055106","doc_count":3795},{"key":"20231114-051016","doc_count":2765}]}},{"key":"codex-continuous-tests-profiling-two-client-tests-sched-0","doc_count":22615739,"indices":{"doc_count_error_upper_bound":0,"sum_other_doc_count":0,"buckets":[{"key":"continuous-tests-pods-2023.11.10","doc_count":22415082},{"key":"continuous-tests-pods-2023.11.09","doc_count":200657}]},"runid":{"doc_count_error_upper_bound":0,"sum_other_doc_count":0,"buckets":[{"key":"20231109-101554","doc_count":22615739}]}}]}}}'
headers:
Transfer-Encoding:
- chunked
X-elastic-product:
- Elasticsearch
content-type:
- application/vnd.elasticsearch+json;compatible-with=8
status:
code: 200
message: OK
version: 1

View File

@ -1,6 +1,6 @@
interactions:
- request:
body: '{"aggs":{"distinct_namespaces":{"terms":{"field":"pod_namespace.keyword","include":"codex-continuous-tests-profiling.*"},"aggs":{"indices":{"terms":{"field":"_index"}},"runid":{"terms":{"field":"pod_labels.runid.keyword"}}}}},"size":0}'
body: '{"aggs":{"distinct_namespaces":{"terms":{"field":"pod_namespace.keyword","size":1000,"include":"codex-continuous-tests-profiling.*"},"aggs":{"indices":{"terms":{"field":"_index"}},"runid":{"terms":{"field":"pod_labels.runid.keyword"}}}}},"size":0}'
headers:
accept:
- application/vnd.elasticsearch+json; compatible-with=8
@ -16,7 +16,7 @@ interactions:
uri: http://localhost:9200/continuous-tests-pods-*/_search
response:
body:
string: '{"took":24,"timed_out":false,"_shards":{"total":14,"successful":14,"skipped":0,"failed":0},"hits":{"total":{"value":10000,"relation":"gte"},"max_score":null,"hits":[]},"aggregations":{"distinct_namespaces":{"doc_count_error_upper_bound":0,"sum_other_doc_count":0,"buckets":[{"key":"codex-continuous-tests-profiling-two-client-tests-0","doc_count":32258774,"indices":{"doc_count_error_upper_bound":0,"sum_other_doc_count":0,"buckets":[{"key":"continuous-tests-pods-2023.11.10","doc_count":23279858},{"key":"continuous-tests-pods-2023.11.09","doc_count":8405729},{"key":"continuous-tests-pods-2023.11.07","doc_count":573187}]},"runid":{"doc_count_error_upper_bound":0,"sum_other_doc_count":0,"buckets":[{"key":"20231109-085853","doc_count":31604533},{"key":"20231107-074743","doc_count":517515},{"key":"20231109-043100","doc_count":77259},{"key":"20231107-065930","doc_count":50350},{"key":"20231107-064223","doc_count":5322},{"key":"20231109-055106","doc_count":3795}]}},{"key":"codex-continuous-tests-profiling-two-client-tests-sched-0","doc_count":22615739,"indices":{"doc_count_error_upper_bound":0,"sum_other_doc_count":0,"buckets":[{"key":"continuous-tests-pods-2023.11.10","doc_count":22415082},{"key":"continuous-tests-pods-2023.11.09","doc_count":200657}]},"runid":{"doc_count_error_upper_bound":0,"sum_other_doc_count":0,"buckets":[{"key":"20231109-101554","doc_count":22615739}]}}]}}}'
string: '{"took":5327,"timed_out":false,"_shards":{"total":14,"successful":14,"skipped":0,"failed":0},"hits":{"total":{"value":10000,"relation":"gte"},"max_score":null,"hits":[]},"aggregations":{"distinct_namespaces":{"doc_count_error_upper_bound":0,"sum_other_doc_count":0,"buckets":[{"key":"codex-continuous-tests-profiling-two-client-tests-0","doc_count":32272649,"indices":{"doc_count_error_upper_bound":0,"sum_other_doc_count":0,"buckets":[{"key":"continuous-tests-pods-2023.11.10","doc_count":23279858},{"key":"continuous-tests-pods-2023.11.09","doc_count":8405729},{"key":"continuous-tests-pods-2023.11.07","doc_count":573187},{"key":"continuous-tests-pods-2023.11.14","doc_count":13875}]},"runid":{"doc_count_error_upper_bound":0,"sum_other_doc_count":0,"buckets":[{"key":"20231109-085853","doc_count":31604533},{"key":"20231107-074743","doc_count":517515},{"key":"20231109-043100","doc_count":77259},{"key":"20231107-065930","doc_count":50350},{"key":"20231114-051742","doc_count":7115},{"key":"20231107-064223","doc_count":5322},{"key":"20231114-045924","doc_count":3995},{"key":"20231109-055106","doc_count":3795},{"key":"20231114-051016","doc_count":2765}]}},{"key":"codex-continuous-tests-profiling-two-client-tests-sched-0","doc_count":22615739,"indices":{"doc_count_error_upper_bound":0,"sum_other_doc_count":0,"buckets":[{"key":"continuous-tests-pods-2023.11.10","doc_count":22415082},{"key":"continuous-tests-pods-2023.11.09","doc_count":200657}]},"runid":{"doc_count_error_upper_bound":0,"sum_other_doc_count":0,"buckets":[{"key":"20231109-101554","doc_count":22615739}]}}]}}}'
headers:
Transfer-Encoding:
- chunked

View File

@ -1,6 +1,6 @@
interactions:
- request:
body: '{"aggs":{"distinct_pods":{"terms":{"field":"pod_name.keyword"},"aggs":{"indices":{"terms":{"field":"_index"}},"namespace":{"terms":{"field":"pod_namespace.keyword"}},"runid":{"terms":{"field":"pod_labels.runid.keyword"}}}}},"query":{"bool":{"filter":[{"term":{"pod_labels.runid.keyword":"20231109-101554"}}]}},"size":0}'
body: '{"aggs":{"distinct_pods":{"terms":{"field":"pod_name.keyword","size":1000},"aggs":{"indices":{"terms":{"field":"_index"}},"namespace":{"terms":{"field":"pod_namespace.keyword"}},"runid":{"terms":{"field":"pod_labels.runid.keyword","size":1000}}}}},"query":{"bool":{"filter":[{"term":{"pod_labels.runid.keyword":"20231109-101554"}}]}},"size":0}'
headers:
accept:
- application/vnd.elasticsearch+json; compatible-with=8
@ -16,7 +16,7 @@ interactions:
uri: http://localhost:9200/continuous-tests-pods-*/_search
response:
body:
string: '{"took":17,"timed_out":false,"_shards":{"total":14,"successful":14,"skipped":0,"failed":0},"hits":{"total":{"value":10000,"relation":"gte"},"max_score":null,"hits":[]},"aggregations":{"distinct_pods":{"doc_count_error_upper_bound":0,"sum_other_doc_count":0,"buckets":[{"key":"bootstrap-2-58b69484bc-88msf","doc_count":11145047,"indices":{"doc_count_error_upper_bound":0,"sum_other_doc_count":0,"buckets":[{"key":"continuous-tests-pods-2023.11.10","doc_count":11060058},{"key":"continuous-tests-pods-2023.11.09","doc_count":84989}]},"namespace":{"doc_count_error_upper_bound":0,"sum_other_doc_count":0,"buckets":[{"key":"codex-continuous-tests-profiling-two-client-tests-sched-0","doc_count":11145047}]},"runid":{"doc_count_error_upper_bound":0,"sum_other_doc_count":0,"buckets":[{"key":"20231109-101554","doc_count":11145047}]}},{"key":"codex1-3-b558568cf-tvcsc","doc_count":11138278,"indices":{"doc_count_error_upper_bound":0,"sum_other_doc_count":0,"buckets":[{"key":"continuous-tests-pods-2023.11.10","doc_count":11053946},{"key":"continuous-tests-pods-2023.11.09","doc_count":84332}]},"namespace":{"doc_count_error_upper_bound":0,"sum_other_doc_count":0,"buckets":[{"key":"codex-continuous-tests-profiling-two-client-tests-sched-0","doc_count":11138278}]},"runid":{"doc_count_error_upper_bound":0,"sum_other_doc_count":0,"buckets":[{"key":"20231109-101554","doc_count":11138278}]}},{"key":"geth-0-7d8bc9dd5b-8wx95","doc_count":332341,"indices":{"doc_count_error_upper_bound":0,"sum_other_doc_count":0,"buckets":[{"key":"continuous-tests-pods-2023.11.10","doc_count":301052},{"key":"continuous-tests-pods-2023.11.09","doc_count":31289}]},"namespace":{"doc_count_error_upper_bound":0,"sum_other_doc_count":0,"buckets":[{"key":"codex-continuous-tests-profiling-two-client-tests-sched-0","doc_count":332341}]},"runid":{"doc_count_error_upper_bound":0,"sum_other_doc_count":0,"buckets":[{"key":"20231109-101554","doc_count":332341}]}},{"key":"ctnr4-d8f8d6d8-rtqrp","doc_count":60,"indices":{"doc_count_error_upper_bound":0,"sum_other_doc_count":0,"buckets":[{"key":"continuous-tests-pods-2023.11.09","doc_count":34},{"key":"continuous-tests-pods-2023.11.10","doc_count":26}]},"namespace":{"doc_count_error_upper_bound":0,"sum_other_doc_count":0,"buckets":[{"key":"codex-continuous-tests-profiling-two-client-tests-sched-0","doc_count":60}]},"runid":{"doc_count_error_upper_bound":0,"sum_other_doc_count":0,"buckets":[{"key":"20231109-101554","doc_count":60}]}},{"key":"codex-contracts-1-b98d98877-bqd5x","doc_count":13,"indices":{"doc_count_error_upper_bound":0,"sum_other_doc_count":0,"buckets":[{"key":"continuous-tests-pods-2023.11.09","doc_count":13}]},"namespace":{"doc_count_error_upper_bound":0,"sum_other_doc_count":0,"buckets":[{"key":"codex-continuous-tests-profiling-two-client-tests-sched-0","doc_count":13}]},"runid":{"doc_count_error_upper_bound":0,"sum_other_doc_count":0,"buckets":[{"key":"20231109-101554","doc_count":13}]}}]}}}'
string: '{"took":6073,"timed_out":false,"_shards":{"total":14,"successful":14,"skipped":0,"failed":0},"hits":{"total":{"value":10000,"relation":"gte"},"max_score":null,"hits":[]},"aggregations":{"distinct_pods":{"doc_count_error_upper_bound":0,"sum_other_doc_count":0,"buckets":[{"key":"bootstrap-2-58b69484bc-88msf","doc_count":11145047,"indices":{"doc_count_error_upper_bound":0,"sum_other_doc_count":0,"buckets":[{"key":"continuous-tests-pods-2023.11.10","doc_count":11060058},{"key":"continuous-tests-pods-2023.11.09","doc_count":84989}]},"namespace":{"doc_count_error_upper_bound":0,"sum_other_doc_count":0,"buckets":[{"key":"codex-continuous-tests-profiling-two-client-tests-sched-0","doc_count":11145047}]},"runid":{"doc_count_error_upper_bound":0,"sum_other_doc_count":0,"buckets":[{"key":"20231109-101554","doc_count":11145047}]}},{"key":"codex1-3-b558568cf-tvcsc","doc_count":11138278,"indices":{"doc_count_error_upper_bound":0,"sum_other_doc_count":0,"buckets":[{"key":"continuous-tests-pods-2023.11.10","doc_count":11053946},{"key":"continuous-tests-pods-2023.11.09","doc_count":84332}]},"namespace":{"doc_count_error_upper_bound":0,"sum_other_doc_count":0,"buckets":[{"key":"codex-continuous-tests-profiling-two-client-tests-sched-0","doc_count":11138278}]},"runid":{"doc_count_error_upper_bound":0,"sum_other_doc_count":0,"buckets":[{"key":"20231109-101554","doc_count":11138278}]}},{"key":"geth-0-7d8bc9dd5b-8wx95","doc_count":332341,"indices":{"doc_count_error_upper_bound":0,"sum_other_doc_count":0,"buckets":[{"key":"continuous-tests-pods-2023.11.10","doc_count":301052},{"key":"continuous-tests-pods-2023.11.09","doc_count":31289}]},"namespace":{"doc_count_error_upper_bound":0,"sum_other_doc_count":0,"buckets":[{"key":"codex-continuous-tests-profiling-two-client-tests-sched-0","doc_count":332341}]},"runid":{"doc_count_error_upper_bound":0,"sum_other_doc_count":0,"buckets":[{"key":"20231109-101554","doc_count":332341}]}},{"key":"ctnr4-d8f8d6d8-rtqrp","doc_count":60,"indices":{"doc_count_error_upper_bound":0,"sum_other_doc_count":0,"buckets":[{"key":"continuous-tests-pods-2023.11.09","doc_count":34},{"key":"continuous-tests-pods-2023.11.10","doc_count":26}]},"namespace":{"doc_count_error_upper_bound":0,"sum_other_doc_count":0,"buckets":[{"key":"codex-continuous-tests-profiling-two-client-tests-sched-0","doc_count":60}]},"runid":{"doc_count_error_upper_bound":0,"sum_other_doc_count":0,"buckets":[{"key":"20231109-101554","doc_count":60}]}},{"key":"codex-contracts-1-b98d98877-bqd5x","doc_count":13,"indices":{"doc_count_error_upper_bound":0,"sum_other_doc_count":0,"buckets":[{"key":"continuous-tests-pods-2023.11.09","doc_count":13}]},"namespace":{"doc_count_error_upper_bound":0,"sum_other_doc_count":0,"buckets":[{"key":"codex-continuous-tests-profiling-two-client-tests-sched-0","doc_count":13}]},"runid":{"doc_count_error_upper_bound":0,"sum_other_doc_count":0,"buckets":[{"key":"20231109-101554","doc_count":13}]}}]}}}'
headers:
Transfer-Encoding:
- chunked

View File

@ -1,11 +1,12 @@
import pytest
from dateutil import parser
from logtools.log.sources.input.elastic_search.elastic_search_log_repo import ElasticSearchLogRepo, Namespace, Pod
# XXX these are not good quality tests as they are overly complex and either tightly coupled to specific data or very
# weak in terms of what they assert. Ideally we should build simpler fixtures and test smaller bits at a time, but
# that requires a lot of setup, so we go with this.
# weak in terms of what they assert. They will be a pain to maintain. Ideally we should build simpler fixtures and
# test smaller bits at a time, but that requires a lot of setup, so for now we go with this.
@pytest.mark.vcr
def test_should_retrieve_existing_namespaces():
@ -22,11 +23,15 @@ def test_should_retrieve_existing_namespaces():
'20231109-043100',
'20231109-055106',
'20231109-085853',
'20231114-045924',
'20231114-051016',
'20231114-051742',
),
indices=(
'continuous-tests-pods-2023.11.07',
'continuous-tests-pods-2023.11.09',
'continuous-tests-pods-2023.11.10',
'continuous-tests-pods-2023.11.14',
),
),
Namespace(
@ -60,3 +65,16 @@ def test_should_retrieve_existing_pods_for_namespace():
'continuous-tests-pods-2023.11.10',
)
) in pods
@pytest.mark.vcr
def test_should_respect_time_horizon_for_retrieving_resources():
repo = ElasticSearchLogRepo(since=parser.parse('2023-11-14T18:00:00.000Z'))
namespaces = repo.namespaces('codex-continuous-tests-profiling')
assert len(list(namespaces)) == 0
repo = ElasticSearchLogRepo(since=parser.parse('2023-11-07T18:00:00.000Z'))
namespaces = repo.namespaces('codex-continuous-tests-profiling')
assert len(list(namespaces)) == 2

View File

@ -1,7 +1,7 @@
from pathlib import Path
from logtools.log.sources.parse.chronicles_raw_source import ChroniclesRawSource
from logtools.log.sources.input.file_log_source import FileLogSource
from logtools.log.sources.parse.chronicles_raw_source import ChroniclesRawSource
SAMPLE_LOG = Path(__file__).parent / 'sample.log'

View File

@ -3,8 +3,8 @@ from datetime import datetime
import pytest
import pytz
from logtools.log.sources.parse.chronicles_raw_source import ChroniclesRawSource, ChroniclesLogLine, LogLevel
from logtools.log.sources.input.string_log_source import StringLogSource
from logtools.log.sources.parse.chronicles_raw_source import ChroniclesRawSource, ChroniclesLogLine, LogLevel
def test_should_parse_raw_chronicles_logs():