Merge pull request #3 from vacp2p/dump_data

Dump thanos metrics to csv
This commit is contained in:
Alberto Soutullo 2024-03-01 12:04:08 +01:00 committed by GitHub
commit b79d4c8300
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
18 changed files with 695 additions and 24 deletions

View File

@ -1,21 +1,20 @@
# Python Imports
import src.logger.logger
from kubernetes import client, config
# Project Imports
from src.metrics.scrapper import Scrapper
def main():
config.load_kube_config("your_kubeconfig.yaml")
url = "your_url"
namespace = "'zerotesting'"
metrics = ["container_network_receive_bytes_total", "container_network_sent_bytes_total"]
scrape_config = "scrape.yaml"
v1 = client.CoreV1Api()
scrapper = Scrapper(url, namespace, metrics)
scrapper.make_queries()
scrapper = Scrapper(url, scrape_config, "test/")
scrapper.query_and_dump_metrics()
if __name__ == '__main__':

Binary file not shown.

13
scrape.yaml Normal file
View File

@ -0,0 +1,13 @@
scrape_config:
$__rate_interval: "60s"
step: "60s"
until_hours_ago: 1
metrics_to_scrape:
- "libp2p_peers": "instance"
- "libp2p_open_streams": "instance-type-dir"
- "rate(libp2p_network_bytes_total{direction='in'}[$__rate_interval])": "instance-direction"
- "rate(libp2p_network_bytes_total{direction='out'}[$__rate_interval])": "instance-direction"
- "rate(container_network_receive_bytes_total{namespace='zerotesting'}[$__rate_interval])": "pod-node"
- "rate(container_network_transmit_bytes_total{namespace='zerotesting'}[$__rate_interval])": "pod-node"
- "sum by(job) (libp2p_gossipsub_low_peers_topics)" : "job"
- "sum by(job) (libp2p_gossipsub_healthy_peers_topics)": "job"

View File

@ -1,8 +1,8 @@
import logging.config
import yaml
with open('src/logging/logger_config.yaml', 'r') as f:
with open('src/logger/logger_config.yaml', 'r') as f:
config = yaml.safe_load(f.read())
logging.config.dictConfig(config)
logger = logging.getLogger(__name__)
log = logging.getLogger(__name__)

View File

@ -2,12 +2,17 @@
import datetime
# Having now in an external function allows us to mock it in the tests
def _get_datetime_now() -> datetime.datetime:
return datetime.datetime.now()
def create_promql(address: str, query: str, hours_passed: int, step: int) -> str:
promql = address + "query_range?query=" + query
start = datetime.datetime.timestamp(
datetime.datetime.now() - datetime.timedelta(hours=hours_passed))
now = datetime.datetime.timestamp(datetime.datetime.now())
_get_datetime_now() - datetime.timedelta(hours=hours_passed))
now = datetime.datetime.timestamp(_get_datetime_now())
promql = (promql +
"&start=" + str(start) +

View File

@ -1,32 +1,122 @@
# Python Imports
import requests
from typing import List
import logging
import pandas as pd
from itertools import chain
from typing import List, Dict
from pathlib import Path
# Project Imports
import src.logging.logger
from src.metrics import scrape_utils
from result import Ok, Err, Result
from src.utils.file_utils import read_yaml_file
from src.utils.queries import get_query_data
logger = logging.getLogger(__name__)
class Scrapper:
def __init__(self, url: str, namespace: str, metrics: List):
def __init__(self, url: str, query_config_file: str, out_folder: str):
self._url = url
self._namespace = namespace
self._metrics = metrics
self._query_config = None
self._query_config_file = query_config_file
self._out_folder = out_folder
self._set_query_config()
# TODO make interval match value in cluster
self._template = "irate($metric{namespace=$namespace}[3m])"
def make_queries(self):
for metric in self._metrics:
query = self._template.replace("$metric", metric)
query = query.replace("$namespace", self._namespace)
promql = scrape_utils.create_promql(self._url, query, 1, 60)
logger.info(f"Promql: {promql}")
response = requests.get(promql)
logger.info(f"Response: {response.status_code}")
def query_and_dump_metrics(self):
for metric_dict_item in self._query_config['metrics_to_scrape']:
metric, column_name = next(iter(metric_dict_item.items()))
logger.info(f'Querying {metric}')
promql = self._create_query(metric, self._query_config['scrape_config'])
match get_query_data(promql):
case Ok(data):
logger.info(f'Successfully extracted {metric} data from response')
case Err(err):
logger.info(err)
continue
logger.info(f'Dumping {metric} data to .csv')
self._dump_data(metric, column_name, data)
def _set_query_config(self):
self._query_config = read_yaml_file(self._query_config_file)
def _create_query(self, metric: str, scrape_config: Dict) -> str:
if '__rate_interval' in metric:
metric = metric.replace('$__rate_interval', scrape_config['$__rate_interval'])
promql = scrape_utils.create_promql(self._url, metric,
scrape_config['until_hours_ago'],
scrape_config['step'])
return promql
def _dump_data(self, metric: str, column_name: str, data: Dict):
result = self._prepare_path(metric)
if result.is_err():
logger.error(f'{result.err_value}')
exit(1)
df = self._create_dataframe_from_data(data, column_name)
df = self._sort_dataframe(df)
df.to_csv(result.ok_value)
logger.info(f'{metric} data dumped')
def _prepare_path(self, metric: str) -> Result[Path, str]:
output_file = f'{metric}.csv'
output_dir = Path(self._out_folder)
try:
output_dir.mkdir(parents=True, exist_ok=True)
except OSError as e:
return Err(f'Error creating {output_dir}. {e}')
return Ok(output_dir / output_file)
def _create_dataframe_from_data(self, data: Dict, column_name: str) -> pd.DataFrame:
final_df = pd.DataFrame()
for pod_result_dict in data['result']:
column_name_items = column_name.split('-')
metric_result_info = pod_result_dict['metric']
result_string = '_'.join(metric_result_info[key] for key in column_name_items)
values = pod_result_dict['values']
pod_df = self._create_pod_df(result_string, values)
final_df = pd.merge(final_df, pod_df, how='outer', left_index=True, right_index=True)
return final_df
def _sort_dataframe(self, df: pd.DataFrame) -> pd.DataFrame:
columns = self._order(df.columns.tolist())
df = df[columns]
return df
def _create_pod_df(self, column_name: str, values: List) -> pd.DataFrame:
pod_df = pd.DataFrame(values, columns=['Unix Timestamp', column_name])
pod_df['Unix Timestamp'] = pd.to_datetime(pod_df['Unix Timestamp'], unit='s')
pod_df.set_index('Unix Timestamp', inplace=True)
return pod_df
# TODO this depends on pods name assigned in deployment
def _order(self, column_names: List) -> List:
def get_default_format_id(val):
return int(val.split('-')[1].split('_')[0])
nodes = []
bootstrap = []
others = []
for column in column_names:
if column.startswith('nodes'):
nodes.append(column)
elif column.startswith('bootstrap'):
bootstrap.append(column)
else:
others.append(column)
nodes.sort(key=get_default_format_id)
bootstrap.sort(key=get_default_format_id)
return list(chain(others, bootstrap, nodes))

View File

View File

@ -0,0 +1,6 @@
scrape_config:
$__rate_interval: "60s"
step: "60s"
until_hours_ago: 1
metrics_to_scrape:
- "metric1": "instance"

View File

@ -0,0 +1,7 @@
scrape_config:
$__rate_interval: "60s"
step: "60s"
until_hours_ago: 1
metrics_to_scrape:
- "metric1": "instance"
- "metric2[$__rate_interval])": "instance-direction"

View File

@ -0,0 +1,27 @@
# Python Imports
import unittest
import datetime
from unittest.mock import patch, MagicMock
# Project Imports
from src.metrics.scrape_utils import create_promql
class TestScrapeUtils(unittest.TestCase):
@patch('src.metrics.scrape_utils._get_datetime_now')
def test_create_promql(self, mock_datetime_now: MagicMock):
address = "0.0.0.0:9090/api/"
query = "bandwidth"
hours_passed = 1
step = 60
return_value_first = datetime.datetime(2024, 2, 22, 11, 0, 0)
return_value_second = datetime.datetime(2024, 2, 22, 12, 0, 0)
mock_datetime_now.side_effect = [return_value_first, return_value_second]
result = create_promql(address, query, hours_passed, step)
expected_result = ("0.0.0.0:9090/api/query_range?query=bandwidth&start=1708592400.0&end"
"=1708599600.0&step=60")
self.assertEqual(expected_result, result)

View File

@ -0,0 +1,453 @@
# Python Imports
import datetime
import os
import unittest
import logging
from pathlib import Path
from unittest.mock import patch, MagicMock
from result import Ok, Err
import pandas as pd
# Project Imports
from src.metrics import scrapper
logger = logging.getLogger(__name__)
@patch('src.metrics.scrapper.get_query_data')
class TestScrapper(unittest.TestCase):
def setUp(self):
self.current_directory = os.path.dirname(os.path.abspath(__file__))
@classmethod
def tearDownClass(cls):
os.rmdir('test_results')
def test_query_and_dump_metrics_single(self, mock_get_query_data: MagicMock):
file_path = os.path.join(self.current_directory, 'single_test_scrape.yaml')
test_scrapper = scrapper.Scrapper("http://myurl:9090/api/v1/",
file_path,
"test_results/")
data = {'result': [{'metric': {'instance': 'nodes-1'}, 'values': [[1, 5], [2, 5], [3, 5],
[4, 5], [5, 5]]}]}
mock_get_query_data.side_effect = [Ok(data)]
test_scrapper.query_and_dump_metrics()
expected_data = {
'Unix Timestamp': pd.to_datetime(
['1970-01-01 00:00:01', '1970-01-01 00:00:02', '1970-01-01 00:00:03',
'1970-01-01 00:00:04', '1970-01-01 00:00:05']),
'nodes-1': [5] * 5
}
expected_df = pd.DataFrame(expected_data)
result = pd.read_csv('test_results/metric1.csv')
# Convert data type since it is lost when reading from a file
result['Unix Timestamp'] = pd.to_datetime(result['Unix Timestamp'])
self.assertTrue(result.equals(expected_df))
os.remove('test_results/metric1.csv')
def test_query_and_dump_metrics_multiple_column(self, mock_get_query_data: MagicMock):
file_path = os.path.join(self.current_directory, 'single_test_scrape.yaml')
test_scrapper = scrapper.Scrapper("http://myurl:9090/api/v1/",
file_path,
"test_results/")
data = {'result': [{'metric': {'instance': 'nodes-1'}, 'values': [[1, 5], [2, 5], [3, 5],
[4, 5], [5, 5]]},
{'metric': {'instance': 'nodes-2'}, 'values': [[1, 6], [2, 6], [3, 6],
[4, 6], [5, 6]]}
]}
mock_get_query_data.side_effect = [Ok(data)]
test_scrapper.query_and_dump_metrics()
expected_data = {
'Unix Timestamp': pd.to_datetime(
['1970-01-01 00:00:01', '1970-01-01 00:00:02', '1970-01-01 00:00:03',
'1970-01-01 00:00:04', '1970-01-01 00:00:05']),
'nodes-1': [5] * 5,
'nodes-2': [6] * 5
}
expected_df = pd.DataFrame(expected_data)
result = pd.read_csv('test_results/metric1.csv')
# Convert data type since it is lost when reading from a file
result['Unix Timestamp'] = pd.to_datetime(result['Unix Timestamp'])
self.assertTrue(result.equals(expected_df))
os.remove('test_results/metric1.csv')
def test_query_and_dump_metrics_multiple_column_unordered(self, mock_get_query_data: MagicMock):
file_path = os.path.join(self.current_directory, 'single_test_scrape.yaml')
test_scrapper = scrapper.Scrapper("http://myurl:9090/api/v1/",
file_path,
"test_results/")
data = {'result': [{'metric': {'instance': 'nodes-2'}, 'values': [[1, 6], [2, 6], [3, 6],
[4, 6], [5, 6]]},
{'metric': {'instance': 'nodes-1'}, 'values': [[1, 5], [2, 5], [3, 5],
[4, 5], [5, 5]]}
]}
mock_get_query_data.side_effect = [Ok(data)]
test_scrapper.query_and_dump_metrics()
expected_data = {
'Unix Timestamp': pd.to_datetime(
['1970-01-01 00:00:01', '1970-01-01 00:00:02', '1970-01-01 00:00:03',
'1970-01-01 00:00:04', '1970-01-01 00:00:05']),
'nodes-1': [5] * 5,
'nodes-2': [6] * 5
}
expected_df = pd.DataFrame(expected_data)
result = pd.read_csv('test_results/metric1.csv')
# Convert data type since it is lost when reading from a file
result['Unix Timestamp'] = pd.to_datetime(result['Unix Timestamp'])
self.assertTrue(result.equals(expected_df))
os.remove('test_results/metric1.csv')
def test_query_and_dump_metrics_multiple_data(self, mock_get_query_data: MagicMock):
file_path = os.path.join(self.current_directory, 'test_scrape.yaml')
test_scrapper = scrapper.Scrapper("http://myurl:9090/api/v1/",
file_path,
"test_results/")
data1 = {'result': [{'metric': {'instance': 'nodes-2'}, 'values': [[1, 6], [2, 6], [3, 6],
[4, 6], [5, 6]]},
{'metric': {'instance': 'nodes-1'}, 'values': [[1, 5], [2, 5], [3, 5],
[4, 5], [5, 5]]}
]}
data2 = {'result': [
{'metric': {'instance': 'nodes-2', 'direction': 'in'},
'values': [[1, 6], [2, 6], [3, 6],
[4, 6], [5, 6]]},
{'metric': {'instance': 'nodes-1', 'direction': 'out'},
'values': [[1, 5], [2, 5], [3, 5],
[4, 5], [5, 5]]}
]}
mock_get_query_data.side_effect = [Ok(data1), Ok(data2)]
test_scrapper.query_and_dump_metrics()
expected_data_1 = {
'Unix Timestamp': pd.to_datetime(
['1970-01-01 00:00:01', '1970-01-01 00:00:02', '1970-01-01 00:00:03',
'1970-01-01 00:00:04', '1970-01-01 00:00:05']),
'nodes-1': [5] * 5,
'nodes-2': [6] * 5
}
expected_df1 = pd.DataFrame(expected_data_1)
expected_data_2 = {
'Unix Timestamp': pd.to_datetime(
['1970-01-01 00:00:01', '1970-01-01 00:00:02', '1970-01-01 00:00:03',
'1970-01-01 00:00:04', '1970-01-01 00:00:05']),
'nodes-1_out': [5] * 5,
'nodes-2_in': [6] * 5
}
expected_df2 = pd.DataFrame(expected_data_2)
result1 = pd.read_csv('test_results/metric1.csv')
# Convert data type since it is lost when reading from a file
result1['Unix Timestamp'] = pd.to_datetime(result1['Unix Timestamp'])
self.assertTrue(result1.equals(expected_df1))
result2 = pd.read_csv('test_results/metric2[$__rate_interval]).csv')
# Convert data type since it is lost when reading from a file
result2['Unix Timestamp'] = pd.to_datetime(result2['Unix Timestamp'])
self.assertTrue(result2.equals(expected_df2))
os.remove('test_results/metric1.csv')
os.remove('test_results/metric2[$__rate_interval]).csv')
@patch('src.metrics.scrapper.Scrapper._dump_data')
def test_query_and_dump_metrics_multiple_fail(self, mock_dump: MagicMock, mock_get_query_data: MagicMock):
file_path = os.path.join(self.current_directory, 'test_scrape.yaml')
test_scrapper = scrapper.Scrapper("http://myurl:9090/api/v1/",
file_path,
"test_results/")
file_path = os.path.join(self.current_directory, 'test_scrape.yaml')
err1 = "Err1"
err2 = "Err2"
mock_get_query_data.side_effect = [Err(err1), Err(err2)]
test_scrapper.query_and_dump_metrics()
self.assertEqual(0, mock_dump.call_count)
def test__set_query_config(self, _mock_get_query_data: MagicMock):
file_path = os.path.join(self.current_directory, 'single_test_scrape.yaml')
test_scrapper = scrapper.Scrapper("http://myurl:9090/api/v1/",
file_path,
"test_results/")
test_scrapper._set_query_config()
expected_config = {'scrape_config': {'until_hours_ago': 1, 'step': "60s",
'$__rate_interval': '60s'},
'metrics_to_scrape': [{'metric1': 'instance'}]}
self.assertEqual(expected_config, test_scrapper._query_config)
@patch('src.metrics.scrape_utils._get_datetime_now')
def test__create_query(self, mock_datetime_now: MagicMock, _mock_get_query_data: MagicMock):
file_path = os.path.join(self.current_directory, 'single_test_scrape.yaml')
test_scrapper = scrapper.Scrapper("http://myurl:9090/api/v1/",
file_path,
"test_results/")
metric = "bandwidth"
scrape_config = {'until_hours_ago': 1, 'step': "60s", '$__rate_interval': '60s'}
return_value_first = datetime.datetime(2024, 2, 22, 11, 0, 0)
return_value_second = datetime.datetime(2024, 2, 22, 12, 0, 0)
mock_datetime_now.side_effect = [return_value_first, return_value_second]
result = test_scrapper._create_query(metric, scrape_config)
expected_result = ('http://myurl:9090/api/v1/query_range?query=bandwidth&start=1708592400'
'.0&end=1708599600.0&step=60s')
self.assertEqual(expected_result, result)
@patch('src.metrics.scrape_utils._get_datetime_now')
def test__create_query_with_rate(self, mock_datetime_now: MagicMock, _mock_get_query_data: MagicMock):
file_path = os.path.join(self.current_directory, 'single_test_scrape.yaml')
test_scrapper = scrapper.Scrapper("http://myurl:9090/api/v1/",
file_path,
"test_results/")
metric = "bandwidth[$__rate_interval]"
scrape_config = {'until_hours_ago': 1, 'step': "60s", '$__rate_interval': '60s'}
return_value_first = datetime.datetime(2024, 2, 22, 11, 0, 0)
return_value_second = datetime.datetime(2024, 2, 22, 12, 0, 0)
mock_datetime_now.side_effect = [return_value_first, return_value_second]
result = test_scrapper._create_query(metric, scrape_config)
expected_result = (
'http://myurl:9090/api/v1/query_range?query=bandwidth[60s]&start=1708592400'
'.0&end=1708599600.0&step=60s')
self.assertEqual(expected_result, result)
def test__dump_data(self, _mock_get_query_data: MagicMock):
file_path = os.path.join(self.current_directory, 'single_test_scrape.yaml')
test_scrapper = scrapper.Scrapper("http://myurl:9090/api/v1/",
file_path,
"test_results/")
data = {'result': [{'metric': {'instance': 'nodes-1'}, 'values': [[1, 5], [2, 5], [3, 5],
[4, 5], [5, 5]]}]}
test_scrapper._dump_data('metric1', 'instance', data)
expected_data = {
'Unix Timestamp': pd.to_datetime(
['1970-01-01 00:00:01', '1970-01-01 00:00:02', '1970-01-01 00:00:03',
'1970-01-01 00:00:04', '1970-01-01 00:00:05']),
'nodes-1': [5] * 5
}
expected_df = pd.DataFrame(expected_data)
result = pd.read_csv('test_results/metric1.csv')
# Convert data type since it is lost when reading from a file
result['Unix Timestamp'] = pd.to_datetime(result['Unix Timestamp'])
self.assertTrue(result.equals(expected_df))
os.remove('test_results/metric1.csv')
@patch('src.metrics.scrapper.Scrapper._prepare_path')
def test__dump_data_err(self, mock_prepare_path: MagicMock, _mock_get_query_data: MagicMock):
file_path = os.path.join(self.current_directory, 'single_test_scrape.yaml')
test_scrapper = scrapper.Scrapper("", file_path, "/")
mock_prepare_path.return_value = Err("Error")
data = {}
with self.assertRaises(SystemExit) as cm:
test_scrapper._dump_data('', '', data)
self.assertEqual(cm.exception.code, 1)
def test__prepare_path(self, _mock_get_query_data: MagicMock):
file_path = os.path.join(self.current_directory, 'single_test_scrape.yaml')
test_scrapper = scrapper.Scrapper("", file_path, "test_path/")
result = test_scrapper._prepare_path('metric1')
self.assertEqual(Path('test_path/metric1.csv'), result.ok_value)
os.rmdir('test_path/')
def test__prepare_path_multiple(self, _mock_get_query_data: MagicMock):
file_path = os.path.join(self.current_directory, 'single_test_scrape.yaml')
test_scrapper = scrapper.Scrapper("", file_path, "test_path_1/test_path_2")
result = test_scrapper._prepare_path('metric1')
self.assertEqual(Path('test_path_1/test_path_2/metric1.csv'), result.ok_value)
os.rmdir('test_path_1/test_path_2/')
os.rmdir('test_path_1')
@patch('src.metrics.scrapper.Path.mkdir')
def test__prepare_path_err(self, mock_mkdir: MagicMock, _mock_get_query_data: MagicMock):
file_path = os.path.join(self.current_directory, 'single_test_scrape.yaml')
test_scrapper = scrapper.Scrapper("", file_path, "test_path_1/test_path_2")
mock_mkdir.side_effect = OSError("Error")
result = test_scrapper._prepare_path('metric1')
self.assertIsInstance(result, Err)
def test__create_dataframe_from_data(self, _mock_get_query_data: MagicMock):
file_path = os.path.join(self.current_directory, 'single_test_scrape.yaml')
test_scrapper = scrapper.Scrapper("", file_path, "")
data = {'result': [{'metric': {'instance': 'nodes-1'}, 'values': [[1, 5], [2, 5], [3, 5],
[4, 5], [5, 5]]}]}
result = test_scrapper._create_dataframe_from_data(data, 'instance')
expected_data = {
'Unix Timestamp': pd.to_datetime(
['1970-01-01 00:00:01', '1970-01-01 00:00:02', '1970-01-01 00:00:03',
'1970-01-01 00:00:04', '1970-01-01 00:00:05']),
'nodes-1': [5] * 5
}
expected_df = pd.DataFrame(expected_data)
expected_df.set_index('Unix Timestamp', inplace=True)
self.assertTrue(result.equals(expected_df))
def test__create_dataframe_from_data_multiple(self, _mock_get_query_data: MagicMock):
file_path = os.path.join(self.current_directory, 'single_test_scrape.yaml')
test_scrapper = scrapper.Scrapper("", file_path, "")
data = {'result': [{'metric': {'instance': 'nodes-1'}, 'values': [[1, 5], [2, 5], [3, 5],
[4, 5], [5, 5]]},
{'metric': {'instance': 'nodes-2'}, 'values': [[1, 6], [2, 6], [3, 6],
[4, 6], [5, 6]]}
]}
result = test_scrapper._create_dataframe_from_data(data, 'instance')
expected_data = {
'Unix Timestamp': pd.to_datetime(
['1970-01-01 00:00:01', '1970-01-01 00:00:02', '1970-01-01 00:00:03',
'1970-01-01 00:00:04', '1970-01-01 00:00:05']),
'nodes-1': [5] * 5,
'nodes-2': [6] * 5
}
expected_df = pd.DataFrame(expected_data)
expected_df.set_index('Unix Timestamp', inplace=True)
self.assertTrue(result.equals(expected_df))
def test__create_dataframe_from_data_not_matching_times(self, _mock_get_query_data: MagicMock):
file_path = os.path.join(self.current_directory, 'single_test_scrape.yaml')
test_scrapper = scrapper.Scrapper("", file_path, "")
data = {'result': [{'metric': {'instance': 'nodes-1'}, 'values': [[1, 5], [3, 5], [5, 5]]},
{'metric': {'instance': 'nodes-2'}, 'values': [[1, 6], [2, 6], [4, 6]]}
]}
result = test_scrapper._create_dataframe_from_data(data, 'instance')
expected_data = {
'Unix Timestamp': pd.to_datetime(
['1970-01-01 00:00:01', '1970-01-01 00:00:02', '1970-01-01 00:00:03',
'1970-01-01 00:00:04', '1970-01-01 00:00:05']),
'nodes-1': [5, None, 5, None, 5],
'nodes-2': [6, 6, None, 6, None]
}
expected_df = pd.DataFrame(expected_data)
expected_df.set_index('Unix Timestamp', inplace=True)
self.assertTrue(result.equals(expected_df))
def test__sort_dataframe(self, _mock_get_query_data: MagicMock):
file_path = os.path.join(self.current_directory, 'single_test_scrape.yaml')
test_scrapper = scrapper.Scrapper("", file_path, "")
data = {'result': [{'metric': {'instance': 'nodes-4'}, 'values': [[1, 5], [2, 5], [3, 5],
[4, 5], [5, 5]]},
{'metric': {'instance': 'nodes-1'}, 'values': [[1, 5], [2, 5], [3, 5],
[4, 5], [5, 5]]},
{'metric': {'instance': 'nodes-3'}, 'values': [[1, 5], [2, 5], [3, 5],
[4, 5], [5, 5]]}
]}
df = test_scrapper._create_dataframe_from_data(data, 'instance')
result = test_scrapper._sort_dataframe(df)
expected_columns = ['nodes-1', 'nodes-3', 'nodes-4']
self.assertEqual(expected_columns, result.columns.tolist())
def test__create_pod_df(self, _mock_get_query_data: MagicMock):
file_path = os.path.join(self.current_directory, 'single_test_scrape.yaml')
test_scrapper = scrapper.Scrapper("", file_path, "")
values = [[1, 5], [2, 5], [3, 5], [4, 5], [5, 5]]
result = test_scrapper._create_pod_df('nodes-1', values)
expected_data = {
'Unix Timestamp': pd.to_datetime(
['1970-01-01 00:00:01', '1970-01-01 00:00:02', '1970-01-01 00:00:03',
'1970-01-01 00:00:04', '1970-01-01 00:00:05']),
'nodes-1': [5] * 5
}
expected_df = pd.DataFrame(expected_data)
expected_df.set_index('Unix Timestamp', inplace=True)
self.assertTrue(result.equals(expected_df))
def test__order(self, _mock_get_query_data: MagicMock):
file_path = os.path.join(self.current_directory, 'single_test_scrape.yaml')
test_scrapper = scrapper.Scrapper("", file_path, "")
columns = ['nodes-4', 'nodes-1', 'nodes-3']
result = test_scrapper._order(columns)
expected_columns = ['nodes-1', 'nodes-3', 'nodes-4']
self.assertEqual(expected_columns, result)
def test__order_bootstrap(self, _mock_get_query_data: MagicMock):
file_path = os.path.join(self.current_directory, 'single_test_scrape.yaml')
test_scrapper = scrapper.Scrapper("", file_path, "")
columns = ['nodes-4', 'nodes-1', 'nodes-3', 'bootstrap-2']
result = test_scrapper._order(columns)
expected_columns = ['bootstrap-2', 'nodes-1', 'nodes-3', 'nodes-4']
self.assertEqual(expected_columns, result)

0
src/utils/__init__.py Normal file
View File

14
src/utils/file_utils.py Normal file
View File

@ -0,0 +1,14 @@
# Python Imports
import yaml
from pathlib import Path
# Project Imports
def read_yaml_file(file_path: str):
path = Path(file_path)
with open(path, 'r') as file:
data = yaml.safe_load(file)
return data

22
src/utils/queries.py Normal file
View File

@ -0,0 +1,22 @@
# Python Imports
import logging
import requests
from typing import Dict
from result import Result, Err, Ok
logger = logging.getLogger(__name__)
def get_query_data(request: str) -> Result[Dict, str]:
try:
response = requests.get(request, timeout=30)
except requests.exceptions.Timeout:
return Err(f'Timeout error.')
if response.ok:
logger.info(f'Response: {response.status_code}')
data = response.json()['data']
return Ok(data)
return Err(f'Error in query. Status code {response.status_code}. {response.content}')

View File

View File

@ -0,0 +1,35 @@
# Python Imports
import unittest
import requests.exceptions
from unittest.mock import patch, MagicMock
# Project Imports
from src.utils.queries import get_query_data
@patch('src.utils.queries.requests.get')
class TestQueries(unittest.TestCase):
def test_get_query_data_correct(self, mock_requests_get: MagicMock):
mock_requests_get.return_value.ok = True
mock_requests_get.return_value.json.return_value = {'data': 'foo'}
result = get_query_data('https://foo/bar/1')
self.assertEqual(result.ok_value, 'foo')
def test_get_query_data_timeout(self, mock_requests_get: MagicMock):
mock_requests_get.side_effect = requests.exceptions.Timeout
result = get_query_data('https://foo/bar/1')
self.assertEqual(result.err_value, 'Timeout error.')
def test_get_query_data_error(self, mock_requests_get: MagicMock):
mock_requests_get.return_value.ok = False
mock_requests_get.return_value.status_code = 404
mock_requests_get.return_value.content = 'bar'
result = get_query_data('https://foo/bar/1')
self.assertEqual(result.err_value, 'Error in query. Status code 404. bar')