From eee6c8cf2766602a4165b84e08f4ac9ace224f9a Mon Sep 17 00:00:00 2001 From: Alberto Soutullo Date: Tue, 1 Oct 2024 11:57:29 +0200 Subject: [PATCH] Add more parameters to yaml configuration --- scrape.yaml | 20 ++++++++++++-------- src/data/data_file_handler.py | 6 +++++- src/plotting/plotter.py | 7 ++++--- src/utils/file_utils.py | 10 +++++++--- 4 files changed, 28 insertions(+), 15 deletions(-) diff --git a/scrape.yaml b/scrape.yaml index 6bf41c7..db0bb64 100644 --- a/scrape.yaml +++ b/scrape.yaml @@ -56,16 +56,20 @@ metrics_to_scrape: # extract_field: "pod-node" # folder_name: "nim-gc-memory/" plotting: - "bandwidth": - "ignore": ["bootstrap", "midstrap"] - "data_points": 15 + "bandwidth-0-33-3K": + "ignore_columns": ["bootstrap", "midstrap"] + "data_points": 25 "folder": - "test/nwaku0.26-f/" "data": - # - "libp2p-in" - # - "libp2p-out" - - "asd" - - "asd2" - "xlabel_name": "NÂșNodes-MsgRate" + - "libp2p-in" + - "libp2p-out" + "include_files": + - "3K-1mgs-s-1KB" + - "3K-1mgs-5s-1KB" + - "3K-1mgs-10s-1KB" + "xlabel_name": "Simulation" "ylabel_name": "KBytes/s" + "show_min_max": false "scale-x": 1000 + "fig_size": [20, 20] diff --git a/src/data/data_file_handler.py b/src/data/data_file_handler.py index 0b9611c..bb7551e 100644 --- a/src/data/data_file_handler.py +++ b/src/data/data_file_handler.py @@ -14,11 +14,15 @@ logger = logging.getLogger(__name__) class DataFileHandler(DataHandler): + def __init__(self, ignore_columns: Optional[List] = None, include_files: Optional[List] = None): + super().__init__(ignore_columns) + self._include_files = include_files + def concat_dataframes_from_folders_as_mean(self, folders: List, points: int): for folder in folders: folder_path = Path(folder) folder_df = pd.DataFrame() - match file_utils.get_files_from_folder_path(folder_path): + match file_utils.get_files_from_folder_path(folder_path, self._include_files): case Ok(data_files_names): folder_df = self._concat_files_as_mean(folder_df, data_files_names, folder_path, points) diff --git a/src/plotting/plotter.py b/src/plotting/plotter.py index 4edc865..00359bc 100644 --- a/src/plotting/plotter.py +++ b/src/plotting/plotter.py @@ -28,7 +28,7 @@ class Plotter: def _create_plot(self, plot_name: str, plot_specs: Dict): fig, axs = plt.subplots(nrows=1, ncols=len(plot_specs['data']), sharey='row', - figsize=(15, 15)) + figsize=plot_specs['fig_size']) subplot_paths_group = self._create_subplot_paths_group(plot_specs) self._insert_data_in_axs(subplot_paths_group, axs, plot_specs) @@ -36,7 +36,8 @@ class Plotter: def _insert_data_in_axs(self, subplot_paths_group: List, axs: np.ndarray, plot_specs: Dict): for i, subplot_path_group in enumerate(subplot_paths_group): - file_data_handler = DataFileHandler(plot_specs['ignore']) + include_files = plot_specs.get("include_files") + file_data_handler = DataFileHandler(plot_specs['ignore_columns'], include_files) file_data_handler.concat_dataframes_from_folders_as_mean(subplot_path_group, plot_specs['data_points']) subplot_df = file_data_handler.dataframe @@ -54,7 +55,7 @@ class Plotter: subplot_title = plot_specs['data'][index] axs = axs if type(axs) is not np.ndarray else axs[index] box_plot = sns.boxplot(data=df, x="variable", y="value", hue="class", ax=axs, - showfliers=False) + showfliers=True) # Apply the custom formatter to the x-axis ticks formatter = ticker.FuncFormatter(lambda x, pos: '{:.0f}'.format(x / plot_specs['scale-x'])) diff --git a/src/utils/file_utils.py b/src/utils/file_utils.py index fc40c04..b88515c 100644 --- a/src/utils/file_utils.py +++ b/src/utils/file_utils.py @@ -3,7 +3,7 @@ import pandas as pd import yaml import logging from pathlib import Path -from typing import List, Dict +from typing import List, Dict, Optional from result import Result, Err, Ok from src.utils import path_utils @@ -22,14 +22,18 @@ def read_yaml_file(file_path: str) -> Dict: return data -def get_files_from_folder_path(path: Path, extension: str = '*') -> Result[List[str], str]: +def get_files_from_folder_path(path: Path, include_files: Optional[List[str]] = None, extension: str = '*') \ + -> Result[List[str], str]: if not path.exists(): return Err(f"{path} does not exist.") if not extension.startswith('*'): extension = '*.' + extension - files = [p.name for p in path.glob(extension) if p.is_file()] + files = [ + p.name for p in path.glob(extension) + if p.is_file() and (include_files is None or p.name in include_files) + ] logger.debug(f"Found {len(files)} files in {path}") logger.debug(f"Files are: {files}")