das-research/DHT/plots.py
2023-10-04 10:13:47 +02:00

266 lines
9.2 KiB
Python

import os
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from IPython.display import display
# Tag Identifiers
RETRIEVAL = "retrieval"
LOOKUP = "lookup"
NETWORK = "network"
NN = "nn"
RN = "rn"
SAMPL = "sampl"
FER = "fer"
SER = "ser"
CDR = "cdr"
FDR = "fdr"
SDR = "sdr"
K = "k"
A = "a"
B = "b"
Y = "y"
STEPS = "steps"
# --
OPERATION = "operation"
NUMBER_NODES = "number_nodes"
RETRIEVAL_NODES = "retrieval_nodes"
CONCURRENT_SAMPLES = "concurrent_samples"
FAST_ERROR_RATE = "fast_error_rate"
SLOW_ERROR_RATE = "slow_error_rate"
CONNECTION_DELAYS = "connection_delays"
FAST_ERROR_DELAYS = "fast_error_delays"
SLOW_ERROR_DELAYS = "slow_error_delays"
K_PARAMETER = "k_replication"
ALPHA = "alpha"
BETA = "beta"
GAMMA = "overhead"
STEPS_TO_STOP = "steps_to_stop"
# Utils
tag_example = "retrieval_lookup_nn12000_rn1_sampl100_fer10_ser0_cdr50-75_fdr50-100_sdr0_k20_a3_b20_y1.0_steps3"
def tag_parser(tag: str):
params = {
OPERATION: "",
NUMBER_NODES: "",
RETRIEVAL_NODES: "",
CONCURRENT_SAMPLES: "",
FAST_ERROR_RATE: "",
SLOW_ERROR_RATE: "",
CONNECTION_DELAYS: "",
FAST_ERROR_DELAYS: "",
SLOW_ERROR_DELAYS: "",
K_PARAMETER: "",
ALPHA: "",
BETA: "",
GAMMA: "",
STEPS_TO_STOP: "",
}
# split the tag into - type & parameters
raw_params = tag.split("_")
for param in raw_params:
if NN in param:
params[NUMBER_NODES] = param.replace(NN, "")
elif RN in param:
params[RETRIEVAL_NODES] = param.replace(RN, "")
elif SAMPL in param:
params[CONCURRENT_SAMPLES] = param.replace(SAMPL, "")
elif FER in param:
params[FAST_ERROR_RATE] = param.replace(FER, "")
elif SER in param:
params[SLOW_ERROR_RATE] = param.replace(SER, "")
elif CDR in param:
params[CONNECTION_DELAYS] = param.replace(CDR, "")
elif FDR in param:
params[FAST_ERROR_DELAYS] = param.replace(FDR, "")
elif SDR in param:
params[SLOW_ERROR_DELAYS] = param.replace(SDR, "")
elif K in param and param != "lookup":
params[K_PARAMETER] = param.replace(K, "")
elif A in param:
params[ALPHA] = param.replace(A, "")
elif B in param:
params[BETA] = param.replace(B, "")
elif Y in param:
params[GAMMA] = param.replace(Y, "")
elif STEPS in param:
params[STEPS_TO_STOP] = param.replace(STEPS, "")
else:
if params[OPERATION] == "":
params[OPERATION] = param
else:
params[OPERATION] += f"_{param}"
return params
def compose_legend(params, labels):
legend = ""
for label in labels:
if legend == "":
legend = f"{label}={params[label]}"
else:
legend += f" {label}={params[label]}"
return legend
def make_folder(folder, reason):
try:
os.mkdir(folder)
print(f"created folder {folder} for {reason}")
except FileExistsError:
print(f"folder {folder} was already created")
except Exception as e:
print(e)
# --- Single Metrics ---
class SingleMetrics:
metrics = {
"lookup_aggregated_delay": {
"title_tag": "delay",
"xlabel_tag": "delay (ms)",
"ylabel_tag": "",
},
"finished_connection_attempts": {
"title_tag": "hops",
"xlabel_tag": "hops",
"ylabel_tag": "",
},
"accuracy": {
"title_tag": "accuracy",
"xlabel_tag": "accuracy",
"ylabel_tag": "",
},
}
def __init__(self, file, output_image_folder, operation, metrics: dict = dict()):
self.file = file
self.df = pd.read_csv(file)
self.label = file.split("/")[-1].replace(".csv", "")
self.targetFolder = output_image_folder+"/"+self.label
self.operation = operation
# add metrics to pre-existing ones
self.metrics.update(metrics)
# Make sure there is a valid folder for the imgaes
make_folder(self.targetFolder, f"for keeping the lookup related images about {self.label}\n")
print(f"plotting {self.label}, saving figures at {self.targetFolder}\n")
# display the lookup wallclock cdf
# display the aggregated delay cdf
for metric_name, metric_opts in self.metrics.items():
self.plot_cdf(metric_name, metric_opts)
self.plot_pdf(metric_name, metric_opts)
def plot_cdf(self, column_name, column_opts):
df = self.df.sort_values(column_name)
# CDF
sns.set()
g = sns.lineplot(data=df, x=column_name, y=np.linspace(0, 1, len(df)), color='red', ci=None)
g.set(title=f"Simulated {self.operation} {column_name} CDF ({self.label})",
xlabel=f"Simulated {column_opts['xlabel_tag']}", ylabel=f"{self.operation} {column_opts['ylabel_tag']}")
fig = g.get_figure()
fig.savefig(self.targetFolder+f"/{self.operation.lower()}_{column_name}_cdf.png")
plt.show()
def plot_pdf(self, column_name, column_opts):
df = self.df.sort_values(column_name)
# Histogram
bins = 8
sns.set()
g = sns.histplot(x=df[column_name], bins=bins)
g.set(title=f"Simulated lookup {column_name} PDF ({self.label})",
xlabel=f"Simulated {column_opts['xlabel_tag']}", ylabel=f"Lookups {column_opts['ylabel_tag']}")
fig = g.get_figure()
fig.savefig(self.targetFolder + f"/lookup_{column_name}_pdf.png")
plt.show()
# --- Multiple Aggregators ---
class CombinedMetrics:
metrics = {
"lookup_aggregated_delay": {
"title_tag": "delay",
"xlabel_tag": "delay (ms)",
"ylabel_tag": "",
},
"finished_connection_attempts": {
"title_tag": "hops",
"xlabel_tag": "hops",
"ylabel_tag": "",
},
"accuracy": {
"title_tag": "accuracy",
"xlabel_tag": "accuracy",
"ylabel_tag": "",
},
}
def __init__(self, files, aggregator, filters, operation, output_image_folder, metrics, legend):
self.files = files
self.dfs = []
self.tags = []
self.params = []
self.tag = aggregator
self.filters = filters
self.operation = operation
# add metrics to pre-existing ones
self.metrics.update(metrics)
for file in files:
if any(filter not in file for filter in filters):
continue
self.dfs.append(pd.read_csv(file))
raw_tag = file.split("/")[-1].replace(".csv", "")
params = tag_parser(raw_tag)
tag = compose_legend(params, legend)
self.params.append(params)
self.tags.append(tag)
self.udf = self.unify_dfs(self.dfs) # unified dataframe
self.targetFolder = output_image_folder+f"/{self.operation.lower}_comparison_{aggregator}"
make_folder(self.targetFolder, f"for keeping the {self.operation} related images about {self.tag}\n")
print(f"plotting by {aggregator}, saving figures at {self.targetFolder}\n")
# --- plotting sequence ---
for metrics_name, metrics_opts in self.metrics.items():
self.plot_cdfs_by(aggregator, metrics_name, metrics_opts)
self.plot_pdfs_by(aggregator, metrics_name, metrics_opts)
def unify_dfs(self, dfs):
return pd.concat(dfs)
def plot_cdfs_by(self, aggregator_tag, column_name, column_opts):
# CDF
sns.set()
palette = sns.color_palette(n_colors=len(self.dfs))
for i, df in enumerate(self.dfs):
df = df.sort_values(column_name)
g = sns.lineplot(data=df, x=column_name, y=np.linspace(0, 1, len(df)), label=self.tags[i],
ci=None, color=palette[i])
g.set(title=f"Simulated {self.operation} {column_opts['title_tag']} CDF (by {aggregator_tag})",
xlabel=f"Simulated {column_opts['xlabel_tag']}",
ylabel=f"{self.operation} {column_opts['ylabel_tag']} CDF")
plt.legend(loc='lower center', ncols=1, bbox_to_anchor=(0.5, -0.2+(-0.065*len(self.dfs))))
fig = g.get_figure()
fig.savefig(self.targetFolder+f"/simulated_{self.operation.lower()}_{column_name}_cdf.png")
plt.show()
def plot_pdfs_by(self, aggregator_tag, column_name, column_opts):
# Histogram
sns.set()
by_aggregator = self.udf.groupby([column_name, aggregator_tag]).count()
df = by_aggregator.reset_index()
g = sns.histplot(data=df, x=df[column_name])
"""
g = sns.barplot(data=df, x=df[column_name], y="Unnamed: 0", hue=aggregator_tag, width=1.2)
"""
g.set(title=f"Simulated {self.operation} {column_opts['title_tag']} PDF (by {aggregator_tag})",
xlabel=f"Simulated {column_opts['xlabel_tag']}",
ylabel=f"{self.operation} {column_opts['ylabel_tag']}")
plt.legend(loc='lower center', ncols=1, bbox_to_anchor=(0.5, -0.2+(-0.065*len(self.dfs))))
fig = g.get_figure()
fig.savefig(self.targetFolder+f"/simulated_{self.operation.lower()}_{column_name}_hist.png")
plt.show()