mirror of
https://github.com/logos-storage/das-research.git
synced 2026-01-15 19:43:07 +00:00
266 lines
9.2 KiB
Python
266 lines
9.2 KiB
Python
import os
|
|
import numpy as np
|
|
import pandas as pd
|
|
import seaborn as sns
|
|
import matplotlib.pyplot as plt
|
|
from IPython.display import display
|
|
|
|
# Tag Identifiers
|
|
RETRIEVAL = "retrieval"
|
|
LOOKUP = "lookup"
|
|
NETWORK = "network"
|
|
NN = "nn"
|
|
RN = "rn"
|
|
SAMPL = "sampl"
|
|
FER = "fer"
|
|
SER = "ser"
|
|
CDR = "cdr"
|
|
FDR = "fdr"
|
|
SDR = "sdr"
|
|
K = "k"
|
|
A = "a"
|
|
B = "b"
|
|
Y = "y"
|
|
STEPS = "steps"
|
|
# --
|
|
OPERATION = "operation"
|
|
NUMBER_NODES = "number_nodes"
|
|
RETRIEVAL_NODES = "retrieval_nodes"
|
|
CONCURRENT_SAMPLES = "concurrent_samples"
|
|
FAST_ERROR_RATE = "fast_error_rate"
|
|
SLOW_ERROR_RATE = "slow_error_rate"
|
|
CONNECTION_DELAYS = "connection_delays"
|
|
FAST_ERROR_DELAYS = "fast_error_delays"
|
|
SLOW_ERROR_DELAYS = "slow_error_delays"
|
|
K_PARAMETER = "k_replication"
|
|
ALPHA = "alpha"
|
|
BETA = "beta"
|
|
GAMMA = "overhead"
|
|
STEPS_TO_STOP = "steps_to_stop"
|
|
|
|
|
|
# Utils
|
|
tag_example = "retrieval_lookup_nn12000_rn1_sampl100_fer10_ser0_cdr50-75_fdr50-100_sdr0_k20_a3_b20_y1.0_steps3"
|
|
def tag_parser(tag: str):
|
|
params = {
|
|
OPERATION: "",
|
|
NUMBER_NODES: "",
|
|
RETRIEVAL_NODES: "",
|
|
CONCURRENT_SAMPLES: "",
|
|
FAST_ERROR_RATE: "",
|
|
SLOW_ERROR_RATE: "",
|
|
CONNECTION_DELAYS: "",
|
|
FAST_ERROR_DELAYS: "",
|
|
SLOW_ERROR_DELAYS: "",
|
|
K_PARAMETER: "",
|
|
ALPHA: "",
|
|
BETA: "",
|
|
GAMMA: "",
|
|
STEPS_TO_STOP: "",
|
|
}
|
|
# split the tag into - type & parameters
|
|
raw_params = tag.split("_")
|
|
for param in raw_params:
|
|
if NN in param:
|
|
params[NUMBER_NODES] = param.replace(NN, "")
|
|
elif RN in param:
|
|
params[RETRIEVAL_NODES] = param.replace(RN, "")
|
|
elif SAMPL in param:
|
|
params[CONCURRENT_SAMPLES] = param.replace(SAMPL, "")
|
|
elif FER in param:
|
|
params[FAST_ERROR_RATE] = param.replace(FER, "")
|
|
elif SER in param:
|
|
params[SLOW_ERROR_RATE] = param.replace(SER, "")
|
|
elif CDR in param:
|
|
params[CONNECTION_DELAYS] = param.replace(CDR, "")
|
|
elif FDR in param:
|
|
params[FAST_ERROR_DELAYS] = param.replace(FDR, "")
|
|
elif SDR in param:
|
|
params[SLOW_ERROR_DELAYS] = param.replace(SDR, "")
|
|
elif K in param and param != "lookup":
|
|
params[K_PARAMETER] = param.replace(K, "")
|
|
elif A in param:
|
|
params[ALPHA] = param.replace(A, "")
|
|
elif B in param:
|
|
params[BETA] = param.replace(B, "")
|
|
elif Y in param:
|
|
params[GAMMA] = param.replace(Y, "")
|
|
elif STEPS in param:
|
|
params[STEPS_TO_STOP] = param.replace(STEPS, "")
|
|
else:
|
|
if params[OPERATION] == "":
|
|
params[OPERATION] = param
|
|
else:
|
|
params[OPERATION] += f"_{param}"
|
|
return params
|
|
|
|
def compose_legend(params, labels):
|
|
legend = ""
|
|
for label in labels:
|
|
if legend == "":
|
|
legend = f"{label}={params[label]}"
|
|
else:
|
|
legend += f" {label}={params[label]}"
|
|
return legend
|
|
|
|
def make_folder(folder, reason):
|
|
try:
|
|
os.mkdir(folder)
|
|
print(f"created folder {folder} for {reason}")
|
|
except FileExistsError:
|
|
print(f"folder {folder} was already created")
|
|
except Exception as e:
|
|
print(e)
|
|
|
|
|
|
# --- Single Metrics ---
|
|
class SingleMetrics:
|
|
|
|
metrics = {
|
|
"lookup_aggregated_delay": {
|
|
"title_tag": "delay",
|
|
"xlabel_tag": "delay (ms)",
|
|
"ylabel_tag": "",
|
|
},
|
|
"finished_connection_attempts": {
|
|
"title_tag": "hops",
|
|
"xlabel_tag": "hops",
|
|
"ylabel_tag": "",
|
|
},
|
|
"accuracy": {
|
|
"title_tag": "accuracy",
|
|
"xlabel_tag": "accuracy",
|
|
"ylabel_tag": "",
|
|
},
|
|
}
|
|
|
|
def __init__(self, file, output_image_folder, operation, metrics: dict = dict()):
|
|
self.file = file
|
|
self.df = pd.read_csv(file)
|
|
self.label = file.split("/")[-1].replace(".csv", "")
|
|
self.targetFolder = output_image_folder+"/"+self.label
|
|
self.operation = operation
|
|
# add metrics to pre-existing ones
|
|
self.metrics.update(metrics)
|
|
# Make sure there is a valid folder for the imgaes
|
|
make_folder(self.targetFolder, f"for keeping the lookup related images about {self.label}\n")
|
|
print(f"plotting {self.label}, saving figures at {self.targetFolder}\n")
|
|
# display the lookup wallclock cdf
|
|
|
|
# display the aggregated delay cdf
|
|
for metric_name, metric_opts in self.metrics.items():
|
|
self.plot_cdf(metric_name, metric_opts)
|
|
self.plot_pdf(metric_name, metric_opts)
|
|
|
|
def plot_cdf(self, column_name, column_opts):
|
|
df = self.df.sort_values(column_name)
|
|
# CDF
|
|
sns.set()
|
|
g = sns.lineplot(data=df, x=column_name, y=np.linspace(0, 1, len(df)), color='red', ci=None)
|
|
g.set(title=f"Simulated {self.operation} {column_name} CDF ({self.label})",
|
|
xlabel=f"Simulated {column_opts['xlabel_tag']}", ylabel=f"{self.operation} {column_opts['ylabel_tag']}")
|
|
fig = g.get_figure()
|
|
fig.savefig(self.targetFolder+f"/{self.operation.lower()}_{column_name}_cdf.png")
|
|
plt.show()
|
|
|
|
def plot_pdf(self, column_name, column_opts):
|
|
df = self.df.sort_values(column_name)
|
|
# Histogram
|
|
bins = 8
|
|
sns.set()
|
|
g = sns.histplot(x=df[column_name], bins=bins)
|
|
g.set(title=f"Simulated lookup {column_name} PDF ({self.label})",
|
|
xlabel=f"Simulated {column_opts['xlabel_tag']}", ylabel=f"Lookups {column_opts['ylabel_tag']}")
|
|
fig = g.get_figure()
|
|
fig.savefig(self.targetFolder + f"/lookup_{column_name}_pdf.png")
|
|
plt.show()
|
|
|
|
|
|
# --- Multiple Aggregators ---
|
|
class CombinedMetrics:
|
|
metrics = {
|
|
"lookup_aggregated_delay": {
|
|
"title_tag": "delay",
|
|
"xlabel_tag": "delay (ms)",
|
|
"ylabel_tag": "",
|
|
},
|
|
"finished_connection_attempts": {
|
|
"title_tag": "hops",
|
|
"xlabel_tag": "hops",
|
|
"ylabel_tag": "",
|
|
},
|
|
"accuracy": {
|
|
"title_tag": "accuracy",
|
|
"xlabel_tag": "accuracy",
|
|
"ylabel_tag": "",
|
|
},
|
|
}
|
|
|
|
def __init__(self, files, aggregator, filters, operation, output_image_folder, metrics, legend):
|
|
self.files = files
|
|
self.dfs = []
|
|
self.tags = []
|
|
self.params = []
|
|
self.tag = aggregator
|
|
self.filters = filters
|
|
self.operation = operation
|
|
# add metrics to pre-existing ones
|
|
self.metrics.update(metrics)
|
|
for file in files:
|
|
if any(filter not in file for filter in filters):
|
|
continue
|
|
|
|
self.dfs.append(pd.read_csv(file))
|
|
raw_tag = file.split("/")[-1].replace(".csv", "")
|
|
params = tag_parser(raw_tag)
|
|
tag = compose_legend(params, legend)
|
|
self.params.append(params)
|
|
self.tags.append(tag)
|
|
|
|
self.udf = self.unify_dfs(self.dfs) # unified dataframe
|
|
|
|
self.targetFolder = output_image_folder+f"/{self.operation.lower}_comparison_{aggregator}"
|
|
make_folder(self.targetFolder, f"for keeping the {self.operation} related images about {self.tag}\n")
|
|
print(f"plotting by {aggregator}, saving figures at {self.targetFolder}\n")
|
|
|
|
# --- plotting sequence ---
|
|
for metrics_name, metrics_opts in self.metrics.items():
|
|
self.plot_cdfs_by(aggregator, metrics_name, metrics_opts)
|
|
self.plot_pdfs_by(aggregator, metrics_name, metrics_opts)
|
|
|
|
def unify_dfs(self, dfs):
|
|
return pd.concat(dfs)
|
|
|
|
def plot_cdfs_by(self, aggregator_tag, column_name, column_opts):
|
|
# CDF
|
|
sns.set()
|
|
palette = sns.color_palette(n_colors=len(self.dfs))
|
|
for i, df in enumerate(self.dfs):
|
|
df = df.sort_values(column_name)
|
|
g = sns.lineplot(data=df, x=column_name, y=np.linspace(0, 1, len(df)), label=self.tags[i],
|
|
ci=None, color=palette[i])
|
|
g.set(title=f"Simulated {self.operation} {column_opts['title_tag']} CDF (by {aggregator_tag})",
|
|
xlabel=f"Simulated {column_opts['xlabel_tag']}",
|
|
ylabel=f"{self.operation} {column_opts['ylabel_tag']} CDF")
|
|
plt.legend(loc='lower center', ncols=1, bbox_to_anchor=(0.5, -0.2+(-0.065*len(self.dfs))))
|
|
fig = g.get_figure()
|
|
fig.savefig(self.targetFolder+f"/simulated_{self.operation.lower()}_{column_name}_cdf.png")
|
|
plt.show()
|
|
|
|
def plot_pdfs_by(self, aggregator_tag, column_name, column_opts):
|
|
# Histogram
|
|
sns.set()
|
|
by_aggregator = self.udf.groupby([column_name, aggregator_tag]).count()
|
|
df = by_aggregator.reset_index()
|
|
g = sns.histplot(data=df, x=df[column_name])
|
|
"""
|
|
g = sns.barplot(data=df, x=df[column_name], y="Unnamed: 0", hue=aggregator_tag, width=1.2)
|
|
"""
|
|
g.set(title=f"Simulated {self.operation} {column_opts['title_tag']} PDF (by {aggregator_tag})",
|
|
xlabel=f"Simulated {column_opts['xlabel_tag']}",
|
|
ylabel=f"{self.operation} {column_opts['ylabel_tag']}")
|
|
plt.legend(loc='lower center', ncols=1, bbox_to_anchor=(0.5, -0.2+(-0.065*len(self.dfs))))
|
|
fig = g.get_figure()
|
|
fig.savefig(self.targetFolder+f"/simulated_{self.operation.lower()}_{column_name}_hist.png")
|
|
plt.show()
|