280 lines
10 KiB
Python
Raw Normal View History

2023-08-21 22:17:38 +05:30
import os
2023-08-22 20:12:00 +05:30
import sys
import math
2023-08-16 12:42:41 +05:30
import typer
2023-10-26 15:16:43 +05:30
import glob
2023-08-21 22:17:38 +05:30
import json
import pandas as pd
import numpy as np
2023-08-16 12:42:41 +05:30
import logging as log
from pathlib import Path
2023-08-21 22:17:38 +05:30
import matplotlib.pyplot as plt
2023-08-22 22:46:49 +05:30
from os import walk
2023-08-23 00:18:51 +05:30
from collections import defaultdict
2023-08-16 12:42:41 +05:30
2023-12-12 18:28:24 +05:30
# read a json and return the dict
2023-08-21 22:17:38 +05:30
def read_json(fname):
with open(fname) as f:
cdata = json.load(f)
return cdata
2023-08-16 12:42:41 +05:30
2023-12-12 18:28:24 +05:30
# write the json from a dict
def write_json(dic, fname):
dic_str = {str(k): str(v) for k,v in dic.items()}
jdump = json.dumps(dic_str, indent=4, sort_keys=True)
with open(fname, 'w') as f:
f.write(jdump)
2023-08-16 12:42:41 +05:30
2023-12-12 18:28:24 +05:30
# read a serialised python dict
2023-08-24 18:46:18 +05:30
def read_dict(fname):
with open(fname, 'r') as f:
return eval(f.read())
2023-12-12 18:28:24 +05:30
# write a serialised python fict
2023-08-24 18:46:18 +05:30
def write_dict(dic, fname):
with open(fname, 'w') as f:
return f.write(str(dic))
2023-12-12 18:28:24 +05:30
# read the output csv and return a panadas dataframe
2023-08-21 22:17:38 +05:30
def read_csv(fname):
df = pd.read_csv(fname, header=0, comment='#', skipinitialspace = True )
return df
2023-08-16 12:42:41 +05:30
2023-12-12 18:28:24 +05:30
# write pandas dataframe as csv
2023-08-21 22:17:38 +05:30
def write_csv(df, fname):
df.to_csv(fname)
2023-08-16 12:42:41 +05:30
2023-12-12 18:28:24 +05:30
# compute the steps it took to compute the views, and tag the tree depth
2023-08-24 20:46:45 +05:30
def compute_view_finalisation_times(df, conf, oprefix, simtype, tag="tag", plot=False):
if simtype == "tree":
num_nodes = conf["node_count"]
2023-10-26 15:16:43 +05:30
log.info(f'num nodes: tree: {num_nodes} - - {conf["stream_settings"]["path"]}')
2023-08-24 20:46:45 +05:30
else:
2023-10-26 15:16:43 +05:30
committee_size = int (conf["node_count"]/conf["overlay_settings"]["branch_depth"])
num_tree_nodes = 2 ** (conf["overlay_settings"]["branch_depth"] - 1) - 1
num_nodes = num_tree_nodes * committee_size
log.info(f'num nodes: branch: {num_nodes} = {num_tree_nodes}*{committee_size} - {conf["overlay_settings"]["branch_depth"], conf["stream_settings"]["path"]}')
2023-08-24 20:46:45 +05:30
two3rd = math.floor(conf["node_count"] * 2/3) + 1
2023-10-26 15:16:43 +05:30
#two3rd = math.floor(conf["node_count"] * 3/3)
2023-08-22 20:12:00 +05:30
2023-12-12 18:28:24 +05:30
# for view_offset^th view, last view_offset number of views will need to be omitted
2023-12-13 17:17:18 +05:30
view_offset = 2
2023-12-12 18:15:21 +05:30
#views, view2fin_time = df.current_view.unique()[:-2], {}
views, view2fin_time = df.current_view.unique()[:-view_offset], {}
2023-10-26 15:16:43 +05:30
log.debug(f'views: {conf["stream_settings"]["path"]} {views}, {df.current_view.unique()}')
2023-08-22 20:12:00 +05:30
2023-12-12 18:15:21 +05:30
print(df.current_view.unique(), df.step_id.unique(), df.columns)
2023-08-22 20:12:00 +05:30
for start_view in views:
2023-12-12 18:15:21 +05:30
end_view = start_view + view_offset
2023-08-22 20:12:00 +05:30
start_idx = df.index[(df['current_view'] == start_view)][0]
end_idx = df.index[(df['current_view'] == end_view)][two3rd-1]
start_step = df.iloc[start_idx].step_id
end_step = df.iloc[end_idx].step_id
view2fin_time[start_view] = end_step - start_step
2023-12-12 18:15:21 +05:30
#print(f'TEST {conf["stream_settings"]["path"]}, {view2fin_time[start_view]}, {(start_view, start_idx, start_step)}, {(end_idx, end_view, end_step)}')
2023-08-22 20:12:00 +05:30
log.debug(f'{start_view}({start_idx}), {end_view}({end_idx}) : {end_step} - {start_step} = {view2fin_time[start_view]}')
if not plot:
return view2fin_time
2023-08-22 20:12:00 +05:30
fig, axes = plt.subplots(1, 1, layout='constrained', sharey=False)
fig.set_figwidth(12)
fig.set_figheight(10)
fig.suptitle(f'View Finalisation Times - {tag}')
axes.set_ylabel("Number of Epochs to Finalise a View")
axes.set_xlabel("Views")
axes.set_xticks([x for x in view2fin_time.keys()])
axes.set_yticks([x for x in range(0, max(view2fin_time.values())+2)])
axes.plot(view2fin_time.keys(), view2fin_time.values(), linestyle='--', marker='o')
plt.show()
plt.savefig(f'{oprefix}-view-finalisation-times.pdf', format="pdf", bbox_inches="tight")
2023-08-16 12:42:41 +05:30
2023-12-12 18:28:24 +05:30
# iterate over the different networks/overlay type and collect view finalisation times
2023-10-26 15:16:43 +05:30
def compute_view_times(path, oprefix, otype):
2023-08-31 21:36:27 +05:30
nwsize2vfins = {}
2023-10-26 15:16:43 +05:30
#conf_fnames = next(walk(f'{path}/configs'), (None, None, []))[2]
conf_fnames = glob.glob(f'{path}/configs/*_{otype}.json')
2023-12-12 18:15:21 +05:30
print(conf_fnames, otype)
2023-08-24 18:46:18 +05:30
for conf in conf_fnames:
tag = os.path.splitext(os.path.basename(conf))[0]
2023-10-26 15:16:43 +05:30
#cfile, dfile = f'{path}/configs/{conf}', f'{path}/output/{tag}.csv'
cfile, dfile = f'{conf}', f'{path}/output/{tag}.csv'
2023-12-13 17:17:18 +05:30
max_depth, num_nodes = 0, 1
2023-08-24 18:46:18 +05:30
conf, df = read_json(cfile), read_csv(dfile)
2023-12-13 17:17:18 +05:30
simtype = conf["stream_settings"]["path"].split("/")[1].split("_")[0]
# simtype = conf["stream_settings"]["path"].split("_")[0].strip()
2023-08-24 20:46:45 +05:30
view2fin = compute_view_finalisation_times(df, conf, oprefix, simtype, tag, plot=False)
2023-12-12 18:15:21 +05:30
print(f'SIM:{simtype}', view2fin)
2023-08-24 18:46:18 +05:30
if not view2fin: # < 2 views
continue
2023-08-24 20:46:45 +05:30
if simtype == "tree":
num_nodes = conf["node_count"]
2023-12-13 16:33:52 +05:30
#ceil((lg(N+1) - 1))
max_depth = math.ceil(math.log(conf["overlay_settings"]["number_of_committees"]+1, 2)-1)
2023-08-24 20:46:45 +05:30
else:
num_tree_nodes = 2 ** (conf["overlay_settings"]["branch_depth"]) - 1
num_committees = int (conf["node_count"]/conf["overlay_settings"]["branch_depth"])
num_nodes = num_tree_nodes * num_committees
2023-08-24 18:46:18 +05:30
max_depth = conf["overlay_settings"]["branch_depth"]
2023-10-26 15:16:43 +05:30
print(f'depth = {max_depth}')
#if simtype == "branch":
# max_depth = conf["overlay_settings"]["branch_depth"]
#else:
# max_depth = math.log(num_nodes + 1, 2) - 1
2023-08-31 21:36:27 +05:30
if num_nodes in nwsize2vfins:
nwsize2vfins[num_nodes].append((simtype, max_depth, view2fin, tag))
2023-08-24 18:46:18 +05:30
else:
2023-08-31 21:36:27 +05:30
nwsize2vfins[num_nodes] = [(simtype, max_depth, view2fin, tag)]
return nwsize2vfins
2023-08-24 18:46:18 +05:30
2023-12-12 18:28:24 +05:30
# plot the view times, add log plots for comparison
2023-10-26 15:16:43 +05:30
def plot_view_times(nwsize2vfins, simtype, oprefix, otype):
2023-08-24 20:46:45 +05:30
logbands = {}
logbands[simtype] = {}
logbands[simtype]["low"] = []
logbands[simtype]["high"] = []
if simtype == "branch":
2023-09-01 17:42:35 +05:30
low, high = 0.75, 1.5
2023-08-24 20:46:45 +05:30
else:
2023-09-01 17:42:35 +05:30
low, high = 0.75, 1.5
2023-10-26 15:16:43 +05:30
data = [[], [], []]
2023-08-31 21:36:27 +05:30
for n in sorted(list(map(int, nwsize2vfins.keys()))):
vfin = nwsize2vfins[n]
2023-10-26 15:16:43 +05:30
#print(f"{simtype} {n} {nwsize2vfins[n]}", end=' == ')
2023-08-24 20:46:45 +05:30
for run in vfin:
2023-10-26 15:16:43 +05:30
#print(run)
if otype in run[3] and simtype in run[0]:
2023-08-24 20:46:45 +05:30
data[0].append(n)
data[1].append(int(run[2][0]))
2023-10-26 15:16:43 +05:30
data[2].append(int(run[1]))
log.debug(f"IF: {simtype}={run[0]} : {n} {run[3]}")
2023-08-31 21:36:27 +05:30
logbands[simtype]["low"].append(math.log(n, 2)*low)
logbands[simtype]["high"].append(math.log(n, 2)*high)
else:
2023-10-26 15:16:43 +05:30
log.debug(f"ELSE: {simtype}!={run[0]} : {n} {run[3]}")
2023-08-31 21:36:27 +05:30
2023-08-24 20:46:45 +05:30
print(data)
fig, axes = plt.subplots(1, 1, layout='constrained', sharey=False)
fig.set_figwidth(12)
fig.set_figheight(10)
fig.suptitle(f'View Finalisation Times - {simtype}')
axes.set_ylabel("Number of Epochs")
axes.set_xlabel("Number of Nodes")
2023-10-26 15:16:43 +05:30
l1 = axes.plot(data[0], data[2], linestyle='-.', marker='o', label='Depth')
l2 = axes.plot(data[0], data[1], linestyle='-', marker='o', label='Carnot')
#l3 = axes.plot(data[0], logbands[simtype]["low"], linestyle='--', marker='x', label=f'{low} * log(#nodes)')
#l4 = axes.plot(data[0], logbands[simtype]["high"], linestyle='--', marker='x', label=f'{high} * log(#nodes)')
l = l1 + l2 #+ l3 + l4
2023-08-24 20:46:45 +05:30
labels = [curve.get_label() for curve in l]
axes.legend(l, labels, loc="lower right")
plt.show()
plt.savefig(f'{oprefix}-{simtype}-output.pdf', format="pdf", bbox_inches="tight")
plt.clf()
plt.cla()
plt.close()
2023-08-24 21:24:33 +05:30
return data
2023-08-24 20:46:45 +05:30
2023-12-12 18:28:24 +05:30
# plot tree vs branch against the number of nodes; works only when #tree sims = # branch sims
2023-08-24 21:24:33 +05:30
def plot_tree_vs_branch(tree, branch, oprefix):
2023-08-24 20:46:45 +05:30
2023-08-24 21:24:33 +05:30
print(tree, branch)
2023-08-24 20:46:45 +05:30
fig, axes = plt.subplots(1, 1, layout='constrained', sharey=False)
fig.set_figwidth(12)
fig.set_figheight(10)
2023-09-01 17:42:35 +05:30
fig.suptitle(f'View Finalisation Times - Tree vs Branch')
axes.set_xlabel("Tree")
axes.set_ylabel("Branch")
2023-11-07 19:36:57 +05:30
#branch[1] = [6] + branch[1]
2023-10-26 15:16:43 +05:30
print("\nT, B:", f'({tree[1], len(tree[1])})', f'({branch[1], len(branch[1])})')
2023-09-01 17:42:35 +05:30
axes.scatter(tree[1], branch[1])
axes.plot([0, 1], [0, 1], linestyle='dashed', transform=axes.transAxes)
'''
2023-08-24 21:24:33 +05:30
fig.suptitle(f'View Finalisation Times - Tree vs Branch')
2023-08-31 21:36:27 +05:30
axes.set_xlabel("Number of Nodes")
axes.set_ylabel("Number of Epochs")
2023-08-24 20:46:45 +05:30
2023-08-24 21:24:33 +05:30
axes.scatter(tree[0], tree[1], label="Tree")
axes.scatter(branch[0], branch[1], label="Branch")
2023-09-01 17:42:35 +05:30
'''
2023-08-24 20:46:45 +05:30
plt.show()
plt.savefig(f'{oprefix}-scatter.pdf', format="pdf", bbox_inches="tight")
plt.clf()
plt.cla()
plt.close()
2023-08-22 22:46:49 +05:30
2023-08-21 22:17:38 +05:30
app = typer.Typer()
2023-08-16 12:42:41 +05:30
2023-12-12 18:28:24 +05:30
2023-08-21 22:17:38 +05:30
@app.command()
2023-08-22 22:46:49 +05:30
def view(ctx: typer.Context,
2023-08-22 20:12:00 +05:30
data_file: Path = typer.Option("config.json",
exists=True, file_okay=True, readable=True,
help="Set the simulation config file"),
config_file: Path = typer.Option("simout.csv",
2023-08-16 12:42:41 +05:30
exists=True, file_okay=True, readable=True,
help="Set the simulation data file"),
oprefix: str = typer.Option("output",
help="Set the output prefix for the plots"),
):
2023-08-21 22:17:38 +05:30
log.basicConfig(level=log.INFO)
2023-08-22 20:12:00 +05:30
tag = os.path.splitext(os.path.basename(data_file))[0]
2023-08-22 22:46:49 +05:30
conf, df = read_json(config_file), read_csv(data_file)
2023-08-24 20:46:45 +05:30
compute_view_finalisation_times(df, conf, oprefix, simtype, tag, plot=True)
2023-08-21 22:17:38 +05:30
2023-12-12 18:28:24 +05:30
2023-08-22 22:46:49 +05:30
@app.command()
def views(ctx: typer.Context,
path: Path = typer.Option("../",
2023-08-22 20:12:00 +05:30
exists=True, dir_okay=True, readable=True,
help="Set the simulation config file"),
oprefix: str = typer.Option("output",
2023-10-26 15:16:43 +05:30
help="Set the output prefix for the plots"),
otype: str = typer.Option("nolat",
help="Select the for the plots")
2023-08-22 20:12:00 +05:30
):
2023-08-21 22:33:33 +05:30
log.basicConfig(level=log.INFO)
2023-10-26 15:16:43 +05:30
nwsize2vfins = compute_view_times(path, oprefix, otype)
2023-11-07 19:36:57 +05:30
write_dict(nwsize2vfins, f'{oprefix}-viewtimes.dict')
2023-10-26 15:16:43 +05:30
print("processed and wrote the dict. now reading...")
2023-09-01 17:42:35 +05:30
nwsize2vfins = read_dict(f'{oprefix}-viewtimes.dict')
2023-10-26 15:16:43 +05:30
tree = plot_view_times(nwsize2vfins, "tree", oprefix, otype)
branch = plot_view_times(nwsize2vfins, "branch", oprefix, otype)
2023-08-24 21:24:33 +05:30
plot_tree_vs_branch(tree, branch, oprefix)
2023-08-21 22:17:38 +05:30
2023-12-12 18:28:24 +05:30
2023-08-21 22:17:38 +05:30
@app.command()
2023-12-12 18:15:21 +05:30
def test():
2023-08-21 22:17:38 +05:30
pass
2023-08-16 12:42:41 +05:30
if __name__ == "__main__":
2023-08-21 22:17:38 +05:30
app()