From 89368d5fbe441b74950e0d9a2987f726a5a0a54b Mon Sep 17 00:00:00 2001
From: HajarZaiz <h.zaiz@aui.ma>
Date: Thu, 30 Mar 2023 11:24:30 +0000
Subject: [PATCH] fixed bug and generalized averaging

---
 DAS/visualizer.py | 79 +++++++++++++++++++++++++++++++----------------
 config_example.py | 18 +++++------
 2 files changed, 62 insertions(+), 35 deletions(-)

diff --git a/DAS/visualizer.py b/DAS/visualizer.py
index c44d32a..be05ed0 100644
--- a/DAS/visualizer.py
+++ b/DAS/visualizer.py
@@ -6,19 +6,21 @@ import matplotlib.pyplot as plt
 import numpy as np
 import seaborn as sns
 from itertools import combinations
+from config_example import runs
 
 class Visualizer:
 
     def __init__(self, execID):
         self.execID = execID
         self.folderPath = "results/"+self.execID
-        self.parameters = ['run', 'blockSize', 'failureRate', 'numberNodes', 'netDegree',
-                           'chi', 'vpn1', 'vpn2', 'bwUplinkProd', 'bwUplink1', 'bwUplink2']
+        self.parameters = ['run', 'blockSize', 'failureRate', 'numberNodes', 'netDegree', 'chi', 'vpn1', 'vpn2', 'class1ratio', 'bwUplinkProd', 'bwUplink1', 'bwUplink2']
         self.minimumDataPoints = 2
+        self.maxTTA = 999
 
     def plottingData(self):
         """Store data with a unique key for each params combination"""
         data = {}
+        bw = []
         """Loop over the xml files in the folder"""
         for filename in os.listdir(self.folderPath):
             """Loop over the xmls and store the data in variables"""
@@ -29,6 +31,7 @@ class Visualizer:
                 blockSize = int(root.find('blockSize').text)
                 failureRate = int(root.find('failureRate').text)
                 numberNodes = int(root.find('numberNodes').text)
+                class1ratio = float(root.find('class1ratio').text)
                 netDegree = int(root.find('netDegree').text)
                 chi = int(root.find('chi').text)
                 vpn1 = int(root.find('vpn1').text)
@@ -38,25 +41,31 @@ class Visualizer:
                 bwUplink2 = int(root.find('bwUplink2').text)
                 tta = int(root.find('tta').text)
 
-                """Loop over all possible combinations of length 4 of the parameters"""
+                # if tta == -1:
+                #     tta = self.maxTTA
+
+                """Store BW"""
+                bw.append(bwUplinkProd)
+
+                """Loop over all possible combinations of length of the parameters minus x, y params"""
                 for combination in combinations(self.parameters, len(self.parameters)-2):
                     # Get the indices and values of the parameters in the combination
 
                     indices = [self.parameters.index(element) for element in combination]
-                    selectedValues = [run, blockSize, failureRate, numberNodes, netDegree, chi, vpn1, vpn2, bwUplinkProd, bwUplink1, bwUplink2]
+                    selectedValues = [run, blockSize, failureRate, numberNodes, netDegree, chi, vpn1, vpn2, class1ratio, bwUplinkProd, bwUplink1, bwUplink2]
                     values = [selectedValues[index] for index in indices]
                     names = [self.parameters[i] for i in indices]
                     keyComponents = [f"{name}_{value}" for name, value in zip(names, values)]
                     key = tuple(keyComponents[:len(self.parameters)-2])
-                    """Get the names of the other 2 parameters that are not included in the key"""
+                    """Get the names of the other parameters that are not included in the key"""
                     otherParams = [self.parameters[i] for i in range(len(self.parameters)) if i not in indices]
-                    """Append the values of the other 2 parameters and the ttas to the lists for the key"""
+                    """Append the values of the other parameters and the ttas to the lists for the key"""
                     otherIndices = [i for i in range(len(self.parameters)) if i not in indices]
 
                     """Initialize the dictionary for the key if it doesn't exist yet"""
                     if key not in data:
                         data[key] = {}
-                        """Initialize lists for the other 2 parameters and the ttas with the key"""
+                        """Initialize lists for the other parameters and the ttas with the key"""
                         data[key][otherParams[0]] = []
                         data[key][otherParams[1]] = []
                         data[key]['ttas'] = []
@@ -73,14 +82,14 @@ class Visualizer:
         print("Getting data from the folder...")
         return data
     
-    def averageRuns(self, data):
+    def averageRuns(self, data, runs):
         """Get the average of run 0 and run 1 for each key"""
         newData = {}
         for key, value in data.items():
             runExists = False
             """Check if the key contains 'run_' with a numerical value"""
             for item in key:
-                if item.startswith('run_0'):
+                if item.startswith('run_'):
                     runExists = True
                     break
             if runExists:
@@ -88,20 +97,20 @@ class Visualizer:
                     """Create a new key with the other items in the tuple"""
                     if item.startswith('run_'):
                         newKey = tuple([x for x in key if x != item])
-                        key0 = ('run_0',) + newKey
-                        data0 = data[key0]
-                        key1 = ('run_1',) + newKey
-                        data1 = data[key1]
+                        """Average the similar key values"""
+                        total = [0] * len(data[key]['ttas'])
+                        for i in range(runs):
+                            key0 = (f'run_{i}',) + newKey
+                            for cnt, tta in enumerate(data[key0]['ttas']):
+                                total[cnt] += tta
+                        for i in range(len(total)):
+                            total[i] = total[i]/runs
                         averages = {}
-                        for key in data0.keys():
-                            if key in data1:
-                                values = []
-                                for i in range(len(data0[key])):
-                                    value = (data0[key][i] + data1[key][i]) / 2.0
-                                    if isinstance(data0[key][i], int) and isinstance(data1[key][i], int) and key != 'ttas':
-                                        value = int(value)
-                                    values.append(value)
-                                averages[key] = values
+                        for subkey in data[key].keys():
+                            if subkey == 'ttas':
+                                averages[subkey] = total
+                            else:
+                                averages[subkey] = data[key][subkey]
                         newData[newKey] = averages
         print("Getting the average of the runs...")
         return newData
@@ -135,9 +144,12 @@ class Visualizer:
 
     def plotHeatmaps(self):
         """Plot and store the 2D heatmaps in subfolders"""
-        data = self.plottingData()
-        data = self.averageRuns(data)
+        data= self.plottingData()
+        """Average the runs if needed"""
+        if(len(runs) > 1):
+            data = self.averageRuns(data, len(runs))
         filteredKeys = self.similarKeys(data)
+        vmin, vmax = 0, self.maxTTA
         print("Plotting heatmaps...")
    
         """Create the directory if it doesn't exist already"""
@@ -152,10 +164,10 @@ class Visualizer:
                 ylabels = np.sort(np.unique(data[key][labels[1]]))
                 if len(xlabels) < self.minimumDataPoints or len(ylabels) < self.minimumDataPoints:
                     continue
-                hist, xedges, yedges = np.histogram2d(data[key][labels[0]], data[key][labels[1]], bins=(len(xlabels), len(ylabels)), weights=data[key]['ttas'])
+                hist, xedges, yedges = np.histogram2d(data[key][labels[0]], data[key][labels[1]], bins=(len(xlabels), len(ylabels)), weights=data[key]['ttas'], normed=False)
                 hist = hist.T
                 fig, ax = plt.subplots(figsize=(10, 6))
-                sns.heatmap(hist, xticklabels=xlabels, yticklabels=ylabels, cmap='Purples', cbar_kws={'label': 'Time to block availability'}, linecolor='black', linewidths=0.3, annot=True, fmt=".2f", ax=ax)
+                sns.heatmap(hist, xticklabels=xlabels, yticklabels=ylabels, cmap='Purples', cbar_kws={'label': 'Time to block availability'}, linecolor='black', linewidths=0.3, annot=True, fmt=".2f", ax=ax, vmin=vmin, vmax=vmax)
                 plt.xlabel(self.formatLabel(labels[0]))
                 plt.ylabel(self.formatLabel(labels[1]))
                 filename = ""
@@ -177,3 +189,18 @@ class Visualizer:
                 plt.savefig(os.path.join(targetFolder, filename))
                 plt.close()
                 plt.clf()
+    
+    def plotHist(self, bandwidth):
+        """Plot Bandwidth Frequency Histogram"""
+        plt.hist(bandwidth, bins=5)
+        plt.xlabel('Bandwidth')
+        plt.ylabel('Frequency')
+        plt.title('Bandwidth Histogram')
+
+        """Create the directory if it doesn't exist already"""
+        histogramFolder = self.folderPath + '/histogram'
+        if not os.path.exists(histogramFolder):
+            os.makedirs(histogramFolder)
+        filename = os.path.join(histogramFolder, 'histogram.png')
+        plt.savefig(filename)
+        plt.clf()
\ No newline at end of file
diff --git a/config_example.py b/config_example.py
index af55fc2..9e99c18 100644
--- a/config_example.py
+++ b/config_example.py
@@ -24,36 +24,36 @@ logLevel = logging.INFO
 
 # number of parallel workers. -1: all cores; 1: sequential
 # for more details, see joblib.Parallel
-numJobs = 3
+numJobs = -1
 
 # distribute rows/columns evenly between validators (True)
 # or generate it using local randomness (False)
 evenLineDistribution = True
 
 # Number of simulation runs with the same parameters for statistical relevance
-runs = range(10)
+runs = range(3)
 
 # Number of validators
-numberNodes = range(256, 513, 128)
+numberNodes = range(256, 313, 128)
 
 # Percentage of block not released by producer
-failureRates = range(10, 91, 40)
+failureRates = range(10, 31, 40)
 
 # Block size in one dimension in segments. Block is blockSizes * blockSizes segments.
-blockSizes = range(32,65,16)
+blockSizes = range(32,35,16)
 
 # Per-topic mesh neighborhood size
 netDegrees = range(6, 9, 2)
 
 # number of rows and columns a validator is interested in
-chis = range(1, 5, 2)
+chis = range(2, 5, 2)
 
 # ratio of class1 nodes (see below for parameters per class)
-class1ratios = np.arange(0, 1, .2)
+class1ratios = [0.8, 0.9]
 
 # Number of validators per beacon node
 validatorsPerNode1 = [1]
-validatorsPerNode2 = [2, 4, 8, 16, 32]
+validatorsPerNode2 = [50]
 
 # Set uplink bandwidth. In segments (~560 bytes) per timestep (50ms?)
 # 1 Mbps ~= 1e6 / 20 / 8 / 560 ~= 11
@@ -62,7 +62,7 @@ bwUplinks1 = [110]
 bwUplinks2 = [2200]
 
 # Set to True if you want your run to be deterministic, False if not
-deterministic = False
+deterministic = True
 
 # If your run is deterministic you can decide the random seed. This is ignore otherwise.
 randomSeed = "DAS"