Merge pull request #27 from status-im/vis

Average runs
2025-02-18 17:46:53 +00:00 · 2023-04-27 14:13:20 +02:00 · 2023-04-27 14:13:20 +02:00 · 9e98f6963d
commit 9e98f6963d
parent b2df64f8b9 6eac3eb8e7
4 changed files with 173 additions and 30 deletions
--- a/DAS/requirements.txt
+++ b/DAS/requirements.txt
@ -1,6 +1,7 @@
 bitarray==2.6.0
 dicttoxml==1.7.16
 matplotlib==3.6.2
+mplfinance==0.12.9b7
 networkx==3.0
 numpy==1.23.5
 seaborn==0.12.2
--- a/DAS/visualizer.py
+++ b/DAS/visualizer.py
@ -6,22 +6,28 @@ import matplotlib.pyplot as plt
 import numpy as np
 import seaborn as sns
 from itertools import combinations
+from mplfinance.original_flavor import candlestick_ohlc
+import os
+

 class Visualizer:

-    def __init__(self, execID):
+    def __init__(self, execID, config):
        self.execID = execID
+        self.config = config
        self.folderPath = "results/"+self.execID
-        self.parameters = ['run', 'blockSize', 'failureRate', 'numberNodes', 'netDegree',
-                           'chi', 'vpn1', 'vpn2', 'bwUplinkProd', 'bwUplink1', 'bwUplink2']
+        self.parameters = ['run', 'blockSize', 'failureRate', 'numberNodes', 'netDegree', 'chi', 'vpn1', 'vpn2', 'class1ratio', 'bwUplinkProd', 'bwUplink1', 'bwUplink2']
        self.minimumDataPoints = 2
+        self.maxTTA = 11000

    def plottingData(self):
-        #Store data with a unique key for each params combination
+        """Store data with a unique key for each params combination"""
        data = {}
-        #Loop over the xml files in the folder
+        bw = []
+        print("Getting data from the folder...")
+        """Loop over the xml files in the folder"""
        for filename in os.listdir(self.folderPath):
-            #Loop over the xmls and store the data in variables
+            """Loop over the xmls and store the data in variables"""
            if filename.endswith('.xml'):
                tree = ET.parse(os.path.join(self.folderPath, filename))
                root = tree.getroot()
@ -29,6 +35,7 @@ class Visualizer:
                blockSize = int(root.find('blockSize').text)
                failureRate = int(root.find('failureRate').text)
                numberNodes = int(root.find('numberNodes').text)
+                class1ratio = float(root.find('class1ratio').text)
                netDegree = int(root.find('netDegree').text)
                chi = int(root.find('chi').text)
                vpn1 = int(root.find('vpn1').text)
@ -38,24 +45,28 @@ class Visualizer:
                bwUplink2 = int(root.find('bwUplink2').text)
                tta = float(root.find('tta').text)

-                # Loop over all possible combinations of of the parameters minus two
+                """Store BW"""
+                bw.append(bwUplinkProd)
+
+                """Loop over all possible combinations of length of the parameters minus x, y params"""
                for combination in combinations(self.parameters, len(self.parameters)-2):
                    # Get the indices and values of the parameters in the combination
+
                    indices = [self.parameters.index(element) for element in combination]
-                    selectedValues = [run, blockSize, failureRate, numberNodes, netDegree, chi, vpn1, vpn2, bwUplinkProd, bwUplink1, bwUplink2]
+                    selectedValues = [run, blockSize, failureRate, numberNodes, netDegree, chi, vpn1, vpn2, class1ratio, bwUplinkProd, bwUplink1, bwUplink2]
                    values = [selectedValues[index] for index in indices]
                    names = [self.parameters[i] for i in indices]
                    keyComponents = [f"{name}_{value}" for name, value in zip(names, values)]
                    key = tuple(keyComponents[:len(self.parameters)-2])
-                    #Get the names of the other 2 parameters that are not included in the key
+                    """Get the names of the other parameters that are not included in the key"""
                    otherParams = [self.parameters[i] for i in range(len(self.parameters)) if i not in indices]
-                    #Append the values of the other 2 parameters and the ttas to the lists for the key
+                    """Append the values of the other parameters and the ttas to the lists for the key"""
                    otherIndices = [i for i in range(len(self.parameters)) if i not in indices]

-                    #Initialize the dictionary for the key if it doesn't exist yet
+                    """Initialize the dictionary for the key if it doesn't exist yet"""
                    if key not in data:
                        data[key] = {}
-                        #Initialize lists for the other 2 parameters and the ttas with the key
+                        """Initialize lists for the other parameters and the ttas with the key"""
                        data[key][otherParams[0]] = []
                        data[key][otherParams[1]] = []
                        data[key]['ttas'] = []
@ -69,11 +80,76 @@ class Visualizer:
                    else:
                        data[key][otherParams[1]] = [selectedValues[otherIndices[1]]]
                    data[key]['ttas'].append(tta)
-        print("Getting data from the folder...")
        return data

+    def averageRuns(self, data, runs):
+        """Get the average of all runs for each key"""
+        newData = {}
+        print("Getting the average of the runs...")
+        for key, value in data.items():
+            runExists = False
+            """Check if the key contains 'run_' with a numerical value"""
+            for item in key:
+                if item.startswith('run_'):
+                    runExists = True
+                    break
+            if runExists:
+                ps = list(data[key].keys())
+                for item in key:
+                    """Create a new key with the other items in the tuple"""
+                    if item.startswith('run_'):
+                        newKey = tuple([x for x in key if x != item])
+                        """Average the similar key values"""
+                        tta_sums = {}
+                        nbRuns = {}
+                        ttRuns = []
+                        total = []
+                        p0 = []
+                        p1 = []
+                        p2 = []
+                        p3 = []
+                        for i in range(runs):
+                            key0 = (f'run_{i}',) + newKey
+                            #Create a dictionary to store the sums of ttas for each unique pair of values in subkeys
+                            for i in range(len(data[key0][ps[0]])):
+                                keyPair = (data[key0][ps[0]][i], data[key0][ps[1]][i])
+                                if data[key0]["ttas"][i] == -1:
+                                    data[key0]["ttas"][i] = self.maxTTA
+                                try:
+                                    tta_sums[keyPair] += data[key0]['ttas'][i]
+                                    if data[key0]["ttas"][i] != self.maxTTA:
+                                        nbRuns[keyPair] += 1
+                                except KeyError:
+                                    tta_sums[keyPair] = data[key0]['ttas'][i]
+                                    if data[key0]["ttas"][i] != self.maxTTA:
+                                        nbRuns[keyPair] = 1
+                                    else:
+                                        nbRuns[keyPair] = 0
+                        for k, tta in tta_sums.items():
+                            p0.append(k[0])
+                            p1.append(k[1])
+                            total.append(tta)
+                        for k, run in nbRuns.items():
+                            p2.append(k[0])
+                            p3.append(k[1])
+                            ttRuns.append(run)
+                        for i in range(len(total)):
+                            if(ttRuns[i] == 0):     # All tta = -1
+                                total[i] = self.maxTTA
+                            elif ttRuns[i] < runs:  # Some tta = -1
+                                total[i] -= (runs-ttRuns[i]) * self.maxTTA
+                                total[i] = total[i]/ttRuns[i]
+                            else:                   # No tta = -1
+                                total[i] = total[i]/ttRuns[i]
+                        averages = {}
+                        averages[ps[0]] = p0
+                        averages[ps[1]] = p1
+                        averages['ttas'] = total
+                        newData[newKey] = averages
+        return newData
+
    def similarKeys(self, data):
-        #Get the keys for all data with the same x and y labels
+        """Get the keys for all data with the same x and y labels"""
        filteredKeys = {}
        for key1, value1 in data.items():
            subKeys1 = list(value1.keys())
@ -89,28 +165,32 @@ class Visualizer:
        return filteredKeys

    def formatLabel(self, label):
-        #Label formatting for the figures
+        """Label formatting for the figures"""
        result = ''.join([f" {char}" if char.isupper() else char for char in label])
        return result.title()

    def formatTitle(self, key):
-        #Title formatting for the figures
+        """Title formatting for the figures"""
        name = ''.join([f" {char}" if char.isupper() else char for char in key.split('_')[0]])
        number = key.split('_')[1]
        return f"{name.title()}: {number} "

    def plotHeatmaps(self):
-        #Plot and store the 2D heatmaps in subfolders
-        data = self.plottingData()
+        """Plot and store the 2D heatmaps in subfolders"""
+        data= self.plottingData()
+        """Average the runs if needed"""
+        if(len(self.config.runs) > 1):
+            data = self.averageRuns(data, len(self.config.runs))
        filteredKeys = self.similarKeys(data)
+        vmin, vmax = 0, self.maxTTA+1000
        print("Plotting heatmaps...")

-        #Create the directory if it doesn't exist already
+        """Create the directory if it doesn't exist already"""
        heatmapsFolder = self.folderPath + '/heatmaps'
        if not os.path.exists(heatmapsFolder):
            os.makedirs(heatmapsFolder)

-        #Plot
+        """Plot"""
        for labels, keys in filteredKeys.items():
            for key in keys:
                xlabels = np.sort(np.unique(data[key][labels[0]]))
@ -120,20 +200,21 @@ class Visualizer:
                hist, xedges, yedges = np.histogram2d(data[key][labels[0]], data[key][labels[1]], bins=(len(xlabels), len(ylabels)), weights=data[key]['ttas'])
                hist = hist.T
                fig, ax = plt.subplots(figsize=(10, 6))
-                sns.heatmap(hist, xticklabels=xlabels, yticklabels=ylabels, cmap='Purples', cbar_kws={'label': 'Time to block availability (ms)'}, linecolor='black', linewidths=0.3, annot=True, fmt=".2f", ax=ax)
+                sns.heatmap(hist, xticklabels=xlabels, yticklabels=ylabels, cmap='hot_r', cbar_kws={'label': 'Time to block availability (ms)'}, linecolor='black', linewidths=0.3, annot=True, fmt=".2f", ax=ax, vmin=vmin, vmax=vmax)
                plt.xlabel(self.formatLabel(labels[0]))
                plt.ylabel(self.formatLabel(labels[1]))
                filename = ""
                title = ""
                paramValueCnt = 0
                for param in self.parameters:
-                    if param != labels[0] and param != labels[1]:
+                    if param != labels[0] and param != labels[1] and param != 'run':
                        filename += f"{key[paramValueCnt]}"
                        formattedTitle = self.formatTitle(key[paramValueCnt])
                        title += formattedTitle
                        if (paramValueCnt+1) % 5 == 0:
                            title += "\n"
                        paramValueCnt += 1
+                title = "Time to Block Availability (ms)"
                title_obj = plt.title(title)
                font_size = 16 * fig.get_size_inches()[0] / 10
                title_obj.set_fontsize(font_size)
@ -144,3 +225,53 @@ class Visualizer:
                plt.savefig(os.path.join(targetFolder, filename))
                plt.close()
                plt.clf()
+
+    def plotHist(self, bandwidth):
+        """Plot Bandwidth Frequency Histogram"""
+        plt.hist(bandwidth, bins=5)
+        plt.xlabel('Bandwidth')
+        plt.ylabel('Frequency')
+        plt.title('Bandwidth Histogram')
+
+        """Create the directory if it doesn't exist already"""
+        histogramFolder = self.folderPath + '/histogram'
+        if not os.path.exists(histogramFolder):
+            os.makedirs(histogramFolder)
+        filename = os.path.join(histogramFolder, 'histogram.png')
+        plt.savefig(filename)
+        plt.clf()
+
+    def plotHist(self, bandwidth):
+        """Plot Bandwidth Frequency Histogram"""
+        plt.hist(bandwidth, bins=5)
+        plt.xlabel('Bandwidth')
+        plt.ylabel('Frequency')
+        plt.title('Bandwidth Histogram')
+
+        """Create the directory if it doesn't exist already"""
+        histogramFolder = self.folderPath + '/histogram'
+        if not os.path.exists(histogramFolder):
+            os.makedirs(histogramFolder)
+        filename = os.path.join(histogramFolder, 'histogram.png')
+        plt.savefig(filename)
+        plt.clf()
+
+    def plotCandleStick(self, TX_prod, TX_avg, TX_max):
+        #x-axis corresponding to steps
+        steps = range(len(TX_prod))
+
+        #Plot the candlestick chart
+        ohlc = []
+        for i in range(len(TX_prod)):
+            ohlc.append([steps[i], TX_prod[i], TX_max[i], TX_avg[i]])
+        fig, ax = plt.subplots()
+        candlestick_ohlc(ax, ohlc, width=0.6, colorup='green', colordown='red')
+
+        #Ticks, title and labels
+        plt.xticks(steps, ['run{}'.format(i) for i in steps], rotation=45)
+        plt.title('Candlestick Chart')
+        plt.xlabel('Step')
+        plt.ylabel('Price')
+
+        #Test
+        plt.show()
--- a/config_example.py
+++ b/config_example.py
@ -18,11 +18,22 @@ import itertools
 import numpy as np
 from DAS.shape import Shape

+# Dump results into XML files
 dumpXML = 1

 # save progress vectors to XML
 saveProgress = 1
+
+# plot progress for each run to PNG
+plotProgress = 1
+
+# Save row and column distributions
+saveRCdist = 1
+
+# Plot all figures
 visualization = 1
+
+# Verbosity level
 logLevel = logging.INFO

 # number of parallel workers. -1: all cores; 1: sequential
@ -34,25 +45,25 @@ numJobs = -1
 evenLineDistribution = True

 # Number of simulation runs with the same parameters for statistical relevance
-runs = range(2)
+runs = range(3)

 # Number of validators
-numberNodes = range(256, 513, 128)
+numberNodes = range(128, 513, 128)

 # Percentage of block not released by producer
-failureRates = range(10, 91, 40)
+failureRates = range(40, 81, 20)

 # Block size in one dimension in segments. Block is blockSizes * blockSizes segments.
-blockSizes = range(32,65,16)
+blockSizes = range(64, 113, 128)

 # Per-topic mesh neighborhood size
-netDegrees = range(6, 9, 2)
+netDegrees = range(8, 9, 2)

 # number of rows and columns a validator is interested in
-chis = range(2, 5, 2)
+chis = range(2, 3, 2)

 # ratio of class1 nodes (see below for parameters per class)
-class1ratios = [0.8, 0.9]
+class1ratios = [0.8]

 # Number of validators per beacon node
 validatorsPerNode1 = [1]
--- a/study.py
+++ b/study.py
@ -84,7 +84,7 @@ def study():
    logger.info("A total of %d simulations ran in %d seconds" % (len(results), end-start), extra=format)

    if config.visualization:
-        vis = Visualizer(execID)
+        vis = Visualizer(execID, config)
        vis.plotHeatmaps()

 if __name__ == "__main__":