From 8f3d15983ff738d02ad36ade9890a98b6d57a85d Mon Sep 17 00:00:00 2001
From: Youngjoon Lee <5462944+youngjoon-lee@users.noreply.github.com>
Date: Sat, 25 May 2024 23:42:55 +0900
Subject: [PATCH] polish results of bulk_run to show line plots

---
 mixnet/v2/sim/bulk_run.py    | 186 +++++++++++++++++------------------
 mixnet/v2/sim/config.yaml    |  16 +--
 mixnet/v2/sim/measurement.py |  15 ++-
 3 files changed, 114 insertions(+), 103 deletions(-)

diff --git a/mixnet/v2/sim/bulk_run.py b/mixnet/v2/sim/bulk_run.py
index 1883474..432f830 100644
--- a/mixnet/v2/sim/bulk_run.py
+++ b/mixnet/v2/sim/bulk_run.py
@@ -1,23 +1,16 @@
 import argparse
+from datetime import datetime
 
 import pandas as pd
-import seaborn
 from matplotlib import pyplot as plt
 
 from config import P2PConfig, Config
-from analysis import Analysis
 from simulation import Simulation
 
-COL_P2P_TYPE = "P2P Type"
-COL_NUM_NODES = "Num Nodes"
-COL_TRAFFIC_TYPE = "Traffic Type"
-COL_STAT = "Stat"
-COL_BANDWIDTH = "Bandwidth"
-
-TRAFFIC_TYPE_INGRESS = "Ingress"
-TRAFFIC_TYPE_EGRESS = "Egress"
-STAT_MEAN = "mean"
-STAT_MAX = "max"
+# https://matplotlib.org/stable/api/markers_api.html
+MARKERS = ['o', 'x', 'v', '^', '<', '>']
+NUM_NODES_SET = [10, 200, 400, 600, 800, 1000]
+NUM_MIX_LAYERS_SET = [0, 2, 4]
 
 
 def bulk_run():
@@ -27,104 +20,111 @@ def bulk_run():
     args = parser.parse_args()
     config = Config.load(args.config)
 
-    data = {
-        COL_P2P_TYPE: [],
-        COL_NUM_NODES: [],
-        COL_TRAFFIC_TYPE: [],
-        COL_STAT: [],
-        COL_BANDWIDTH: [],
-    }
+    results = []
 
-    message_size_df = None
+    for num_nodes in NUM_NODES_SET:
+        config.mixnet.num_nodes = num_nodes
 
-    for p2p_type in [P2PConfig.TYPE_ONE_TO_ALL, P2PConfig.TYPE_GOSSIP]:
-        config.p2p.type = p2p_type
+        for p2p_type in [P2PConfig.TYPE_GOSSIP]:
+            config.p2p.type = p2p_type
 
-        num_nodes_list = [10, 100, 1000]
-        for i, num_nodes in enumerate(num_nodes_list):
-            config.mixnet.num_nodes = num_nodes
-            sim = Simulation(config)
-            sim.run()
+            for num_mix_layers in NUM_MIX_LAYERS_SET:
+                config.mixnet.num_mix_layers = num_mix_layers
 
-            if message_size_df is None:
-                message_size_df = Analysis(sim, config).message_size_distribution()
+                for cover_message_prob in [0.0, config.mixnet.real_message_prob * 2]:
+                    config.mixnet.cover_message_prob = cover_message_prob
 
-            if i == len(num_nodes_list) - 1:
-                Analysis(sim, config).run()
+                    sim = Simulation(config)
+                    sim.run()
 
-            nonzero_ingresses, nonzero_egresses = [], []
-            for ingress_bandwidths, egress_bandwidths in zip(sim.p2p.measurement.ingress_bandwidth_per_time,
-                                                             sim.p2p.measurement.egress_bandwidth_per_time):
-                for node in sim.p2p.nodes:
-                    ingress = ingress_bandwidths[node] / 1024.0
-                    egress = egress_bandwidths[node] / 1024.0
-                    if ingress > 0:
-                        nonzero_ingresses.append(ingress)
-                    if egress > 0:
-                        nonzero_egresses.append(egress)
+                    ingress, egress = sim.p2p.measurement.bandwidth()
+                    results.append({
+                        "num_nodes": num_nodes,
+                        "config": f"{p2p_type}: {num_mix_layers}: {cover_message_prob}",
+                        "ingress_mean": ingress.mean(),
+                        "ingress_max": ingress.max(),
+                        "egress_mean": egress.mean(),
+                        "egress_max": egress.max(),
+                    })
 
-            ingresses = pd.Series(nonzero_ingresses)
-            add_data(data, p2p_type, num_nodes, TRAFFIC_TYPE_INGRESS, STAT_MEAN, ingresses.mean())
-            add_data(data, p2p_type, num_nodes, TRAFFIC_TYPE_INGRESS, STAT_MAX, ingresses.max())
-            egresses = pd.Series(nonzero_egresses)
-            add_data(data, p2p_type, num_nodes, TRAFFIC_TYPE_EGRESS, STAT_MEAN, egresses.mean())
-            add_data(data, p2p_type, num_nodes, TRAFFIC_TYPE_EGRESS, STAT_MAX, egresses.max())
-
-    df = pd.DataFrame(data)
-    draw_bandwidth_plot(df, TRAFFIC_TYPE_INGRESS, config, message_size_df)
-    draw_bandwidth_plot(df, TRAFFIC_TYPE_EGRESS, config, message_size_df)
+    df = pd.DataFrame(results)
+    df.to_csv(f"{datetime.now().replace(microsecond=0).isoformat()}.csv", index=False)
+    plot(df)
 
 
-def add_data(data: dict, p2p_type: str, num_nodes: int, bandwidth_type: str, stat: str, bandwidth: float):
-    data[COL_P2P_TYPE].append(p2p_type)
-    data[COL_NUM_NODES].append(num_nodes)
-    data[COL_TRAFFIC_TYPE].append(bandwidth_type)
-    data[COL_STAT].append(stat)
-    data[COL_BANDWIDTH].append(bandwidth)
+def load_and_plot():
+    # with skipping the header
+    df = pd.read_csv("2024-05-25T23:16:39.csv")
+    print(df)
+    plot(df)
 
 
-def draw_bandwidth_plot(df: pd.DataFrame, traffic_type: str, config: Config, message_size_df: pd.DataFrame):
-    ingress_df = df[df[COL_TRAFFIC_TYPE] == traffic_type]
-
+def plot(df: pd.DataFrame):
+    ingress_max_df = df.pivot(index='num_nodes', columns='config', values='ingress_max')
     plt.figure(figsize=(12, 6))
+    fig, ax = plt.subplots()
+    for config in ingress_max_df.columns:
+        num_mix_layers = int(config.split(":")[1].strip())
+        ax.plot(ingress_max_df.index, ingress_max_df[config], label=config,
+                marker=MARKERS[NUM_MIX_LAYERS_SET.index(num_mix_layers)])
+    plt.title("Ingress Bandwidth (Max)")
+    plt.xlabel("Number of Nodes")
+    plt.ylabel("Max Bandwidth (KiB/s)")
+    plt.legend(title="mode: layers: cover", loc="upper left")
+    plt.tight_layout()
+    plt.grid(True)
+    plt.show()
+    ingress_max_y_lim = ax.get_ylim()
 
-    mean_df = ingress_df[ingress_df[COL_STAT] == STAT_MEAN]
-    seaborn.barplot(data=mean_df, x=COL_NUM_NODES, y=COL_BANDWIDTH, hue=COL_P2P_TYPE, ax=plt.gca(), capsize=0.1)
-    max_df = ingress_df[ingress_df[COL_STAT] == STAT_MAX]
-    barplot = seaborn.barplot(data=max_df, x=COL_NUM_NODES, y=COL_BANDWIDTH, hue=COL_P2P_TYPE, ax=plt.gca(),
-                              capsize=0.1, alpha=0.5)
+    ingress_mean_df = df.pivot(index='num_nodes', columns='config', values='ingress_mean')
+    plt.figure(figsize=(12, 6))
+    fig, ax = plt.subplots()
+    for config in ingress_mean_df.columns:
+        num_mix_layers = int(config.split(":")[1].strip())
+        ax.plot(ingress_mean_df.index, ingress_mean_df[config], label=config,
+                marker=MARKERS[NUM_MIX_LAYERS_SET.index(num_mix_layers)])
+    plt.title("Ingress Bandwidth (Mean)")
+    plt.xlabel("Number of Nodes")
+    plt.ylabel("Mean Bandwidth (KiB/s)")
+    plt.legend(title="mode: layers: cover", loc="upper left")
+    plt.tight_layout()
+    plt.grid(True)
+    ax.set_ylim(ingress_max_y_lim)
+    plt.show()
 
-    # Adding labels to each bar
-    for p in barplot.patches:
-        height = p.get_height()
-        if height > 0:  # Only label bars with positive height
-            barplot.annotate(format(height, ".2f"),
-                             (p.get_x() + p.get_width() / 2., height),
-                             ha="center", va="center",
-                             xytext=(0, 9),
-                             textcoords="offset points")
-
-    plt.title(f"{traffic_type} Bandwidth")
-    plt.xlabel(COL_NUM_NODES)
-    plt.ylabel(f"{COL_BANDWIDTH} (KB/s)")
-
-    # Custom legend to show Mean and Max
-    handles, labels = barplot.get_legend_handles_labels()
-    for i in range(len(labels) // 2):
-        labels[i] = labels[i] + f" ({STAT_MEAN})"
-    for i in range(len(labels) // 2, len(labels)):
-        labels[i] = labels[i] + f" ({STAT_MAX})"
-    plt.legend(handles=handles, labels=labels, loc="upper left")
-
-    desc = (
-        f"message: {message_size_df["message_size"].mean():.0f} bytes\n"
-        f"{config.description()}"
-    )
-    plt.text(1.02, 0.5, desc, transform=plt.gca().transAxes, verticalalignment="center", fontsize=12)
-    plt.subplots_adjust(right=0.8)  # Adjust layout to make room for the text
+    egress_max_df = df.pivot(index='num_nodes', columns='config', values='egress_max')
+    plt.figure(figsize=(12, 6))
+    fig, ax = plt.subplots()
+    for config in egress_max_df.columns:
+        num_mix_layers = int(config.split(":")[1].strip())
+        ax.plot(egress_max_df.index, egress_max_df[config], label=config,
+                marker=MARKERS[NUM_MIX_LAYERS_SET.index(num_mix_layers)])
+    plt.title("Egress Bandwidth (Max)")
+    plt.xlabel("Number of Nodes")
+    plt.ylabel("Max Bandwidth (KiB/s)")
+    plt.legend(title="mode: layers: cover", loc="upper left")
+    plt.tight_layout()
+    plt.grid(True)
+    plt.show()
+    ingress_max_y_lim = ax.get_ylim()
 
+    egress_mean_df = df.pivot(index='num_nodes', columns='config', values='egress_mean')
+    plt.figure(figsize=(12, 6))
+    fig, ax = plt.subplots()
+    for config in egress_mean_df.columns:
+        num_mix_layers = int(config.split(":")[1].strip())
+        ax.plot(egress_mean_df.index, egress_mean_df[config], label=config,
+                marker=MARKERS[NUM_MIX_LAYERS_SET.index(num_mix_layers)])
+    plt.title("Egress Bandwidth (Mean)")
+    plt.xlabel("Number of Nodes")
+    plt.ylabel("Mean Bandwidth (KiB/s)")
+    plt.legend(title="mode: layers: cover", loc="upper left")
+    plt.tight_layout()
+    plt.grid(True)
+    ax.set_ylim(ingress_max_y_lim)
     plt.show()
 
 
 if __name__ == "__main__":
     bulk_run()
+    # load_and_plot()
diff --git a/mixnet/v2/sim/config.yaml b/mixnet/v2/sim/config.yaml
index 6708975..3f37821 100644
--- a/mixnet/v2/sim/config.yaml
+++ b/mixnet/v2/sim/config.yaml
@@ -6,32 +6,32 @@ mixnet:
   num_nodes: 100
   # A number of mix nodes selected by a message sender through which the Sphinx message goes through
   # If 0, the message is broadcast directly to all nodes without being Sphinx-encoded.
-  num_mix_layers: 3
+  num_mix_layers: 0
   # A size of a message payload in bytes (e.g. the size of a block proposal)
   payload_size: 320
   # An interval of sending a new real/cover message
   # A probability of actually sending a message depends on the following parameters.
   message_interval: 1
   # A probability of sending a real message within a cycle
-  real_message_prob: 0.1
+  real_message_prob: 0.01
   # A weight of real message emission probability of some nodes
   # Each weight is multiplied to the real_message_prob of the node being at the same position in the node list.
   # The length of the list should be <= p2p.num_nodes. i.e. some nodes won't have a weight.
-  real_message_prob_weights: [3, 2, 5]
+  real_message_prob_weights: []
   # A probability of sending a cover message within a cycle if not sending a real message
-  cover_message_prob: 0.2
+  cover_message_prob: 0
   # A maximum preparation time (computation time) for a message sender before sending the message
-  max_message_prep_time: 0.05
+  max_message_prep_time: 0
   # A maximum delay of messages mixed in a mix node
-  max_mix_delay: 3
+  max_mix_delay: 0
 
 p2p:
   # Broadcasting type: 1-to-all | gossip
   type: "gossip"
   # A connection density, only if the type is gossip
-  connection_density: 10
+  connection_density: 6
   # A maximum network latency between nodes directly connected with each other
-  max_network_latency: 0.5
+  max_network_latency: 0.20
 
 measurement:
     # How many times in simulation represent 1 second in real time
diff --git a/mixnet/v2/sim/measurement.py b/mixnet/v2/sim/measurement.py
index 0d1d8e3..f9b258e 100644
--- a/mixnet/v2/sim/measurement.py
+++ b/mixnet/v2/sim/measurement.py
@@ -1,6 +1,7 @@
 from collections import defaultdict
 from typing import TYPE_CHECKING
 
+import pandas as pd
 import simpy
 
 from config import Config
@@ -16,7 +17,7 @@ class Measurement:
         self.ingress_bandwidth_per_time = []
         self.egress_bandwidth_per_time = []
 
-        self.env.process(self.update_bandwidth_window())
+        self.env.process(self._update_bandwidth_window())
 
     def measure_ingress(self, node: "Node", msg: SphinxPacket | bytes):
         self.ingress_bandwidth_per_time[-1][node] += len(msg)
@@ -24,9 +25,19 @@ class Measurement:
     def measure_egress(self, node: "Node", msg: SphinxPacket | bytes):
         self.egress_bandwidth_per_time[-1][node] += len(msg)
 
-    def update_bandwidth_window(self):
+    def _update_bandwidth_window(self):
         while True:
             self.ingress_bandwidth_per_time.append(defaultdict(int))
             self.egress_bandwidth_per_time.append(defaultdict(int))
             yield self.env.timeout(self.config.measurement.sim_time_per_second)
 
+    def bandwidth(self) -> (pd.Series, pd.Series):
+        nonzero_ingresses, nonzero_egresses = [], []
+        for ingress_bandwidths, egress_bandwidths in zip(self.ingress_bandwidth_per_time, self.egress_bandwidth_per_time):
+            for bandwidth in ingress_bandwidths.values():
+                if bandwidth > 0:
+                    nonzero_ingresses.append(bandwidth / 1024.0)
+            for bandwidth in egress_bandwidths.values():
+                if bandwidth > 0:
+                    nonzero_egresses.append(bandwidth / 1024.0)
+        return pd.Series(nonzero_ingresses), pd.Series(nonzero_egresses)