diff --git a/mixnet/v2/sim/analysis.py b/mixnet/v2/sim/analysis.py index 12eea72..a8f12fe 100644 --- a/mixnet/v2/sim/analysis.py +++ b/mixnet/v2/sim/analysis.py @@ -1,3 +1,4 @@ +import itertools import sys from collections import Counter from typing import TYPE_CHECKING @@ -28,9 +29,10 @@ COL_SUCCESS_RATE = "Success Rate (%)" class Analysis: - def __init__(self, sim: Simulation, config: Config): + def __init__(self, sim: Simulation, config: Config, show_plots: bool = True): self.sim = sim self.config = config + self.show_plots = show_plots def run(self): message_size_df = self.message_size_distribution() @@ -42,6 +44,9 @@ class Analysis: self.timing_attack(median_hops) def bandwidth(self, message_size_df: pd.DataFrame): + if not self.show_plots: + return + dataframes = [] nonzero_egresses = [] nonzero_ingresses = [] @@ -95,7 +100,7 @@ class Analysis: print(df.describe()) return df - def messages_emitted_around_interval(self): + def messages_emitted_around_interval(self) -> (float, float, float): # A ground truth that shows how many times each node sent a real message truth_df = pd.DataFrame( [(node.id, count) for node, count in self.sim.p2p.measurement.original_senders.items()], @@ -107,18 +112,19 @@ class Analysis: columns=[COL_NODE_ID, COL_MSG_CNT] ) - width = 0.4 - fig, ax = plt.subplots(figsize=(12, 8)) - ax.bar(truth_df[COL_NODE_ID] - width / 2, truth_df[COL_MSG_CNT], width, label="Ground Truth", color="b") - ax.bar(truth_df[COL_NODE_ID] + width / 2, suspected_df[COL_MSG_CNT], width, label="Adversary's Inference", - color="r") - ax.set_title("Nodes who generated real messages") - ax.set_xlabel(COL_NODE_ID) - ax.set_ylabel(COL_MSG_CNT) - ax.set_xlim(-1, len(truth_df[COL_NODE_ID])) - ax.legend() - plt.tight_layout() - plt.show() + if self.show_plots: + width = 0.4 + fig, ax = plt.subplots(figsize=(12, 8)) + ax.bar(truth_df[COL_NODE_ID] - width / 2, truth_df[COL_MSG_CNT], width, label="Ground Truth", color="b") + ax.bar(truth_df[COL_NODE_ID] + width / 2, suspected_df[COL_MSG_CNT], width, label="Adversary's Inference", + color="r") + ax.set_title("Nodes who generated real messages") + ax.set_xlabel(COL_NODE_ID) + ax.set_ylabel(COL_MSG_CNT) + ax.set_xlim(-1, len(truth_df[COL_NODE_ID])) + ax.legend() + plt.tight_layout() + plt.show() # Calculate precision, recall, and F1 score truth = set(truth_df[truth_df[COL_MSG_CNT] > 0][COL_NODE_ID]) @@ -128,8 +134,12 @@ class Analysis: recall = len(true_positives) / len(truth) * 100.0 if len(truth) > 0 else 0.0 f1_score = 2 * precision * recall / (precision + recall) if precision + recall > 0 else 0.0 print(f"Precision: {precision:.2f}%, Recall: {recall:.2f}%, F1 Score: {f1_score:.2f}%") + return precision, recall, f1_score def messages_in_node_over_time(self): + if not self.show_plots: + return + dataframes = [] for time, msg_pools in enumerate(self.sim.p2p.adversary.msg_pools_per_time): data = [] @@ -178,6 +188,9 @@ class Analysis: plt.show() def node_states(self): + if not self.show_plots: + return + rows = [] for time, node_states in self.sim.p2p.adversary.node_states.items(): for node, state in node_states.items(): @@ -196,14 +209,37 @@ class Analysis: def message_hops(self) -> int: df = pd.DataFrame(self.sim.p2p.measurement.message_hops.values(), columns=[COL_HOPS]) print(df.describe()) - plt.figure(figsize=(6, 6)) - seaborn.boxplot(data=df, y=COL_HOPS, medianprops={"color": "red", "linewidth": 2.5}) - plt.ylim(bottom=0) - plt.title("The distribution of max hops of single broadcasting") - plt.show() + if self.show_plots: + plt.figure(figsize=(6, 6)) + seaborn.boxplot(data=df, y=COL_HOPS, medianprops={"color": "red", "linewidth": 2.5}) + plt.ylim(bottom=0) + plt.title("The distribution of max hops of single broadcasting") + plt.show() return int(df.median().iloc[0]) - def timing_attack(self, hops_between_layers: int): + def timing_attack(self, hops_between_layers: int) -> pd.DataFrame: + success_rates = self.timing_attack_inner(hops_between_layers) + df = pd.DataFrame(success_rates, columns=[COL_SUCCESS_RATE]) + print(df.describe()) + + if self.show_plots: + plt.figure(figsize=(6, 6)) + plt.boxplot(df[COL_SUCCESS_RATE], vert=True, patch_artist=True, boxprops=dict(facecolor="lightblue"), + medianprops=dict(color="orange")) + mean = df[COL_SUCCESS_RATE].mean() + median = df[COL_SUCCESS_RATE].median() + plt.axhline(mean, color="red", linestyle="--", linewidth=1, label=f"Mean: {mean:.2f}%") + plt.axhline(median, color="orange", linestyle="-", linewidth=1, label=f"Median: {median:.2f}%") + plt.ylabel(COL_SUCCESS_RATE) + plt.ylim(-5, 105) + plt.title("Timing attack success rate distribution") + plt.legend() + plt.grid(True) + plt.show() + + return df + + def timing_attack_inner(self, hops_between_layers: int) -> list[float]: hops_to_observe = hops_between_layers * (self.config.mixnet.num_mix_layers + 1) success_rates = [] for receiver, times_and_msgs in self.sim.p2p.adversary.final_msgs_received.items(): @@ -219,22 +255,10 @@ class Analysis: else: success_rate = 0.0 success_rates.append(success_rate) + if len(success_rates) >= self.config.adversary.timing_attack_max_targets: + return success_rates - df = pd.DataFrame(success_rates, columns=[COL_SUCCESS_RATE]) - print(df.describe()) - plt.figure(figsize=(6, 6)) - plt.boxplot(df[COL_SUCCESS_RATE], vert=True, patch_artist=True, boxprops=dict(facecolor="lightblue"), - medianprops=dict(color="orange")) - mean = df[COL_SUCCESS_RATE].mean() - median = df[COL_SUCCESS_RATE].median() - plt.axhline(mean, color="red", linestyle="--", linewidth=1, label=f"Mean: {mean:.2f}%") - plt.axhline(median, color="orange", linestyle="-", linewidth=1, label=f"Median: {median:.2f}%") - plt.ylabel(COL_SUCCESS_RATE) - plt.ylim(-5, 105) - plt.title("Timing attack success rate distribution") - plt.legend() - plt.grid(True) - plt.show() + return success_rates def timing_attack_with(self, receiver: "Node", time_received: Time, remaining_hops: int, observed_hops: int, suspected_origins: Counter, @@ -250,6 +274,8 @@ class Analysis: # If the specific sender is given, inspect only that sender to maximize the success rate. if senders is None: senders = self.sim.p2p.adversary.msgs_received_per_time[time_received][receiver] + + senders = dict(itertools.islice(senders.items(), self.config.adversary.timing_attack_max_pool_size)) # Inspect each sender who sent messages to the receiver for sender, times_sent in senders.items(): diff --git a/mixnet/v2/sim/bulk_attack.py b/mixnet/v2/sim/bulk_attack.py new file mode 100644 index 0000000..ccebc91 --- /dev/null +++ b/mixnet/v2/sim/bulk_attack.py @@ -0,0 +1,133 @@ +import argparse +from datetime import datetime + +import pandas as pd +from matplotlib import pyplot as plt + +from analysis import Analysis +from config import Config, P2PConfig +from simulation import Simulation + + +def bulk_attack(): + parser = argparse.ArgumentParser(description="Run multiple passive adversary attack simulations", + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument("--config", type=str, required=True, help="Configuration file path") + args = parser.parse_args() + config = Config.load(args.config) + + config.simulation.running_time = 300 + config.mixnet.num_nodes = 100 + config.mixnet.payload_size = 320 + config.mixnet.message_interval = 10 + config.mixnet.real_message_prob = 0.01 + config.mixnet.real_message_prob_weights = [] + config.mixnet.max_message_prep_time = 0 + config.p2p.connection_density = 6 + config.p2p.min_network_latency = 1 + config.p2p.max_network_latency = 1 + config.measurement.sim_time_per_second = 10 + + results = [] + + for p2p_type in [P2PConfig.TYPE_ONE_TO_ALL, P2PConfig.TYPE_GOSSIP]: + config.p2p.type = p2p_type + + for num_mix_layers in [0, 1, 2, 3, 4]: + config.mixnet.num_mix_layers = num_mix_layers + + for cover_message_prob in [0.0, 0.1, 0.2, 0.3, 0.4]: + config.mixnet.cover_message_prob = cover_message_prob + + for mix_delay in [0]: + config.mixnet.min_mix_delay = mix_delay + config.mixnet.max_mix_delay = mix_delay + + sim = Simulation(config) + sim.run() + + analysis = Analysis(sim, config, show_plots=False) + precision, recall, f1_score = analysis.messages_emitted_around_interval() + print( + f"ANALYZING TIMING ATTACK: p2p_type:{p2p_type}, {num_mix_layers} layers, {cover_message_prob} cover, {mix_delay} delay") + timing_attack_df = analysis.timing_attack(analysis.message_hops()) + + results.append({ + "p2p_type": p2p_type, + "num_mix_layers": num_mix_layers, + "cover_message_prob": cover_message_prob, + "mix_delay": mix_delay, + "global_precision": precision, + "global_recall": recall, + "global_f1_score": f1_score, + "target_median": float(timing_attack_df.median().iloc[0]), + "target_std": float(timing_attack_df.std().iloc[0]), + "target_min": float(timing_attack_df.min().iloc[0]), + "target_25%": float(timing_attack_df.quantile(0.25).iloc[0]), + "target_mean": float(timing_attack_df.mean().iloc[0]), + "target_75%": float(timing_attack_df.quantile(0.75).iloc[0]), + "target_max": float(timing_attack_df.max().iloc[0]), + }) + + df = pd.DataFrame(results) + df.to_csv(f"bulk-attack-{datetime.now().replace(microsecond=0).isoformat()}.csv", index=False) + plot_global_metrics(df) + plot_target_metrics(df) + + +def plot_global_metrics(df: pd.DataFrame): + for p2p_type in df["p2p_type"].unique(): + # Plotting global precision, recall, and f1 score against different parameters + fig, axes = plt.subplots(nrows=3, ncols=1, figsize=(10, 15)) + + # Precision plot + for cover_message_prob in df["cover_message_prob"].unique(): + subset = df[(df["cover_message_prob"] == cover_message_prob) & (df["p2p_type"] == p2p_type)] + axes[0].plot(subset["num_mix_layers"], subset["global_precision"], label=f"{cover_message_prob} cover rate") + axes[0].set_title(f"Global Precision ({p2p_type})") + axes[0].set_xlabel("# of Mix Layers") + axes[0].set_ylabel("Global Precision (%)") + axes[0].set_ylim(0, 100) + axes[0].legend() + + # Recall plot + for cover_message_prob in df["cover_message_prob"].unique(): + subset = df[(df["cover_message_prob"] == cover_message_prob) & (df["p2p_type"] == p2p_type)] + axes[1].plot(subset["num_mix_layers"], subset["global_recall"], label=f"{cover_message_prob} cover rate") + axes[1].set_title(f"Global Recall ({p2p_type})") + axes[1].set_xlabel("# of Mix Layers") + axes[1].set_ylabel("Global Recall (%)") + axes[1].set_ylim(0, 100) + axes[1].legend() + + # F1 Score plot + for cover_message_prob in df["cover_message_prob"].unique(): + subset = df[(df["cover_message_prob"] == cover_message_prob) & (df["p2p_type"] == p2p_type)] + axes[2].plot(subset["num_mix_layers"], subset["global_f1_score"], label=f"{cover_message_prob} cover rate") + axes[2].set_title(f"Global F1 Score ({p2p_type})") + axes[2].set_xlabel("# of Mix Layers") + axes[2].set_ylabel("Global F1 Score (%)") + axes[2].set_ylim(0, 100) + axes[2].legend() + + plt.tight_layout() + plt.show() + + +def plot_target_metrics(df: pd.DataFrame): + for p2p_type in df["p2p_type"].unique(): + plt.figure(figsize=(12, 6)) + for cover_message_prob in df["cover_message_prob"].unique(): + subset = df[(df["cover_message_prob"] == cover_message_prob) & (df["p2p_type"] == p2p_type)] + plt.plot(subset["num_mix_layers"], subset["target_median"], label=f"{cover_message_prob} cover rate") + plt.title(f"Timing Attack Success Rate ({p2p_type})") + plt.xlabel("# of Mix Layers") + plt.ylabel("Median of Success Rates (%)") + plt.ylim(0, 100) + plt.legend() + plt.tight_layout() + plt.show() + + +if __name__ == "__main__": + bulk_attack() diff --git a/mixnet/v2/sim/config.py b/mixnet/v2/sim/config.py index e9548bc..0ae89c5 100644 --- a/mixnet/v2/sim/config.py +++ b/mixnet/v2/sim/config.py @@ -16,6 +16,7 @@ class Config: mixnet: MixnetConfig p2p: P2PConfig measurement: MeasurementConfig + adversary: AdversaryConfig @classmethod def load(cls, yaml_path: str) -> Self: @@ -28,6 +29,7 @@ class Config: config.mixnet.validate() config.p2p.validate() config.measurement.validate() + config.adversary.validate() return config @@ -139,3 +141,13 @@ class MeasurementConfig: def validate(self): assert self.sim_time_per_second > 0 + + +@dataclass +class AdversaryConfig: + timing_attack_max_targets: int + timing_attack_max_pool_size: int + + def validate(self): + assert self.timing_attack_max_targets > 0 + assert self.timing_attack_max_pool_size > 0 diff --git a/mixnet/v2/sim/config.yaml b/mixnet/v2/sim/config.yaml index 0d7ea0b..2424bf8 100644 --- a/mixnet/v2/sim/config.yaml +++ b/mixnet/v2/sim/config.yaml @@ -37,4 +37,8 @@ p2p: measurement: # How many times in simulation represent 1 second in real time - sim_time_per_second: 10 \ No newline at end of file + sim_time_per_second: 10 + +adversary: + timing_attack_max_targets: 5 + timing_attack_max_pool_size: 3 \ No newline at end of file