279 lines
11 KiB
Python
Raw Permalink Normal View History

2024-05-31 21:42:56 +09:00
from collections import Counter
2024-05-31 10:05:36 +09:00
from typing import TYPE_CHECKING
2024-05-31 21:07:47 +09:00
import numpy as np
import pandas as pd
import seaborn
from matplotlib import pyplot as plt
2024-05-31 21:07:47 +09:00
import scipy.stats as stats
2024-05-17 16:49:13 +09:00
from adversary import NodeState
from config import Config
from simulation import Simulation
2024-05-31 10:05:36 +09:00
if TYPE_CHECKING:
from node import Node
class Analysis:
def __init__(self, sim: Simulation, config: Config):
self.sim = sim
self.config = config
def run(self):
message_size_df = self.message_size_distribution()
self.bandwidth(message_size_df)
self.messages_emitted_around_interval()
self.messages_in_node_over_time()
2024-05-24 16:48:39 +09:00
# self.node_states()
2024-05-30 18:36:56 +09:00
self.message_hops()
2024-05-31 10:05:36 +09:00
self.timing_attack()
def bandwidth(self, message_size_df: pd.DataFrame):
2024-05-18 00:31:29 +09:00
dataframes = []
nonzero_egresses = []
2024-05-26 00:08:11 +09:00
nonzero_ingresses = []
for egress_bandwidths, ingress_bandwidths in zip(self.sim.p2p.measurement.egress_bandwidth_per_time,
self.sim.p2p.measurement.ingress_bandwidth_per_time):
2024-05-18 00:31:29 +09:00
rows = []
for node in self.sim.p2p.nodes:
egress = egress_bandwidths[node] / 1024.0
2024-05-26 00:08:11 +09:00
ingress = ingress_bandwidths[node] / 1024.0
rows.append((node.id, egress, ingress))
if egress > 0:
nonzero_egresses.append(egress)
2024-05-26 00:08:11 +09:00
if ingress > 0:
nonzero_ingresses.append(ingress)
df = pd.DataFrame(rows, columns=["node_id", "egress", "ingress"])
2024-05-18 00:31:29 +09:00
dataframes.append(df)
2024-05-18 00:31:29 +09:00
times = range(len(dataframes))
df = pd.concat([df.assign(Time=time) for df, time in zip(dataframes, times)], ignore_index=True)
2024-05-26 00:08:11 +09:00
df = df.pivot(index="Time", columns="node_id", values=["egress", "ingress"])
2024-05-18 00:31:29 +09:00
plt.figure(figsize=(12, 6))
for column in df.columns:
marker = "x" if column[0] == "ingress" else "o"
plt.plot(df.index, df[column], marker=marker, label=column[0])
2024-05-26 00:08:11 +09:00
plt.title("Egress/ingress bandwidth of each node over time")
2024-05-18 00:31:29 +09:00
plt.xlabel("Time")
plt.ylabel("Bandwidth (KiB/s)")
plt.ylim(bottom=0)
2024-05-26 00:08:11 +09:00
# Customize the legend to show only 'egress' and 'ingress' regardless of node_id
2024-05-18 00:31:29 +09:00
handles, labels = plt.gca().get_legend_handles_labels()
by_label = dict(zip(labels, handles))
plt.legend(by_label.values(), by_label.keys())
plt.grid(True)
# Adding descriptions on the right size of the plot
egress_series = pd.Series(nonzero_egresses)
2024-05-26 00:08:11 +09:00
ingress_series = pd.Series(nonzero_ingresses)
desc = (
f"message: {message_size_df["message_size"].mean():.0f} bytes\n"
f"{self.config.description()}\n\n"
2024-05-26 00:08:11 +09:00
f"[egress(>0)]\nmean: {egress_series.mean():.2f} KiB/s\nmax: {egress_series.max():.2f} KiB/s\n\n"
f"[ingress(>0)]\nmean: {ingress_series.mean():.2f} KiB/s\nmax: {ingress_series.max():.2f} KiB/s"
)
plt.text(1.02, 0.5, desc, transform=plt.gca().transAxes, verticalalignment="center", fontsize=12)
plt.subplots_adjust(right=0.8) # Adjust layout to make room for the text
2024-05-18 00:31:29 +09:00
plt.show()
def message_size_distribution(self) -> pd.DataFrame:
2024-05-17 13:29:20 +09:00
df = pd.DataFrame(self.sim.p2p.adversary.message_sizes, columns=["message_size"])
print(df.describe())
return df
def messages_emitted_around_interval(self):
df = pd.DataFrame(
2024-05-17 14:54:23 +09:00
[(node.id, cnt, node.id < len(self.sim.config.mixnet.real_message_prob_weights))
2024-05-17 13:29:20 +09:00
for node, cnt in self.sim.p2p.adversary.senders_around_interval.items()],
columns=["node_id", "msg_count", "expected"]
)
plt.figure(figsize=(10, 6))
seaborn.barplot(data=df, x="node_id", y="msg_count", hue="expected", palette={True: "red", False: "blue"})
plt.title("Messages emitted around the promised interval")
plt.xlabel("Sender Node ID")
plt.ylabel("Msg Count")
plt.legend(title="expected")
plt.show()
def messages_in_node_over_time(self):
dataframes = []
for i, msgs_in_node in enumerate(self.sim.p2p.adversary.msgs_in_node_per_window):
time = i * self.config.adversary.io_window_moving_interval
2024-05-31 21:42:56 +09:00
df = pd.DataFrame(
[(time, node.id, msg_cnt, len(senders)) for node, (msg_cnt, senders) in msgs_in_node.items()],
columns=["time", "node_id", "msg_cnt", "sender_cnt"])
if not df.empty:
dataframes.append(df)
df = pd.concat(dataframes, ignore_index=True)
2024-05-28 13:48:56 +09:00
msg_cnt_df = df.pivot(index="time", columns="node_id", values="msg_cnt")
plt.figure(figsize=(12, 6))
2024-05-28 13:48:56 +09:00
for column in msg_cnt_df.columns:
plt.plot(msg_cnt_df.index, msg_cnt_df[column], marker=None, label=column)
2024-05-30 15:56:07 +09:00
plt.title("Messages within each node over time")
plt.xlabel("Time")
plt.ylabel("Msg Count")
plt.ylim(bottom=0)
plt.grid(True)
plt.tight_layout()
plt.show()
2024-05-17 16:49:13 +09:00
2024-05-28 13:48:56 +09:00
sender_cnt_df = df.pivot(index="time", columns="node_id", values="sender_cnt")
plt.figure(figsize=(12, 6))
2024-05-28 13:48:56 +09:00
for column in sender_cnt_df.columns:
plt.plot(sender_cnt_df.index, sender_cnt_df[column], marker=None, label=column)
2024-05-30 15:56:07 +09:00
plt.title("Diversity of senders of messages received by each node over time")
plt.xlabel("Time")
2024-05-30 15:56:07 +09:00
plt.ylabel("# of senders of messages received by each node")
plt.ylim(bottom=0)
plt.grid(True)
plt.tight_layout()
plt.show()
2024-05-28 13:48:56 +09:00
plt.figure(figsize=(12, 6))
df.boxplot(column="sender_cnt", by="time", medianprops={"color": "red", "linewidth": 2.5})
2024-05-30 15:56:07 +09:00
plt.title("Diversity of senders of messages received by each node over time")
2024-05-28 13:48:56 +09:00
plt.suptitle("")
plt.xticks([])
plt.xlabel("Time")
2024-05-30 15:56:07 +09:00
plt.ylabel("# of senders of messages received by each node")
2024-05-28 13:48:56 +09:00
plt.ylim(bottom=0)
plt.grid(axis="x")
plt.tight_layout()
plt.show()
2024-05-17 16:49:13 +09:00
def node_states(self):
rows = []
for time, node_states in self.sim.p2p.adversary.node_states.items():
for node, state in node_states.items():
rows.append((time, node.id, state))
df = pd.DataFrame(rows, columns=["time", "node_id", "state"])
plt.figure(figsize=(10, 6))
seaborn.scatterplot(data=df, x="time", y="node_id", hue="state",
palette={NodeState.SENDING: "red", NodeState.RECEIVING: "blue"})
2024-05-17 16:49:13 +09:00
plt.title("Node states over time")
plt.xlabel("Time")
plt.ylabel("Node ID")
plt.legend(title="state")
plt.show()
2024-05-30 18:36:56 +09:00
def message_hops(self):
df = pd.DataFrame(self.sim.p2p.measurement.message_hops.values(), columns=["hops"])
print(df.describe())
plt.figure(figsize=(6, 6))
seaborn.boxplot(data=df, y="hops", medianprops={"color": "red", "linewidth": 2.5})
plt.title("Message hops distribution")
plt.show()
2024-05-31 10:05:36 +09:00
def timing_attack(self):
"""
2024-05-31 12:14:15 +09:00
pick a random node received a message.
2024-05-31 10:05:36 +09:00
then, track back the message to the sender
until
- there is no message to track back within a reasonable time window
- enough hops have been traversed
"""
2024-05-31 21:07:47 +09:00
all_results = []
2024-05-31 12:14:15 +09:00
window = len(self.sim.p2p.adversary.msgs_in_node_per_window) - 1
while window >= 0:
items = self.sim.p2p.adversary.msgs_in_node_per_window[window].items()
2024-05-31 11:25:41 +09:00
actual_receivers = [node for node, (msg_cnt, senders) in items if len(senders) > 0]
if len(actual_receivers) == 0:
2024-05-31 12:14:15 +09:00
window -= 1
2024-05-31 10:05:36 +09:00
continue
2024-05-31 21:07:47 +09:00
results = []
max_hops = 0
for receiver in actual_receivers:
nodes_per_hop = self.timing_attack_with(receiver, window)
self.print_nodes_per_hop(nodes_per_hop, window)
results.append(nodes_per_hop)
max_hops = max(max_hops, len(nodes_per_hop))
window -= max_hops
all_results.extend(results)
suspected_senders = Counter()
for result in all_results:
print(Counter({node.id: count for node, count in result[-1].items()}))
suspected_senders.update(result[-1])
suspected_senders = ({node.id: count for node, count in suspected_senders.items()})
print(f"suspected nodes count: {len(suspected_senders)}")
# Extract keys and values from the Counter
keys = list(suspected_senders.keys())
values = list(suspected_senders.values())
# Create the bar plot
plt.figure(figsize=(12, 8))
plt.bar(keys, values)
plt.xlabel('Node ID')
plt.ylabel('Counts')
plt.title('Suspected Sender Counts')
plt.show()
# Create the bar plot for original sender counts
original_senders = ({node.id: count for node, count in self.sim.p2p.measurement.original_senders.items()})
plt.figure(figsize=(12, 8))
plt.bar(list(original_senders.keys()), list(original_senders.values()))
plt.xlabel('Node ID')
plt.ylabel('Counts')
plt.title('Original Sender Counts')
plt.show()
2024-05-31 23:17:10 +09:00
# Create the bar plot for original sender counts
broadcasters = ({node.id: count for node, count in self.sim.p2p.broadcasters.items()})
plt.figure(figsize=(12, 8))
plt.bar(list(broadcasters.keys()), list(broadcasters.values()))
plt.xlabel('Node ID')
plt.ylabel('Counts')
plt.title('Broadcasters')
plt.show()
2024-05-31 21:07:47 +09:00
# Calculate the mean and standard deviation of the counts
mean = np.mean(values)
std_dev = np.std(values)
# Plot the histogram of the values
plt.figure(figsize=(12, 8))
plt.hist(values, bins=30, density=True, alpha=0.6, color='g', label='Counts Histogram')
# Plot the normal distribution curve
xmin, xmax = plt.xlim()
x = np.linspace(xmin, xmax, 100)
p = stats.norm.pdf(x, mean, std_dev)
plt.plot(x, p, 'k', linewidth=2, label='Normal Distribution')
title = "Fit results: mean = %.2f, std_dev = %.2f" % (mean, std_dev)
plt.title(title)
plt.xlabel('Counts')
plt.ylabel('Density')
plt.legend()
plt.show()
2024-05-31 11:25:41 +09:00
2024-05-31 12:14:15 +09:00
def timing_attack_with(self, starting_node: "Node", starting_window: int):
_, senders = self.sim.p2p.adversary.msgs_in_node_per_window[starting_window][starting_node]
2024-05-31 11:25:41 +09:00
nodes_per_hop = [Counter(senders)]
2024-05-31 21:42:56 +09:00
if self.config.p2p.type == self.config.p2p.TYPE_ONE_TO_ALL:
MAX_HOPS = 1 + self.config.mixnet.num_mix_layers
else:
2024-05-31 23:17:10 +09:00
MAX_HOPS = (1 + self.config.mixnet.num_mix_layers) * 4
2024-05-31 21:42:56 +09:00
2024-05-31 12:14:15 +09:00
for window in range(starting_window - 1, 0, -1):
2024-05-31 11:25:41 +09:00
if len(nodes_per_hop) >= MAX_HOPS:
2024-05-31 10:05:36 +09:00
break
next_nodes = Counter()
for node in nodes_per_hop[-1]:
2024-05-31 12:14:15 +09:00
_, senders = self.sim.p2p.adversary.msgs_in_node_per_window[window][node]
2024-05-31 10:05:36 +09:00
next_nodes.update(senders)
if len(next_nodes) == 0:
break
nodes_per_hop.append(next_nodes)
2024-05-31 11:25:41 +09:00
return nodes_per_hop
@staticmethod
2024-05-31 12:14:15 +09:00
def print_nodes_per_hop(nodes_per_hop, starting_window: int):
for hop, nodes in enumerate(nodes_per_hop):
print(f"hop-{hop} from w-{starting_window}: {len(nodes)} nodes: {sorted([node.id for node in nodes])}")