simplified timing attack, but use messages_emitted_around_interval instead with some fixes

This commit is contained in:
Youngjoon Lee 2024-06-04 12:04:03 +09:00
parent 1aaa0abd53
commit 3fa8af8850
No known key found for this signature in database
GPG Key ID: 09B750B5BD6F08A2
9 changed files with 146 additions and 163 deletions

View File

@ -31,13 +31,15 @@ But, it is guarantees that those two event are processed before the events sched
Using this virtual time, complex distributed systems can be simulated in a simple way without worrying about the real-time synchronization.
For more details, please see the [Time and Scheduling](https://simpy.readthedocs.io/en/latest/topical_guides/time_and_scheduling.html#what-is-time) section in the SimPy documentation.
## Mixnet Functionalities
## Progresses
### Mixnet Functionalities
- Modified Sphinx
- [x] Without encryption
- [ ] With encryption
- P2P Broadcasting
- [x] Naive 1-to-all
- [ ] More realistic broadcasting (e.g. gossipsub)
- [x] More realistic broadcasting (e.g. gossipsub)
- [x] Sending a real message to the mixnet at the promised interval
- Each node has its own probability of sending a real message at each interval.
- [x] Cover traffic
@ -47,36 +49,12 @@ For more details, please see the [Time and Scheduling](https://simpy.readthedocs
- [x] Naive random delays
- [ ] More sophisticated delays (e.g. Poisson) if necessary
## Performance Measurements
### Performance Measurements
- [ ] Bandwidth Usage
- DRAFT with the naive 1-to-all broadcasting
![](./docs/bandwidth.png)
- Should be measured with realistic parameters and P2P gossiping.
- [x] Bandwidth Usage
## [Adversary Models](https://www.notion.so/Mixnet-v2-Proof-of-Concept-102d0563e75345a3a6f1c11791fbd746?pvs=4#c5ffa49486ce47ed81d25028bc0d9d40)
### [Adversary Models](https://www.notion.so/Mixnet-v2-Proof-of-Concept-102d0563e75345a3a6f1c11791fbd746?pvs=4#c5ffa49486ce47ed81d25028bc0d9d40)
- [x] Inspecting message sizes to analyze how far each message has traveled since emitted by the original sender.
- Currently, all messages have the same size (including messages broadcasted after being fully unwrapped). Thus, the adversary can learn nothing.
```
message_size
count 1806.0
mean 1937.0
std 0.0
min 1937.0
25% 1937.0
50% 1937.0
75% 1937.0
max 1937.0
```
- Identifying nodes emitting messages around the promised interval.
- [x] As the GPA
- [ ] With partial visibility
- [ ] Quantifying how much the expected frequent senders are anonymized by cover traffic
![](./docs/msgs-around-interval.png)
- [x] Identifying nodes emitting messages around the promised interval.
- [ ] Correlating senders-receivers based on timing
- DRAFT
![](./docs/mixed-msgs-over-time.png)
- This can indicate whether a certain node has extremely few messages being mixed in the node.
But, I don't think this is the best way to analyze the correlation.
I think we need to try to simulate the actual timing attack.
- [ ] Active attacks

View File

@ -1,7 +1,7 @@
from __future__ import annotations
import math
from collections import defaultdict, deque
from collections import defaultdict, deque, Counter
from enum import Enum
from typing import TYPE_CHECKING
@ -20,9 +20,9 @@ class Adversary:
self.env = env
self.config = config
self.message_sizes = []
self.senders_around_interval = defaultdict(int)
self.msgs_in_node_per_window = [] # [<receiver, (int, set[sender]))>]
self.cur_window_per_node = defaultdict(lambda: deque()) # <node, [(time, int)]>: int is + or -.
self.senders_around_interval = Counter()
self.io_windows = [] # dict[receiver, (deque[time_received], set[sender]))]
self.io_windows.append(defaultdict(lambda: (deque(), set())))
# self.node_states = defaultdict(dict)
self.env.process(self.update_observation_window())
@ -31,42 +31,32 @@ class Adversary:
self.message_sizes.append(len(msg))
def observe_receiving_node(self, sender: "Node", receiver: "Node"):
self.cur_window_per_node[receiver].append((self.env.now, 1, sender))
msg_queue, senders = self.io_windows[-1][receiver]
msg_queue.append(self.env.now)
senders.add(sender)
# if node not in self.node_states[self.env.now]:
# self.node_states[self.env.now][node] = NodeState.RECEIVING
def observe_sending_node(self, sender: "Node", receiver: "Node"):
self.cur_window_per_node[sender].append((self.env.now, -1, receiver))
def observe_sending_node(self, sender: "Node"):
msg_queue, _ = self.io_windows[-1][sender]
if len(msg_queue) > 0:
msg_queue.popleft()
if self.is_around_message_interval(self.env.now):
self.senders_around_interval[sender] += 1
self.senders_around_interval.update({sender})
# self.node_states[self.env.now][node] = NodeState.SENDING
def is_around_message_interval(self, time: SimTime):
now_frac, now_int = math.modf(time)
return now_int % self.config.mixnet.message_interval == 0 and now_frac <= self.config.mixnet.max_message_prep_time
return time % self.config.mixnet.message_interval <= self.config.mixnet.max_message_prep_time
def update_observation_window(self):
while True:
yield self.env.timeout(self.config.adversary.io_window_moving_interval)
self.msgs_in_node_per_window.append(defaultdict(lambda: (0, set()))) # <node, (int, int)>
for node, queue in self.cur_window_per_node.items():
msg_cnt = 0.0
senders = set()
# Pop old events that are out of the new window, and accumulate msg_cnt
while queue and queue[0][0] < self.env.now - self.config.adversary.io_window_size:
_, delta, sender = queue.popleft()
msg_cnt += delta
if delta > 0:
senders.add(sender)
# Iterate remaining events that will remain in the new window, and accumulate msg_cnt
for t, delta, sender in queue:
if t >= self.env.now - self.config.adversary.io_window_moving_interval:
break
msg_cnt += delta
if delta > 0:
senders.add(sender)
self.msgs_in_node_per_window[-1][node] = (msg_cnt, senders)
yield self.env.timeout(self.config.adversary.io_window_size)
new_window = defaultdict(lambda: (deque(), set()))
for receiver, (msg_queue, _) in self.io_windows[-1].items():
for time_received in msg_queue:
if self.env.now - time_received < self.config.mixnet.max_mix_delay:
new_window[receiver][0].append(time_received)
self.io_windows.append(new_window)
class NodeState(Enum):

View File

@ -14,6 +14,17 @@ from simulation import Simulation
if TYPE_CHECKING:
from node import Node
COL_TIME = "Time"
COL_NODE_ID = "Node ID"
COL_MSG_CNT = "Message Count"
COL_SENDER_CNT = "Sender Count"
COL_NODE_STATE = "Node State"
COL_HOPS = "Hops"
COL_EXPECTED = "Expected"
COL_MSG_SIZE = "Message Size"
COL_EGRESS = "Egress"
COL_INGRESS = "Ingress"
class Analysis:
def __init__(self, sim: Simulation, config: Config):
@ -27,7 +38,7 @@ class Analysis:
self.messages_in_node_over_time()
# self.node_states()
self.message_hops()
self.timing_attack()
# self.timing_attack(median_hops)
def bandwidth(self, message_size_df: pd.DataFrame):
dataframes = []
@ -44,18 +55,18 @@ class Analysis:
nonzero_egresses.append(egress)
if ingress > 0:
nonzero_ingresses.append(ingress)
df = pd.DataFrame(rows, columns=["node_id", "egress", "ingress"])
df = pd.DataFrame(rows, columns=[COL_NODE_ID, COL_EGRESS, COL_INGRESS])
dataframes.append(df)
times = range(len(dataframes))
df = pd.concat([df.assign(Time=time) for df, time in zip(dataframes, times)], ignore_index=True)
df = df.pivot(index="Time", columns="node_id", values=["egress", "ingress"])
df = df.pivot(index=COL_TIME, columns=COL_NODE_ID, values=[COL_EGRESS, COL_INGRESS])
plt.figure(figsize=(12, 6))
for column in df.columns:
marker = "x" if column[0] == "ingress" else "o"
marker = "x" if column[0] == COL_INGRESS else "o"
plt.plot(df.index, df[column], marker=marker, label=column[0])
plt.title("Egress/ingress bandwidth of each node over time")
plt.xlabel("Time")
plt.xlabel(COL_TIME)
plt.ylabel("Bandwidth (KiB/s)")
plt.ylim(bottom=0)
# Customize the legend to show only 'egress' and 'ingress' regardless of node_id
@ -68,7 +79,7 @@ class Analysis:
egress_series = pd.Series(nonzero_egresses)
ingress_series = pd.Series(nonzero_ingresses)
desc = (
f"message: {message_size_df["message_size"].mean():.0f} bytes\n"
f"message: {message_size_df[COL_MSG_SIZE].mean():.0f} bytes\n"
f"{self.config.description()}\n\n"
f"[egress(>0)]\nmean: {egress_series.mean():.2f} KiB/s\nmax: {egress_series.max():.2f} KiB/s\n\n"
f"[ingress(>0)]\nmean: {ingress_series.mean():.2f} KiB/s\nmax: {ingress_series.max():.2f} KiB/s"
@ -79,53 +90,64 @@ class Analysis:
plt.show()
def message_size_distribution(self) -> pd.DataFrame:
df = pd.DataFrame(self.sim.p2p.adversary.message_sizes, columns=["message_size"])
df = pd.DataFrame(self.sim.p2p.adversary.message_sizes, columns=[COL_MSG_SIZE])
print(df.describe())
return df
def messages_emitted_around_interval(self):
df = pd.DataFrame(
[(node.id, cnt, node.id < len(self.sim.config.mixnet.real_message_prob_weights))
for node, cnt in self.sim.p2p.adversary.senders_around_interval.items()],
columns=["node_id", "msg_count", "expected"]
# A ground truth that shows how many times each node sent a real message
truth_df = pd.DataFrame([(node.id, count) for node, count in self.sim.p2p.measurement.original_senders.items()],
columns=[COL_NODE_ID, COL_MSG_CNT])
# A result of observing nodes who have sent messages around the promised message interval
suspected_df = pd.DataFrame(
[(node.id, self.sim.p2p.adversary.senders_around_interval[node]) for node in
self.sim.p2p.measurement.original_senders.keys()],
columns=[COL_NODE_ID, COL_MSG_CNT]
)
plt.figure(figsize=(10, 6))
seaborn.barplot(data=df, x="node_id", y="msg_count", hue="expected", palette={True: "red", False: "blue"})
plt.title("Messages emitted around the promised interval")
plt.xlabel("Sender Node ID")
plt.ylabel("Msg Count")
plt.legend(title="expected")
width = 0.4
fig, ax = plt.subplots(figsize=(12, 8))
ax.bar(truth_df[COL_NODE_ID] - width / 2, truth_df[COL_MSG_CNT], width, label="Ground Truth", color="b")
ax.bar(truth_df[COL_NODE_ID] + width / 2, suspected_df[COL_MSG_CNT], width, label="Adversary's Inference",
color="r")
ax.set_title("Nodes who generated real messages")
ax.set_xlabel(COL_NODE_ID)
ax.set_ylabel(COL_MSG_CNT)
ax.set_xlim(-1, len(truth_df[COL_NODE_ID]))
ax.legend()
plt.tight_layout()
plt.show()
def messages_in_node_over_time(self):
dataframes = []
for i, msgs_in_node in enumerate(self.sim.p2p.adversary.msgs_in_node_per_window):
time = i * self.config.adversary.io_window_moving_interval
for i, io_window in enumerate(self.sim.p2p.adversary.io_windows):
time = i * self.config.adversary.io_window_size
df = pd.DataFrame(
[(time, node.id, msg_cnt, len(senders)) for node, (msg_cnt, senders) in msgs_in_node.items()],
columns=["time", "node_id", "msg_cnt", "sender_cnt"])
[(time, receiver.id, len(msg_queue), len(senders)) for receiver, (msg_queue, senders) in
io_window.items()],
columns=[COL_TIME, COL_NODE_ID, COL_MSG_CNT, COL_SENDER_CNT])
if not df.empty:
dataframes.append(df)
df = pd.concat(dataframes, ignore_index=True)
msg_cnt_df = df.pivot(index="time", columns="node_id", values="msg_cnt")
msg_cnt_df = df.pivot(index=COL_TIME, columns=COL_NODE_ID, values=COL_MSG_CNT)
plt.figure(figsize=(12, 6))
for column in msg_cnt_df.columns:
plt.plot(msg_cnt_df.index, msg_cnt_df[column], marker=None, label=column)
plt.title("Messages within each node over time")
plt.xlabel("Time")
plt.ylabel("Msg Count")
plt.xlabel(COL_TIME)
plt.ylabel(COL_MSG_CNT)
plt.ylim(bottom=0)
plt.grid(True)
plt.tight_layout()
plt.show()
sender_cnt_df = df.pivot(index="time", columns="node_id", values="sender_cnt")
sender_cnt_df = df.pivot(index=COL_TIME, columns=COL_NODE_ID, values=COL_SENDER_CNT)
plt.figure(figsize=(12, 6))
for column in sender_cnt_df.columns:
plt.plot(sender_cnt_df.index, sender_cnt_df[column], marker=None, label=column)
plt.title("Diversity of senders of messages received by each node over time")
plt.xlabel("Time")
plt.xlabel(COL_TIME)
plt.ylabel("# of senders of messages received by each node")
plt.ylim(bottom=0)
plt.grid(True)
@ -133,11 +155,11 @@ class Analysis:
plt.show()
plt.figure(figsize=(12, 6))
df.boxplot(column="sender_cnt", by="time", medianprops={"color": "red", "linewidth": 2.5})
df.boxplot(column=COL_SENDER_CNT, by=COL_TIME, medianprops={"color": "red", "linewidth": 2.5})
plt.title("Diversity of senders of messages received by each node over time")
plt.suptitle("")
plt.xticks([])
plt.xlabel("Time")
plt.xlabel(COL_TIME)
plt.ylabel("# of senders of messages received by each node")
plt.ylim(bottom=0)
plt.grid(axis="x")
@ -149,67 +171,54 @@ class Analysis:
for time, node_states in self.sim.p2p.adversary.node_states.items():
for node, state in node_states.items():
rows.append((time, node.id, state))
df = pd.DataFrame(rows, columns=["time", "node_id", "state"])
df = pd.DataFrame(rows, columns=[COL_TIME, COL_NODE_ID, COL_NODE_STATE])
plt.figure(figsize=(10, 6))
seaborn.scatterplot(data=df, x="time", y="node_id", hue="state",
seaborn.scatterplot(data=df, x=COL_TIME, y=COL_NODE_ID, hue=COL_NODE_STATE,
palette={NodeState.SENDING: "red", NodeState.RECEIVING: "blue"})
plt.title("Node states over time")
plt.xlabel("Time")
plt.ylabel("Node ID")
plt.legend(title="state")
plt.xlabel(COL_TIME)
plt.ylabel(COL_NODE_ID)
plt.legend(title=COL_NODE_STATE)
plt.show()
def message_hops(self):
df = pd.DataFrame(self.sim.p2p.measurement.message_hops.values(), columns=["hops"])
def message_hops(self) -> int:
df = pd.DataFrame(self.sim.p2p.measurement.message_hops.values(), columns=[COL_HOPS])
print(df.describe())
plt.figure(figsize=(6, 6))
seaborn.boxplot(data=df, y="hops", medianprops={"color": "red", "linewidth": 2.5})
seaborn.boxplot(data=df, y=COL_HOPS, medianprops={"color": "red", "linewidth": 2.5})
plt.ylim(bottom=0)
plt.title("Message hops distribution")
plt.show()
return int(df.median().iloc[0])
def timing_attack(self):
"""
pick a random node received a message.
then, track back the message to the sender
until
- there is no message to track back within a reasonable time window
- enough hops have been traversed
"""
all_results = []
window = len(self.sim.p2p.adversary.msgs_in_node_per_window) - 1
def timing_attack(self, hops_between_layers: int):
hops_to_observe = hops_between_layers * (self.config.mixnet.num_mix_layers + 1)
all_results = Counter()
window = len(self.sim.p2p.adversary.io_windows) - 1
while window >= 0:
items = self.sim.p2p.adversary.msgs_in_node_per_window[window].items()
actual_receivers = [node for node, (msg_cnt, senders) in items if len(senders) > 0]
items = self.sim.p2p.adversary.io_windows[window].items()
actual_receivers = [receiver for receiver, (_, senders) in items if len(senders) > 0]
if len(actual_receivers) == 0:
window -= 1
continue
results = []
max_hops = 0
for receiver in actual_receivers:
nodes_per_hop = self.timing_attack_with(receiver, window)
self.print_nodes_per_hop(nodes_per_hop, window)
results.append(nodes_per_hop)
max_hops = max(max_hops, len(nodes_per_hop))
window -= max_hops
all_results.extend(results)
suspected_senders = self.timing_attack_with(receiver, window, hops_to_observe)
# self.print_nodes_per_hop(suspected_senders, window)
all_results.update(suspected_senders)
window -= 1
suspected_senders = Counter()
for result in all_results:
print(Counter({node.id: count for node, count in result[-1].items()}))
suspected_senders.update(result[-1])
suspected_senders = ({node.id: count for node, count in suspected_senders.items()})
suspected_senders = ({node.id: count for node, count in all_results.items()})
print(f"suspected nodes count: {len(suspected_senders)}")
# Create the bar plot for original sender counts
original_senders = ({node.id: count for node, count in self.sim.p2p.measurement.original_senders.items()})
plt.figure(figsize=(12, 8))
plt.bar(list(original_senders.keys()), list(original_senders.values()))
plt.xlabel('Node ID')
plt.ylabel('Counts')
plt.title('Original Sender Counts')
plt.xlabel("Node ID")
plt.ylabel("Counts")
plt.title("Original Sender Counts")
plt.xlim(-1, self.config.mixnet.num_nodes)
plt.show()
@ -219,9 +228,9 @@ class Analysis:
# Create the bar plot
plt.figure(figsize=(12, 8))
plt.bar(keys, values)
plt.xlabel('Node ID')
plt.ylabel('Counts')
plt.title('Suspected Sender Counts')
plt.xlabel("Node ID")
plt.ylabel("Counts")
plt.title("Suspected Sender Counts")
plt.xlim(-1, self.config.mixnet.num_nodes)
plt.show()
@ -229,9 +238,9 @@ class Analysis:
broadcasters = ({node.id: count for node, count in self.sim.p2p.broadcasters.items()})
plt.figure(figsize=(12, 8))
plt.bar(list(broadcasters.keys()), list(broadcasters.values()))
plt.xlabel('Node ID')
plt.ylabel('Counts')
plt.title('Broadcasters')
plt.xlabel("Node ID")
plt.ylabel("Counts")
plt.title("Broadcasters")
plt.xlim(-1, self.config.mixnet.num_nodes)
plt.show()
@ -240,41 +249,53 @@ class Analysis:
std_dev = np.std(values)
# Plot the histogram of the values
plt.figure(figsize=(12, 8))
plt.hist(values, bins=30, density=True, alpha=0.6, color='g', label='Counts Histogram')
plt.hist(values, bins=30, density=True, alpha=0.6, color="g", label="Counts Histogram")
# Plot the normal distribution curve
xmin, xmax = plt.xlim()
x = np.linspace(xmin, xmax, 100)
p = stats.norm.pdf(x, mean, std_dev)
plt.plot(x, p, 'k', linewidth=2, label='Normal Distribution')
plt.plot(x, p, "k", linewidth=2, label="Normal Distribution")
title = "Fit results: mean = %.2f, std_dev = %.2f" % (mean, std_dev)
plt.title(title)
plt.xlabel('Counts')
plt.ylabel('Density')
plt.xlabel("Counts")
plt.ylabel("Density")
plt.legend()
plt.show()
def timing_attack_with(self, starting_node: "Node", starting_window: int):
_, senders = self.sim.p2p.adversary.msgs_in_node_per_window[starting_window][starting_node]
nodes_per_hop = [Counter(senders)]
def timing_attack_with(self, receiver: "Node", window: int, remaining_hops: int) -> Counter:
assert remaining_hops >= 1
if self.config.p2p.type == self.config.p2p.TYPE_ONE_TO_ALL:
MAX_HOPS = 1 + self.config.mixnet.num_mix_layers
else:
MAX_HOPS = (1 + self.config.mixnet.num_mix_layers) * 4
# Start inspecting senders who sent messages that were arrived in the receiver at the given window
_, senders = self.sim.p2p.adversary.io_windows[window][receiver]
for window in range(starting_window - 1, 0, -1):
if len(nodes_per_hop) >= MAX_HOPS:
break
# If the remaining_hops is 1, return the senders as suspected senders
if remaining_hops == 1:
return Counter(senders)
next_nodes = Counter()
for node in nodes_per_hop[-1]:
_, senders = self.sim.p2p.adversary.msgs_in_node_per_window[window][node]
next_nodes.update(senders)
if len(next_nodes) == 0:
break
nodes_per_hop.append(next_nodes)
# A result to be returned after inspecting all senders who sent messages to the receiver
all_suspected_senders = Counter()
return nodes_per_hop
# Inspect each sender who sent messages to the receiver
for sender in senders:
# A sub-result to be filled when tracking back further from the sender
suspected_senders = Counter()
# Track back to each window where that sender might have received any messages.
time_range = self.config.mixnet.max_mix_delay + self.config.p2p.max_network_latency - self.config.adversary.io_window_size
window_range = int(time_range / self.config.adversary.io_window_size)
for prev_window in range(window - 1, window - 1 - window_range, -1):
if prev_window < 0:
break
suspected_senders.update(self.timing_attack_with(sender, prev_window, remaining_hops - 1))
# If there is no suspected sender gathered, we can assume that the sender is the original sender
# because it means that nobody has sent messages to the sender within the reasonable time window
if len(suspected_senders) == 0:
all_suspected_senders.update({sender})
else:
all_suspected_senders.update(suspected_senders)
return all_suspected_senders
@staticmethod
def print_nodes_per_hop(nodes_per_hop, starting_window: int):

View File

@ -145,10 +145,6 @@ class MeasurementConfig:
class AdversaryConfig:
# A time window for the adversary to observe inputs and outputs of each node
io_window_size: float
# A moving interval of the time window for the adversary to observe inputs and outputs of each node
# This must be smaller or equal to io_window_size.
io_window_moving_interval: float
def validate(self):
assert self.io_window_size > 0
assert 0 < self.io_window_moving_interval <= self.io_window_size

View File

@ -19,7 +19,7 @@ mixnet:
# The length of the list should be <= p2p.num_nodes. i.e. some nodes won't have a weight.
real_message_prob_weights: []
# A probability of sending a cover message within a cycle if not sending a real message
cover_message_prob: 0
cover_message_prob: 0.05
# A maximum preparation time (computation time) for a message sender before sending the message
max_message_prep_time: 0
# A maximum delay of messages mixed in a mix node
@ -41,9 +41,5 @@ measurement:
adversary:
# A time window for the adversary to observe inputs and outputs of each node
# Recommendation: Same as `mixnet.max_mix_delay + (p2p.max_network_latency - p2p.min_network_latency)`
io_window_size: 0.10
# A moving interval of the time window for the adversary to observe inputs and outputs of each node
# This must be smaller or equal to io_window_size.
# Recommendation: Same as `p2p.min_network_latency`
io_window_moving_interval: 0.10
io_window_size: 0.10

Binary file not shown.

Before

Width:  |  Height:  |  Size: 243 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 191 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 49 KiB

View File

@ -46,7 +46,7 @@ class P2P(ABC):
is_first_of_broadcasting: bool):
if is_first_of_broadcasting:
self.adversary.inspect_message_size(msg)
self.adversary.observe_sending_node(sender, receiver)
self.adversary.observe_sending_node(sender)
self.measurement.measure_egress(sender, msg)
# simulate network latency
@ -79,6 +79,8 @@ class NaiveBroadcastP2P(P2P):
self.env.process(self.send(msg, 0, sender, receiver, i == 0))
def receive(self, msg: SphinxPacket | bytes, hops_traveled: int, sender: "Node", receiver: "Node"):
msg_hash = hashlib.sha256(bytes(msg)).digest()
self.measurement.update_message_hops(msg_hash, hops_traveled)
self.env.process(receiver.receive_message(msg))