add input/output observation

2026-02-23 22:53:16 +00:00 · 2024-05-16 17:13:43 +09:00 · 2024-05-16 17:13:43 +09:00 · e349e23a6c
commit e349e23a6c
parent e1ef269636
7 changed files with 56 additions and 9 deletions
--- a/mixnet/v2/sim/README.md
+++ b/mixnet/v2/sim/README.md
@ -61,9 +61,15 @@ For more details, please see the [Time and Scheduling](https://simpy.readthedocs
    75%          1937.0
    max          1937.0
    ```
- [x] Identifying nodes emitting messages around the promised interval.
+- Identifying nodes emitting messages around the promised interval.
+  - [x] As the GPA
  - [ ] With partial visibility
  - [ ] Quantifying how much the expected frequent senders are anonymized by cover traffic
  ![](./docs/msgs-around-interval.png)
 - [ ] Correlating senders-receivers based on timing
+  - DRAFT
+    ![](./docs/mixed-msgs-over-time.png)
+    - This can indicate whether a certain node has extremely few messages being mixed in the node.
+      But, I don't think this is the best way to analyze the correlation.
+      I think we need to try to simulate the actual timing attack.
 - [ ] Active attacks
--- a/mixnet/v2/sim/config.py
+++ b/mixnet/v2/sim/config.py
@ -16,7 +16,8 @@ class Config:
    # A probability of sending a real message within one cycle
    real_message_prob: float
    # A weight of real message emission probability of some nodes
-    # The length of the list should be <= num_nodes.
+    # Each weight is assigned to each node in the order of the node ID.
+    # The length of the list should be <= num_nodes. i.e. some nodes won't have a weight.
    real_message_prob_weights: list[float]
    # A probability of sending a cover message within one cycle if not sending a real message
    cover_message_prob: float
@ -24,6 +25,10 @@ class Config:
    max_message_prep_time: float
    # A maximum network latency between nodes directly connected with each other
    max_network_latency: float
+    # A maximum delay of messages mixed in a mix node
+    max_mix_delay: float
+    # A discrete time window for the adversary to observe inputs and outputs of a certain node
+    io_observation_window: int

    @classmethod
    def load(cls, yaml_path: str) -> Self:
@ -43,5 +48,6 @@ class Config:
        assert config.cover_message_prob >= 0
        assert config.max_message_prep_time >= 0
        assert config.max_network_latency >= 0
+        assert config.io_observation_window >= 0

        return config
--- a/mixnet/v2/sim/config.yaml
+++ b/mixnet/v2/sim/config.yaml
@ -8,11 +8,16 @@ message_interval: 1
 # A probability of sending a real message within a cycle
 real_message_prob: 0.1
 # A weight of real message emission probability of some nodes
-# The length of the list should be <= num_nodes.
+# Each weight is assigned to each node in the order of the node ID.
+# The length of the list should be <= num_nodes. i.e. some nodes won't have a weight.
 real_message_prob_weights: [10, 8, 12]
 # A probability of sending a cover message within a cycle if not sending a real message
 cover_message_prob: 0.2
 # A maximum preparation time (delay) before sending the message
 max_message_prep_time: 0.3
 # A maximum network latency between nodes directly connected with each other
-max_network_latency: 0.5
+max_network_latency: 0.5
+# A maximum delay of messages mixed in a mix node
+max_mix_delay: 3
+# A discrete time window for the adversary to observe inputs and outputs of a certain node
+io_observation_window: 1
--- a/mixnet/v2/sim/docs/mixed-msgs-over-time.png
+++ b/mixnet/v2/sim/docs/mixed-msgs-over-time.png
--- a/mixnet/v2/sim/main.py
+++ b/mixnet/v2/sim/main.py
@ -24,14 +24,32 @@ if __name__ == "__main__":
    df = pd.DataFrame(
        [(node.id, cnt, node.id < len(config.real_message_prob_weights))
         for node, cnt in sim.p2p.senders_around_interval.items()],
-        columns=["NodeID", "Count", "Expected"]
+        columns=["NodeID", "MsgCount", "Expected"]
    )
    plt.figure(figsize=(10, 6))
-    seaborn.barplot(data=df, x="NodeID", y="Count", hue="Expected", palette={True: "red", False: "blue"})
+    seaborn.barplot(data=df, x="NodeID", y="MsgCount", hue="Expected", palette={True: "red", False: "blue"})
    plt.title("Messages emitted around the promised interval")
    plt.xlabel("Sender Node ID")
    plt.ylabel("Msg Count")
    plt.legend(title="Expected")
    plt.show()

+    # Analyze the number of mixed messages per node per observation window
+    dataframes = []
+    for mixed_msgs_per_node in sim.p2p.mixed_msgs_per_window:
+        df = pd.DataFrame([(node.id, cnt) for node, cnt in mixed_msgs_per_node.items()], columns=["NodeID", "MsgCount"])
+        dataframes.append(df)
+    observation_times = range(len(dataframes))
+    df = pd.concat([df.assign(Time=time) for df, time in zip(dataframes, observation_times)], ignore_index=True)
+    df = df.pivot(index="Time", columns="NodeID", values="MsgCount")
+    plt.figure(figsize=(12, 6))
+    for column in df.columns:
+        plt.plot(df.index, df[column], marker='o', label=column)
+    plt.title('Mixed messages in each mix over time')
+    plt.xlabel('Time')
+    plt.ylabel('Msg Count')
+    plt.legend(title='Node ID')
+    plt.grid(True)
+    plt.show()
+
    print("Simulation complete!")
--- a/mixnet/v2/sim/node.py
+++ b/mixnet/v2/sim/node.py
@ -88,8 +88,8 @@ class Node:
                    else:
                        self.log("Dropping a cover message: %s" % msg.payload)
                else:
-                    # TODO: use Poisson delay or something else
-                    yield self.env.timeout(random.uniform(0, 5.0))
+                    # TODO: use Poisson delay or something else, if necessary
+                    yield self.env.timeout(random.uniform(0, self.config.max_mix_delay))
                    self.env.process(self.p2p.broadcast(self, msg))
            else:
                self.log("Receiving SphinxPacket, but not mine")
--- a/mixnet/v2/sim/p2p.py
+++ b/mixnet/v2/sim/p2p.py
@ -17,6 +17,8 @@ class P2p:
        # TODO: Move these to a separate class `Adversary`.
        self.message_sizes = []
        self.senders_around_interval = defaultdict(int)
+        self.mixed_msgs_per_window = []
+        self.env.process(self.update_observation_window())

    def add_node(self, nodes):
        self.nodes.extend(nodes)
@ -24,10 +26,12 @@ class P2p:
    def get_nodes(self, n: int):
        return random.sample(self.nodes, n)

-    # TODO: This should accept only bytes, but SphinxPacket is also accepted until we implement the Sphinx serde
+    # This should accept only bytes in practice,
+    # but we accept SphinxPacket as well because we don't implement Sphinx deserialization.
    def broadcast(self, sender, msg: SphinxPacket | bytes):
        self.log("Broadcasting a msg: %d bytes" % len(msg))
        self.message_sizes.append(len(msg))
+        self.mixed_msgs_per_window[-1][sender] -= 1

        now_frac, now_int = math.modf(self.env.now)
        if now_int % self.config.message_interval == 0 and now_frac <= self.config.max_message_prep_time:
@ -44,7 +48,15 @@ class P2p:
    def send(self, msg: SphinxPacket | bytes, node):
        # simulate network latency
        yield self.env.timeout(random.uniform(0, self.config.max_network_latency))
+
+        self.mixed_msgs_per_window[-1][node] += 1
        self.env.process(node.receive_message(msg))

+    # TODO: Move to a separate class `Adversary`.
+    def update_observation_window(self):
+        while True:
+            self.mixed_msgs_per_window.append(defaultdict(int))
+            yield self.env.timeout(self.config.io_observation_window)
+
    def log(self, msg):
        print("P2P at %g: %s" % (self.env.now, msg))