From 961c1e75811d12fdb58b93bce790a85a9f9c5f09 Mon Sep 17 00:00:00 2001 From: Youngjoon Lee <5462944+youngjoon-lee@users.noreply.github.com> Date: Fri, 5 Jul 2024 23:39:46 +0900 Subject: [PATCH] add calculating hamming distance between two DataFrames --- mixnet/sim/hamming.py | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 mixnet/sim/hamming.py diff --git a/mixnet/sim/hamming.py b/mixnet/sim/hamming.py new file mode 100644 index 0000000..ad92ef9 --- /dev/null +++ b/mixnet/sim/hamming.py @@ -0,0 +1,38 @@ +import sys + +import pandas as pd + + +def calculate_hamming_distance(df1, df2): + if df1.shape != df2.shape: + raise ValueError( + "DataFrames must have the same shape to calculate Hamming distance." + ) + + # Compare element-wise and count differences + differences = (df1 != df2).sum().sum() + return differences / df1.size # normalize the distance + + +def main(): + if len(sys.argv) != 3: + print("Usage: python hamming.py ") + sys.exit(1) + + csv_path1 = sys.argv[1] + csv_path2 = sys.argv[2] + + # Load the CSV files into DataFrames + df1 = pd.read_csv(csv_path1) + df2 = pd.read_csv(csv_path2) + + # Calculate the Hamming distance + try: + hamming_distance = calculate_hamming_distance(df1, df2) + print(f"Hamming distance: {hamming_distance}") + except ValueError as e: + print(f"Error: {e}") + + +if __name__ == "__main__": + main()