2024-09-11 23:22:18 +09:00

43 lines
1.0 KiB
Python

import sys
import pandas as pd
def calculate_hamming_distance(df1, df2):
"""
Caculate the hamming distance between two DataFrames
to quantify the difference between them.
"""
if df1.shape != df2.shape:
raise ValueError(
"DataFrames must have the same shape to calculate Hamming distance."
)
# Compare element-wise and count differences
differences = (df1 != df2).sum().sum()
return differences / df1.size # normalize the distance
def main():
if len(sys.argv) != 3:
print("Usage: python hamming.py <csv_path1> <csv_path2>")
sys.exit(1)
csv_path1 = sys.argv[1]
csv_path2 = sys.argv[2]
# Load the CSV files into DataFrames
df1 = pd.read_csv(csv_path1)
df2 = pd.read_csv(csv_path2)
# Calculate the Hamming distance
try:
hamming_distance = calculate_hamming_distance(df1, df2)
print(f"Hamming distance: {hamming_distance}")
except ValueError as e:
print(f"Error: {e}")
if __name__ == "__main__":
main()