add analysis in python

This commit is contained in:
Youngjoon Lee 2024-09-11 06:53:25 +09:00
parent bb4ee26ea1
commit ac3c5c2e59
No known key found for this signature in database
GPG Key ID: 167546E2D1712F8C
6 changed files with 202 additions and 0 deletions

1
mixnet-rs/analysis/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
.venv/

View File

@ -0,0 +1,96 @@
import argparse
import matplotlib.pyplot as plt
import pandas as pd
from common import MARKERS, X_FIELDS
def analyze(path: str):
data = pd.read_csv(path)
for x_field in X_FIELDS:
analyze_versus(data, x_field)
def analyze_versus(data: pd.DataFrame, x_field: str):
# Group by both x_field and queue_type, then select the row with the largest paramset for each group
max_paramset_data = data.loc[
data.groupby([x_field, "queue_type"])["paramset"].idxmax()
]
all_fields = [
["strong_coeff_min", "casual_coeff_min", "weak_coeff_min"],
["strong_coeff_mean", "casual_coeff_mean", "weak_coeff_mean"],
["strong_coeff_max", "casual_coeff_max", "weak_coeff_max"],
]
# Display the plots
fig, ax = plt.subplots(3, 3, figsize=(20, 10))
for ax_col, fields in enumerate(all_fields):
max_y = 0
for field in fields:
max_y = max(max_y, max_paramset_data[field].max())
max_y = min(max_y, 5000) # hard limit
for ax_row, field in enumerate(fields):
for queue_type in max_paramset_data["queue_type"].unique():
queue_data = max_paramset_data[
max_paramset_data["queue_type"] == queue_type
]
x_values = queue_data[x_field]
y_values = queue_data[field]
ax[ax_row][ax_col].plot(
x_values,
y_values,
label=queue_type,
marker=MARKERS[queue_type],
)
ax[ax_row][ax_col].set_title(f"{field} vs {x_field}", fontsize=10)
ax[ax_row][ax_col].set_xlabel(x_field)
ax[ax_row][ax_col].set_ylabel(field)
if ax_row == 0 and ax_col == len(all_fields) - 1:
ax[ax_row][ax_col].legend(bbox_to_anchor=(1, 1), loc="upper left")
ax[ax_row][ax_col].grid(True)
ax[ax_row][ax_col].set_ylim(-10, max_y * 1.05)
plt.tight_layout()
# Display the table of values
# Create a table with x_field, queue_type, and coefficients
flatten_fields = [
field for zipped_fields in zip(*all_fields) for field in zipped_fields
]
columns = [x_field, "queue_type"] + flatten_fields
table_data = max_paramset_data[columns].sort_values(by=[x_field, "queue_type"])
# Prepare to display values with only 2 decimal places
table_data[flatten_fields] = table_data[flatten_fields].applymap(
lambda x: f"{x:.2f}"
)
# Display the table as a separate subplot
fig_table, ax_table = plt.subplots(
figsize=(len(columns) * 1.8, len(table_data) * 0.3)
)
ax_table.axis("off") # Turn off the axis
table = ax_table.table(
cellText=table_data.values,
colLabels=table_data.columns,
cellLoc="center",
loc="center",
)
table.auto_set_font_size(False)
table.set_fontsize(10)
table.scale(1, 1.5)
for i in range(len(table_data.columns)):
table.auto_set_column_width(i)
plt.show()
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Aggregate the results of all paramsets of an experiment"
)
parser.add_argument("path", type=str, help="dir path")
args = parser.parse_args()
analyze(args.path)

View File

@ -0,0 +1,15 @@
MARKERS = {
"NonMix": "o",
"PureCoinFlipping": "s",
"PureRandomSampling": "^",
"PermutedCoinFlipping": "v",
"NoisyCoinFlipping": "*",
"NoisyCoinFlippingRandomRelease": "d",
}
X_FIELDS = [
"num_mixes",
"peering_degree",
"sender_data_msg_prob",
"mix_data_msg_prob",
]

View File

@ -0,0 +1,81 @@
import argparse
import matplotlib.pyplot as plt
import pandas as pd
from common import MARKERS, X_FIELDS
def analyze(path: str):
data = pd.read_csv(path)
for x_field in X_FIELDS:
analyze_versus(data, x_field)
def analyze_versus(data: pd.DataFrame, x_field: str):
# Group by both x_field and queue_type, then select the row with the largest paramset for each group
max_paramset_data = data.loc[
data.groupby([x_field, "queue_type"])["paramset"].idxmax()
]
fields = ["latency_min", "latency_mean", "latency_max"]
# Display the plots
fig, ax = plt.subplots(1, 3, figsize=(20, 4))
for ax_col, field in enumerate(fields):
for queue_type in max_paramset_data["queue_type"].unique():
queue_data = max_paramset_data[
max_paramset_data["queue_type"] == queue_type
]
x_values = queue_data[x_field]
y_values = queue_data[field]
ax[ax_col].plot(
x_values,
y_values,
label=queue_type,
marker=MARKERS[queue_type],
)
ax[ax_col].set_title(f"{field} vs {x_field}", fontsize=10)
ax[ax_col].set_xlabel(x_field)
ax[ax_col].set_ylabel(field)
if ax_col == len(fields) - 1:
ax[ax_col].legend(bbox_to_anchor=(1, 1), loc="upper left")
ax[ax_col].grid(True)
ax[ax_col].set_ylim(bottom=0)
plt.tight_layout()
# Display the table of values
# Create a table with x_field, queue_type, and coefficients
columns = [x_field, "queue_type"] + fields
table_data = max_paramset_data[columns].sort_values(by=[x_field, "queue_type"])
# Prepare to display values with only 2 decimal places
table_data[fields] = table_data[fields].applymap(lambda x: f"{x:.2f}")
# Display the table as a separate subplot
fig_table, ax_table = plt.subplots(
figsize=(len(columns) * 1.8, len(table_data) * 0.3)
)
ax_table.axis("off") # Turn off the axis
table = ax_table.table(
cellText=table_data.values,
colLabels=table_data.columns,
cellLoc="center",
loc="center",
)
table.auto_set_font_size(False)
table.set_fontsize(10)
table.scale(1, 1.5)
for i in range(len(table_data.columns)):
table.auto_set_column_width(i)
plt.show()
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Aggregate the results of all paramsets of an experiment"
)
parser.add_argument("path", type=str, help="dir path")
args = parser.parse_args()
analyze(args.path)

View File

@ -0,0 +1,7 @@
{
"venvPath": ".",
"venv": ".venv",
"typeCheckingMode": "basic",
"reportMissingTypeStubs": "none",
"ignore": ["reportMissingTypeStubs"]
}

View File

@ -0,0 +1,2 @@
matplotlib==3.9.2
pandas==2.2.2