das-research/DHT/retrieval_on_das_plotting.ipynb
2023-10-04 10:13:47 +02:00

281 lines
9.0 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"import os\n",
"import pandas as pd\n",
"from IPython.display import display\n",
"import warnings\n",
"warnings.filterwarnings('ignore')\n",
"\n",
"from plots import make_folder, SingleMetrics, CombinedMetrics\n",
"import plots\n",
"\n",
"# Necessary folders to start\n",
"CSV_FOLDER = \"./csvs/retrieval_3\"\n",
"IMG_FOLDER = \"./imgs/retrieval_3\"\n",
"\n",
"# make sure that the output folder exists\n",
"make_folder(IMG_FOLDER, \"keeping track of the generated images\")\n"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"# Read all the available csv files in the given folder\n",
"def read_files_with(target: str):\n",
" files = []\n",
" for dir, _, files in os.walk(CSV_FOLDER):\n",
" for file in files:\n",
" if target in file:\n",
" files.append(dir+\"/\"+file)\n",
" else:\n",
" continue\n",
" print(f\"found {len(files)} with {target} files in {CSV_FOLDER}\")\n"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "markdown",
"source": [
"## Analysis of the data\n",
"#### Individual metrics"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"# Individual metrics\n",
"def print_avg_lookup(df):\n",
" print(f\"lookup_wallclock_time\\t\\t\\t {df.lookup_wallclock_time.mean()}\")\n",
" print(f\"attempted_nodes\\t\\t\\t\\t\\t {df.attempted_nodes.mean()}\")\n",
" print(f\"finished_connection_attempts\\t {df.finished_connection_attempts.mean()}\")\n",
" print(f\"successful_connections\\t\\t\\t {df.successful_connections.mean()}\")\n",
" print(f\"failed_connections\\t\\t\\t\\t {df.failed_connections.mean()}\")\n",
" print(f\"total_discovered_nodes\\t\\t\\t {df.total_discovered_nodes.mean()}\")\n",
" print(f\"retrievable\\t\\t\\t\\t\\t\\t {df.retrievable.mean()}\")\n",
" print(f\"accuracy\\t\\t\\t\\t\\t\\t {df.accuracy.mean()}\")\n",
"\n",
"\n",
"# Display the sigle metrics of the test individually\n",
"files = read_files_with(\"retrieval_lookup_nn\")\n",
"for file in files:\n",
" df = pd.read_csv(file)\n",
" print(\"\\nmax simulated lookup delay\")\n",
" display(df.loc[df['lookup_aggregated_delay'].idxmax()])\n",
"\n",
" print(\"\\nmin simulated lookup delay\")\n",
" display(df.loc[df['lookup_aggregated_delay'].idxmin()])\n",
"\n",
" print(\"\\navg simulated lookup delay\")\n",
" print_avg_lookup(df)\n",
" metrics = SingleMetrics(file, IMG_FOLDER, \"Retrievals\", {\n",
" \"retrievable\": {\n",
" \"title_tag\": \"retriebable\",\n",
" \"xlabel_tag\": \"retriebable\",\n",
" \"ylabel_tag\": \"\",\n",
" },})"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "markdown",
"source": [
"#### Aggregated accross samples"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"# aggregate metrics across runs\n",
"files = read_files_with(\"lookup_nn\")\n",
"unifiedMetrics = CombinedMetrics(\n",
" files=files, aggregator=\"fast_delay_range\",\n",
" operation=\"retrieval\",\n",
" filters=[\"y0.125\", \"cdr50-75\"], output_image_folder=IMG_FOLDER,\n",
" metrics={\n",
" \"retrievable\": {\n",
" \"title_tag\": \"retriebable\",\n",
" \"xlabel_tag\": \"retriebable\",\n",
" \"ylabel_tag\": \"\",\n",
" },\n",
" },\n",
" legend=[\n",
" plots.RETRIEVAL_NODES,\n",
" plots.CONCURRENT_SAMPLES,\n",
" plots.FAST_ERROR_RATE,\n",
" plots.CONNECTION_DELAYS,\n",
" plots.GAMMA,\n",
" ])"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"# example to reproduce the network details\n",
"import seaborn as sns\n",
"import matplotlib.pyplot as plt\n",
"\n",
"\n",
"file = CSV_FOLDER+\"/retrieval_lookup_network_nn12000_rn100_sampl100_fer10_ser0_cdr50-75_fdr50-100_sdr0_k20_a3_b20_y0.125_steps3.csv\"\n",
"\n",
"df = pd.read_csv(file)\n",
"data = df.groupby([\"from\", \"to\"]).count()\n",
"data = data.reset_index()\n",
"data = data.rename(columns={\"Unnamed: 0\": \"total_connections\"})\n",
"data = data.sort_values(by=\"total_connections\", ascending=False)\n",
"pivoted_data = data.pivot(index=\"from\", columns=\"to\", values=\"total_connections\").fillna(0)\n",
"display(pivoted_data)\n",
"\n",
"# plot heatmap of connections\n",
"cmap = sns.cm.rocket_r\n",
"\n",
"sns.set()\n",
"plt.show()\n",
"g = sns.heatmap(data=pivoted_data, xticklabels=\"to\", yticklabels=\"from\", cmap = cmap)\n"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"# example to reproduce the network details\n",
"import networkx as nx\n",
"import plotly.graph_objects as go\n",
"\n",
"\n",
"file = CSV_FOLDER+\"/retrieval_lookup_network_nn12000_rn100_sampl100_fer10_ser0_cdr50-75_fdr50-100_sdr0_k20_a3_b20_y0.125_steps3.csv\"\n",
"\n",
"df = pd.read_csv(file)\n",
"df = df.groupby([\"from\", \"to\"]).size().reset_index(name=\"count\")\n",
"top_interactions = df.sort_values('count', ascending=False).head(10000) # top 10000 interactions\n",
"display(top_interactions)\n",
"\n",
"G = nx.from_pandas_edgelist(top_interactions, 'to', 'from', ['count'])\n",
"pos = nx.spring_layout(G)\n",
"\n",
"for node in G.nodes():\n",
" G.nodes[node]['pos'] = list(pos[node])\n",
"\n",
"edge_x = []\n",
"edge_y = []\n",
"for edge in G.edges():\n",
" x0, y0 = G.nodes[edge[0]]['pos']\n",
" x1, y1 = G.nodes[edge[1]]['pos']\n",
" edge_x.extend([x0, x1, None])\n",
" edge_y.extend([y0, y1, None])\n",
"\n",
"node_x = [pos[node][0] for node in G.nodes()]\n",
"node_y = [pos[node][1] for node in G.nodes()]\n",
"\n",
"edge_trace = go.Scatter(\n",
" x=edge_x, y=edge_y,\n",
" line=dict(width=0.5, color='#888'),\n",
" hoverinfo='none',\n",
" mode='lines')\n",
"\n",
"node_trace = go.Scatter(\n",
" x=node_x, y=node_y,\n",
" mode='markers',\n",
" hoverinfo='text',\n",
" marker=dict(\n",
" showscale=True,\n",
" colorscale='YlGnBu',\n",
" size=10,\n",
" colorbar=dict(\n",
" thickness=15,\n",
" title='Node Connections',\n",
" xanchor='left',\n",
" titleside='right'\n",
" ),\n",
" line_width=2))\n",
"\n",
"node_adjacencies = []\n",
"node_text = []\n",
"for node in G.nodes():\n",
" adjacencies = list(G.adj[node]) # List of nodes adjacent to the current node\n",
" num_connections = len(adjacencies)\n",
"\n",
" node_adjacencies.append(num_connections)\n",
" node_text.append(f'Node id: {node}<br># of connections: {num_connections}')\n",
"\n",
"node_trace.marker.color = node_adjacencies\n",
"node_trace.text = node_text\n",
"\n",
"fig = go.Figure(data=[edge_trace, node_trace],\n",
" layout=go.Layout(\n",
" title='Network of Top Address Interactions',\n",
" titlefont_size=16,\n",
" showlegend=False,\n",
" hovermode='closest',\n",
" margin=dict(b=0, l=0, r=0, t=0),\n",
" annotations=[dict(\n",
" text=\"Based on top interactions\",\n",
" showarrow=False,\n",
" xref=\"paper\", yref=\"paper\",\n",
" x=0.005, y=-0.002)],\n",
" xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),\n",
" yaxis=dict(showgrid=False, zeroline=False, showticklabels=False))\n",
" )\n",
"fig.update_layout(title_text=\"DHT network's interactions\")\n",
"fig.show()\n"
],
"metadata": {
"collapsed": false
}
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 0
}