{ "cells": [ { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "import os\n", "import pandas as pd\n", "from IPython.display import display\n", "import warnings\n", "warnings.filterwarnings('ignore')\n", "\n", "from plots import make_folder, SingleMetrics, CombinedMetrics\n", "import plots\n", "\n", "# Necessary folders to start\n", "CSV_FOLDER = \"./csvs/retrieval_test\"\n", "IMG_FOLDER = \"./imgs/retrieval_test\"\n", "\n", "# make sure that the output folder exists\n", "make_folder(IMG_FOLDER, \"keeping track of the generated images\")\n" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "# Read all the available csv files in the given folder\n", "def read_files_with(target: str):\n", " files = []\n", " for dir, _, files in os.walk(CSV_FOLDER):\n", " for file in files:\n", " if target in file:\n", " files.append(dir+\"/\"+file)\n", " else:\n", " continue\n", " print(f\"found {len(files)} with {target} files in {CSV_FOLDER}\")\n" ], "metadata": { "collapsed": false } }, { "cell_type": "markdown", "source": [ "## Analysis of the data\n", "#### Individual metrics" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "# Individual metrics\n", "def print_avg_lookup(df):\n", " print(f\"lookup_wallclock_time\\t\\t\\t {df.lookup_wallclock_time.mean()}\")\n", " print(f\"attempted_nodes\\t\\t\\t\\t\\t {df.attempted_nodes.mean()}\")\n", " print(f\"finished_connection_attempts\\t {df.finished_connection_attempts.mean()}\")\n", " print(f\"successful_connections\\t\\t\\t {df.successful_connections.mean()}\")\n", " print(f\"failed_connections\\t\\t\\t\\t {df.failed_connections.mean()}\")\n", " print(f\"total_discovered_nodes\\t\\t\\t {df.total_discovered_nodes.mean()}\")\n", " print(f\"retrievable\\t\\t\\t\\t\\t\\t {df.retrievable.mean()}\")\n", " print(f\"accuracy\\t\\t\\t\\t\\t\\t {df.accuracy.mean()}\")\n", "\n", "\n", "# Display the sigle metrics of the test individually\n", "files = read_files_with(\"retrieval_lookup_nn\")\n", "for file in files:\n", " df = pd.read_csv(file)\n", " print(\"\\nmax simulated lookup delay\")\n", " display(df.loc[df['lookup_aggregated_delay'].idxmax()])\n", "\n", " print(\"\\nmin simulated lookup delay\")\n", " display(df.loc[df['lookup_aggregated_delay'].idxmin()])\n", "\n", " print(\"\\navg simulated lookup delay\")\n", " print_avg_lookup(df)\n", " metrics = SingleMetrics(file, IMG_FOLDER, \"Retrievals\", {\n", " \"retrievable\": {\n", " \"title_tag\": \"retriebable\",\n", " \"xlabel_tag\": \"retriebable\",\n", " \"ylabel_tag\": \"\",\n", " },})" ], "metadata": { "collapsed": false, "is_executing": true } }, { "cell_type": "markdown", "source": [ "#### Aggregated accross samples" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "# aggregate metrics across runs\n", "files = read_files_with(\"lookup_nn\")\n", "unifiedMetrics = CombinedMetrics(\n", " files=files, aggregator=\"fast_delay_range\",\n", " operation=\"retrieval\",\n", " filters=[\"y0.125\", \"cdr50-75\"], output_image_folder=IMG_FOLDER,\n", " metrics={\n", " \"retrievable\": {\n", " \"title_tag\": \"retriebable\",\n", " \"xlabel_tag\": \"retriebable\",\n", " \"ylabel_tag\": \"\",\n", " },\n", " },\n", " legend=[\n", " plots.RETRIEVAL_NODES,\n", " plots.CONCURRENT_SAMPLES,\n", " plots.FAST_ERROR_RATE,\n", " plots.CONNECTION_DELAYS,\n", " plots.GAMMA,\n", " ])" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "# example to reproduce the network details\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "\n", "\n", "file = CSV_FOLDER+\"/retrieval_lookup_network_nn12000_rn100_sampl100_fer10_ser0_cdr50-75_fdr50-100_sdr0_k20_a3_b20_y0.125_steps3.csv\"\n", "\n", "df = pd.read_csv(file)\n", "data = df.groupby([\"from\", \"to\"]).count()\n", "data = data.reset_index()\n", "data = data.rename(columns={\"Unnamed: 0\": \"total_connections\"})\n", "data = data.sort_values(by=\"total_connections\", ascending=False)\n", "pivoted_data = data.pivot(index=\"from\", columns=\"to\", values=\"total_connections\").fillna(0)\n", "display(pivoted_data)\n", "\n", "# plot heatmap of connections\n", "cmap = sns.cm.rocket_r\n", "\n", "sns.set()\n", "plt.show()\n", "g = sns.heatmap(data=pivoted_data, xticklabels=\"to\", yticklabels=\"from\", cmap = cmap)\n" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "# example to reproduce the network details\n", "import networkx as nx\n", "import plotly.graph_objects as go\n", "\n", "\n", "file = CSV_FOLDER+\"/retrieval_lookup_network_nn12000_rn100_sampl100_fer10_ser0_cdr50-75_fdr50-100_sdr0_k20_a3_b20_y0.125_steps3.csv\"\n", "\n", "df = pd.read_csv(file)\n", "df = df.groupby([\"from\", \"to\"]).size().reset_index(name=\"count\")\n", "top_interactions = df.sort_values('count', ascending=False).head(10000) # top 10000 interactions\n", "display(top_interactions)\n", "\n", "G = nx.from_pandas_edgelist(top_interactions, 'to', 'from', ['count'])\n", "pos = nx.spring_layout(G)\n", "\n", "for node in G.nodes():\n", " G.nodes[node]['pos'] = list(pos[node])\n", "\n", "edge_x = []\n", "edge_y = []\n", "for edge in G.edges():\n", " x0, y0 = G.nodes[edge[0]]['pos']\n", " x1, y1 = G.nodes[edge[1]]['pos']\n", " edge_x.extend([x0, x1, None])\n", " edge_y.extend([y0, y1, None])\n", "\n", "node_x = [pos[node][0] for node in G.nodes()]\n", "node_y = [pos[node][1] for node in G.nodes()]\n", "\n", "edge_trace = go.Scatter(\n", " x=edge_x, y=edge_y,\n", " line=dict(width=0.5, color='#888'),\n", " hoverinfo='none',\n", " mode='lines')\n", "\n", "node_trace = go.Scatter(\n", " x=node_x, y=node_y,\n", " mode='markers',\n", " hoverinfo='text',\n", " marker=dict(\n", " showscale=True,\n", " colorscale='YlGnBu',\n", " size=10,\n", " colorbar=dict(\n", " thickness=15,\n", " title='Node Connections',\n", " xanchor='left',\n", " titleside='right'\n", " ),\n", " line_width=2))\n", "\n", "node_adjacencies = []\n", "node_text = []\n", "for node in G.nodes():\n", " adjacencies = list(G.adj[node]) # List of nodes adjacent to the current node\n", " num_connections = len(adjacencies)\n", "\n", " node_adjacencies.append(num_connections)\n", " node_text.append(f'Node id: {node}
# of connections: {num_connections}')\n", "\n", "node_trace.marker.color = node_adjacencies\n", "node_trace.text = node_text\n", "\n", "fig = go.Figure(data=[edge_trace, node_trace],\n", " layout=go.Layout(\n", " title='Network of Top Address Interactions',\n", " titlefont_size=16,\n", " showlegend=False,\n", " hovermode='closest',\n", " margin=dict(b=0, l=0, r=0, t=0),\n", " annotations=[dict(\n", " text=\"Based on top interactions\",\n", " showarrow=False,\n", " xref=\"paper\", yref=\"paper\",\n", " x=0.005, y=-0.002)],\n", " xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),\n", " yaxis=dict(showgrid=False, zeroline=False, showticklabels=False))\n", " )\n", "fig.update_layout(title_text=\"DHT network's interactions\")\n", "fig.show()\n" ], "metadata": { "collapsed": false } } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.6" } }, "nbformat": 4, "nbformat_minor": 0 }