das-research/DHT/retrieval_on_das_plotting.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import os\n",
    "import pandas as pd\n",
    "from IPython.display import display\n",
    "import warnings\n",
    "warnings.filterwarnings('ignore')\n",
    "\n",
    "from plots import make_folder, SingleMetrics, CombinedMetrics\n",
    "import plots\n",
    "\n",
    "# Necessary folders to start\n",
    "CSV_FOLDER = \"./csvs/retrieval_3\"\n",
    "IMG_FOLDER = \"./imgs/retrieval_3\"\n",
    "\n",
    "# make sure that the output folder exists\n",
    "make_folder(IMG_FOLDER, \"keeping track of the generated images\")\n"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "# Read all the available csv files in the given folder\n",
    "def read_files_with(target: str):\n",
    "    files = []\n",
    "    for dir, _, files in os.walk(CSV_FOLDER):\n",
    "        for file in files:\n",
    "            if target in file:\n",
    "                files.append(dir+\"/\"+file)\n",
    "            else:\n",
    "                continue\n",
    "    print(f\"found {len(files)} with {target} files in {CSV_FOLDER}\")\n"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "## Analysis of the data\n",
    "#### Individual metrics"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "# Individual metrics\n",
    "def print_avg_lookup(df):\n",
    "    print(f\"lookup_wallclock_time\\t\\t\\t {df.lookup_wallclock_time.mean()}\")\n",
    "    print(f\"attempted_nodes\\t\\t\\t\\t\\t {df.attempted_nodes.mean()}\")\n",
    "    print(f\"finished_connection_attempts\\t {df.finished_connection_attempts.mean()}\")\n",
    "    print(f\"successful_connections\\t\\t\\t {df.successful_connections.mean()}\")\n",
    "    print(f\"failed_connections\\t\\t\\t\\t {df.failed_connections.mean()}\")\n",
    "    print(f\"total_discovered_nodes\\t\\t\\t {df.total_discovered_nodes.mean()}\")\n",
    "    print(f\"retrievable\\t\\t\\t\\t\\t\\t {df.retrievable.mean()}\")\n",
    "    print(f\"accuracy\\t\\t\\t\\t\\t\\t {df.accuracy.mean()}\")\n",
    "\n",
    "\n",
    "# Display the sigle metrics of the test individually\n",
    "files = read_files_with(\"retrieval_lookup_nn\")\n",
    "for file in files:\n",
    "    df = pd.read_csv(file)\n",
    "    print(\"\\nmax simulated lookup delay\")\n",
    "    display(df.loc[df['lookup_aggregated_delay'].idxmax()])\n",
    "\n",
    "    print(\"\\nmin simulated lookup delay\")\n",
    "    display(df.loc[df['lookup_aggregated_delay'].idxmin()])\n",
    "\n",
    "    print(\"\\navg simulated lookup delay\")\n",
    "    print_avg_lookup(df)\n",
    "    metrics = SingleMetrics(file, IMG_FOLDER, \"Retrievals\", {\n",
    "        \"retrievable\": {\n",
    "            \"title_tag\": \"retriebable\",\n",
    "            \"xlabel_tag\": \"retriebable\",\n",
    "            \"ylabel_tag\": \"\",\n",
    "        },})"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "#### Aggregated accross samples"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "# aggregate metrics across runs\n",
    "files = read_files_with(\"lookup_nn\")\n",
    "unifiedMetrics = CombinedMetrics(\n",
    "    files=files, aggregator=\"fast_delay_range\",\n",
    "    operation=\"retrieval\",\n",
    "    filters=[\"y0.125\", \"cdr50-75\"], output_image_folder=IMG_FOLDER,\n",
    "    metrics={\n",
    "        \"retrievable\": {\n",
    "            \"title_tag\": \"retriebable\",\n",
    "            \"xlabel_tag\": \"retriebable\",\n",
    "            \"ylabel_tag\": \"\",\n",
    "        },\n",
    "    },\n",
    "    legend=[\n",
    "        plots.RETRIEVAL_NODES,\n",
    "        plots.CONCURRENT_SAMPLES,\n",
    "        plots.FAST_ERROR_RATE,\n",
    "        plots.CONNECTION_DELAYS,\n",
    "        plots.GAMMA,\n",
    "    ])"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "# example to reproduce the network details\n",
    "import seaborn as sns\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "\n",
    "file = CSV_FOLDER+\"/retrieval_lookup_network_nn12000_rn100_sampl100_fer10_ser0_cdr50-75_fdr50-100_sdr0_k20_a3_b20_y0.125_steps3.csv\"\n",
    "\n",
    "df = pd.read_csv(file)\n",
    "data = df.groupby([\"from\", \"to\"]).count()\n",
    "data = data.reset_index()\n",
    "data = data.rename(columns={\"Unnamed: 0\": \"total_connections\"})\n",
    "data = data.sort_values(by=\"total_connections\", ascending=False)\n",
    "pivoted_data = data.pivot(index=\"from\", columns=\"to\", values=\"total_connections\").fillna(0)\n",
    "display(pivoted_data)\n",
    "\n",
    "# plot heatmap of connections\n",
    "cmap = sns.cm.rocket_r\n",
    "\n",
    "sns.set()\n",
    "plt.show()\n",
    "g = sns.heatmap(data=pivoted_data, xticklabels=\"to\", yticklabels=\"from\", cmap = cmap)\n"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "# example to reproduce the network details\n",
    "import networkx as nx\n",
    "import plotly.graph_objects as go\n",
    "\n",
    "\n",
    "file = CSV_FOLDER+\"/retrieval_lookup_network_nn12000_rn100_sampl100_fer10_ser0_cdr50-75_fdr50-100_sdr0_k20_a3_b20_y0.125_steps3.csv\"\n",
    "\n",
    "df = pd.read_csv(file)\n",
    "df = df.groupby([\"from\", \"to\"]).size().reset_index(name=\"count\")\n",
    "top_interactions = df.sort_values('count', ascending=False).head(10000)  # top 10000 interactions\n",
    "display(top_interactions)\n",
    "\n",
    "G = nx.from_pandas_edgelist(top_interactions, 'to', 'from', ['count'])\n",
    "pos = nx.spring_layout(G)\n",
    "\n",
    "for node in G.nodes():\n",
    "    G.nodes[node]['pos'] = list(pos[node])\n",
    "\n",
    "edge_x = []\n",
    "edge_y = []\n",
    "for edge in G.edges():\n",
    "    x0, y0 = G.nodes[edge[0]]['pos']\n",
    "    x1, y1 = G.nodes[edge[1]]['pos']\n",
    "    edge_x.extend([x0, x1, None])\n",
    "    edge_y.extend([y0, y1, None])\n",
    "\n",
    "node_x = [pos[node][0] for node in G.nodes()]\n",
    "node_y = [pos[node][1] for node in G.nodes()]\n",
    "\n",
    "edge_trace = go.Scatter(\n",
    "    x=edge_x, y=edge_y,\n",
    "    line=dict(width=0.5, color='#888'),\n",
    "    hoverinfo='none',\n",
    "    mode='lines')\n",
    "\n",
    "node_trace = go.Scatter(\n",
    "    x=node_x, y=node_y,\n",
    "    mode='markers',\n",
    "    hoverinfo='text',\n",
    "    marker=dict(\n",
    "        showscale=True,\n",
    "        colorscale='YlGnBu',\n",
    "        size=10,\n",
    "        colorbar=dict(\n",
    "            thickness=15,\n",
    "            title='Node Connections',\n",
    "            xanchor='left',\n",
    "            titleside='right'\n",
    "        ),\n",
    "        line_width=2))\n",
    "\n",
    "node_adjacencies = []\n",
    "node_text = []\n",
    "for node in G.nodes():\n",
    "    adjacencies = list(G.adj[node])  # List of nodes adjacent to the current node\n",
    "    num_connections = len(adjacencies)\n",
    "\n",
    "    node_adjacencies.append(num_connections)\n",
    "    node_text.append(f'Node id: {node}<br># of connections: {num_connections}')\n",
    "\n",
    "node_trace.marker.color = node_adjacencies\n",
    "node_trace.text = node_text\n",
    "\n",
    "fig = go.Figure(data=[edge_trace, node_trace],\n",
    "                layout=go.Layout(\n",
    "                    title='Network of Top Address Interactions',\n",
    "                    titlefont_size=16,\n",
    "                    showlegend=False,\n",
    "                    hovermode='closest',\n",
    "                    margin=dict(b=0, l=0, r=0, t=0),\n",
    "                    annotations=[dict(\n",
    "                        text=\"Based on top interactions\",\n",
    "                        showarrow=False,\n",
    "                        xref=\"paper\", yref=\"paper\",\n",
    "                        x=0.005, y=-0.002)],\n",
    "                    xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),\n",
    "                    yaxis=dict(showgrid=False, zeroline=False, showticklabels=False))\n",
    "                )\n",
    "fig.update_layout(title_text=\"DHT network's interactions\")\n",
    "fig.show()\n"
   ],
   "metadata": {
    "collapsed": false
   }
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}