mirror of
https://github.com/status-im/nimbus-eth2.git
synced 2025-01-10 06:16:25 +00:00
47f1f7ff1a
The new format is based on compressed CSV files in two channels: * Detailed per-epoch data * Aggregated "daily" summaries The use of append-only CSV file speeds up significantly the epoch processing speed during data generation. The use of compression results in smaller storage requirements overall. The use of the aggregated files has a very minor cost in both CPU and storage, but leads to near interactive speed for report generation. Other changes: - Implemented support for graceful shut downs to avoid corrupting the saved files. - Fixed a memory leak caused by lacking `StateCache` clean up on each iteration. - Addressed review comments - Moved the rewards and penalties calculation code in a separate module Required invasive changes to existing modules: - The `data` field of the `KeyedBlockRef` type is made public to be used by the validator rewards monitor's Chain DAG update procedure. - The `getForkedBlock` procedure from the `blockchain_dag.nim` module is made public to be used by the validator rewards monitor's Chain DAG update procedure.
360 lines
12 KiB
Plaintext
360 lines
12 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "071e1cb8",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"%load_ext autotime\n",
|
|
"%matplotlib inline\n",
|
|
"import string\n",
|
|
"import sqlite3\n",
|
|
"import os\n",
|
|
"import re\n",
|
|
"import pandas as pd\n",
|
|
"import matplotlib.pyplot as plt\n",
|
|
"import numpy as np\n",
|
|
"import snappy\n",
|
|
"from scipy.interpolate import make_interp_spline\n",
|
|
"from pathlib import Path\n",
|
|
"from io import StringIO"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "f00bca92",
|
|
"metadata": {},
|
|
"source": [
|
|
"**Database connection:**"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "f69cd8b3",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"database_dir = \"../build/data/mainnetValidatorDb/validatorDb.sqlite3\"\n",
|
|
"connection = sqlite3.connect(database_dir)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "8ccd8945",
|
|
"metadata": {},
|
|
"source": [
|
|
"**Rewards and penalties components:**"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "229adad0",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"SOURCE = \"source\"\n",
|
|
"TARGET = \"target\"\n",
|
|
"HEAD = \"head\"\n",
|
|
"INCLUSION_DELAY = \"inclusion_delay\"\n",
|
|
"SYNC_COMMITTEE = \"sync_committee\"\n",
|
|
"\n",
|
|
"CSV_DATA_COLUMNS_NAMES = [\n",
|
|
" \"source_outcome\",\n",
|
|
" \"max_source_reward\",\n",
|
|
" \"target_outcome\",\n",
|
|
" \"max_target_reward\",\n",
|
|
" \"head_outcome\",\n",
|
|
" \"max_head_reward\",\n",
|
|
" \"inclusion_delay_outcome\",\n",
|
|
" \"max_inclusion_delay_reward\",\n",
|
|
" \"sync_committee_outcome\",\n",
|
|
" \"max_sync_committee_reward\",\n",
|
|
" \"proposer_outcome\",\n",
|
|
" \"inactivity_penalty\",\n",
|
|
" \"slashing_outcome\",\n",
|
|
" \"deposits\",\n",
|
|
" \"inclusion_delay\"]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "9a747287",
|
|
"metadata": {},
|
|
"source": [
|
|
"**Helper functions:**"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "63fb9f21",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def valid_public_key(public_key):\n",
|
|
" \"\"\"Checks whether a string is a valid hex representation of a public key of an Eth2 validator.\"\"\"\n",
|
|
" if len(public_key) != 96:\n",
|
|
" return False\n",
|
|
" return all(c in string.hexdigits for c in public_key)\n",
|
|
"\n",
|
|
"def idx(public_key):\n",
|
|
" \"\"\"Returns validator's index by its public key.\"\"\"\n",
|
|
" \n",
|
|
" if public_key.startswith(\"0x\"):\n",
|
|
" public_key = public_key[2:]\n",
|
|
" \n",
|
|
" if not valid_public_key(public_key):\n",
|
|
" raise ValueError(f\"The string '{public_key}' is not a valid public key of a validator.\")\n",
|
|
" \n",
|
|
" QUERY_FIELD = \"validator_index\"\n",
|
|
" query = f\"SELECT {QUERY_FIELD} FROM validators_raw WHERE pubkey=x'{public_key}';\"\n",
|
|
" query_result = pd.read_sql_query(query, connection)\n",
|
|
" \n",
|
|
" if len(query_result[QUERY_FIELD]) == 0:\n",
|
|
" raise ValueError(f\"Not found a validator with a public key '{public_key}'.\")\n",
|
|
" \n",
|
|
" if len(query_result[QUERY_FIELD]) > 1:\n",
|
|
" raise ValueError(f\"Found multiple validators with a public key '{public_key}'.\")\n",
|
|
" \n",
|
|
" return query_result[QUERY_FIELD][0]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "946762c1",
|
|
"metadata": {},
|
|
"source": [
|
|
"**Input parameters:**"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "e9aca3ed",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"start_epoch = 10000\n",
|
|
"end_epoch = 20000\n",
|
|
"resolution = 225\n",
|
|
"files_dir = \"../build/data/mainnetCompactedValidatorDb/\"\n",
|
|
"use_compacted_files = True\n",
|
|
"rewards = [SOURCE, TARGET, HEAD, INCLUSION_DELAY, SYNC_COMMITTEE]\n",
|
|
"validators_sets = {\n",
|
|
" \"set1\": list(range(10)),\n",
|
|
" \"set2\": list(map(idx, [\n",
|
|
" \"0x8efba2238a00d678306c6258105b058e3c8b0c1f36e821de42da7319c4221b77aa74135dab1860235e19d6515575c381\",\n",
|
|
" \"0xa2dce641f347a9e46f58458390e168fa4b3a0166d74fc495457cb00c8e4054b5d284c62aa0d9578af1996c2e08e36fb6\",\n",
|
|
" \"0x98b7d0eac7ab95d34dbf2b7baa39a8ec451671328c063ab1207c2055d9d5d6f1115403dc5ea19a1111a404823bd9a6e9\",\n",
|
|
" \"0xb0fd08e2e06d1f4d90d0d6843feb543ebeca684cde397fe230e6cdf6f255d234f2c26f4b36c07170dfdfcbbe355d0848\",\n",
|
|
" \"0xab7a5aa955382906be3d76e322343bd439e8690f286ecf2f2a7646363b249f5c133d0501d766ccf1aa1640f0283047b3\",\n",
|
|
" \"0x980c0c001645a00b71c720935ce193e1ed0e917782c4cb07dd476a4fdb7decb8d91daf2770eb413055f0c1d14b5ed6df\",\n",
|
|
" \"0xac7cbdc535ce8254eb9cdedf10d5b1e75de4cd5e91756c3467d0492b01b70b5c6a81530e9849c6b696c8bc157861d0c3\",\n",
|
|
" \"0x98ea289db7ea9714699ec93701a3b6db43900e04ae5497be01fa8cc5a56754c23589eaf1f674de718e291376f452d68c\",\n",
|
|
" \"0x92451d4c099e51f54ab20f5c1a4edf405595c60122ccfb0f39250b7e80986fe0fe457bacd8a887e9087cd6fc323f492c\",\n",
|
|
" \"0xa06f6c678f0129aec056df309a4fe18760116ecaea2292947c5a9cc997632ff437195309783c269ffca7bb2704e675a0\"\n",
|
|
" ])),\n",
|
|
" \"set3\": list(range(20, 30))\n",
|
|
" }"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "5e0fb2da",
|
|
"metadata": {},
|
|
"source": [
|
|
"**Loading the data and losses calculation:**"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "485a2d7e",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"COMPACTED_EPOCH_INFO_FILE_PATTERN = re.compile(r\"(\\d{8})\\_(\\d{8})\\.epoch\")\n",
|
|
"\n",
|
|
"def get_first_and_last_epoch(file_name):\n",
|
|
" m = re.match(COMPACTED_EPOCH_INFO_FILE_PATTERN, file_name)\n",
|
|
" if m == None:\n",
|
|
" return None\n",
|
|
" return int(m.group(1)), int(m.group(2))\n",
|
|
"\n",
|
|
"def isEpochInfoFile(file_name):\n",
|
|
" r = get_first_and_last_epoch(file_name)\n",
|
|
" if r == None:\n",
|
|
" return False\n",
|
|
" file_start_epoch, file_end_epoch = r\n",
|
|
" if file_start_epoch > file_end_epoch:\n",
|
|
" return False\n",
|
|
" if file_end_epoch < start_epoch:\n",
|
|
" return False\n",
|
|
" if file_start_epoch > end_epoch:\n",
|
|
" return False\n",
|
|
" return True\n",
|
|
"\n",
|
|
"def adjust_constraints(sorted_file_names):\n",
|
|
" first_start_epoch, first_end_epoch = get_first_and_last_epoch(sorted_file_names[0])\n",
|
|
" _, last_end_epoch = get_first_and_last_epoch(sorted_file_names[-1])\n",
|
|
" start_epoch = first_start_epoch\n",
|
|
" end_epoch = last_end_epoch\n",
|
|
" resolution = first_end_epoch - first_start_epoch + 1\n",
|
|
"\n",
|
|
"def read_csv(file_path):\n",
|
|
" return pd.read_csv(\n",
|
|
" StringIO(snappy.decompress(file_path.read_bytes()).decode(\"utf-8\")),\n",
|
|
" names = CSV_DATA_COLUMNS_NAMES, usecols = set(range(0, 10)))\n",
|
|
"\n",
|
|
"def get_outcome_var(component):\n",
|
|
" return component + \"_outcome\"\n",
|
|
"\n",
|
|
"def get_max_reward_var(component):\n",
|
|
" return \"max_\" + component + \"_reward\"\n",
|
|
"\n",
|
|
"def sum_max_values(t):\n",
|
|
" return sum(getattr(t, get_max_reward_var(reward)) for reward in rewards)\n",
|
|
"\n",
|
|
"def sum_actual_values(t):\n",
|
|
" return sum(getattr(t, get_outcome_var(reward)) for reward in rewards)\n",
|
|
"\n",
|
|
"total_losses_per_epoch_point = {}\n",
|
|
"validators_per_epoch_point = {}\n",
|
|
"average_losses_per_epoch_point = {}\n",
|
|
"\n",
|
|
"def compute_total_losses(epoch_point, epochs = None):\n",
|
|
" for set_name, validators in validators_sets.items():\n",
|
|
" if not set_name in total_losses_per_epoch_point:\n",
|
|
" total_losses_per_epoch_point[set_name] = {}\n",
|
|
" validators_per_epoch_point[set_name] = {}\n",
|
|
" if not epoch_point in total_losses_per_epoch_point[set_name]:\n",
|
|
" total_losses_per_epoch_point[set_name][epoch_point] = 0\n",
|
|
" validators_per_epoch_point[set_name][epoch_point] = 0\n",
|
|
" for validator_index in validators:\n",
|
|
" validator_info = data.iloc[validator_index]\n",
|
|
" validator_losses = \\\n",
|
|
" sum_max_values(validator_info) - sum_actual_values(validator_info)\n",
|
|
" total_losses_per_epoch_point[set_name][epoch_point] += \\\n",
|
|
" validator_losses if epochs == None else validator_losses * epochs\n",
|
|
" validators_per_epoch_point[set_name][epoch_point] += \\\n",
|
|
" 1 if epochs == None else epochs\n",
|
|
"\n",
|
|
"def compute_average_losses():\n",
|
|
" for set_name in validators_sets:\n",
|
|
" if not set_name in average_losses_per_epoch_point:\n",
|
|
" average_losses_per_epoch_point[set_name] = {}\n",
|
|
" for epoch_point, total_losses in total_losses_per_epoch_point[set_name].items():\n",
|
|
" average_losses_per_epoch_point[set_name][epoch_point] = \\\n",
|
|
" total_losses / validators_per_epoch_point[set_name][epoch_point]\n",
|
|
"\n",
|
|
"if use_compacted_files:\n",
|
|
" file_names = [file_name for file_name in os.listdir(files_dir)\n",
|
|
" if isEpochInfoFile(file_name)]\n",
|
|
" file_names.sort()\n",
|
|
" adjust_constraints(file_names)\n",
|
|
"\n",
|
|
" for file_name in file_names:\n",
|
|
" data = read_csv(Path(files_dir + file_name))\n",
|
|
" file_first_epoch, file_last_epoch = get_first_and_last_epoch(file_name)\n",
|
|
" file_epochs_range = file_last_epoch - file_first_epoch + 1\n",
|
|
" epoch_point = file_first_epoch // resolution\n",
|
|
" compute_total_losses(epoch_point, file_epochs_range)\n",
|
|
"else:\n",
|
|
" for epoch in range(start_epoch, end_epoch + 1):\n",
|
|
" data = read_csv(Path(files_dir + \"{:08}.epoch\".format(epoch)))\n",
|
|
" epoch_point = epoch // resolution\n",
|
|
" compute_total_losses(epoch_point)\n",
|
|
"\n",
|
|
"compute_average_losses()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "800ee35b",
|
|
"metadata": {},
|
|
"source": [
|
|
"**Average losses graph:** "
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "62d1e96d",
|
|
"metadata": {
|
|
"scrolled": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"plt.subplots(figsize = (20, 5))\n",
|
|
"plt.title(\"Average losses per epoch\")\n",
|
|
"plt.xlabel(\"Epoch\")\n",
|
|
"plt.ylabel(\"Gwei\")\n",
|
|
"\n",
|
|
"for name, value in average_losses_per_epoch_point.items():\n",
|
|
" epochs = np.array([ep * resolution + resolution // 2 for ep in value.keys()])\n",
|
|
" values = np.array(list(value.values()))\n",
|
|
" spline = make_interp_spline(epochs, values)\n",
|
|
" num_samples = (end_epoch - start_epoch + 1) // resolution * 100\n",
|
|
" x = np.linspace(epochs.min(), epochs.max(), num_samples)\n",
|
|
" y = spline(x)\n",
|
|
" plt.plot(x, y, label=name)\n",
|
|
"\n",
|
|
"plt.legend(loc=\"best\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "0fff538c",
|
|
"metadata": {},
|
|
"source": [
|
|
"**Total losses:**"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "6ab52601",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"sets_total_losses = {}\n",
|
|
"for set_name, epoch_points in total_losses_per_epoch_point.items():\n",
|
|
" sets_total_losses[set_name] = 0\n",
|
|
" for _, losses in epoch_points.items():\n",
|
|
" sets_total_losses[set_name] += losses\n",
|
|
"\n",
|
|
"plt.title(\"Total losses\")\n",
|
|
"plt.xlabel(\"Set\")\n",
|
|
"plt.ylabel(\"Ethers\")\n",
|
|
"plt.bar(list(sets_total_losses.keys()), [loss * 1e-9 for loss in sets_total_losses.values()])\n",
|
|
"print(sets_total_losses)"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.9.7"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|