Script for comparing csv outputs from block import
This commit is contained in:
parent
1e65093b3e
commit
0c6c84f2ce
|
@ -0,0 +1,40 @@
|
||||||
|
# Utility scripts
|
||||||
|
|
||||||
|
## block-import-stats.py
|
||||||
|
|
||||||
|
This script compares outputs from two `nimbus import --debug-csv-stats`, a
|
||||||
|
baseline and a contender.
|
||||||
|
|
||||||
|
To use it, set up a virtual environment:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Create a venv for the tool
|
||||||
|
python -m venv stats
|
||||||
|
. stats/bin/activate
|
||||||
|
pip install -r requirements.txt
|
||||||
|
|
||||||
|
python block-import-stats.py
|
||||||
|
```
|
||||||
|
|
||||||
|
* Generate a baseline version by processing a long range of blocks using
|
||||||
|
`nimbus import`
|
||||||
|
* Modify your code and commit to git (to generate a unique identifier for the code)
|
||||||
|
* Re-run the same import over the range of blocks of interest, saving the import
|
||||||
|
statistics to a new CSV
|
||||||
|
* Pass the two CSV files to the script
|
||||||
|
|
||||||
|
By default, the script will skip block numbers below 500k since these are mostly
|
||||||
|
unintersting.
|
||||||
|
|
||||||
|
See `-h` for help text on running the script.
|
||||||
|
|
||||||
|
### Testing a particular range of blocks
|
||||||
|
|
||||||
|
As long as block import is run on similar hardware, each run can be saved for
|
||||||
|
future reference using the git hash.
|
||||||
|
|
||||||
|
The block import can be run repeatedly with `--max-blocks` to stop after
|
||||||
|
processing a number of blocks - by copying the state at that point, one can
|
||||||
|
resume or replay the import of a particular block range
|
||||||
|
|
||||||
|
See `make_states.sh` for such an example.
|
|
@ -0,0 +1,137 @@
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import os
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
plt.rcParams["figure.figsize"] = [40, 30]
|
||||||
|
|
||||||
|
from pandas.plotting import register_matplotlib_converters
|
||||||
|
|
||||||
|
register_matplotlib_converters()
|
||||||
|
|
||||||
|
|
||||||
|
def readStats(name: str, min_block_number: int):
|
||||||
|
df = pd.read_csv(name).convert_dtypes()
|
||||||
|
df = df[df.block_number >= min_block_number]
|
||||||
|
df.set_index("block_number", inplace=True)
|
||||||
|
df.time /= 1000000000
|
||||||
|
df.drop(columns=["gas"], inplace=True)
|
||||||
|
df["bps"] = df.blocks / df.time
|
||||||
|
df["tps"] = df.txs / df.time
|
||||||
|
return df
|
||||||
|
|
||||||
|
|
||||||
|
def prettySecs(s: float):
|
||||||
|
sa = abs(int(s))
|
||||||
|
ss = sa % 60
|
||||||
|
m = sa // 60 % 60
|
||||||
|
h = sa // (60 * 60)
|
||||||
|
sign = "" if s >= 0 else "-"
|
||||||
|
|
||||||
|
if h > 0:
|
||||||
|
return f"{sign}{h}h{m}m{ss}s"
|
||||||
|
elif m > 0:
|
||||||
|
return f"{sign}{m}m{ss}s"
|
||||||
|
else:
|
||||||
|
return f"{sign}{ss}s"
|
||||||
|
|
||||||
|
|
||||||
|
def formatBins(df: pd.DataFrame, bins: int):
|
||||||
|
if bins > 0:
|
||||||
|
bins = np.linspace(
|
||||||
|
df.block_number.iloc[0], df.block_number.iloc[-1], bins, dtype=int
|
||||||
|
)
|
||||||
|
return df.groupby(pd.cut(df["block_number"], bins), observed=True)
|
||||||
|
else:
|
||||||
|
return df
|
||||||
|
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("baseline")
|
||||||
|
parser.add_argument("contender")
|
||||||
|
parser.add_argument("--plot", action="store_true")
|
||||||
|
parser.add_argument(
|
||||||
|
"--bins",
|
||||||
|
default=10,
|
||||||
|
type=int,
|
||||||
|
help="Number of bins to group block ranges into in overview, 0=all rows",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--min-block-number",
|
||||||
|
default=500000,
|
||||||
|
type=int,
|
||||||
|
help="Skip block blocks below the given number",
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
baseline = readStats(args.baseline, args.min_block_number)
|
||||||
|
contender = readStats(args.contender, args.min_block_number)
|
||||||
|
|
||||||
|
# Pick out the rows to match - a more sophisticated version of this would
|
||||||
|
# interpolate, perhaps - also, maybe should check for non-matching block/tx counts
|
||||||
|
df = baseline.merge(contender, on=("block_number", "blocks", "txs"))
|
||||||
|
|
||||||
|
df["bpsd"] = (df.bps_y - df.bps_x) / df.bps_x
|
||||||
|
df["tpsd"] = (df.tps_y - df.tps_x) / df.tps_x
|
||||||
|
df["timed"] = (df.time_y - df.time_x) / df.time_x
|
||||||
|
|
||||||
|
df.reset_index(inplace=True)
|
||||||
|
|
||||||
|
if args.plot:
|
||||||
|
plt.rcParams["axes.grid"] = True
|
||||||
|
|
||||||
|
fig = plt.figure()
|
||||||
|
bps = fig.add_subplot(2, 2, 1, title="Blocks per second (more is better)")
|
||||||
|
bpsd = fig.add_subplot(2, 2, 2, title="Difference (>0 is better)")
|
||||||
|
tps = fig.add_subplot(2, 2, 3, title="Transactions per second (more is better)")
|
||||||
|
tpsd = fig.add_subplot(2, 2, 4, title="Difference (>0 is better)")
|
||||||
|
|
||||||
|
bps.plot(df.block_number, df.bps_x.rolling(3).mean(), label="baseline")
|
||||||
|
bps.plot(df.block_number, df.bps_y.rolling(3).mean(), label="contender")
|
||||||
|
|
||||||
|
bpsd.plot(df.block_number, df.bpsd.rolling(3).mean())
|
||||||
|
|
||||||
|
tps.plot(df.block_number, df.tps_x.rolling(3).mean(), label="baseline")
|
||||||
|
tps.plot(df.block_number, df.tps_y.rolling(3).mean(), label="contender")
|
||||||
|
|
||||||
|
tpsd.plot(df.block_number, df.tpsd.rolling(3).mean())
|
||||||
|
|
||||||
|
bps.legend()
|
||||||
|
tps.legend()
|
||||||
|
|
||||||
|
fig.subplots_adjust(bottom=0.05, right=0.95, top=0.95, left=0.05)
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
|
||||||
|
print(f"{os.path.basename(args.baseline)} vs {os.path.basename(args.contender)}")
|
||||||
|
print(
|
||||||
|
formatBins(df, args.bins)
|
||||||
|
.agg(
|
||||||
|
dict.fromkeys(
|
||||||
|
["bps_x", "bps_y", "tps_x", "tps_y", "bpsd", "tpsd", "timed"], "mean"
|
||||||
|
),
|
||||||
|
)
|
||||||
|
.to_string(
|
||||||
|
formatters=dict(
|
||||||
|
dict.fromkeys(["bpsd", "tpsd", "timed"], "{:,.2%}".format),
|
||||||
|
**dict.fromkeys(["bps_x", "bps_y", "tps_x"], "{:,.2f}".format),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
print(
|
||||||
|
f"\nblocks: {df.blocks.sum()}, baseline: {prettySecs(df.time_x.sum())}, contender: {prettySecs(df.time_y.sum())}"
|
||||||
|
)
|
||||||
|
print(f"bpsd (mean): {df.bpsd.mean():.2%}")
|
||||||
|
print(f"tpsd (mean): {df.tpsd.mean():.2%}")
|
||||||
|
print(
|
||||||
|
f"Time (sum): {prettySecs(df.time_y.sum()-df.time_x.sum())}, {df.timed.mean():.2%}"
|
||||||
|
)
|
||||||
|
|
||||||
|
print()
|
||||||
|
print(
|
||||||
|
"bpsd = blocks per sec diff (+), tpsd = txs per sec diff, timed = time to process diff (-)"
|
||||||
|
)
|
||||||
|
print("+ = more is better, - = less is better")
|
|
@ -0,0 +1,30 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Create a set of states, each advanced by 100k blocks
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
trap "exit" INT
|
||||||
|
|
||||||
|
if [ -z "$3" ]
|
||||||
|
then
|
||||||
|
echo "Syntax: make_states.sh datadir era1dir statsdir"
|
||||||
|
exit 1;
|
||||||
|
fi
|
||||||
|
|
||||||
|
counter=0
|
||||||
|
|
||||||
|
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
|
||||||
|
DATE="$(date -u +%Y%m%d_%H%M)"
|
||||||
|
REV=$(git rev-parse --short=8 HEAD)
|
||||||
|
|
||||||
|
while true;
|
||||||
|
do
|
||||||
|
"$SCRIPT_DIR/../build/nimbus" import \
|
||||||
|
--data-dir:"$1/${DATE}-${REV}" \
|
||||||
|
--era1-dir:"$2" \
|
||||||
|
--debug-csv-stats:"$3/stats-${DATE}-${REV}.csv" \
|
||||||
|
--max-blocks:100000
|
||||||
|
cp -ar "$1/${DATE}-${REV}" "$1/${DATE}-${REV}"-$(printf "%04d" $counter)
|
||||||
|
counter=$((counter+1))
|
||||||
|
done
|
|
@ -0,0 +1,3 @@
|
||||||
|
pandas
|
||||||
|
matplotlib
|
||||||
|
|
|
@ -0,0 +1,41 @@
|
||||||
|
alabaster==0.7.16
|
||||||
|
attrs==23.2.0
|
||||||
|
Babel==2.15.0
|
||||||
|
cattrs==23.2.3
|
||||||
|
certifi==2024.2.2
|
||||||
|
charset-normalizer==3.3.2
|
||||||
|
contourpy==1.2.1
|
||||||
|
cycler==0.12.1
|
||||||
|
docutils==0.20.1
|
||||||
|
esbonio==0.16.4
|
||||||
|
fonttools==4.53.0
|
||||||
|
idna==3.7
|
||||||
|
imagesize==1.4.1
|
||||||
|
Jinja2==3.1.3
|
||||||
|
kiwisolver==1.4.5
|
||||||
|
lsprotocol==2023.0.1
|
||||||
|
MarkupSafe==2.1.5
|
||||||
|
matplotlib==3.9.0
|
||||||
|
numpy==1.26.4
|
||||||
|
packaging==24.0
|
||||||
|
pandas==2.2.2
|
||||||
|
pillow==10.3.0
|
||||||
|
platformdirs==4.2.1
|
||||||
|
pygls==1.3.1
|
||||||
|
Pygments==2.18.0
|
||||||
|
pyparsing==3.1.2
|
||||||
|
pyspellchecker==0.8.1
|
||||||
|
python-dateutil==2.9.0.post0
|
||||||
|
pytz==2024.1
|
||||||
|
requests==2.31.0
|
||||||
|
six==1.16.0
|
||||||
|
snowballstemmer==2.2.0
|
||||||
|
Sphinx==7.3.7
|
||||||
|
sphinxcontrib-applehelp==1.0.8
|
||||||
|
sphinxcontrib-devhelp==1.0.6
|
||||||
|
sphinxcontrib-htmlhelp==2.0.5
|
||||||
|
sphinxcontrib-jsmath==1.0.1
|
||||||
|
sphinxcontrib-qthelp==1.0.7
|
||||||
|
sphinxcontrib-serializinghtml==1.1.10
|
||||||
|
tzdata==2024.1
|
||||||
|
urllib3==2.2.1
|
Loading…
Reference in New Issue