nimbus-eth1/nimbus/nimbus_import.nim
Jacek Sieka 0b32078c4b
Consolidate block type for block processing (#2325)
This PR consolidates the split header-body sequences into a single EthBlock
sequence and cleans up the fallout from that which significantly reduces
block processing overhead during import thanks to less garbage collection
and fewer copies of things all around.

Notably, since the number of headers must always match the number of bodies,
we also get rid of a pointless degree of freedom that in the future could
introduce unnecessary bugs.

* only read header and body from era file
* avoid several unnecessary copies along the block processing way
* simplify signatures, cleaning up unused arguemnts and returns
* use `stew/assign2` in a few strategic places where the generated
  nim assignent is slow and add a few `move` to work around poor
  analysis in nim 1.6 (will need to be revisited for 2.0)

```
stats-20240607_2223-a814aa0b.csv vs stats-20240608_0714-21c1d0a9.csv
                       bps_x     bps_y     tps_x        tps_y    bpsd    tpsd    timed
block_number
(498305, 713245]    1,540.52  1,809.73  2,361.58  2775.340189  17.63%  17.63%  -14.92%
(713245, 928185]      730.36    865.26  1,715.90  2028.973852  18.01%  18.01%  -15.21%
(928185, 1143126]     663.03    789.10  2,529.26  3032.490771  19.79%  19.79%  -16.28%
(1143126, 1358066]    393.46    508.05  2,152.50  2777.578119  29.13%  29.13%  -22.50%
(1358066, 1573007]    370.88    440.72  2,351.31  2791.896052  18.81%  18.81%  -15.80%
(1573007, 1787947]    283.65    335.11  2,068.93  2441.373402  17.60%  17.60%  -14.91%
(1787947, 2002888]    287.29    342.11  2,078.39  2474.179448  18.99%  18.99%  -15.91%
(2002888, 2217828]    293.38    343.16  2,208.83   2584.77457  17.16%  17.16%  -14.61%
(2217828, 2432769]    140.09    167.86  1,081.87  1296.336926  18.82%  18.82%  -15.80%

blocks: 1934464, baseline: 3h13m1s, contender: 2h43m47s
bpsd (mean): 19.55%
tpsd (mean): 19.55%
Time (total): -29m13s, -15.14%
```
2024-06-09 16:32:20 +02:00

188 lines
5.2 KiB
Nim

# Nimbus
# Copyright (c) 2024 Status Research & Development GmbH
# Licensed under either of
# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE))
# * MIT license ([LICENSE-MIT](LICENSE-MIT))
# at your option.
# This file may not be copied, modified, or distributed except according to
# those terms.
{.push raises: [].}
import
chronicles,
chronos/timer,
std/[strformat, strutils],
stew/io2,
./config,
./common/common,
./core/[block_import, chain],
./db/era1_db,
beacon_chain/era_db
var running {.volatile.} = true
func shortLog(a: timer.Duration, parts = int.high): string {.inline.} =
## Returns string representation of Duration ``a`` as nanoseconds value.
var
res = ""
v = a.nanoseconds()
parts = parts
template f(n: string, T: Duration) =
if v >= T.nanoseconds():
res.add($(uint64(v div T.nanoseconds())))
res.add(n)
v = v mod T.nanoseconds()
dec parts
if v == 0 or parts <= 0:
return res
f("w", Week)
f("d", Day)
f("h", Hour)
f("m", Minute)
f("s", Second)
f("ms", Millisecond)
f("us", Microsecond)
f("ns", Nanosecond)
res
proc importBlocks*(conf: NimbusConf, com: CommonRef) =
proc controlCHandler() {.noconv.} =
when defined(windows):
# workaround for https://github.com/nim-lang/Nim/issues/4057
setupForeignThreadGc()
running = false
setControlCHook(controlCHandler)
let
start =
try:
com.db.getSavedStateBlockNumber().truncate(uint64) + 1
except RlpError as exc:
error "Could not read block number", err = exc.msg
quit(QuitFailure)
chain = com.newChain()
var
imported = 0'u64
gas = 0.u256
txs = 0
time0 = Moment.now()
csv =
if conf.csvStats.isSome:
try:
let f = open(conf.csvStats.get(), fmAppend)
if f.getFileSize() == 0:
f.writeLine("block_number,blocks,txs,gas,time")
f
except IOError as exc:
error "Could not open statistics output file",
file = conf.csvStats, err = exc.msg
quit(QuitFailure)
else:
File(nil)
defer:
if csv != nil:
close(csv)
template blockNumber(): uint64 =
start + imported
if isDir(conf.era1Dir.string):
doAssert conf.networkId == MainNet, "Only mainnet era1 current supported"
const
# TODO the merge block number could be fetched from the era1 file instead,
# specially if the accumulator is added to the chain metadata
lastEra1Block = 15537393
if start <= lastEra1Block:
notice "Importing era1 archive",
start, dataDir = conf.dataDir.string, era1Dir = conf.era1Dir.string
var blocks: seq[EthBlock]
func f(value: float): string =
try:
&"{value:4.3f}"
except ValueError:
raiseAssert "valid fmt string"
template process() =
let
time1 = Moment.now()
statsRes = chain.persistBlocks(blocks)
if statsRes.isErr():
error "Failed to persist blocks", error = statsRes.error
quit(QuitFailure)
txs += statsRes[].txs
gas += uint64 statsRes[].gas
let
time2 = Moment.now()
diff1 = (time2 - time1).nanoseconds().float / 1000000000
diff0 = (time2 - time0).nanoseconds().float / 1000000000
info "Imported blocks",
blockNumber,
blocks = imported,
txs,
gas,
bps = f(blocks.len.float / diff1),
tps = f(statsRes[].txs.float / diff1),
gps = f(statsRes[].gas.float / diff1),
avgBps = f(imported.float / diff0),
avgTps = f(txs.float / diff0),
avgGps = f(gas.truncate(uint64).float / diff0), # TODO fix truncate
elapsed = shortLog(time2 - time0, 3)
if csv != nil:
# In the CSV, we store a line for every chunk of blocks processed so
# that the file can meaningfully be appended to when restarting the
# process - this way, each sample is independent
try:
csv.writeLine(
[
$blockNumber,
$blocks.len,
$statsRes[].txs,
$statsRes[].gas,
$(time2 - time1).nanoseconds(),
].join(",")
)
csv.flushFile()
except IOError as exc:
warn "Could not write csv", err = exc.msg
blocks.setLen(0)
let db =
Era1DbRef.init(conf.era1Dir.string, "mainnet").expect("Era files present")
defer:
db.dispose()
while running and imported < conf.maxBlocks and blockNumber <= lastEra1Block:
var blk = db.getEthBlock(blockNumber).valueOr:
error "Could not load block from era1", blockNumber, error
break
imported += 1
blocks.add move(blk)
if blocks.lenu64 mod conf.chunkSize == 0:
process()
if blocks.len > 0:
process() # last chunk, if any
for blocksFile in conf.blocksFile:
if isFile(string blocksFile):
# success or not, we quit after importing blocks
if not importRlpBlock(string blocksFile, com):
quit(QuitFailure)
else:
quit(QuitSuccess)