Jacek Sieka 0b32078c4b
Consolidate block type for block processing (#2325)
This PR consolidates the split header-body sequences into a single EthBlock
sequence and cleans up the fallout from that which significantly reduces
block processing overhead during import thanks to less garbage collection
and fewer copies of things all around.

Notably, since the number of headers must always match the number of bodies,
we also get rid of a pointless degree of freedom that in the future could
introduce unnecessary bugs.

* only read header and body from era file
* avoid several unnecessary copies along the block processing way
* simplify signatures, cleaning up unused arguemnts and returns
* use `stew/assign2` in a few strategic places where the generated
  nim assignent is slow and add a few `move` to work around poor
  analysis in nim 1.6 (will need to be revisited for 2.0)

```
stats-20240607_2223-a814aa0b.csv vs stats-20240608_0714-21c1d0a9.csv
                       bps_x     bps_y     tps_x        tps_y    bpsd    tpsd    timed
block_number
(498305, 713245]    1,540.52  1,809.73  2,361.58  2775.340189  17.63%  17.63%  -14.92%
(713245, 928185]      730.36    865.26  1,715.90  2028.973852  18.01%  18.01%  -15.21%
(928185, 1143126]     663.03    789.10  2,529.26  3032.490771  19.79%  19.79%  -16.28%
(1143126, 1358066]    393.46    508.05  2,152.50  2777.578119  29.13%  29.13%  -22.50%
(1358066, 1573007]    370.88    440.72  2,351.31  2791.896052  18.81%  18.81%  -15.80%
(1573007, 1787947]    283.65    335.11  2,068.93  2441.373402  17.60%  17.60%  -14.91%
(1787947, 2002888]    287.29    342.11  2,078.39  2474.179448  18.99%  18.99%  -15.91%
(2002888, 2217828]    293.38    343.16  2,208.83   2584.77457  17.16%  17.16%  -14.61%
(2217828, 2432769]    140.09    167.86  1,081.87  1296.336926  18.82%  18.82%  -15.80%

blocks: 1934464, baseline: 3h13m1s, contender: 2h43m47s
bpsd (mean): 19.55%
tpsd (mean): 19.55%
Time (total): -29m13s, -15.14%
```
2024-06-09 16:32:20 +02:00

102 lines
3.1 KiB
Nim

# Nimbus
# Copyright (c) 2021-2024 Status Research & Development GmbH
# Licensed under either of
# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or
# http://www.apache.org/licenses/LICENSE-2.0)
# * MIT license ([LICENSE-MIT](LICENSE-MIT) or
# http://opensource.org/licenses/MIT)
# at your option. This file may not be copied, modified, or distributed except
# according to those terms.
{.push raises: [].}
import
stew/io2,
std/[os, parseutils, strutils, tables],
results,
eth/common/eth_types,
../../../fluffy/eth_data/era1
export results, eth_types
# TODO this is a "rough copy" of the fluffy DB, minus the accumulator (it goes
# by era number alone instead of rooted name) - eventually the two should
# be merged, when eth1 gains accumulators in its metadata
type Era1DbRef* = ref object
## The Era1 database manages a collection of era files that together make up
## a linear history of pre-merge execution chain data.
path: string
network: string
files: seq[Era1File]
filenames: Table[uint64, string]
proc getEra1File*(db: Era1DbRef, era: Era1): Result[Era1File, string] =
for f in db.files:
if f.blockIdx.startNumber.era == era:
return ok(f)
let
name =
try:
db.filenames[uint64 era]
except KeyError:
return err("Era not covered by existing files: " & $era)
path = db.path / name
if not isFile(path):
return err("Era file no longer available: " & path)
# TODO: The open call does not do full verification. It is assumed here that
# trusted files are used. We might want to add a full validation option.
let f = Era1File.open(path).valueOr:
return err(error)
if db.files.len > 16: # TODO LRU
close(db.files[0])
db.files.delete(0)
db.files.add(f)
ok(f)
proc init*(
T: type Era1DbRef, path: string, network: string
): Result[Era1DbRef, string] =
var filenames: Table[uint64, string]
try:
for w in path.walkDir(relative = true):
if w.kind in {pcFile, pcLinkToFile}:
let (_, name, ext) = w.path.splitFile()
# era files are named network-00era-root.era1 - we don't have the root
# so do prefix matching instead
if name.startsWith(network & "-") and ext == ".era1":
var era1: uint64
discard parseBiggestUInt(name, era1, start = network.len + 1)
filenames[era1] = w.path
except CatchableError as exc:
return err "Cannot open era database: " & exc.msg
if filenames.len == 0:
return err "No era files found in " & path
ok Era1DbRef(path: path, network: network, filenames: filenames)
proc getEthBlock*(db: Era1DbRef, blockNumber: uint64): Result[EthBlock, string] =
let f = ?db.getEra1File(blockNumber.era)
f.getEthBlock(blockNumber)
proc getBlockTuple*(db: Era1DbRef, blockNumber: uint64): Result[BlockTuple, string] =
let f = ?db.getEra1File(blockNumber.era)
f.getBlockTuple(blockNumber)
proc dispose*(db: Era1DbRef) =
for w in db.files:
if w != nil:
w.close()
db.files.reset()
# ------------------------------------------------------------------------------
# End
# ------------------------------------------------------------------------------