nimbus-eth1/nimbus/db/era1_db/db_desc.nim

# Nimbus
# Copyright (c) 2021-2024 Status Research & Development GmbH
# Licensed under either of
#  * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or
#    http://www.apache.org/licenses/LICENSE-2.0)
#  * MIT license ([LICENSE-MIT](LICENSE-MIT) or
#    http://opensource.org/licenses/MIT)
# at your option. This file may not be copied, modified, or distributed except
# according to those terms.

{.push raises: [].}

import
  stew/io2,
  std/[os, parseutils, strutils, tables],
  results,
  eth/common/eth_types,
  ../../../fluffy/eth_data/era1

export results, eth_types

# TODO this is a "rough copy" of the fluffy DB, minus the accumulator (it goes
#      by era number alone instead of rooted name) - eventually the two should
#      be merged, when eth1 gains accumulators in its metadata

type Era1DbRef* = ref object
  ## The Era1 database manages a collection of era files that together make up
  ## a linear history of pre-merge execution chain data.
  path: string
  network: string
  files: seq[Era1File]
  filenames: Table[uint64, string]

proc getEra1File*(db: Era1DbRef, era: Era1): Result[Era1File, string] =
  for f in db.files:
    if f.blockIdx.startNumber.era == era:
      return ok(f)

  let
    name =
      try:
        db.filenames[uint64 era]
      except KeyError:
        return err("Era not covered by existing files: " & $era)
    path = db.path / name

  if not isFile(path):
    return err("Era file no longer available: " & path)

  # TODO: The open call does not do full verification. It is assumed here that
  # trusted files are used. We might want to add a full validation option.
  let f = Era1File.open(path).valueOr:
    return err(error)

  if db.files.len > 16: # TODO LRU
    close(db.files[0])
    db.files.delete(0)

  db.files.add(f)
  ok(f)

proc init*(
    T: type Era1DbRef, path: string, network: string
): Result[Era1DbRef, string] =
  var filenames: Table[uint64, string]
  try:
    for w in path.walkDir(relative = true):
      if w.kind in {pcFile, pcLinkToFile}:
        let (_, name, ext) = w.path.splitFile()
        # era files are named network-00era-root.era1 - we don't have the root
        # so do prefix matching instead
        if name.startsWith(network & "-") and ext == ".era1":
          var era1: uint64
          discard parseBiggestUInt(name, era1, start = network.len + 1)
          filenames[era1] = w.path
  except CatchableError as exc:
    return err "Cannot open era database: " & exc.msg
  if filenames.len == 0:
    return err "No era files found in " & path

  ok Era1DbRef(path: path, network: network, filenames: filenames)

proc getEthBlock*(db: Era1DbRef, blockNumber: uint64): Result[EthBlock, string] =
  let f = ?db.getEra1File(blockNumber.era)

  f.getEthBlock(blockNumber)

proc getBlockTuple*(db: Era1DbRef, blockNumber: uint64): Result[BlockTuple, string] =
  let f = ?db.getEra1File(blockNumber.era)

  f.getBlockTuple(blockNumber)

proc dispose*(db: Era1DbRef) =
  for w in db.files:
    if w != nil:
      w.close()
  db.files.reset()

# ------------------------------------------------------------------------------
# End
# ------------------------------------------------------------------------------
Unified mode for undumping gzip-ed or era1-ed encoded block dumps (#2198) ackn: Built on Daniel's work 2024-05-20 13:59:18 +00:00			`# Nimbus`
			`# Copyright (c) 2021-2024 Status Research & Development GmbH`
			`# Licensed under either of`
			`# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or`
			`# http://www.apache.org/licenses/LICENSE-2.0)`
			`# * MIT license ([LICENSE-MIT](LICENSE-MIT) or`
			`# http://opensource.org/licenses/MIT)`
			`# at your option. This file may not be copied, modified, or distributed except`
			`# according to those terms.`

			`{.push raises: [].}`

			`import`
era: simplify, instant startup (#2218) This PR exploits structural properties of era files to simplify the implementation and in particular remove the need to load all era file indicies at startup which may be slow (due to archival storage residing on slow drives) 2024-05-26 06:24:13 +00:00			`stew/io2,`
			`std/[os, parseutils, strutils, tables],`
Unified mode for undumping gzip-ed or era1-ed encoded block dumps (#2198) ackn: Built on Daniel's work 2024-05-20 13:59:18 +00:00			`results,`
era: simplify, instant startup (#2218) This PR exploits structural properties of era files to simplify the implementation and in particular remove the need to load all era file indicies at startup which may be slow (due to archival storage residing on slow drives) 2024-05-26 06:24:13 +00:00			`eth/common/eth_types,`
fix import path; force refc memory management even with Nim 2.0+ (#2241) 2024-05-29 18:47:06 +00:00			`../../../fluffy/eth_data/era1`
Unified mode for undumping gzip-ed or era1-ed encoded block dumps (#2198) ackn: Built on Daniel's work 2024-05-20 13:59:18 +00:00
era: simplify, instant startup (#2218) This PR exploits structural properties of era files to simplify the implementation and in particular remove the need to load all era file indicies at startup which may be slow (due to archival storage residing on slow drives) 2024-05-26 06:24:13 +00:00			`export results, eth_types`
Unified mode for undumping gzip-ed or era1-ed encoded block dumps (#2198) ackn: Built on Daniel's work 2024-05-20 13:59:18 +00:00
era: simplify, instant startup (#2218) This PR exploits structural properties of era files to simplify the implementation and in particular remove the need to load all era file indicies at startup which may be slow (due to archival storage residing on slow drives) 2024-05-26 06:24:13 +00:00			`# TODO this is a "rough copy" of the fluffy DB, minus the accumulator (it goes`
			`# by era number alone instead of rooted name) - eventually the two should`
			`# be merged, when eth1 gains accumulators in its metadata`
Unified mode for undumping gzip-ed or era1-ed encoded block dumps (#2198) ackn: Built on Daniel's work 2024-05-20 13:59:18 +00:00
era: simplify, instant startup (#2218) This PR exploits structural properties of era files to simplify the implementation and in particular remove the need to load all era file indicies at startup which may be slow (due to archival storage residing on slow drives) 2024-05-26 06:24:13 +00:00			`type Era1DbRef* = ref object`
			`## The Era1 database manages a collection of era files that together make up`
			`## a linear history of pre-merge execution chain data.`
			`path: string`
			`network: string`
			`files: seq[Era1File]`
			`filenames: Table[uint64, string]`
Unified mode for undumping gzip-ed or era1-ed encoded block dumps (#2198) ackn: Built on Daniel's work 2024-05-20 13:59:18 +00:00
era: simplify, instant startup (#2218) This PR exploits structural properties of era files to simplify the implementation and in particular remove the need to load all era file indicies at startup which may be slow (due to archival storage residing on slow drives) 2024-05-26 06:24:13 +00:00			`proc getEra1File*(db: Era1DbRef, era: Era1): Result[Era1File, string] =`
			`for f in db.files:`
			`if f.blockIdx.startNumber.era == era:`
			`return ok(f)`
Unified mode for undumping gzip-ed or era1-ed encoded block dumps (#2198) ackn: Built on Daniel's work 2024-05-20 13:59:18 +00:00
			`let`
era: simplify, instant startup (#2218) This PR exploits structural properties of era files to simplify the implementation and in particular remove the need to load all era file indicies at startup which may be slow (due to archival storage residing on slow drives) 2024-05-26 06:24:13 +00:00			`name =`
			`try:`
			`db.filenames[uint64 era]`
			`except KeyError:`
			`return err("Era not covered by existing files: " & $era)`
			`path = db.path / name`
Unified mode for undumping gzip-ed or era1-ed encoded block dumps (#2198) ackn: Built on Daniel's work 2024-05-20 13:59:18 +00:00
era: simplify, instant startup (#2218) This PR exploits structural properties of era files to simplify the implementation and in particular remove the need to load all era file indicies at startup which may be slow (due to archival storage residing on slow drives) 2024-05-26 06:24:13 +00:00			`if not isFile(path):`
			`return err("Era file no longer available: " & path)`
Unified mode for undumping gzip-ed or era1-ed encoded block dumps (#2198) ackn: Built on Daniel's work 2024-05-20 13:59:18 +00:00
era: simplify, instant startup (#2218) This PR exploits structural properties of era files to simplify the implementation and in particular remove the need to load all era file indicies at startup which may be slow (due to archival storage residing on slow drives) 2024-05-26 06:24:13 +00:00			`# TODO: The open call does not do full verification. It is assumed here that`
			`# trusted files are used. We might want to add a full validation option.`
			`let f = Era1File.open(path).valueOr:`
			`return err(error)`
Unified mode for undumping gzip-ed or era1-ed encoded block dumps (#2198) ackn: Built on Daniel's work 2024-05-20 13:59:18 +00:00
era: simplify, instant startup (#2218) This PR exploits structural properties of era files to simplify the implementation and in particular remove the need to load all era file indicies at startup which may be slow (due to archival storage residing on slow drives) 2024-05-26 06:24:13 +00:00			`if db.files.len > 16: # TODO LRU`
			`close(db.files[0])`
			`db.files.delete(0)`

			`db.files.add(f)`
			`ok(f)`

			`proc init*(`
			`T: type Era1DbRef, path: string, network: string`
			`): Result[Era1DbRef, string] =`
			`var filenames: Table[uint64, string]`
Unified mode for undumping gzip-ed or era1-ed encoded block dumps (#2198) ackn: Built on Daniel's work 2024-05-20 13:59:18 +00:00			`try:`
era: simplify, instant startup (#2218) This PR exploits structural properties of era files to simplify the implementation and in particular remove the need to load all era file indicies at startup which may be slow (due to archival storage residing on slow drives) 2024-05-26 06:24:13 +00:00			`for w in path.walkDir(relative = true):`
Unified mode for undumping gzip-ed or era1-ed encoded block dumps (#2198) ackn: Built on Daniel's work 2024-05-20 13:59:18 +00:00			`if w.kind in {pcFile, pcLinkToFile}:`
era: simplify, instant startup (#2218) This PR exploits structural properties of era files to simplify the implementation and in particular remove the need to load all era file indicies at startup which may be slow (due to archival storage residing on slow drives) 2024-05-26 06:24:13 +00:00			`let (_, name, ext) = w.path.splitFile()`
			`# era files are named network-00era-root.era1 - we don't have the root`
			`# so do prefix matching instead`
			`if name.startsWith(network & "-") and ext == ".era1":`
			`var era1: uint64`
			`discard parseBiggestUInt(name, era1, start = network.len + 1)`
			`filenames[era1] = w.path`
			`except CatchableError as exc:`
			`return err "Cannot open era database: " & exc.msg`
			`if filenames.len == 0:`
			`return err "No era files found in " & path`

			`ok Era1DbRef(path: path, network: network, filenames: filenames)`

Consolidate block type for block processing (#2325) This PR consolidates the split header-body sequences into a single EthBlock sequence and cleans up the fallout from that which significantly reduces block processing overhead during import thanks to less garbage collection and fewer copies of things all around. Notably, since the number of headers must always match the number of bodies, we also get rid of a pointless degree of freedom that in the future could introduce unnecessary bugs. * only read header and body from era file * avoid several unnecessary copies along the block processing way * simplify signatures, cleaning up unused arguemnts and returns * use `stew/assign2` in a few strategic places where the generated nim assignent is slow and add a few `move` to work around poor analysis in nim 1.6 (will need to be revisited for 2.0) ``` stats-20240607_2223-a814aa0b.csv vs stats-20240608_0714-21c1d0a9.csv bps_x bps_y tps_x tps_y bpsd tpsd timed block_number (498305, 713245] 1,540.52 1,809.73 2,361.58 2775.340189 17.63% 17.63% -14.92% (713245, 928185] 730.36 865.26 1,715.90 2028.973852 18.01% 18.01% -15.21% (928185, 1143126] 663.03 789.10 2,529.26 3032.490771 19.79% 19.79% -16.28% (1143126, 1358066] 393.46 508.05 2,152.50 2777.578119 29.13% 29.13% -22.50% (1358066, 1573007] 370.88 440.72 2,351.31 2791.896052 18.81% 18.81% -15.80% (1573007, 1787947] 283.65 335.11 2,068.93 2441.373402 17.60% 17.60% -14.91% (1787947, 2002888] 287.29 342.11 2,078.39 2474.179448 18.99% 18.99% -15.91% (2002888, 2217828] 293.38 343.16 2,208.83 2584.77457 17.16% 17.16% -14.61% (2217828, 2432769] 140.09 167.86 1,081.87 1296.336926 18.82% 18.82% -15.80% blocks: 1934464, baseline: 3h13m1s, contender: 2h43m47s bpsd (mean): 19.55% tpsd (mean): 19.55% Time (total): -29m13s, -15.14% ``` 2024-06-09 14:32:20 +00:00			`proc getEthBlock*(db: Era1DbRef, blockNumber: uint64): Result[EthBlock, string] =`
			`let f = ?db.getEra1File(blockNumber.era)`

			`f.getEthBlock(blockNumber)`

era: simplify, instant startup (#2218) This PR exploits structural properties of era files to simplify the implementation and in particular remove the need to load all era file indicies at startup which may be slow (due to archival storage residing on slow drives) 2024-05-26 06:24:13 +00:00			`proc getBlockTuple*(db: Era1DbRef, blockNumber: uint64): Result[BlockTuple, string] =`
			`let f = ?db.getEra1File(blockNumber.era)`

			`f.getBlockTuple(blockNumber)`
Unified mode for undumping gzip-ed or era1-ed encoded block dumps (#2198) ackn: Built on Daniel's work 2024-05-20 13:59:18 +00:00
			`proc dispose*(db: Era1DbRef) =`
era: simplify, instant startup (#2218) This PR exploits structural properties of era files to simplify the implementation and in particular remove the need to load all era file indicies at startup which may be slow (due to archival storage residing on slow drives) 2024-05-26 06:24:13 +00:00			`for w in db.files:`
			`if w != nil:`
			`w.close()`
			`db.files.reset()`
Unified mode for undumping gzip-ed or era1-ed encoded block dumps (#2198) ackn: Built on Daniel's work 2024-05-20 13:59:18 +00:00
			`# ------------------------------------------------------------------------------`
			`# End`
			`# ------------------------------------------------------------------------------`