nimbus-eth1/fluffy/tools/eth_data_exporter.nim

261 lines
8.0 KiB
Nim

# Nimbus
# Copyright (c) 2022 Status Research & Development GmbH
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at https://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at https://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
# Tool to download chain history data from local node, and save it to the json
# file or sqlite database.
# In case of json:
# Block data is stored as it gets transmitted over the wire and as defined here:
# https://github.com/ethereum/portal-network-specs/blob/master/history-network.md#content-keys-and-values
#
# Json file has following format:
# {
# "hexEncodedBlockHash: {
# "header": "the rlp encoded block header as a hex string"
# "body": "the SSZ encoded container of transactions and uncles as a hex string"
# "receipts: "The SSZ encoded list of the receipts as a hex string"
# "number": "block number"
# },
# ...,
# ...,
# }
# In case of sqlite:
# Data is saved in a format friendly to history network i.e one table with 3
# columns: contentid, contentkey, content.
# Such format enables queries to quickly find content in range of some node
# which makes it possible to offer content to nodes in bulk.
#
# When using geth as client to download receipts from, be aware that you will
# have to set the number of blocks to maintain the transaction index for to
# unlimited if you want access to all transactions/receipts.
# e.g: `./build/bin/geth --ws --txlookuplimit=0`
#
{.push raises: [Defect].}
import
std/[json, typetraits, strutils, os],
confutils,
stew/[byteutils, io2],
json_serialization,
faststreams, chronicles,
eth/[common, rlp], chronos,
eth/common/eth_types_json_serialization,
json_rpc/rpcclient,
../seed_db,
../../premix/downloader,
../network/history/history_content
# Need to be selective due to the `Block` type conflict from downloader
from ../network/history/history_network import encode
proc defaultDataDir*(): string =
let dataDir = when defined(windows):
"AppData" / "Roaming" / "EthData"
elif defined(macosx):
"Library" / "Application Support" / "EthData"
else:
".cache" / "ethData"
getHomeDir() / dataDir
const
defaultDataDirDesc = defaultDataDir()
defaultFileName = "eth-history-data"
type
StorageMode* = enum
Json, Db
ExporterConf* = object
logLevel* {.
defaultValue: LogLevel.INFO
defaultValueDesc: $LogLevel.INFO
desc: "Sets the log level"
name: "log-level" .}: LogLevel
initialBlock* {.
desc: "Number of first block which should be downloaded"
defaultValue: 0
name: "initial-block" .}: uint64
endBlock* {.
desc: "Number of last block which should be downloaded"
defaultValue: 0
name: "end-block" .}: uint64
dataDir* {.
desc: "The directory where generated file will be placed"
defaultValue: defaultDataDir()
defaultValueDesc: $defaultDataDirDesc
name: "data-dir" .}: OutDir
filename* {.
desc: "File name (minus extension) where history data will be exported to"
defaultValue: defaultFileName
defaultValueDesc: $defaultFileName
name: "filename" .}: string
storageMode* {.
desc: "Storage mode of data export"
defaultValue: Json
name: "storage-mode" .}: StorageMode
DataRecord = object
header: string
body: string
receipts: string
number: uint64
proc parseCmdArg*(T: type StorageMode, p: TaintedString): T
{.raises: [Defect, ConfigurationError].} =
if p == "db":
return Db
elif p == "json":
return Json
else:
let msg = "Provided mode: " & p & " is not a valid. Should be `json` or `db`"
raise newException(ConfigurationError, msg)
proc completeCmdArg*(T: type StorageMode, val: TaintedString): seq[string] =
return @[]
proc writeBlock(writer: var JsonWriter, blck: Block)
{.raises: [IOError, Defect].} =
let
dataRecord = DataRecord(
header: rlp.encode(blck.header).to0xHex(),
body: encode(blck.body).to0xHex(),
receipts: encode(blck.receipts).to0xHex(),
number: blck.header.blockNumber.truncate(uint64))
headerHash = to0xHex(rlpHash(blck.header).data)
writer.writeField(headerHash, dataRecord)
proc downloadBlock(i: uint64, client: RpcClient): Block =
let num = u256(i)
try:
return requestBlock(num, flags = {DownloadReceipts}, client = some(client))
except CatchableError as e:
fatal "Error while requesting Block", error = e.msg, number = i
quit 1
proc createAndOpenFile(config: ExporterConf): OutputStreamHandle =
# Creates directory and file specified in config, if file already exists
# program is aborted with info to user, to avoid losing data
let fileName: string =
if not config.filename.endsWith(".json"):
config.filename & ".json"
else:
config.filename
let filePath = config.dataDir / fileName
if isFile(filePath):
fatal "File under provided path already exists and would be overwritten",
path = filePath
quit 1
let res = createPath(distinctBase(config.dataDir))
if res.isErr():
fatal "Error occurred while creating directory", error = res.error
quit 1
try:
# this means that each time file be overwritten, but it is ok for such one
# off toll
return fileOutput(filePath)
except IOError as e:
fatal "Error occurred while opening the file", error = e.msg
quit 1
proc writeToJson(config: ExporterConf, client: RpcClient) =
let fh = createAndOpenFile(config)
try:
var writer = JsonWriter[DefaultFlavor].init(fh.s, pretty = true)
writer.beginRecord()
for i in config.initialBlock..config.endBlock:
let blck = downloadBlock(i, client)
writer.writeBlock(blck)
writer.endRecord()
info "File successfully written"
except IOError as e:
fatal "Error occoured while writing to file", error = e.msg
quit 1
finally:
try:
fh.close()
except IOError as e:
fatal "Error occoured while closing file", error = e.msg
quit 1
proc writeToDb(config: ExporterConf, client: RpcClient) =
let db = SeedDb.new(distinctBase(config.dataDir), config.filename)
defer:
db.close()
for i in config.initialBlock..config.endBlock:
let
blck = downloadBlock(i, client)
blockHash = blck.header.blockHash()
contentKeyType = BlockKey(chainId: 1, blockHash: blockHash)
headerKey = encode(ContentKey(
contentType: blockHeader, blockHeaderKey: contentKeyType))
bodyKey = encode(ContentKey(
contentType: blockBody, blockBodyKey: contentKeyType))
receiptsKey = encode(
ContentKey(contentType: receipts, receiptsKey: contentKeyType))
db.put(headerKey.toContentId(), headerKey.asSeq(), rlp.encode(blck.header))
# No need to seed empty lists into database
if len(blck.body.transactions) > 0 or len(blck.body.uncles) > 0:
let body = encode(blck.body)
db.put(bodyKey.toContentId(), bodyKey.asSeq(), body)
if len(blck.receipts) > 0:
let receipts = encode(blck.receipts)
db.put(receiptsKey.toContentId(), receiptsKey.asSeq(), receipts)
info "Data successfuly written to db"
proc run(config: ExporterConf, client: RpcClient) =
case config.storageMode
of Json:
writeToJson(config, client)
of Db:
writeToDb(config, client)
when isMainModule:
{.pop.}
let config = ExporterConf.load()
{.push raises: [Defect].}
if (config.endBlock < config.initialBlock):
fatal "Initial block number should be smaller than end block number",
initialBlock = config.initialBlock,
endBlock = config.endBlock
quit 1
setLogLevel(config.logLevel)
var client: RpcClient
try:
let c = newRpcWebSocketClient()
# TODO Currently hardcoded to default geth ws address, at some point it may
# be moved to config
waitFor c.connect("ws://127.0.0.1:8546")
client = c
except CatchableError as e:
fatal "Error while connecting to data provider", error = e.msg
quit 1
try:
run(config, client)
finally:
waitFor client.close()