# Nimbus # Copyright (c) 2022-2023 Status Research & Development GmbH # Licensed and distributed under either of # * MIT license (license terms in the root directory or at https://opensource.org/licenses/MIT). # * Apache v2 license (license terms in the root directory or at https://www.apache.org/licenses/LICENSE-2.0). # at your option. This file may not be copied, modified, or distributed except according to those terms. # Tool to download chain history data from local node, and save it to the json # file or sqlite database. # In case of json: # Block data is stored as it gets transmitted over the wire and as defined here: # https://github.com/ethereum/portal-network-specs/blob/master/history-network.md#content-keys-and-values # # Json file has following format: # { # "hexEncodedBlockHash: { # "header": "the rlp encoded block header as a hex string" # "body": "the SSZ encoded container of transactions and uncles as a hex string" # "receipts: "The SSZ encoded list of the receipts as a hex string" # "number": "block number" # }, # ..., # ..., # } # In case of sqlite: # Data is saved in a format friendly to history network i.e one table with 3 # columns: contentid, contentkey, content. # Such format enables queries to quickly find content in range of some node # which makes it possible to offer content to nodes in bulk. # # When using geth as client to download receipts from, be aware that you will # have to set the number of blocks to maintain the transaction index for to # unlimited if you want access to all transactions/receipts. # e.g: `./build/bin/geth --ws --txlookuplimit=0` # {.push raises: [].} import std/[json, typetraits, strutils, strformat, os, uri], confutils, stew/[byteutils, io2], json_serialization, faststreams, chronicles, eth/[common, rlp], chronos, eth/common/eth_types_json_serialization, json_rpc/rpcclient, ncli/e2store, ../seed_db, ../../premix/[downloader, parser], ../network/history/[history_content, accumulator], ../eth_data/[history_data_json_store, history_data_ssz_e2s] # Need to be selective due to the `Block` type conflict from downloader from ../network/history/history_network import encode chronicles.formatIt(IoErrorCode): $it proc defaultDataDir*(): string = let dataDir = when defined(windows): "AppData" / "Roaming" / "EthData" elif defined(macosx): "Library" / "Application Support" / "EthData" else: ".cache" / "eth-data" getHomeDir() / dataDir type Web3UrlKind* = enum HttpUrl, WsUrl Web3Url* = object kind*: Web3UrlKind url*: string const defaultDataDirDesc = defaultDataDir() defaultBlockFileName = "eth-block-data" defaultAccumulatorFileName = "mainnet-master-accumulator.ssz" defaultWeb3Url = Web3Url(kind: HttpUrl, url: "http://127.0.0.1:8545") type ExporterCmd* = enum # TODO: Multiline strings doesn't work here anymore with 1.6, and concat of # several lines gives the error: Error: Invalid node kind nnkInfix for macros.`$` exportBlockData = "Export block data (headers, bodies and receipts) to a json format or a database. Some of this functionality is likely to get deprecated" exportEpochHeaders = "Export block headers from an Ethereum JSON RPC Execution endpoint to *.e2s files arranged per epoch (8192 blocks)" verifyEpochHeaders = "Verify *.e2s files containing block headers. Verify currently only means being able to RLP decode the block headers" exportAccumulatorData = "Build and export the master accumulator and historical epoch accumulators. Requires *.e2s block header files generated with the exportHeaders command up until the merge block" printAccumulatorData = "Print the root hash of the master accumulator and of all historical epoch accumulators. Requires data generated by exportAccumulatorData command" exportHeaderRange = "Export block headers from an Ethereum JSON RPC Execution endpoint to *.e2s files (unlimited amount)" exportHeadersWithProof = "Export block headers with proof from *.e2s headers file and epochAccumulator files" StorageMode* = enum Json, Db ExporterConf* = object logLevel* {. defaultValue: LogLevel.INFO defaultValueDesc: $LogLevel.INFO desc: "Sets the log level" name: "log-level" .}: LogLevel dataDir* {. desc: "The directory where generated data files will be exported to" defaultValue: defaultDataDir() defaultValueDesc: $defaultDataDirDesc name: "data-dir" .}: OutDir web3Url* {. desc: "Execution layer JSON-RPC API URL" defaultValue: defaultWeb3Url name: "web3-url" .}: Web3Url case cmd* {. command defaultValue: exportBlockData .}: ExporterCmd of exportBlockData: startBlock* {. desc: "Number of the first block to be exported" defaultValue: 0 name: "start-block" .}: uint64 endBlock* {. desc: "Number of the last block to be exported" defaultValue: 0 name: "end-block" .}: uint64 fileName* {. desc: "File name (minus extension) where block data will be exported to" defaultValue: defaultBlockFileName defaultValueDesc: $defaultBlockFileName name: "file-name" .}: string storageMode* {. desc: "Storage mode of block data export" defaultValue: Json name: "storage-mode" .}: StorageMode headersOnly* {. desc: "Only export the headers instead of full blocks and receipts" defaultValue: false name: "headers-only" .}: bool of exportEpochHeaders: startEpoch* {. desc: "Number of the first epoch which should be downloaded" defaultValue: 0 name: "start-epoch" .}: uint64 endEpoch* {. desc: "Number of the last epoch which should be downloaded" defaultValue: 1896 name: "end-epoch" .}: uint64 # TODO: # Although options are the same as for exportHeaders, we can't drop them # under the same case of as confutils does not agree with that. of verifyEpochHeaders: startEpochVerify* {. desc: "Number of the first epoch which should be downloaded" defaultValue: 0 name: "start-epoch" .}: uint64 endEpochVerify* {. desc: "Number of the last epoch which should be downloaded" defaultValue: 1896 name: "end-epoch" .}: uint64 of exportAccumulatorData: accumulatorFileName* {. desc: "File to which the serialized accumulator is written" defaultValue: defaultAccumulatorFileName defaultValueDesc: $defaultAccumulatorFileName name: "accumulator-file-name" .}: string writeEpochAccumulators* {. desc: "Write also the SSZ encoded epoch accumulators to specific files" defaultValue: false name: "write-epoch-accumulators" .}: bool of printAccumulatorData: accumulatorFileNamePrint* {. desc: "File from which the serialized accumulator is read" defaultValue: defaultAccumulatorFileName defaultValueDesc: $defaultAccumulatorFileName name: "accumulator-file-name" .}: string of exportHeaderRange: startBlockNumber* {. desc: "Number of the first block header to be exported" name: "start-block" .}: uint64 endBlockNumber* {. desc: "Number of the last block header to be exported" name: "end-block" .}: uint64 of exportHeadersWithProof: startBlockNumber2* {. desc: "Number of the first block header to be exported" name: "start-block" .}: uint64 endBlockNumber2* {. desc: "Number of the last block header to be exported" name: "end-block" .}: uint64 proc parseCmdArg*( T: type Web3Url, p: string): T {.raises: [ConfigurationError].} = let url = parseUri(p) normalizedScheme = url.scheme.toLowerAscii() if (normalizedScheme == "http" or normalizedScheme == "https"): Web3Url(kind: HttpUrl, url: p) elif (normalizedScheme == "ws" or normalizedScheme == "wss"): Web3Url(kind: WsUrl, url: p) else: raise newException( ConfigurationError, "The Web3 URL must specify one of following protocols: http/https/ws/wss" ) proc completeCmdArg*(T: type Web3Url, val: string): seq[string] = return @[] proc parseCmdArg*(T: type StorageMode, p: string): T {.raises: [ConfigurationError].} = if p == "db": return Db elif p == "json": return Json else: let msg = "Provided mode: " & p & " is not a valid. Should be `json` or `db`" raise newException(ConfigurationError, msg) proc completeCmdArg*(T: type StorageMode, val: string): seq[string] = return @[] proc downloadHeader(client: RpcClient, i: uint64): BlockHeader = let blockNumber = u256(i) try: let jsonHeader = requestHeader(blockNumber, some(client)) parseBlockHeader(jsonHeader) except CatchableError as e: fatal "Error while requesting BlockHeader", error = e.msg, number = i quit 1 proc downloadBlock(i: uint64, client: RpcClient): Block = let num = u256(i) try: return requestBlock(num, flags = {DownloadReceipts}, client = some(client)) except CatchableError as e: fatal "Error while requesting Block", error = e.msg, number = i quit 1 proc createAndOpenFile(dataDir: string, fileName: string): OutputStreamHandle = # Creates directory and file, if file already exists # program is aborted with info to user, to avoid losing data let fileName: string = if not fileName.endsWith(".json"): fileName & ".json" else: fileName let filePath = dataDir / fileName if isFile(filePath): fatal "File under provided path already exists and would be overwritten", path = filePath quit 1 let res = createPath(dataDir) if res.isErr(): fatal "Error occurred while creating directory", error = ioErrorMsg(res.error) quit 1 try: return fileOutput(filePath) except IOError as e: fatal "Error occurred while opening the file", error = e.msg quit 1 proc writeHeadersToJson(config: ExporterConf, client: RpcClient) = let fh = createAndOpenFile(string config.dataDir, string config.fileName) try: var writer = JsonWriter[DefaultFlavor].init(fh.s, pretty = true) writer.beginRecord() for i in config.startBlock..config.endBlock: let blck = client.downloadHeader(i) writer.writeHeaderRecord(blck) if ((i - config.startBlock) mod 8192) == 0 and i != config.startBlock: info "Downloaded 8192 new block headers", currentHeader = i writer.endRecord() info "File successfully written", path = config.dataDir / config.fileName except IOError as e: fatal "Error occured while writing to file", error = e.msg quit 1 finally: try: fh.close() except IOError as e: fatal "Error occured while closing file", error = e.msg quit 1 proc writeBlocksToJson(config: ExporterConf, client: RpcClient) = let fh = createAndOpenFile(string config.dataDir, string config.fileName) try: var writer = JsonWriter[DefaultFlavor].init(fh.s, pretty = true) writer.beginRecord() for i in config.startBlock..config.endBlock: let blck = downloadBlock(i, client) writer.writeBlockRecord(blck.header, blck.body, blck.receipts) if ((i - config.startBlock) mod 8192) == 0 and i != config.startBlock: info "Downloaded 8192 new blocks", currentBlock = i writer.endRecord() info "File successfully written", path = config.dataDir / config.fileName except IOError as e: fatal "Error occured while writing to file", error = e.msg quit 1 finally: try: fh.close() except IOError as e: fatal "Error occured while closing file", error = e.msg quit 1 proc writeBlocksToDb(config: ExporterConf, client: RpcClient) = let db = SeedDb.new(distinctBase(config.dataDir), config.fileName) defer: db.close() for i in config.startBlock..config.endBlock: let blck = downloadBlock(i, client) blockHash = blck.header.blockHash() contentKeyType = BlockKey(blockHash: blockHash) headerKey = encode(ContentKey( contentType: blockHeader, blockHeaderKey: contentKeyType)) bodyKey = encode(ContentKey( contentType: blockBody, blockBodyKey: contentKeyType)) receiptsKey = encode( ContentKey(contentType: receipts, receiptsKey: contentKeyType)) db.put(headerKey.toContentId(), headerKey.asSeq(), rlp.encode(blck.header)) # No need to seed empty lists into database if len(blck.body.transactions) > 0 or len(blck.body.uncles) > 0: let body = encode(blck.body) db.put(bodyKey.toContentId(), bodyKey.asSeq(), body) if len(blck.receipts) > 0: let receipts = encode(blck.receipts) db.put(receiptsKey.toContentId(), receiptsKey.asSeq(), receipts) info "Data successfuly written to db" proc exportBlocks(config: ExporterConf, client: RpcClient) = case config.storageMode of Json: if config.headersOnly: writeHeadersToJson(config, client) else: writeBlocksToJson(config, client) of Db: if config.headersOnly: fatal "Db mode not available for headers only" quit 1 else: writeBlocksToDb(config, client) proc newRpcClient(web3Url: Web3Url): RpcClient = # TODO: I don't like this API. I think the creation of the RPC clients should # already include the URL. And then an optional connect may be necessary # depending on the protocol. let client: RpcClient = case web3Url.kind of HttpUrl: newRpcHttpClient() of WsUrl: newRpcWebSocketClient() client proc connectRpcClient( client: RpcClient, web3Url: Web3Url): Future[Result[void, string]] {.async.} = case web3Url.kind of HttpUrl: try: await RpcHttpClient(client).connect(web3Url.url) except CatchableError as e: return err(e.msg) of WsUrl: try: await RpcWebSocketClient(client).connect(web3Url.url) except CatchableError as e: return err(e.msg) when isMainModule: {.pop.} let config = ExporterConf.load() {.push raises: [].} setLogLevel(config.logLevel) let dataDir = config.dataDir.string if not isDir(dataDir): let res = createPath(dataDir) if res.isErr(): fatal "Error occurred while creating data directory", dir = dataDir, error = ioErrorMsg(res.error) quit 1 case config.cmd of ExporterCmd.exportBlockData: let client = newRpcClient(config.web3Url) let connectRes = waitFor client.connectRpcClient(config.web3Url) if connectRes.isErr(): fatal "Failed connecting to JSON-RPC client", error = connectRes.error quit 1 if (config.endBlock < config.startBlock): fatal "Initial block number should be smaller than end block number", startBlock = config.startBlock, endBlock = config.endBlock quit 1 try: exportBlocks(config, client) finally: waitFor client.close() of ExporterCmd.exportEpochHeaders: let client = newRpcClient(config.web3Url) let connectRes = waitFor client.connectRpcClient(config.web3Url) if connectRes.isErr(): fatal "Failed connecting to JSON-RPC client", error = connectRes.error quit 1 proc exportEpochHeaders(file: string, epoch: uint64): Result[void, string] = # Downloading headers from JSON RPC endpoint info "Requesting epoch headers", epoch var headers: seq[BlockHeader] for j in 0..