Adjustments in eth_data_exporter related to accumulator generation (#1261)

- Add exportHeaders to write headers in e2s file per epoch
- Add verifyHeaders to verify decoding the above files
- Adjust exportAccumulatorData to use generated header epoch files
and write accumulator in SSZ (no hex)
- Add printAccumulatorData to print all root hashes
This commit is contained in:
Kim De Mey 2022-10-14 08:57:17 +02:00 committed by GitHub
parent 181243b6c4
commit d667346834
No known key found for this signature in database
1 changed files with 254 additions and 87 deletions

View File

@ -37,7 +37,7 @@
{.push raises: [Defect].}
std/[json, typetraits, strutils, os],
std/[json, typetraits, strutils, strformat, os],
stew/[byteutils, io2],
@ -45,6 +45,7 @@ import
eth/[common, rlp], chronos,
../../premix/[downloader, parser],
../network/history/[history_content, accumulator]
@ -58,7 +59,7 @@ proc defaultDataDir*(): string =
elif defined(macosx):
"Library" / "Application Support" / "EthData"
".cache" / "ethData"
".cache" / "eth-data"
getHomeDir() / dataDir
@ -69,8 +70,30 @@ const
ExporterCmd* = enum
# TODO: Need to use the ugly """ multi lines as string concat with &
# doesn't work well together with confutils.
exportBlockData =
Export block data (headers, bodies and receipts) to ajson format or a
database. Some of this functionality is likely to get deprecated"""
exportHeaders =
Export block headers from an Ethereum JSON RPC Execution endpoint to
*.e2s files arranged per epoch (8192 blocks)"""
verifyHeaders =
Verify *.e2s files containing block headers. Verify currently only
means being able to RLP decode the block headers"""
exportAccumulatorData =
Build and export the master accumulator and historical epoch
accumulators. Requires *.e2s block header files generated with the
exportHeaders command up until the merge block"""
printAccumulatorData =
Print the root hash of the master accumulator and of all historical
epoch accumulators. Requires data generated by exportAccumulatorData
StorageMode* = enum
Json, Db
@ -111,12 +134,39 @@ type
desc: "Only export the headers instead of full blocks and receipts"
defaultValue: false
name: "headers-only" .}: bool
of exportHeaders:
startEpoch* {.
desc: "Number of the first epoch which should be downloaded"
defaultValue: 0
name: "start-epoch" .}: uint64
endEpoch* {.
desc: "Number of the last epoch which should be downloaded"
defaultValue: 1896
name: "end-epoch" .}: uint64
# Although options are the same as for exportHeaders, we can't drop them
# under the same case of as confutils does not agree with that.
of verifyHeaders:
startEpochVerify* {.
desc: "Number of the first epoch which should be downloaded"
defaultValue: 0
name: "start-epoch" .}: uint64
endEpochVerify* {.
desc: "Number of the last epoch which should be downloaded"
defaultValue: 1896
name: "end-epoch" .}: uint64
of exportAccumulatorData:
accumulatorFileName* {.
desc: "File to which the serialized accumulator data is written"
defaultValue: defaultAccumulatorFileName
defaultValueDesc: $defaultAccumulatorFileName
name: "accumulator-file-name" .}: string
of printAccumulatorData:
accumulatorFileNameVerify* {.
desc: "File to which the serialized accumulator data is written"
defaultValue: defaultAccumulatorFileName
defaultValueDesc: $defaultAccumulatorFileName
name: "accumulator-file-name" .}: string
HeaderRecord = object
header: string
@ -128,11 +178,6 @@ type
receipts: string
number: uint64
AccumulatorRecord = object
accumulatorHash: string
maxBlockNumber: uint64
accumulator: string
proc parseCmdArg*(T: type StorageMode, p: TaintedString): T
{.raises: [Defect, ConfigurationError].} =
if p == "db":
@ -172,27 +217,6 @@ proc writeBlockRecord(
writer.writeField(headerHash, dataRecord)
proc writeAccumulatorRecord(
writer: var JsonWriter, accumulator: Accumulator)
{.raises: [IOError, Defect].} =
maxBlockNumber =
accumulator.historicalEpochs.len() * epochSize +
accumulatorHash = hash_tree_root(accumulator).data.to0xHex()
accumulatorRecord = AccumulatorRecord(
accumulatorHash: accumulatorHash,
maxBlockNumber: uint64(maxBlockNumber),
accumulator: SSZ.encode(accumulator).to0xHex())
writer.writeField("accumulator", accumulatorRecord)
proc writeEpochAccumulatorRecord(
writer: var JsonWriter, accumulator: EpochAccumulator)
{.raises: [IOError, Defect].} =
writer.writeField("epochAccumulator", SSZ.encode(accumulator).to0xHex())
proc downloadHeader(client: RpcClient, i: uint64): BlockHeader =
let blockNumber = u256(i)
@ -315,46 +339,6 @@ proc writeBlocksToDb(config: ExporterConf, client: RpcClient) =
info "Data successfuly written to db"
proc writeAccumulatorToJson(
dataDir: string, fileName: string, accumulator: Accumulator) =
let fh = createAndOpenFile(dataDir, fileName)
var writer = JsonWriter[DefaultFlavor].init(fh.s, pretty = true)
info "File successfully written", path = dataDir / fileName
except IOError as e:
fatal "Error occured while writing to file", error = e.msg
quit 1
except IOError as e:
fatal "Error occured while closing file", error = e.msg
quit 1
proc writeEpochAccumulatorToJson(
dataDir: string, fileName: string, accumulator: EpochAccumulator) =
let fh = createAndOpenFile(dataDir, fileName)
var writer = JsonWriter[DefaultFlavor].init(fh.s, pretty = true)
info "File successfully written", path = dataDir / fileName
except IOError as e:
fatal "Error occured while writing to file", error = e.msg
quit 1
except IOError as e:
fatal "Error occured while closing file", error = e.msg
quit 1
proc exportBlocks(config: ExporterConf, client: RpcClient) =
case config.storageMode
of Json:
@ -369,6 +353,23 @@ proc exportBlocks(config: ExporterConf, client: RpcClient) =
writeBlocksToDb(config, client)
# Using the e2s format to store data, but without the specific structure
# like in an era file, as we currently don't really need that.
# See:
# Added two types for now, with numbers not formally specified.
# Might remove the `MasterAccumulatorRecord` as it is a bit silly for just one
# record and could just be stored directly as *.ssz.
# Note:
# Snappy compression for `ExecutionBlockHeaderRecord` only helps for the
# first ~1M (?) block headers, after that there is no gain so we don't do it.
ExecutionBlockHeaderRecord = [byte 0xFF, 0x00]
MasterAccumulatorRecord = [byte 0xFE, 0x00]
proc toString(v: IoErrorCode): string =
try: ioErrorMsg(v)
except Exception as e: raiseAssert e.msg
when isMainModule:
let config = ExporterConf.load()
@ -382,8 +383,15 @@ when isMainModule:
endBlock = config.endBlock
quit 1
var client: RpcClient
let dataDir = config.dataDir.string
if not isDir(dataDir):
let res = createPath(dataDir)
if res.isErr():
fatal "Error occurred while creating data directory",
dir = dataDir, error = ioErrorMsg(res.error)
quit 1
var client: RpcClient
let c = newRpcWebSocketClient()
# TODO: Hardcoded to the default geth ws address. This should become
@ -401,24 +409,183 @@ when isMainModule:
waitFor client.close()
of ExporterCmd.exportAccumulatorData:
var headers: seq[BlockHeader]
for i in config.initialBlock..config.endBlock:
let header = client.downloadHeader(i)
if ((i - config.initialBlock) mod 8192) == 0 and i != config.initialBlock:
info "Downloaded 8192 new block headers", currentBlock = i
of ExporterCmd.exportHeaders:
proc exportEpochHeaders(file: string, epoch: uint64): Result[void, string] =
# Downloading headers from JSON RPC endpoint
info "Requesting epoch headers", epoch
var headers: seq[BlockHeader]
for j in 0..<8192'u64:
debug "Requesting block", number = j
let header = client.downloadHeader(epoch*8192 + j)
let fh = ? openFile(file, {OpenFlags.Write, OpenFlags.Create}).mapErr(toString)
defer: discard closeFile(fh)
info "Writing headers to file", file
for header in headers:
discard ? fh.appendRecord(ExecutionBlockHeaderRecord, rlp.encode(header))
# TODO: Could make the requests concurrent per epoch.
# Batching would also be nice but our json-rpc does not support that:
for i in config.startEpoch..config.endEpoch:
let file = dataDir / &"mainnet-headers-epoch-{i.uint64:05}.e2s"
if isFile(file):
notice "Skipping epoch headers, file already exists", file
let res = exportEpochHeaders(file, i)
if res.isErr():
error "Failed exporting epoch headers", file, error = res.error
waitFor client.close()
info "Building the accumulator"
let accumulator = buildAccumulator(headers)
string config.dataDir, string config.accumulatorFileName, accumulator)
of ExporterCmd.verifyHeaders:
proc verifyEpochHeaders(file: string, epoch: uint64): Result[void, string] =
let fh = ? openFile(file, {OpenFlags.Read}).mapErr(toString)
defer: discard closeFile(fh)
let epochAccumulators = buildAccumulatorData(headers)
var data: seq[byte]
while true:
let header = readRecord(fh, data).valueOr:
for i, epochAccumulator in epochAccumulators:
string config.dataDir, "eth-epoch-accumulator_" & $i & ".json",
if header.typ == ExecutionBlockHeaderRecord:
blockHeader =
rlp.decode(data, BlockHeader)
except RlpError as e:
return err("Invalid block header: " & e.msg)
headerHash = to0xHex(rlpHash(blockHeader).data)
debug "Header decoded successfully",
hash = headerHash, blockNumber = blockHeader.blockNumber
warn "Skipping record, not a block header", typ = toHex(header.typ)
for i in config.startEpochVerify..config.endEpochVerify:
let file = dataDir / &"mainnet-headers-epoch-{i.uint64:05}.e2s"
let res = verifyEpochHeaders(file, i)
if res.isErr():
error "Failed verifying epoch headers", file, error = res.error
info "Successfully decoded epoch headers", file
of ExporterCmd.exportAccumulatorData:
# Also write epoch accumulators to files. These can be re-used for creation
# of headers with proofs.
# Lets first check if the accumulator file already exists before starting
# to build it.
let accumulatorFile = dataDir / &"mainnet-master-accumulator.e2s"
if isFile(accumulatorFile):
notice "Not building accumulator, file already exists",
file = accumulatorFile
quit 1
# Lets verify if the necessary files exists before starting to build the
# accumulator.
for i in 0..<preMergeEpochs:
let file = dataDir / &"mainnet-headers-epoch-{i.uint64:05}.e2s"
if not isFile(file):
fatal "Required epoch headers file does not exist", file
quit 1
var accumulator: Accumulator
for i in 0..<preMergeEpochs:
let file = dataDir / &"mainnet-headers-epoch-{i.uint64:05}.e2s"
let fh = openFile(file, {OpenFlags.Read}).expect("Readable file")
defer: discard closeFile(fh)
var data: seq[byte]
var count = 0'u64
while true:
let header = readRecord(fh, data).valueOr:
if header.typ == ExecutionBlockHeaderRecord:
let blockHeader =
rlp.decode(data, BlockHeader)
except RlpError as e:
fatal "Invalid block header", error = e.msg, file = file
quit 1
# Quick sanity check
if blockHeader.blockNumber.truncate(uint64) != i*epochSize + count:
fatal "Incorrect block headers in file", file = file,
blockNumber = blockHeader.blockNumber,
expectedBlockNumber = i*epochSize + count
quit 1
updateAccumulator(accumulator, blockHeader)
if count == epochSize - 1:
info "Updated an epoch", epoch = i
if blockHeader.blockNumber.truncate(uint64) == mergeBlockNumber - 1:
# TODO: Should get in the finishAccumulator but can't right now.
accumulator.currentEpoch = EpochAccumulator.init(@[])
info "Updated last epoch, finished building master accumulator",
epoch = i
warn "Skipping record, not a block header", typ = toHex(header.typ)
let fh = openFile(
accumulatorFile, {OpenFlags.Write, OpenFlags.Create}).expect(
"Permission to write and create file")
defer: discard closeFile(fh)
let res = fh.appendRecord(MasterAccumulatorRecord, SSZ.encode(accumulator))
if res.isErr():
error "Failed writing accumulator to file", file = accumulatorFile, error = res.error
notice "Succesfully wrote master accumulator to file", file = accumulatorFile
of ExporterCmd.printAccumulatorData:
let accumulatorFile = dataDir / &"mainnet-master-accumulator.e2s"
let fh = openFile(accumulatorFile, {OpenFlags.Read}).expect("Readable file")
defer: discard closeFile(fh)
var data: seq[byte]
let header = readRecord(fh, data).valueOr:
fatal "No record found"
quit 1
if header.typ == MasterAccumulatorRecord:
let accumulator =
SSZ.decode(data, Accumulator)
except SszError as e:
fatal "Invalid accumulator", error = e.msg, file = accumulatorFile
quit 1
let accumulatorRoot = hash_tree_root(accumulator)
info "Accumulator decoded successfully",
root = accumulatorRoot
echo "Master Accumulator:"
echo "-------------------"
echo &"Root: {accumulatorRoot}"
echo ""
echo "Historical Epochs:"
echo "------------------"
echo "Epoch Root"
for i, root in accumulator.historicalEpochs:
echo &"{i.uint64:05} 0x{root.toHex()}"
fatal "Record is not an accumulator", typ = toHex(header.typ)
quit 1