Add populate history db option to fluffy (#929)

This commit is contained in:
Kim De Mey 2022-01-18 15:08:02 +01:00 committed by GitHub
parent 81ebfd2b2a
commit 14dd763900
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 85 additions and 2 deletions

View File

@ -37,6 +37,7 @@ const
type
PortalCmd* = enum
noCommand
populateHistoryDb
PortalConf* = object
logLevel* {.
@ -163,6 +164,17 @@ type
defaultValue: noCommand .}: PortalCmd
of noCommand:
discard
of populateHistoryDb:
# Note: we could use the existing data dir here, but it would require
# also to properly store the network key and default use the one available
dbDir* {.
desc: "The directory of the fluffy content database"
defaultValue: ""
name: "db-dir" }: OutDir
dataFile* {.
desc: "Specify a json file with a map of k:v pairs representing BlockHash : Rlp encoded block"
defaultValue: ""
name: "data-file" }: InputFile
proc parseCmdArg*(T: type enr.Record, p: TaintedString): T
{.raises: [Defect, ConfigurationError].} =

View File

@ -20,7 +20,7 @@ import
./network/state/[state_network, state_content],
./network/history/[history_network, history_content],
./network/wire/[portal_stream, portal_protocol_config],
./content_db
"."/[content_db, populate_db]
proc initializeBridgeClient(maybeUri: Option[string]): Option[BridgeClient] =
try:
@ -130,4 +130,10 @@ when isMainModule:
setLogLevel(config.logLevel)
case config.cmd
of noCommand: run(config)
of PortalCmd.noCommand:
run(config)
of PortalCmd.populateHistoryDb:
let res = populateHistoryDb(config.dbDir.string, config.dataFile.string)
if res.isErr():
fatal "Failed populating the history content db", error = $res.error
quit 1

65
fluffy/populate_db.nim Normal file
View File

@ -0,0 +1,65 @@
# Nimbus - Portal Network
# Copyright (c) 2022 Status Research & Development GmbH
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at https://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at https://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
{.push raises: [Defect].}
import
json_serialization, json_serialization/std/tables,
stew/[byteutils, io2, results],
eth/rlp,
./content_db,
./network/history/history_content
# Offline tool to populate the database with the current existing json files
# with block data. Might move to some other storage format later on. Perhaps
# https://github.com/status-im/nimbus-eth2/blob/stable/docs/e2store.md can be
# interesting.
type
BlockData = object
rlp: string
number: uint64
BlockDataTable = Table[string, BlockData]
proc populateHistoryDb*(dbDir: string, dataFile: string): Result[void, string] =
let db = ContentDB.new(dbDir)
let blockData = readAllFile(dataFile)
if blockData.isErr(): # TODO: map errors
return err("Failed reading data-file")
let decoded =
try:
Json.decode(blockData.get(), BlockDataTable)
except CatchableError as e:
return err("Failed decoding json data-file: " & e.msg)
# This is definitely the slowest part because of the hashing that happens in
# toContentId()
for k,v in decoded:
try:
var rlp = rlpFromHex(v.rlp)
if rlp.enterList():
# List that contains 3 items: Block header, body and receipts.
# Only store block header for now.
# When we want others, can use `rlp.skipElem()` and `rlp.rawData()`.
# Prepare content key
var blockHash: BlockHash
blockHash.data = hexToByteArray[sizeof(BlockHash)](k)
let contentKey = ContentKey(
contentType: blockHeader,
blockHeaderKey: ContentKeyType(chainId: 1'u16, blockHash: blockHash))
db.put(contentKey.toContentId(), rlp.rawData())
except CatchableError as e:
return err("Failed decoding block hash or data: " & e.msg)
ok()