nim-codex/tests/integration/multinodes.nim
Marcin Czenko 92a0eda79a
Validator historical state restoration (#922)
* adds a new overload of queryPastEvents allowing to query past events based on timestamp in the past

* adds state restoration to validator

* refactors a bit to get the tests back to work

* replaces deprecated generic methods from Market with methods for specific event types

* Refactors binary search

* adds market tests for querying past SlotFilled events and binary search

* Takes into account that <<earliest>> block available is not necessarily the genesis block

* Adds more logging and makes testing earliest block boundary more reliable

* adds validation tests for historical state restoration

* adds mockprovider to simplify and improve testing of the edge conditions

* adds slot reservation to the new tests after rebasing

* adds validation groups and group index in logs of validator

* adds integration test with two validators

* adds comment on how to enable logging in integration test executable itself

* testIntegration: makes list is running nodes injected and available in the body of the test

* validation: adds integration test for historical state

* adds more logging to validator

* integration test: validator only looks 30 days back for historical state

* adds logging of the slotState when removing slots during validation

* review and refactor validator integration tests

* adds validation to the set of integration tests

* Fixes mistyped name of the mock provider module in testMarket

* Fixes a typo in the name of the validation suite in integration tests

* Makes validation unit test a bit easier to follow

* better use of logScopes to reduce duplication

* improves timing and clarifies the test conditions

* uses http as default RPC provider for nodes running in integration tests as a workaround for dropped subscriptions

* simplifies the validation integration tests by waiting for failed request instead of tracking slots

* adds config option allowing selectively to set different provider url

* Brings back the default settings for RPC provider in integration tests

* use http RPC provider for clients in validation integration tests

* fine-tune the tests

* Makes validator integration test more robust - adds extra tracking

* brings tracking of marketplace event back to validator integration test

* refactors integration tests

* deletes tmp file

* adds <<return>> after forcing integration test to fail preliminarily

* re-enables all integration tests and matrix

* stops debug output in CI

* allows to choose a different RPC provider for a given integration test suite

* fixes signature of <<getBlock>> method in mockProvider

* adds missing import which seem to be braking integration tests on windows

* makes sure that clients, SPs, and validators use the same provider url

* makes validator integration tests using http at 127.0.0.1:8545

* testvalidator: stop resubscribing as we are now using http polling as rpc provider

* applying review comments

* groups queryPastStorage overrides together (review comment)

* groups the historical validation tests into a sub suite

* removes the temporary extensions in marketplacesuite and multinodesuite allowing to specify provider url

* simplifies validation integration tests

* Removes debug logs when waiting for request to fail

* Renaming waitForRequestFailed => waitForRequestToFail

* renames blockNumberForBlocksAgo to pastBlockTag and makes it private

* removes redundant debugging logs

* refines logging in validation

* removes dev logging from mockmarket

* improves exception handling in provider helper procs and prepares for extraction to a separate module

* Uses chronos instead of std/times for Duration

* extracts provider and binary search helpers to a separate module

* removes redundant log entry params from validator

* unifies the notation to consistently use method call syntax

* reuses ProviderError from nim-ethers in the provider extension

* clarifies the comment in multinodesuite

* uses == operator to check the predefined tags and raises exception when `BlockTag.pending` is requested.

* when waiting for request to fail, we break on any request state that is not Started

* removes tests that were moved to testProvider from testMarket

* extracts tests that use MockProvider to a separate async suite

* improves performance of the historical state restoration

* removing redundant log messages in validator (groupIndex and groups)

* adds testProvider to testContracts group

* removes unused import in testMarket
2024-12-14 05:07:55 +00:00

346 lines
12 KiB
Nim

import std/os
import std/sequtils
import std/strutils
import std/sugar
import std/times
import pkg/codex/conf
import pkg/codex/logutils
import pkg/chronos/transports/stream
import pkg/ethers
import pkg/questionable
import ./codexconfig
import ./codexprocess
import ./hardhatconfig
import ./hardhatprocess
import ./nodeconfigs
import ../asynctest
import ../checktest
export asynctest
export ethers except `%`
export hardhatprocess
export codexprocess
export hardhatconfig
export codexconfig
type
RunningNode* = ref object
role*: Role
node*: NodeProcess
Role* {.pure.} = enum
Client,
Provider,
Validator,
Hardhat
MultiNodeSuiteError = object of CatchableError
proc raiseMultiNodeSuiteError(msg: string) =
raise newException(MultiNodeSuiteError, msg)
proc nextFreePort(startPort: int): Future[int] {.async.} =
proc client(server: StreamServer, transp: StreamTransport) {.async.} =
await transp.closeWait()
var port = startPort
while true:
trace "checking if port is free", port
try:
let host = initTAddress("127.0.0.1", port)
# We use ReuseAddr here only to be able to reuse the same IP/Port when
# there's a TIME_WAIT socket. It's useful when running the test multiple
# times or if a test ran previously using the same port.
var server = createStreamServer(host, client, {ReuseAddr})
trace "port is free", port
await server.closeWait()
return port
except TransportOsError:
trace "port is not free", port
inc port
template multinodesuite*(name: string, body: untyped) =
asyncchecksuite name:
# Following the problem described here:
# https://github.com/NomicFoundation/hardhat/issues/2053
# It may be desirable to use http RPC provider.
# This turns out to be equally important in tests where
# subscriptions get wiped out after 5mins even when
# a new block is mined.
# For this reason, we are using http provider here as the default.
# To use a different provider in your test, you may use
# multinodesuiteWithProviderUrl template in your tests.
# If you want to use a different provider url in the nodes, you can
# use withEthProvider config modifier in the node config
# to set the desired provider url. E.g.:
# NodeConfigs(
# hardhat:
# HardhatConfig.none,
# clients:
# CodexConfigs.init(nodes=1)
# .withEthProvider("ws://localhost:8545")
# .some,
# ...
let jsonRpcProviderUrl = "http://127.0.0.1:8545"
var running {.inject, used.}: seq[RunningNode]
var bootstrap: string
let starttime = now().format("yyyy-MM-dd'_'HH:mm:ss")
var currentTestName = ""
var nodeConfigs: NodeConfigs
var ethProvider {.inject, used.}: JsonRpcProvider
var accounts {.inject, used.}: seq[Address]
var snapshot: JsonNode
template test(tname, startNodeConfigs, tbody) =
currentTestName = tname
nodeConfigs = startNodeConfigs
test tname:
tbody
proc sanitize(pathSegment: string): string =
var sanitized = pathSegment
for invalid in invalidFilenameChars.items:
sanitized = sanitized.replace(invalid, '_')
.replace(' ', '_')
sanitized
proc getLogFile(role: Role, index: ?int): string =
# create log file path, format:
# tests/integration/logs/<start_datetime> <suite_name>/<test_name>/<node_role>_<node_idx>.log
var logDir = currentSourcePath.parentDir() /
"logs" /
sanitize($starttime & "__" & name) /
sanitize($currentTestName)
createDir(logDir)
var fn = $role
if idx =? index:
fn &= "_" & $idx
fn &= ".log"
let fileName = logDir / fn
return fileName
proc newHardhatProcess(
config: HardhatConfig,
role: Role
): Future[NodeProcess] {.async.} =
var args: seq[string] = @[]
if config.logFile:
let updatedLogFile = getLogFile(role, none int)
args.add "--log-file=" & updatedLogFile
let node = await HardhatProcess.startNode(args, config.debugEnabled, "hardhat")
try:
await node.waitUntilStarted()
except NodeProcessError as e:
raiseMultiNodeSuiteError "hardhat node not started: " & e.msg
trace "hardhat node started"
return node
proc newCodexProcess(roleIdx: int,
conf: CodexConfig,
role: Role
): Future[NodeProcess] {.async.} =
let nodeIdx = running.len
var config = conf
if nodeIdx > accounts.len - 1:
raiseMultiNodeSuiteError "Cannot start node at nodeIdx " & $nodeIdx &
", not enough eth accounts."
let datadir = getTempDir() / "Codex" /
sanitize($starttime) /
sanitize($role & "_" & $roleIdx)
try:
if config.logFile.isSome:
let updatedLogFile = getLogFile(role, some roleIdx)
config.withLogFile(updatedLogFile)
config.addCliOption("--api-port", $ await nextFreePort(8080 + nodeIdx))
config.addCliOption("--data-dir", datadir)
config.addCliOption("--nat", "127.0.0.1")
config.addCliOption("--listen-addrs", "/ip4/127.0.0.1/tcp/0")
config.addCliOption("--disc-ip", "127.0.0.1")
config.addCliOption("--disc-port", $ await nextFreePort(8090 + nodeIdx))
except CodexConfigError as e:
raiseMultiNodeSuiteError "invalid cli option, error: " & e.msg
let node = await CodexProcess.startNode(
config.cliArgs,
config.debugEnabled,
$role & $roleIdx
)
try:
await node.waitUntilStarted()
trace "node started", nodeName = $role & $roleIdx
except NodeProcessError as e:
raiseMultiNodeSuiteError "node not started, error: " & e.msg
return node
proc hardhat: HardhatProcess =
for r in running:
if r.role == Role.Hardhat:
return HardhatProcess(r.node)
return nil
proc clients: seq[CodexProcess] {.used.} =
return collect:
for r in running:
if r.role == Role.Client:
CodexProcess(r.node)
proc providers: seq[CodexProcess] {.used.} =
return collect:
for r in running:
if r.role == Role.Provider:
CodexProcess(r.node)
proc validators: seq[CodexProcess] {.used.} =
return collect:
for r in running:
if r.role == Role.Validator:
CodexProcess(r.node)
proc startHardhatNode(config: HardhatConfig): Future[NodeProcess] {.async.} =
return await newHardhatProcess(config, Role.Hardhat)
proc startClientNode(conf: CodexConfig): Future[NodeProcess] {.async.} =
let clientIdx = clients().len
var config = conf
config.addCliOption(StartUpCmd.persistence, "--eth-provider", jsonRpcProviderUrl)
config.addCliOption(StartUpCmd.persistence, "--eth-account", $accounts[running.len])
return await newCodexProcess(clientIdx, config, Role.Client)
proc startProviderNode(conf: CodexConfig): Future[NodeProcess] {.async.} =
let providerIdx = providers().len
var config = conf
config.addCliOption("--bootstrap-node", bootstrap)
config.addCliOption(StartUpCmd.persistence, "--eth-provider", jsonRpcProviderUrl)
config.addCliOption(StartUpCmd.persistence, "--eth-account", $accounts[running.len])
config.addCliOption(PersistenceCmd.prover, "--circom-r1cs",
"vendor/codex-contracts-eth/verifier/networks/hardhat/proof_main.r1cs")
config.addCliOption(PersistenceCmd.prover, "--circom-wasm",
"vendor/codex-contracts-eth/verifier/networks/hardhat/proof_main.wasm")
config.addCliOption(PersistenceCmd.prover, "--circom-zkey",
"vendor/codex-contracts-eth/verifier/networks/hardhat/proof_main.zkey")
return await newCodexProcess(providerIdx, config, Role.Provider)
proc startValidatorNode(conf: CodexConfig): Future[NodeProcess] {.async.} =
let validatorIdx = validators().len
var config = conf
config.addCliOption("--bootstrap-node", bootstrap)
config.addCliOption(StartUpCmd.persistence, "--eth-provider", jsonRpcProviderUrl)
config.addCliOption(StartUpCmd.persistence, "--eth-account", $accounts[running.len])
config.addCliOption(StartUpCmd.persistence, "--validator")
return await newCodexProcess(validatorIdx, config, Role.Validator)
proc teardownImpl() {.async.} =
for nodes in @[validators(), clients(), providers()]:
for node in nodes:
await node.stop() # also stops rest client
node.removeDataDir()
# if hardhat was started in the test, kill the node
# otherwise revert the snapshot taken in the test setup
let hardhat = hardhat()
if not hardhat.isNil:
await hardhat.stop()
else:
discard await send(ethProvider, "evm_revert", @[snapshot])
running = @[]
template failAndTeardownOnError(message: string, tryBody: untyped) =
try:
tryBody
except CatchableError as er:
fatal message, error=er.msg
echo "[FATAL] ", message, ": ", er.msg
await teardownImpl()
when declared(teardownAllIMPL):
teardownAllIMPL()
fail()
quit(1)
setup:
if var conf =? nodeConfigs.hardhat:
try:
let node = await startHardhatNode(conf)
running.add RunningNode(role: Role.Hardhat, node: node)
except CatchableError as e:
echo "failed to start hardhat node"
fail()
quit(1)
try:
# Workaround for https://github.com/NomicFoundation/hardhat/issues/2053
# Do not use websockets, but use http and polling to stop subscriptions
# from being removed after 5 minutes
ethProvider = JsonRpcProvider.new(
jsonRpcProviderUrl,
pollingInterval = chronos.milliseconds(100)
)
# if hardhat was NOT started by the test, take a snapshot so it can be
# reverted in the test teardown
if nodeConfigs.hardhat.isNone:
snapshot = await send(ethProvider, "evm_snapshot")
accounts = await ethProvider.listAccounts()
except CatchableError as e:
echo "Hardhat not running. Run hardhat manually " &
"before executing tests, or include a " &
"HardhatConfig in the test setup."
fail()
quit(1)
if var clients =? nodeConfigs.clients:
failAndTeardownOnError "failed to start client nodes":
for config in clients.configs:
let node = await startClientNode(config)
running.add RunningNode(
role: Role.Client,
node: node
)
if clients().len == 1:
without ninfo =? CodexProcess(node).client.info():
# raise CatchableError instead of Defect (with .get or !) so we
# can gracefully shutdown and prevent zombies
raiseMultiNodeSuiteError "Failed to get node info"
bootstrap = ninfo["spr"].getStr()
if var providers =? nodeConfigs.providers:
failAndTeardownOnError "failed to start provider nodes":
for config in providers.configs.mitems:
let node = await startProviderNode(config)
running.add RunningNode(
role: Role.Provider,
node: node
)
if var validators =? nodeConfigs.validators:
failAndTeardownOnError "failed to start validator nodes":
for config in validators.configs.mitems:
let node = await startValidatorNode(config)
running.add RunningNode(
role: Role.Validator,
node: node
)
# ensure that we have a recent block with a fresh timestamp
discard await send(ethProvider, "evm_mine")
teardown:
await teardownImpl()
body