graceful shutdowns
Where possible, do not raise assert, as other nodes in the test may already be running. Instead, raise exceptions, catch them in multinodes.nim, and attempt to do a teardown before failing the test. `abortOnError` is set to true so that `fail()` will quit immediately, after teardown has been run.
This commit is contained in:
parent
de4b3bebf7
commit
858b6ae339
|
@ -32,6 +32,10 @@ type
|
||||||
Provider,
|
Provider,
|
||||||
Validator,
|
Validator,
|
||||||
Hardhat
|
Hardhat
|
||||||
|
MultiNodeSuiteError = object of CatchableError
|
||||||
|
|
||||||
|
proc raiseMultiNodeSuiteError(msg: string) =
|
||||||
|
raise newException(MultiNodeSuiteError, msg)
|
||||||
|
|
||||||
proc nextFreePort(startPort: int): Future[int] {.async.} =
|
proc nextFreePort(startPort: int): Future[int] {.async.} =
|
||||||
|
|
||||||
|
@ -67,8 +71,6 @@ template multinodesuite*(name: string, body: untyped) =
|
||||||
var accounts {.inject, used.}: seq[Address]
|
var accounts {.inject, used.}: seq[Address]
|
||||||
var snapshot: JsonNode
|
var snapshot: JsonNode
|
||||||
|
|
||||||
proc teardownImpl(): Future[void] {.gcsafe.}
|
|
||||||
|
|
||||||
template test(tname, startNodeConfigs, tbody) =
|
template test(tname, startNodeConfigs, tbody) =
|
||||||
currentTestName = tname
|
currentTestName = tname
|
||||||
nodeConfigs = startNodeConfigs
|
nodeConfigs = startNodeConfigs
|
||||||
|
@ -111,7 +113,10 @@ template multinodesuite*(name: string, body: untyped) =
|
||||||
args.add "--log-file=" & updatedLogFile
|
args.add "--log-file=" & updatedLogFile
|
||||||
|
|
||||||
let node = await HardhatProcess.startNode(args, config.debugEnabled, "hardhat")
|
let node = await HardhatProcess.startNode(args, config.debugEnabled, "hardhat")
|
||||||
|
try:
|
||||||
await node.waitUntilStarted()
|
await node.waitUntilStarted()
|
||||||
|
except NodeProcessError as e:
|
||||||
|
raiseMultiNodeSuiteError "hardhat node not started: " & e.msg
|
||||||
|
|
||||||
trace "hardhat node started"
|
trace "hardhat node started"
|
||||||
return node
|
return node
|
||||||
|
@ -125,9 +130,8 @@ template multinodesuite*(name: string, body: untyped) =
|
||||||
var config = conf
|
var config = conf
|
||||||
|
|
||||||
if nodeIdx > accounts.len - 1:
|
if nodeIdx > accounts.len - 1:
|
||||||
await teardownImpl()
|
raiseMultiNodeSuiteError "Cannot start node at nodeIdx " & $nodeIdx &
|
||||||
raiseAssert("Cannot start node at nodeIdx " & $nodeIdx &
|
", not enough eth accounts."
|
||||||
", not enough eth accounts.")
|
|
||||||
|
|
||||||
let datadir = getTempDir() / "Codex" /
|
let datadir = getTempDir() / "Codex" /
|
||||||
sanitize($starttime) /
|
sanitize($starttime) /
|
||||||
|
@ -146,19 +150,19 @@ template multinodesuite*(name: string, body: untyped) =
|
||||||
config.addCliOption("--disc-port", $ await nextFreePort(8090 + nodeIdx))
|
config.addCliOption("--disc-port", $ await nextFreePort(8090 + nodeIdx))
|
||||||
|
|
||||||
except CodexConfigError as e:
|
except CodexConfigError as e:
|
||||||
fatal "invalid cli option", error = e.msg
|
raiseMultiNodeSuiteError "invalid cli option, error: " & e.msg
|
||||||
echo "[FATAL] invalid cli option ", e.msg
|
|
||||||
await teardownImpl()
|
|
||||||
fail()
|
|
||||||
return
|
|
||||||
|
|
||||||
let node = await CodexProcess.startNode(
|
let node = await CodexProcess.startNode(
|
||||||
config.cliArgs,
|
config.cliArgs,
|
||||||
config.debugEnabled,
|
config.debugEnabled,
|
||||||
$role & $roleIdx
|
$role & $roleIdx
|
||||||
)
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
await node.waitUntilStarted()
|
await node.waitUntilStarted()
|
||||||
trace "node started", nodeName = $role & $roleIdx
|
trace "node started", nodeName = $role & $roleIdx
|
||||||
|
except NodeProcessError as e:
|
||||||
|
raiseMultiNodeSuiteError "node not started, error: " & e.msg
|
||||||
|
|
||||||
return node
|
return node
|
||||||
|
|
||||||
|
@ -215,7 +219,7 @@ template multinodesuite*(name: string, body: untyped) =
|
||||||
|
|
||||||
return await newCodexProcess(validatorIdx, config, Role.Validator)
|
return await newCodexProcess(validatorIdx, config, Role.Validator)
|
||||||
|
|
||||||
proc teardownImpl {.async.} =
|
proc teardownImpl() {.async.} =
|
||||||
for nodes in @[validators(), clients(), providers()]:
|
for nodes in @[validators(), clients(), providers()]:
|
||||||
for node in nodes:
|
for node in nodes:
|
||||||
await node.stop() # also stops rest client
|
await node.stop() # also stops rest client
|
||||||
|
@ -231,10 +235,27 @@ template multinodesuite*(name: string, body: untyped) =
|
||||||
|
|
||||||
running = @[]
|
running = @[]
|
||||||
|
|
||||||
|
template failAndTeardownOnError(message: string, tryBody: untyped) =
|
||||||
|
try:
|
||||||
|
tryBody
|
||||||
|
except CatchableError as er:
|
||||||
|
fatal message, error=er.msg
|
||||||
|
echo "[FATAL] ", message, ": ", er.msg
|
||||||
|
await teardownImpl()
|
||||||
|
when declared(teardownAllIMPL):
|
||||||
|
teardownAllIMPL()
|
||||||
|
fail()
|
||||||
|
quit(1)
|
||||||
|
|
||||||
setup:
|
setup:
|
||||||
if var conf =? nodeConfigs.hardhat:
|
if var conf =? nodeConfigs.hardhat:
|
||||||
|
try:
|
||||||
let node = await startHardhatNode(conf)
|
let node = await startHardhatNode(conf)
|
||||||
running.add RunningNode(role: Role.Hardhat, node: node)
|
running.add RunningNode(role: Role.Hardhat, node: node)
|
||||||
|
except CatchableError as e:
|
||||||
|
echo "failed to start hardhat node"
|
||||||
|
fail()
|
||||||
|
quit(1)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
ethProvider = JsonRpcProvider.new("ws://localhost:8545")
|
ethProvider = JsonRpcProvider.new("ws://localhost:8545")
|
||||||
|
@ -244,13 +265,14 @@ template multinodesuite*(name: string, body: untyped) =
|
||||||
snapshot = await send(ethProvider, "evm_snapshot")
|
snapshot = await send(ethProvider, "evm_snapshot")
|
||||||
accounts = await ethProvider.listAccounts()
|
accounts = await ethProvider.listAccounts()
|
||||||
except CatchableError as e:
|
except CatchableError as e:
|
||||||
fatal "failed to connect to hardhat", error = e.msg
|
echo "Hardhat not running. Run hardhat manually " &
|
||||||
echo "[FATAL] Hardhat not running. Run hardhat manually before executing tests, or include a HardhatConfig in the test setup."
|
"before executing tests, or include a " &
|
||||||
await teardownImpl()
|
"HardhatConfig in the test setup."
|
||||||
fail()
|
fail()
|
||||||
return
|
quit(1)
|
||||||
|
|
||||||
if var clients =? nodeConfigs.clients:
|
if var clients =? nodeConfigs.clients:
|
||||||
|
failAndTeardownOnError "failed to start client nodes":
|
||||||
for config in clients.configs:
|
for config in clients.configs:
|
||||||
let node = await startClientNode(config)
|
let node = await startClientNode(config)
|
||||||
running.add RunningNode(
|
running.add RunningNode(
|
||||||
|
@ -261,6 +283,7 @@ template multinodesuite*(name: string, body: untyped) =
|
||||||
bootstrap = CodexProcess(node).client.info()["spr"].getStr()
|
bootstrap = CodexProcess(node).client.info()["spr"].getStr()
|
||||||
|
|
||||||
if var providers =? nodeConfigs.providers:
|
if var providers =? nodeConfigs.providers:
|
||||||
|
failAndTeardownOnError "failed to start provider nodes":
|
||||||
for config in providers.configs.mitems:
|
for config in providers.configs.mitems:
|
||||||
let node = await startProviderNode(config)
|
let node = await startProviderNode(config)
|
||||||
running.add RunningNode(
|
running.add RunningNode(
|
||||||
|
@ -269,6 +292,7 @@ template multinodesuite*(name: string, body: untyped) =
|
||||||
)
|
)
|
||||||
|
|
||||||
if var validators =? nodeConfigs.validators:
|
if var validators =? nodeConfigs.validators:
|
||||||
|
failAndTeardownOnError "failed to start validator nodes":
|
||||||
for config in validators.configs.mitems:
|
for config in validators.configs.mitems:
|
||||||
let node = await startValidatorNode(config)
|
let node = await startValidatorNode(config)
|
||||||
running.add RunningNode(
|
running.add RunningNode(
|
||||||
|
|
|
@ -23,6 +23,7 @@ type
|
||||||
debug: bool
|
debug: bool
|
||||||
trackedFutures*: TrackedFutures
|
trackedFutures*: TrackedFutures
|
||||||
name*: string
|
name*: string
|
||||||
|
NodeProcessError* = object of CatchableError
|
||||||
|
|
||||||
method workingDir(node: NodeProcess): string {.base.} =
|
method workingDir(node: NodeProcess): string {.base.} =
|
||||||
raiseAssert "not implemented"
|
raiseAssert "not implemented"
|
||||||
|
@ -152,12 +153,14 @@ proc waitUntilStarted*(node: NodeProcess) {.async.} =
|
||||||
try:
|
try:
|
||||||
discard node.captureOutput(node.startedOutput, started).track(node)
|
discard node.captureOutput(node.startedOutput, started).track(node)
|
||||||
await started.wait(35.seconds) # allow enough time for proof generation
|
await started.wait(35.seconds) # allow enough time for proof generation
|
||||||
except AsyncTimeoutError as e:
|
except AsyncTimeoutError:
|
||||||
# attempt graceful shutdown in case node was partially started, prevent
|
# attempt graceful shutdown in case node was partially started, prevent
|
||||||
# zombies
|
# zombies
|
||||||
# TODO: raise error here so that all nodes can be shutdown gracefully
|
|
||||||
await node.stop()
|
await node.stop()
|
||||||
raiseAssert "node did not output '" & node.startedOutput & "'"
|
# raise error here so that all nodes (not just this one) can be
|
||||||
|
# shutdown gracefully
|
||||||
|
raise newException(NodeProcessError, "node did not output '" &
|
||||||
|
node.startedOutput & "'")
|
||||||
|
|
||||||
proc restart*(node: NodeProcess) {.async.} =
|
proc restart*(node: NodeProcess) {.async.} =
|
||||||
await node.stop()
|
await node.stop()
|
||||||
|
|
Loading…
Reference in New Issue