nim-dagger/tests/integration/nodeprocess.nim

166 lines
4.3 KiB
Nim
Raw Normal View History

refactor: multinode integration test refactor (#662) * refactor multi node test suite Refactor the multinode test suite into the marketplace test suite. - Arbitrary number of nodes can be started with each test: clients, providers, validators - Hardhat can also be started locally with each test, usually for the purpose of saving and inspecting its log file. - Log files for all nodes can be persisted on disk, with configuration at the test-level - Log files, if persisted (as specified in the test), will be persisted to a CI artifact - Node config is specified at the test-level instead of the suite-level - Node/Hardhat process starting/stopping is now async, and runs much faster - Per-node config includes: - simulating proof failures - logging to file - log level - log topics - storage quota - debug (print logs to stdout) - Tests find next available ports when starting nodes, as closing ports on Windows can lag - Hardhat is no longer required to be running prior to starting the integration tests (as long as Hardhat is configured to run in the tests). - If Hardhat is already running, a snapshot will be taken and reverted before and after each test, respectively. - If Hardhat is not already running and configured to run at the test-level, a Hardhat process will be spawned and torn down before and after each test, respectively. * additional logging for debug purposes * address PR feedback - fix spelling - revert change from catching ProviderError to SignerError -- this should be handled more consistently in the Market abstraction, and will be handled in another PR. - remove method label from raiseAssert - remove unused import * Use API instead of command exec to test for free port Use chronos `createStreamServer` API to test for free port by binding localhost address and port. Use `ServerFlags.ReuseAddr` to enable reuse of same IP/Port on multiple test runs. * clean up * remove upraises annotations from tests * Update tests to work with updated erasure coding slot sizes * update dataset size, nodes, tolerance to match valid ec params Integration tests now have valid dataset sizes (blocks), tolerances, and number of nodes, to work with valid ec params. These values are validated when requested storage. Print the rest api failure message (via doAssert) when a rest api call fails (eg the rest api may validate some ec params). All integration tests pass when the async `clock.now` changes are reverted. * dont use async clock for now * fix workflow * move integration logs uplod to reusable --------- Co-authored-by: Dmitriy Ryajov <dryajov@gmail.com>
2024-02-19 04:55:39 +00:00
import pkg/questionable
import pkg/questionable/results
import pkg/confutils
import pkg/chronicles
import pkg/libp2p
import std/os
import std/strutils
import codex/conf
import codex/utils/exceptions
import codex/utils/trackedfutures
import ./codexclient
export codexclient
export chronicles
logScope:
topics = "integration testing node process"
type
NodeProcess* = ref object of RootObj
process*: AsyncProcessRef
arguments*: seq[string]
debug: bool
trackedFutures*: TrackedFutures
name*: string
method workingDir(node: NodeProcess): string {.base.} =
raiseAssert "not implemented"
method executable(node: NodeProcess): string {.base.} =
raiseAssert "not implemented"
method startedOutput(node: NodeProcess): string {.base.} =
raiseAssert "not implemented"
method processOptions(node: NodeProcess): set[AsyncProcessOption] {.base.} =
raiseAssert "not implemented"
method outputLineEndings(node: NodeProcess): string {.base.} =
raiseAssert "not implemented"
method onOutputLineCaptured(node: NodeProcess, line: string) {.base.} =
raiseAssert "not implemented"
method start*(node: NodeProcess) {.base, async.} =
logScope:
nodeName = node.name
let poptions = node.processOptions + {AsyncProcessOption.StdErrToStdOut}
trace "starting node",
args = node.arguments,
executable = node.executable,
workingDir = node.workingDir,
processOptions = poptions
try:
node.process = await startProcess(
node.executable,
node.workingDir,
node.arguments,
options = poptions,
stdoutHandle = AsyncProcess.Pipe
)
except CatchableError as e:
error "failed to start node process", error = e.msg
proc captureOutput(
node: NodeProcess,
output: string,
started: Future[void]
) {.async.} =
logScope:
nodeName = node.name
trace "waiting for output", output
let stream = node.process.stdoutStream
try:
while node.process.running.option == some true:
while(let line = await stream.readLine(0, node.outputLineEndings); line != ""):
if node.debug:
# would be nice if chronicles could parse and display with colors
echo line
if not started.isNil and not started.finished and line.contains(output):
started.complete()
node.onOutputLineCaptured(line)
await sleepAsync(1.millis)
await sleepAsync(1.millis)
except AsyncStreamReadError as e:
error "error reading output stream", error = e.msgDetail
proc startNode*[T: NodeProcess](
_: type T,
args: seq[string],
debug: string | bool = false,
name: string
): Future[T] {.async.} =
## Starts a Codex Node with the specified arguments.
## Set debug to 'true' to see output of the node.
let node = T(
arguments: @args,
debug: ($debug != "false"),
trackedFutures: TrackedFutures.new(),
name: name
)
await node.start()
return node
method stop*(node: NodeProcess) {.base, async.} =
logScope:
nodeName = node.name
await node.trackedFutures.cancelTracked()
if node.process != nil:
try:
trace "terminating node process..."
if errCode =? node.process.terminate().errorOption:
error "failed to terminate process", errCode
trace "waiting for node process to exit"
let exitCode = await node.process.waitForExit(3.seconds)
if exitCode > 0:
error "failed to exit process, check for zombies", exitCode
trace "closing node process' streams"
await node.process.closeWait()
except CatchableError as e:
error "error stopping node process", error = e.msg
finally:
node.process = nil
trace "node stopped"
proc waitUntilStarted*(node: NodeProcess) {.async.} =
logScope:
nodeName = node.name
trace "waiting until node started"
let started = newFuture[void]()
try:
discard node.captureOutput(node.startedOutput, started).track(node)
await started.wait(5.seconds)
except AsyncTimeoutError as e:
# attempt graceful shutdown in case node was partially started, prevent
# zombies
await node.stop()
raiseAssert "node did not output '" & node.startedOutput & "'"
proc restart*(node: NodeProcess) {.async.} =
await node.stop()
await node.start()
await node.waitUntilStarted()
method removeDataDir*(node: NodeProcess) {.base.} =
raiseAssert "[removeDataDir] not implemented"