From 3b520fc0c6cd62392b186bfea76eef64ea884e61 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Uhl=C3=AD=C5=99?= Date: Wed, 27 Sep 2023 15:57:41 +0200 Subject: [PATCH] fix: unknown state goes to payout when slot state is finished (#555) Support logging to file Log the entire config and fix build error Process slot queue on reservation callback onMarkUnused Add Reservation object, rename reserve > create refactor Reservations api to include Reservation CRUD All tests that use the Reservation module updated - add requestId and slotIndex to Reservation (hopefully these will prove to be useful when we persist Reservations until request are completed, to add back bytes to Availability) - add querying of all reservations, with accompanying tests - change from find to findAvailabilities - move onCleanUp from SalesContext to SalesAgent as it was getting overwritten for each slot processed - remove sales agent AFTER deleting reservation, as this was causing some SIGSEGVs - retrofit testsales and testslotqueue to match updated Reservations module API Add deletion of inactive reservations on sales load clean up add exception message detail util Apply to onStore errors as we are seeing undetailed errors in the dist tests logs add missing file change slotsize to reflect current implemenation Fix slotSize to reduce by one block Revert change to slotSize that reduces it by one block Add additional test check for querying reservations/availabilities filter past requests based on availability Because availability filtering on push was removed, when availability is added and past storage request events are queried, those requests need to be filtered by availability before being added to the queue. Revert "filter past requests based on availability" This reverts commit 0c2362658b523e0de425794b1fa30ebd53bd30ae. Add debugging for dist tests Add cancel on error during filling state When calling fillSlot, any transaction errors that occur (possibly during estimate gas) will cause that tx to be replaced with a cancellation transaction (a 0-valued tx to ourselves). more debug logging fix build wait for fillSlot to be mined (so that a revert error can be extracted) fix confirmation of fillSlot add more debugging moar debugging debugging: change echo to trace bump ethers to add chronicles fix contracts tests switch to earlier nim-ethers commit bump json-rpc and nim-ethers bump nim-ethers to prevent parsing newHeads log from crashing sales state machine moar debugging moar debugging moar debugging bump ethers to fix "key not found: data" error ethers debug logging bump ethers - better Transaction object deserialization bump ethers to replay tx with past tx format bump ethers to add serialization for PastTransaction ethers bump: better logging, separate logic for revert reason string fix build ethers: update revert reason retreival remove unneeded confirm ethers: try replay without decrementing blockNumber ethers: include type and chainId in replayed txs ethers: add gas into and remove type from replayed txs ensure gas is being serialized in Transaction ethers: fix build error bump ethers: rebased on top of cancel tx due to estimateGas error PR Fix proving with latest ethers bump (tested on master) Update multinode suite for better simulateFailedProofs enabling, add proofs test Improve multinode suite for better debug options, including logging to file There is a 503 "sales unavailable" error improve multinode test suite, add logging to file --- .gitignore | 1 + codex/contracts/market.nim | 23 +- codex/contracts/marketplace.nim | 2 +- codex/market.nim | 14 + codex/sales/states/filling.nim | 1 + codex/sales/states/proving.nim | 17 +- codex/sales/states/provingsimulated.nim | 7 +- tests/codex/helpers/mockmarket.nim | 3 + tests/contracts/testContracts.nim | 4 +- tests/integration/codexclient.nim | 2 +- tests/integration/multinodes.nim | 276 +++++++++++----- tests/integration/nodes.nim | 6 + tests/integration/testproofs.nim | 413 ++++++++++++++++-------- vendor/nim-json-rpc | 2 +- 14 files changed, 524 insertions(+), 247 deletions(-) diff --git a/.gitignore b/.gitignore index c85aa931..c1866c8c 100644 --- a/.gitignore +++ b/.gitignore @@ -39,3 +39,4 @@ docker/hostdatadir docker/prometheus-data .DS_Store nim.cfg +tests/integration/logs diff --git a/codex/contracts/market.nim b/codex/contracts/market.nim index c8ed9ffb..3fc07c9a 100644 --- a/codex/contracts/market.nim +++ b/codex/contracts/market.nim @@ -2,12 +2,13 @@ import std/sequtils import std/strutils import std/sugar import pkg/chronicles -import pkg/ethers +import pkg/ethers except `%` import pkg/ethers/testing import pkg/upraises import pkg/questionable +import ../utils/json import ../market -import ./marketplace +import ./marketplace except `%` export market @@ -22,6 +23,7 @@ type EventSubscription = ethers.Subscription OnChainMarketSubscription = ref object of MarketSubscription eventSubscription: EventSubscription + OnChainMarketError = object of CatchableError func new*(_: type OnChainMarket, contract: Marketplace): OnChainMarket = without signer =? contract.signer: @@ -31,7 +33,7 @@ func new*(_: type OnChainMarket, contract: Marketplace): OnChainMarket = signer: signer, ) -proc approveFunds(market: OnChainMarket, amount: UInt256) {.async.} = +proc approveFunds(market: OnChainMarket, amount: UInt256, waitForConfirmations = 0) {.async.} = debug "Approving tokens", amount let tokenAddress = await market.contract.token() let token = Erc20Token.new(tokenAddress, market.signer) @@ -110,13 +112,22 @@ method getActiveSlot*(market: OnChainMarket, return none Slot raise e -method fillSlot(market: OnChainMarket, +proc cancelTransaction(market: OnChainMarket, nonce: UInt256) {.async.} = + let address = await market.getSigner() + let cancelTx = Transaction(to: address, value: 0.u256, nonce: some nonce) + let populated = await market.signer.populateTransaction(cancelTx) + trace "cancelling transaction to prevent stuck transactions", nonce + discard market.signer.sendTransaction(populated) + +method fillSlot*(market: OnChainMarket, requestId: RequestId, slotIndex: UInt256, proof: seq[byte], collateral: UInt256) {.async.} = - await market.approveFunds(collateral) - await market.contract.fillSlot(requestId, slotIndex, proof) + + await market.approveFunds(collateral, 1) + trace "calling contract fillSlot", slotIndex, requestId + discard await market.contract.fillSlot(requestId, slotIndex, proof).confirm(1) method freeSlot*(market: OnChainMarket, slotId: SlotId) {.async.} = await market.contract.freeSlot(slotId) diff --git a/codex/contracts/marketplace.nim b/codex/contracts/marketplace.nim index f09e8720..47b1a3d2 100644 --- a/codex/contracts/marketplace.nim +++ b/codex/contracts/marketplace.nim @@ -43,7 +43,7 @@ proc slashPercentage*(marketplace: Marketplace): UInt256 {.contract, view.} proc minCollateralThreshold*(marketplace: Marketplace): UInt256 {.contract, view.} proc requestStorage*(marketplace: Marketplace, request: StorageRequest) {.contract.} -proc fillSlot*(marketplace: Marketplace, requestId: RequestId, slotIndex: UInt256, proof: seq[byte]) {.contract.} +proc fillSlot*(marketplace: Marketplace, requestId: RequestId, slotIndex: UInt256, proof: seq[byte]): ?TransactionResponse {.contract.} proc withdrawFunds*(marketplace: Marketplace, requestId: RequestId) {.contract.} proc freeSlot*(marketplace: Marketplace, id: SlotId) {.contract.} proc getRequest*(marketplace: Marketplace, id: RequestId): StorageRequest {.contract, view.} diff --git a/codex/market.nim b/codex/market.nim index be0d06fc..be10a118 100644 --- a/codex/market.nim +++ b/codex/market.nim @@ -5,6 +5,7 @@ import pkg/ethers/erc20 import ./contracts/requests import ./clock import ./periods +import ./utils/exceptions export chronos export questionable @@ -76,6 +77,19 @@ method getActiveSlot*( raiseAssert("not implemented") +method cancelTransaction(market: Market, nonce: UInt256) {.base, async.} = + raiseAssert("not implemented") + +template cancelOnError*(market: Market, body) = + try: + body + except JsonRpcProviderError as e: + trace "error encountered, cancelling tx if there's a nonce present", nonce = e.nonce, error = e.msgDetail + writeStackTrace() + if e.nonce.isSome: + # send a 0-valued transaction with the errored nonce to prevent stuck txs + await market.cancelTransaction(!e.nonce) + method fillSlot*(market: Market, requestId: RequestId, slotIndex: UInt256, diff --git a/codex/sales/states/filling.nim b/codex/sales/states/filling.nim index b1e8471c..b058fa54 100644 --- a/codex/sales/states/filling.nim +++ b/codex/sales/states/filling.nim @@ -34,3 +34,4 @@ method run(state: SaleFilling, machine: Machine): Future[?State] {.async.} = debug "Filling slot", requestId = $data.requestId, slotIndex = $data.slotIndex await market.fillSlot(data.requestId, data.slotIndex, state.proof, collateral) + debug "Waiting for slot filled event...", requestId = $data.requestId, slotIndex = $data.slotIndex diff --git a/codex/sales/states/proving.nim b/codex/sales/states/proving.nim index e4059f6d..1c39b910 100644 --- a/codex/sales/states/proving.nim +++ b/codex/sales/states/proving.nim @@ -1,6 +1,7 @@ import std/options import pkg/chronicles import ../../clock +import ../../utils/exceptions import ../statemachine import ../salesagent import ../salescontext @@ -29,8 +30,10 @@ method prove*( let proof = await onProve(slot) debug "Submitting proof", currentPeriod = currentPeriod, slotId = $slot.id await market.submitProof(slot.id, proof) + except CancelledError: + discard except CatchableError as e: - error "Submitting proof failed", msg = e.msg + error "Submitting proof failed", msg = e.msgDetail proc proveLoop( state: SaleProving, @@ -119,12 +122,12 @@ method run*(state: SaleProving, machine: Machine): Future[?State] {.async.} = debug "Stopping proving.", requestId = $data.requestId, slotIndex = $data.slotIndex if not state.loop.isNil: - if not state.loop.finished: - try: - await state.loop.cancelAndWait() - except CatchableError as e: - error "Error during cancelation of prooving loop", msg = e.msg + if not state.loop.finished: + try: + await state.loop.cancelAndWait() + except CatchableError as e: + error "Error during cancellation of proving loop", msg = e.msg - state.loop = nil + state.loop = nil return some State(SalePayout()) diff --git a/codex/sales/states/provingsimulated.nim b/codex/sales/states/provingsimulated.nim index 4ed9bba1..501987b5 100644 --- a/codex/sales/states/provingsimulated.nim +++ b/codex/sales/states/provingsimulated.nim @@ -8,6 +8,7 @@ when codex_enable_proof_failures: import ../../contracts/requests import ../../market + import ../../utils/exceptions import ../salescontext import ./proving @@ -20,7 +21,7 @@ when codex_enable_proof_failures: proofCount: int proc onSubmitProofError(error: ref CatchableError, period: UInt256, slotId: SlotId) = - error "Submitting invalid proof failed", period = period, slotId = $slotId, msg = error.msg + error "Submitting invalid proof failed", period = period, slotId = $slotId, msg = error.msgDetail method prove*(state: SaleProvingSimulated, slot: Slot, onProve: OnProve, market: Market, currentPeriod: Period) {.async.} = trace "Processing proving in simulated mode" @@ -32,8 +33,8 @@ when codex_enable_proof_failures: try: warn "Submitting INVALID proof", period = currentPeriod, slotId = slot.id await market.submitProof(slot.id, newSeq[byte](0)) - except ProviderError as e: - if not e.revertReason.contains("Invalid proof"): + except SignerError as e: + if not e.msgDetail.contains("Invalid proof"): onSubmitProofError(e, currentPeriod, slot.id) except CatchableError as e: onSubmitProofError(e, currentPeriod, slot.id) diff --git a/tests/codex/helpers/mockmarket.nim b/tests/codex/helpers/mockmarket.nim index 867a3ef5..38d93a96 100644 --- a/tests/codex/helpers/mockmarket.nim +++ b/tests/codex/helpers/mockmarket.nim @@ -205,6 +205,9 @@ proc emitRequestFailed*(market: MockMarket, requestId: RequestId) = subscription.requestId.isNone: subscription.callback(requestId) +proc cancelTransaction(market: Market, nonce: UInt256) {.async.} = + discard + proc fillSlot*(market: MockMarket, requestId: RequestId, slotIndex: UInt256, diff --git a/tests/contracts/testContracts.nim b/tests/contracts/testContracts.nim index 176e5d25..8dfa54f2 100644 --- a/tests/contracts/testContracts.nim +++ b/tests/contracts/testContracts.nim @@ -42,7 +42,7 @@ ethersuite "Marketplace contracts": await marketplace.requestStorage(request) switchAccount(host) discard await token.approve(marketplace.address, request.ask.collateral) - await marketplace.fillSlot(request.id, 0.u256, proof) + discard await marketplace.fillSlot(request.id, 0.u256, proof) slotId = request.slotId(0.u256) proc waitUntilProofRequired(slotId: SlotId) {.async.} = @@ -57,7 +57,7 @@ ethersuite "Marketplace contracts": proc startContract() {.async.} = for slotIndex in 1.. 0: + res &= "=" & option.value + return res proc new*(_: type RunningNode, role: Role, @@ -44,99 +61,113 @@ proc new*(_: type RunningNode, datadir: datadir, ethAccount: ethAccount) -proc init*(_: type StartNodes, - clients, providers, validators: uint): StartNodes = - StartNodes(clients: clients, providers: providers, validators: validators) +proc nodes*(config: StartNodeConfig, numNodes: int): StartNodeConfig = + if numNodes < 0: + raise newException(ValueError, "numNodes must be >= 0") -proc init*(_: type DebugNodes, - client, provider, validator: bool, - topics: string = "validator,proving,market"): DebugNodes = - DebugNodes(client: client, provider: provider, validator: validator, - topics: topics) + var startConfig = config + startConfig.numNodes = numNodes + return startConfig + +proc simulateProofFailuresFor*( + config: StartNodeConfig, + providerIdx: int, + failEveryNProofs: int +): StartNodeConfig = + + if providerIdx > config.numNodes - 1: + raise newException(ValueError, "provider index out of bounds") + + var startConfig = config + startConfig.cliOptions.add( + CliOption( + nodeIdx: some providerIdx, + key: "--simulate-proof-failures", + value: $failEveryNProofs + ) + ) + return startConfig + +proc debug*(config: StartNodeConfig, enabled = true): StartNodeConfig = + ## output log in stdout + var startConfig = config + startConfig.debugEnabled = enabled + return startConfig + +# proc withLogFile*( +# config: StartNodeConfig, +# file: bool | string +# ): StartNodeConfig = + +# var startConfig = config +# when file is bool: +# if not file: startConfig.logFile = none string +# else: startConfig.logFile = +# some currentSourcePath.parentDir() / "codex" & $index & ".log" +# else: +# if file.len <= 0: +# raise newException(ValueError, "file path length must be > 0") +# startConfig.logFile = some file + +# return startConfig + +proc withLogTopics*( + config: StartNodeConfig, + topics: varargs[string] +): StartNodeConfig = + + var startConfig = config + startConfig.logTopics = startConfig.logTopics.concat(@topics) + return startConfig + +proc withLogFile*( + config: StartNodeConfig, + logToFile: bool = true +): StartNodeConfig = + + var startConfig = config + var logDir = currentSourcePath.parentDir() / "logs" / "{starttime}" + createDir(logDir) + startConfig.logFile = logToFile + return startConfig template multinodesuite*(name: string, - startNodes: StartNodes, debugNodes: DebugNodes, body: untyped) = - - if (debugNodes.client or debugNodes.provider) and - (enabledLogLevel > LogLevel.TRACE or - enabledLogLevel == LogLevel.NONE): - echo "" - echo "More test debug logging is available by running the tests with " & - "'-d:chronicles_log_level=TRACE " & - "-d:chronicles_disabled_topics=websock " & - "-d:chronicles_default_output_device=stdout " & - "-d:chronicles_sinks=textlines'" - echo "" + startNodes: StartNodes, body: untyped) = ethersuite name: var running: seq[RunningNode] var bootstrap: string + let starttime = now().format("yyyy-MM-dd'_'HH:mm:ss") proc newNodeProcess(index: int, - addlOptions: seq[string], - debug: bool): (NodeProcess, string, Address) = + config: StartNodeConfig + ): (NodeProcess, string, Address) = if index > accounts.len - 1: raiseAssert("Cannot start node at index " & $index & ", not enough eth accounts.") let datadir = getTempDir() / "Codex" & $index - var options = @[ - "--api-port=" & $(8080 + index), - "--data-dir=" & datadir, - "--nat=127.0.0.1", - "--listen-addrs=/ip4/127.0.0.1/tcp/0", - "--disc-ip=127.0.0.1", - "--disc-port=" & $(8090 + index), - "--eth-account=" & $accounts[index]] - .concat(addlOptions) - if debug: options.add "--log-level=INFO;TRACE: " & debugNodes.topics - let node = startNode(options, debug = debug) + # let logdir = currentSourcePath.parentDir() + var options = config.cliOptions.map(o => $o) + .concat(@[ + "--api-port=" & $(8080 + index), + "--data-dir=" & datadir, + "--nat=127.0.0.1", + "--listen-addrs=/ip4/127.0.0.1/tcp/0", + "--disc-ip=127.0.0.1", + "--disc-port=" & $(8090 + index), + "--eth-account=" & $accounts[index]]) + # if logFile =? config.logFile: + # options.add "--log-file=" & logFile + if config.logTopics.len > 0: + options.add "--log-level=INFO;TRACE: " & config.logTopics.join(",") + + let node = startNode(options, config.debugEnabled) node.waitUntilStarted() (node, datadir, accounts[index]) - proc newCodexClient(index: int): CodexClient = - CodexClient.new("http://localhost:" & $(8080 + index) & "/api/codex/v1") - - proc startClientNode() = - let index = running.len - let (node, datadir, account) = newNodeProcess( - index, @["--persistence"], debugNodes.client) - let restClient = newCodexClient(index) - running.add RunningNode.new(Role.Client, node, restClient, datadir, - account) - if debugNodes.client: - debug "started new client node and codex client", - restApiPort = 8080 + index, discPort = 8090 + index, account - - proc startProviderNode(failEveryNProofs: uint = 0) = - let index = running.len - let (node, datadir, account) = newNodeProcess(index, @[ - "--bootstrap-node=" & bootstrap, - "--persistence", - "--simulate-proof-failures=" & $failEveryNProofs], - debugNodes.provider) - let restClient = newCodexClient(index) - running.add RunningNode.new(Role.Provider, node, restClient, datadir, - account) - if debugNodes.provider: - debug "started new provider node and codex client", - restApiPort = 8080 + index, discPort = 8090 + index, account - - proc startValidatorNode() = - let index = running.len - let (node, datadir, account) = newNodeProcess(index, @[ - "--bootstrap-node=" & bootstrap, - "--validator"], - debugNodes.validator) - let restClient = newCodexClient(index) - running.add RunningNode.new(Role.Validator, node, restClient, datadir, - account) - if debugNodes.validator: - debug "started new validator node and codex client", - restApiPort = 8080 + index, discPort = 8090 + index, account - proc clients(): seq[RunningNode] {.used.} = running.filter(proc(r: RunningNode): bool = r.role == Role.Client) @@ -146,16 +177,85 @@ template multinodesuite*(name: string, proc validators(): seq[RunningNode] {.used.} = running.filter(proc(r: RunningNode): bool = r.role == Role.Validator) + proc newCodexClient(index: int): CodexClient = + CodexClient.new("http://localhost:" & $(8080 + index) & "/api/codex/v1") + + proc getLogFile(role: Role, index: int): string = + var logDir = currentSourcePath.parentDir() / "logs" / $starttime + createDir(logDir) + let fn = $role & "_" & $index & ".log" + let fileName = logDir / fn + echo ">>> replace log file name: ", fileName + return fileName + + proc startClientNode() = + let index = running.len + let clientIdx = clients().len + var config = startNodes.clients + config.cliOptions.add CliOption(key: "--persistence") + if config.logFile: + let updatedLogFile = getLogFile(Role.Client, clientIdx) + config.cliOptions.add CliOption(key: "--log-file", value: updatedLogFile) + let (node, datadir, account) = newNodeProcess(index, config) + let restClient = newCodexClient(index) + running.add RunningNode.new(Role.Client, node, restClient, datadir, + account) + if config.debugEnabled: + debug "started new client node and codex client", + restApiPort = 8080 + index, discPort = 8090 + index, account + + proc startProviderNode(cliOptions: seq[CliOption] = @[]) = + let index = running.len + let providerIdx = providers().len + var config = startNodes.providers + config.cliOptions = config.cliOptions.concat(cliOptions) + if config.logFile: + let updatedLogFile = getLogFile(Role.Provider, providerIdx) + config.cliOptions.add CliOption(key: "--log-file", value: updatedLogFile) + config.cliOptions.add CliOption(key: "--bootstrap-node", value: bootstrap) + config.cliOptions.add CliOption(key: "--persistence") + + config.cliOptions = config.cliOptions.filter( + o => (let idx = o.nodeIdx |? providerIdx; echo "idx: ", idx, ", index: ", index; idx == providerIdx) + ) + + let (node, datadir, account) = newNodeProcess(index, config) + let restClient = newCodexClient(index) + running.add RunningNode.new(Role.Provider, node, restClient, datadir, + account) + if config.debugEnabled: + debug "started new provider node and codex client", + restApiPort = 8080 + index, discPort = 8090 + index, account, + cliOptions = config.cliOptions.join(",") + + proc startValidatorNode() = + let index = running.len + let validatorIdx = providers().len + var config = startNodes.validators + if config.logFile: + let updatedLogFile = getLogFile(Role.Validator, validatorIdx) + config.cliOptions.add CliOption(key: "--log-file", value: updatedLogFile) + config.cliOptions.add CliOption(key: "--bootstrap-node", value: bootstrap) + config.cliOptions.add CliOption(key: "--validator") + + let (node, datadir, account) = newNodeProcess(index, config) + let restClient = newCodexClient(index) + running.add RunningNode.new(Role.Validator, node, restClient, datadir, + account) + if config.debugEnabled: + debug "started new validator node and codex client", + restApiPort = 8080 + index, discPort = 8090 + index, account + setup: - for i in 0..