From 97e96842bffe41c2350e884166d553afffb8079c Mon Sep 17 00:00:00 2001 From: thatben Date: Thu, 24 Apr 2025 12:49:23 +0200 Subject: [PATCH] extra logging in sales --- codex/sales.nim | 11 ++++++----- codex/sales/salesagent.nim | 2 +- codex/sales/states/cancelled.nim | 2 +- codex/sales/states/downloading.nim | 1 + codex/sales/states/errored.nim | 2 +- codex/sales/states/filled.nim | 3 +++ codex/sales/states/filling.nim | 2 ++ codex/sales/states/finished.nim | 2 +- codex/sales/states/ignored.nim | 2 +- codex/sales/states/initialproving.nim | 4 ++-- codex/sales/states/preparing.nim | 3 +++ codex/sales/states/proving.nim | 1 + 12 files changed, 23 insertions(+), 12 deletions(-) diff --git a/codex/sales.nim b/codex/sales.nim index 37e2c06a..4cda24b6 100644 --- a/codex/sales.nim +++ b/codex/sales.nim @@ -112,6 +112,7 @@ proc remove(sales: Sales, agent: SalesAgent) {.async.} = proc cleanUp( sales: Sales, + reason: string, agent: SalesAgent, reprocessSlot: bool, returnedCollateral: ?UInt256, @@ -126,7 +127,7 @@ proc cleanUp( reservationId = data.reservation .? id |? ReservationId.default availabilityId = data.reservation .? availabilityId |? AvailabilityId.default - trace "cleaning up sales agent" + trace "cleaning up sales agent", reason # if reservation for the SalesAgent was not created, then it means # that the cleanUp was called before the sales process really started, so @@ -201,10 +202,10 @@ proc processSlot(sales: Sales, item: SlotQueueItem, done: Future[void]) = let agent = newSalesAgent(sales.context, item.requestId, item.slotIndex, none StorageRequest) - agent.onCleanUp = proc( + agent.onCleanUp = proc(reason: string, reprocessSlot = false, returnedCollateral = UInt256.none ) {.async.} = - await sales.cleanUp(agent, reprocessSlot, returnedCollateral, done) + await sales.cleanUp(reason, agent, reprocessSlot, returnedCollateral, done) agent.onFilled = some proc(request: StorageRequest, slotIndex: uint64) = sales.filled(request, slotIndex, done) @@ -269,13 +270,13 @@ proc load*(sales: Sales) {.async.} = let agent = newSalesAgent(sales.context, slot.request.id, slot.slotIndex, some slot.request) - agent.onCleanUp = proc( + agent.onCleanUp = proc(reason: string, reprocessSlot = false, returnedCollateral = UInt256.none ) {.async.} = # since workers are not being dispatched, this future has not been created # by a worker. Create a dummy one here so we can call sales.cleanUp let done: Future[void] = nil - await sales.cleanUp(agent, reprocessSlot, returnedCollateral, done) + await sales.cleanUp(reason, agent, reprocessSlot, returnedCollateral, done) # There is no need to assign agent.onFilled as slots loaded from `mySlots` # are inherently already filled and so assigning agent.onFilled would be diff --git a/codex/sales/salesagent.nim b/codex/sales/salesagent.nim index 61f3a9d3..0c9c8778 100644 --- a/codex/sales/salesagent.nim +++ b/codex/sales/salesagent.nim @@ -26,7 +26,7 @@ type onCleanUp*: OnCleanUp onFilled*: ?OnFilled - OnCleanUp* = proc( + OnCleanUp* = proc(reason: string, reprocessSlot = false, returnedCollateral = UInt256.none ): Future[void] {.gcsafe, upraises: [].} OnFilled* = proc(request: StorageRequest, slotIndex: uint64) {.gcsafe, upraises: [].} diff --git a/codex/sales/states/cancelled.nim b/codex/sales/states/cancelled.nim index f3c755a3..6d7ec9bc 100644 --- a/codex/sales/states/cancelled.nim +++ b/codex/sales/states/cancelled.nim @@ -51,7 +51,7 @@ method run*( onClear(request, data.slotIndex) if onCleanUp =? agent.onCleanUp: - await onCleanUp(reprocessSlot = false, returnedCollateral = returnedCollateral) + await onCleanUp("cancelled due to timeout", reprocessSlot = false, returnedCollateral = returnedCollateral) warn "Sale cancelled due to timeout", requestId = data.requestId, slotIndex = data.slotIndex diff --git a/codex/sales/states/downloading.nim b/codex/sales/states/downloading.nim index 7cf304d3..f0413fec 100644 --- a/codex/sales/states/downloading.nim +++ b/codex/sales/states/downloading.nim @@ -72,6 +72,7 @@ method run*( trace "Starting download" if err =? (await onStore(request, data.slotIndex, onBlocks, isRepairing)).errorOption: + debug "Failed to download" return some State(SaleErrored(error: err, reprocessSlot: false)) trace "Download complete" diff --git a/codex/sales/states/errored.nim b/codex/sales/states/errored.nim index 95848fd3..884f133f 100644 --- a/codex/sales/states/errored.nim +++ b/codex/sales/states/errored.nim @@ -34,7 +34,7 @@ method run*( onClear(request, data.slotIndex) if onCleanUp =? agent.onCleanUp: - await onCleanUp(reprocessSlot = state.reprocessSlot) + await onCleanUp("sale error: " & state.error.msgDetail, reprocessSlot = state.reprocessSlot) except CancelledError as e: trace "SaleErrored.run was cancelled", error = e.msgDetail except CatchableError as e: diff --git a/codex/sales/states/filled.nim b/codex/sales/states/filled.nim index b0fc65c9..1fdef0a8 100644 --- a/codex/sales/states/filled.nim +++ b/codex/sales/states/filled.nim @@ -57,6 +57,7 @@ method run*( let requestEnd = await market.getRequestEnd(data.requestId) if err =? (await onExpiryUpdate(request.content.cid, requestEnd)).errorOption: + debug "failed to update expiry" return some State(SaleErrored(error: err)) when codex_enable_proof_failures: @@ -66,9 +67,11 @@ method run*( SaleProvingSimulated(failEveryNProofs: context.simulateProofFailures) ) + debug "filled, going to proving" return some State(SaleProving()) else: let error = newException(HostMismatchError, "Slot filled by other host") + debug "slot filled by another host" return some State(SaleErrored(error: error)) except CancelledError as e: trace "SaleFilled.run was cancelled", error = e.msgDetail diff --git a/codex/sales/states/filling.nim b/codex/sales/states/filling.nim index 1b76150a..9544a09f 100644 --- a/codex/sales/states/filling.nim +++ b/codex/sales/states/filling.nim @@ -52,9 +52,11 @@ method run*( debug "Slot is already filled, ignoring slot" return some State(SaleIgnored(reprocessSlot: false)) except MarketError as e: + debug "market error", err = e.msg return some State(SaleErrored(error: e)) # other CatchableErrors are handled "automatically" by the SaleState + debug "going to filled" return some State(SaleFilled()) except CancelledError as e: trace "SaleFilling.run was cancelled", error = e.msgDetail diff --git a/codex/sales/states/finished.nim b/codex/sales/states/finished.nim index 16e66d27..217c6b5a 100644 --- a/codex/sales/states/finished.nim +++ b/codex/sales/states/finished.nim @@ -40,7 +40,7 @@ method run*( onClear(request, data.slotIndex) if onCleanUp =? agent.onCleanUp: - await onCleanUp(returnedCollateral = state.returnedCollateral) + await onCleanUp("finished contract slot", returnedCollateral = state.returnedCollateral) except CancelledError as e: trace "SaleFilled.run onCleanUp was cancelled", error = e.msgDetail except CatchableError as e: diff --git a/codex/sales/states/ignored.nim b/codex/sales/states/ignored.nim index 7f2ae5b1..14e761ed 100644 --- a/codex/sales/states/ignored.nim +++ b/codex/sales/states/ignored.nim @@ -25,7 +25,7 @@ method run*( try: if onCleanUp =? agent.onCleanUp: - await onCleanUp(reprocessSlot = state.reprocessSlot) + await onCleanUp("sale ignored", reprocessSlot = state.reprocessSlot) except CancelledError as e: trace "SaleIgnored.run was cancelled", error = e.msgDetail except CatchableError as e: diff --git a/codex/sales/states/initialproving.nim b/codex/sales/states/initialproving.nim index 57e8cc2c..53914c99 100644 --- a/codex/sales/states/initialproving.nim +++ b/codex/sales/states/initialproving.nim @@ -55,13 +55,13 @@ method run*( let slot = Slot(request: request, slotIndex: data.slotIndex) await waitForStableChallenge(market, clock, slot.id) - debug "Generating initial proof", requestId = data.requestId + debug "Generating initial proof", requestId = data.requestId, slotIndex = data.slotIndex let challenge = await context.market.getChallenge(slot.id) without proof =? (await onProve(slot, challenge)), err: error "Failed to generate initial proof", error = err.msg return some State(SaleErrored(error: err)) - debug "Finished proof calculation", requestId = data.requestId + debug "Finished proof calculation", requestId = data.requestId, slotIndex = data.slotIndex return some State(SaleFilling(proof: proof)) except CancelledError as e: diff --git a/codex/sales/states/preparing.nim b/codex/sales/states/preparing.nim index a3aee4c9..aa648499 100644 --- a/codex/sales/states/preparing.nim +++ b/codex/sales/states/preparing.nim @@ -56,6 +56,7 @@ method run*( let slotId = slotId(data.requestId, data.slotIndex) let state = await market.slotState(slotId) if state != SlotState.Free and state != SlotState.Repair: + debug "Slot is not free and not repair, ignoring." return some State(SaleIgnored(reprocessSlot: false)) # TODO: Once implemented, check to ensure the host is allowed to fill the slot, @@ -93,8 +94,10 @@ method run*( if error of BytesOutOfBoundsError: # Lets monitor how often this happen and if it is often we can make it more inteligent to handle it codex_reservations_availability_mismatch.inc() + debug "Bytes out of bounds error, ignoring" return some State(SaleIgnored(reprocessSlot: true)) + debug "other error, going to error" return some State(SaleErrored(error: error)) trace "Reservation created successfully" diff --git a/codex/sales/states/proving.nim b/codex/sales/states/proving.nim index 690e9136..d116c76d 100644 --- a/codex/sales/states/proving.nim +++ b/codex/sales/states/proving.nim @@ -158,6 +158,7 @@ method run*( state.loop = nil + debug "Proveloop finished" return some State(SalePayout()) except CancelledError as e: trace "SaleProving.run onCleanUp was cancelled", error = e.msgDetail