mirror of
https://github.com/codex-storage/nim-codex.git
synced 2025-01-24 19:59:51 +00:00
e6a387e8e8
* add seen flag * Add MockSlotQueueItem and better prioritisation tests * Update seen priority, and include in SlotQueueItem.init * Re-add processed slots to queue Re-add processed slots to queue if the sale was ignored or errored * add pausing of queue - when processing slots in queue, pause queue if item was marked seen - if availability size is increased, trigger onAvailabilityAdded callback - in sales, on availability added, clear 'seen' flags, then unpause the queue - when items pushed to the queue, unpause the queue * remove unused NoMatchingAvailabilityError from slotqueue The slot queue should also have nothing to do with availabilities * when all availabilities are empty, pause the queue An empty availability is defined as size < DefaultBlockSize as this means even the smallest possible request could not be served. However, this is up for discussion. * remove availability from onAvailabilitiesEmptied callback * refactor onAvailabilityAdded and onAvailabilitiesEmptied onAvailabilityAdded and onAvailabilitiesEmptied are now only called from reservations.update (and eventually reservations.delete once implemented). - Add empty routine for Availability and Reservation - Add allEmpty routine for Availability and Reservation, which returns true when all all Availability or Reservation objects in the datastore are empty. * SlotQueue test support updates * Sales module test support updates * Reservations module tests for queue pausing * Sales module tests for queue pausing Includes tests for sales states cancelled, errored, ignored to ensure onCleanUp is called with correct parameters * SlotQueue module tests for queue pausing * fix existing sales test * PR feedback - indent `self.unpause` - update comment for `clearSeenFlags` * reprocessSlot in SaleErrored only when coming from downloading * remove pausing of queue when availabilities are "emptied" Queue pausing when all availiabilies are "emptied" is not necessary, given that the node would not be able to service slots once all its availabilities' freeSize are too small for the slots in the queue, and would then be paused anyway. Add test that asserts the queue is paused once the freeSpace of availabilities drops too low to fill slots in the queue. * Update clearing of seen flags The asyncheapqueue update overload would need to check index bounds and ultimately a different solution was found using the mitems iterator. * fix test request.id was different before updating request.ask.slots, and that id was used to set the state in mockmarket. * Change filled/cleanup future to nil, so no await is needed * add wait to allow items to be added to queue * do not unpause queue when seen items are pushed * re-add seen item back to queue once paused Previously, when a seen item was processed, it was first popped off the queue, then the queue was paused waiting to process that item once the queue was unpaused. Now, when a seen item is processed, it is popped off the queue, the queue is paused, then the item is re-added to the queue and the queue will wait until unpaused before it will continue popping items off the queue. If the item was not re-added to the queue, it would have been processed immediately once unpaused, however there may have been other items with higher priority pushed to the queue in the meantime. The queue would not be unpaused if those added items were already seen. In particular, this may happen when ignored items due to lack of availability are re-added to a paused queue. Those ignored items will likely have a higher priority than the item that was just seen (due to it having been processed first), causing the queue to the be paused. * address PR comments
459 lines
14 KiB
Nim
459 lines
14 KiB
Nim
import std/sequtils
|
|
import std/tables
|
|
import pkg/chronos
|
|
import pkg/questionable
|
|
import pkg/questionable/results
|
|
import pkg/upraises
|
|
import ../errors
|
|
import ../logutils
|
|
import ../rng
|
|
import ../utils
|
|
import ../contracts/requests
|
|
import ../utils/asyncheapqueue
|
|
import ../utils/then
|
|
import ../utils/trackedfutures
|
|
|
|
logScope:
|
|
topics = "marketplace slotqueue"
|
|
|
|
type
|
|
OnProcessSlot* =
|
|
proc(item: SlotQueueItem, done: Future[void]): Future[void] {.gcsafe, upraises:[].}
|
|
|
|
# Non-ref obj copies value when assigned, preventing accidental modification
|
|
# of values which could cause an incorrect order (eg
|
|
# ``slotQueue[1].collateral = 1`` would cause ``collateral`` to be updated,
|
|
# but the heap invariant would no longer be honoured. When non-ref, the
|
|
# compiler can ensure that statement will fail).
|
|
SlotQueueWorker = object
|
|
doneProcessing*: Future[void]
|
|
|
|
SlotQueueItem* = object
|
|
requestId: RequestId
|
|
slotIndex: uint16
|
|
slotSize: UInt256
|
|
duration: UInt256
|
|
reward: UInt256
|
|
collateral: UInt256
|
|
expiry: UInt256
|
|
seen: bool
|
|
|
|
# don't need to -1 to prevent overflow when adding 1 (to always allow push)
|
|
# because AsyncHeapQueue size is of type `int`, which is larger than `uint16`
|
|
SlotQueueSize = range[1'u16..uint16.high]
|
|
|
|
SlotQueue* = ref object
|
|
maxWorkers: int
|
|
onProcessSlot: ?OnProcessSlot
|
|
queue: AsyncHeapQueue[SlotQueueItem]
|
|
running: bool
|
|
workers: AsyncQueue[SlotQueueWorker]
|
|
trackedFutures: TrackedFutures
|
|
unpaused: AsyncEvent
|
|
|
|
SlotQueueError = object of CodexError
|
|
SlotQueueItemExistsError* = object of SlotQueueError
|
|
SlotQueueItemNotExistsError* = object of SlotQueueError
|
|
SlotsOutOfRangeError* = object of SlotQueueError
|
|
QueueNotRunningError* = object of SlotQueueError
|
|
|
|
# Number of concurrent workers used for processing SlotQueueItems
|
|
const DefaultMaxWorkers = 3
|
|
|
|
# Cap slot queue size to prevent unbounded growth and make sifting more
|
|
# efficient. Max size is not equivalent to the number of slots a host can
|
|
# service, which is limited by host availabilities and new requests circulating
|
|
# the network. Additionally, each new request/slot in the network will be
|
|
# included in the queue if it is higher priority than any of the exisiting
|
|
# items. Older slots should be unfillable over time as other hosts fill the
|
|
# slots.
|
|
const DefaultMaxSize = 128'u16
|
|
|
|
proc profitability(item: SlotQueueItem): UInt256 =
|
|
StorageAsk(collateral: item.collateral,
|
|
duration: item.duration,
|
|
reward: item.reward,
|
|
slotSize: item.slotSize).pricePerSlot
|
|
|
|
proc `<`*(a, b: SlotQueueItem): bool =
|
|
# for A to have a higher priority than B (in a min queue), A must be less than
|
|
# B.
|
|
var scoreA: uint8 = 0
|
|
var scoreB: uint8 = 0
|
|
|
|
proc addIf(score: var uint8, condition: bool, addition: int) =
|
|
if condition:
|
|
score += 1'u8 shl addition
|
|
|
|
scoreA.addIf(a.seen < b.seen, 4)
|
|
scoreB.addIf(a.seen > b.seen, 4)
|
|
|
|
scoreA.addIf(a.profitability > b.profitability, 3)
|
|
scoreB.addIf(a.profitability < b.profitability, 3)
|
|
|
|
scoreA.addIf(a.collateral < b.collateral, 2)
|
|
scoreB.addIf(a.collateral > b.collateral, 2)
|
|
|
|
scoreA.addIf(a.expiry > b.expiry, 1)
|
|
scoreB.addIf(a.expiry < b.expiry, 1)
|
|
|
|
scoreA.addIf(a.slotSize < b.slotSize, 0)
|
|
scoreB.addIf(a.slotSize > b.slotSize, 0)
|
|
|
|
return scoreA > scoreB
|
|
|
|
proc `==`*(a, b: SlotQueueItem): bool =
|
|
a.requestId == b.requestId and
|
|
a.slotIndex == b.slotIndex
|
|
|
|
proc new*(_: type SlotQueue,
|
|
maxWorkers = DefaultMaxWorkers,
|
|
maxSize: SlotQueueSize = DefaultMaxSize): SlotQueue =
|
|
|
|
if maxWorkers <= 0:
|
|
raise newException(ValueError, "maxWorkers must be positive")
|
|
if maxWorkers.uint16 > maxSize:
|
|
raise newException(ValueError, "maxWorkers must be less than maxSize")
|
|
|
|
SlotQueue(
|
|
maxWorkers: maxWorkers,
|
|
# Add 1 to always allow for an extra item to be pushed onto the queue
|
|
# temporarily. After push (and sort), the bottom-most item will be deleted
|
|
queue: newAsyncHeapQueue[SlotQueueItem](maxSize.int + 1),
|
|
running: false,
|
|
trackedFutures: TrackedFutures.new(),
|
|
unpaused: newAsyncEvent()
|
|
)
|
|
# avoid instantiating `workers` in constructor to avoid side effects in
|
|
# `newAsyncQueue` procedure
|
|
|
|
proc init(_: type SlotQueueWorker): SlotQueueWorker =
|
|
SlotQueueWorker(
|
|
doneProcessing: newFuture[void]("slotqueue.worker.processing")
|
|
)
|
|
|
|
proc init*(_: type SlotQueueItem,
|
|
requestId: RequestId,
|
|
slotIndex: uint16,
|
|
ask: StorageAsk,
|
|
expiry: UInt256,
|
|
seen = false): SlotQueueItem =
|
|
|
|
SlotQueueItem(
|
|
requestId: requestId,
|
|
slotIndex: slotIndex,
|
|
slotSize: ask.slotSize,
|
|
duration: ask.duration,
|
|
reward: ask.reward,
|
|
collateral: ask.collateral,
|
|
expiry: expiry,
|
|
seen: seen
|
|
)
|
|
|
|
proc init*(_: type SlotQueueItem,
|
|
request: StorageRequest,
|
|
slotIndex: uint16): SlotQueueItem =
|
|
|
|
SlotQueueItem.init(request.id,
|
|
slotIndex,
|
|
request.ask,
|
|
request.expiry)
|
|
|
|
proc init*(_: type SlotQueueItem,
|
|
requestId: RequestId,
|
|
ask: StorageAsk,
|
|
expiry: UInt256): seq[SlotQueueItem] =
|
|
|
|
if not ask.slots.inRange:
|
|
raise newException(SlotsOutOfRangeError, "Too many slots")
|
|
|
|
var i = 0'u16
|
|
proc initSlotQueueItem: SlotQueueItem =
|
|
let item = SlotQueueItem.init(requestId, i, ask, expiry)
|
|
inc i
|
|
return item
|
|
|
|
var items = newSeqWith(ask.slots.int, initSlotQueueItem())
|
|
Rng.instance.shuffle(items)
|
|
return items
|
|
|
|
proc init*(_: type SlotQueueItem,
|
|
request: StorageRequest): seq[SlotQueueItem] =
|
|
|
|
return SlotQueueItem.init(request.id, request.ask, request.expiry)
|
|
|
|
proc inRange*(val: SomeUnsignedInt): bool =
|
|
val.uint16 in SlotQueueSize.low..SlotQueueSize.high
|
|
|
|
proc requestId*(self: SlotQueueItem): RequestId = self.requestId
|
|
proc slotIndex*(self: SlotQueueItem): uint16 = self.slotIndex
|
|
proc slotSize*(self: SlotQueueItem): UInt256 = self.slotSize
|
|
proc duration*(self: SlotQueueItem): UInt256 = self.duration
|
|
proc reward*(self: SlotQueueItem): UInt256 = self.reward
|
|
proc collateral*(self: SlotQueueItem): UInt256 = self.collateral
|
|
proc seen*(self: SlotQueueItem): bool = self.seen
|
|
|
|
proc running*(self: SlotQueue): bool = self.running
|
|
|
|
proc len*(self: SlotQueue): int = self.queue.len
|
|
|
|
proc size*(self: SlotQueue): int = self.queue.size - 1
|
|
|
|
proc paused*(self: SlotQueue): bool = not self.unpaused.isSet
|
|
|
|
proc `$`*(self: SlotQueue): string = $self.queue
|
|
|
|
proc `onProcessSlot=`*(self: SlotQueue, onProcessSlot: OnProcessSlot) =
|
|
self.onProcessSlot = some onProcessSlot
|
|
|
|
proc activeWorkers*(self: SlotQueue): int =
|
|
if not self.running: return 0
|
|
|
|
# active = capacity - available
|
|
self.maxWorkers - self.workers.len
|
|
|
|
proc contains*(self: SlotQueue, item: SlotQueueItem): bool =
|
|
self.queue.contains(item)
|
|
|
|
proc pause*(self: SlotQueue) =
|
|
# set unpaused flag to false -- coroutines will block on unpaused.wait()
|
|
self.unpaused.clear()
|
|
|
|
proc unpause*(self: SlotQueue) =
|
|
# set unpaused flag to true -- unblocks coroutines waiting on unpaused.wait()
|
|
self.unpaused.fire()
|
|
|
|
proc populateItem*(self: SlotQueue,
|
|
requestId: RequestId,
|
|
slotIndex: uint16): ?SlotQueueItem =
|
|
|
|
trace "populate item, items in queue", len = self.queue.len
|
|
for item in self.queue.items:
|
|
trace "populate item search", itemRequestId = item.requestId, requestId
|
|
if item.requestId == requestId:
|
|
return some SlotQueueItem(
|
|
requestId: requestId,
|
|
slotIndex: slotIndex,
|
|
slotSize: item.slotSize,
|
|
duration: item.duration,
|
|
reward: item.reward,
|
|
collateral: item.collateral,
|
|
expiry: item.expiry
|
|
)
|
|
return none SlotQueueItem
|
|
|
|
proc push*(self: SlotQueue, item: SlotQueueItem): ?!void =
|
|
|
|
logScope:
|
|
requestId = item.requestId
|
|
slotIndex = item.slotIndex
|
|
seen = item.seen
|
|
|
|
trace "pushing item to queue"
|
|
|
|
if not self.running:
|
|
let err = newException(QueueNotRunningError, "queue not running")
|
|
return failure(err)
|
|
|
|
if self.contains(item):
|
|
let err = newException(SlotQueueItemExistsError, "item already exists")
|
|
return failure(err)
|
|
|
|
if err =? self.queue.pushNoWait(item).mapFailure.errorOption:
|
|
return failure(err)
|
|
|
|
if self.queue.full():
|
|
# delete the last item
|
|
self.queue.del(self.queue.size - 1)
|
|
|
|
doAssert self.queue.len <= self.queue.size - 1
|
|
|
|
# when slots are pushed to the queue, the queue should be unpaused if it was
|
|
# paused
|
|
if self.paused and not item.seen:
|
|
trace "unpausing queue after new slot pushed"
|
|
self.unpause()
|
|
|
|
return success()
|
|
|
|
proc push*(self: SlotQueue, items: seq[SlotQueueItem]): ?!void =
|
|
for item in items:
|
|
if err =? self.push(item).errorOption:
|
|
return failure(err)
|
|
|
|
return success()
|
|
|
|
proc findByRequest(self: SlotQueue, requestId: RequestId): seq[SlotQueueItem] =
|
|
var items: seq[SlotQueueItem] = @[]
|
|
for item in self.queue.items:
|
|
if item.requestId == requestId:
|
|
items.add item
|
|
return items
|
|
|
|
proc delete*(self: SlotQueue, item: SlotQueueItem) =
|
|
logScope:
|
|
requestId = item.requestId
|
|
slotIndex = item.slotIndex
|
|
|
|
trace "removing item from queue"
|
|
|
|
if not self.running:
|
|
trace "cannot delete item from queue, queue not running"
|
|
return
|
|
|
|
self.queue.delete(item)
|
|
|
|
proc delete*(self: SlotQueue, requestId: RequestId, slotIndex: uint16) =
|
|
let item = SlotQueueItem(requestId: requestId, slotIndex: slotIndex)
|
|
self.delete(item)
|
|
|
|
proc delete*(self: SlotQueue, requestId: RequestId) =
|
|
let items = self.findByRequest(requestId)
|
|
for item in items:
|
|
self.delete(item)
|
|
|
|
proc `[]`*(self: SlotQueue, i: Natural): SlotQueueItem =
|
|
self.queue[i]
|
|
|
|
proc addWorker(self: SlotQueue): ?!void =
|
|
if not self.running:
|
|
let err = newException(QueueNotRunningError, "queue must be running")
|
|
return failure(err)
|
|
|
|
trace "adding new worker to worker queue"
|
|
|
|
let worker = SlotQueueWorker.init()
|
|
try:
|
|
discard worker.doneProcessing.track(self)
|
|
self.workers.addLastNoWait(worker)
|
|
except AsyncQueueFullError:
|
|
return failure("failed to add worker, worker queue full")
|
|
|
|
return success()
|
|
|
|
proc dispatch(self: SlotQueue,
|
|
worker: SlotQueueWorker,
|
|
item: SlotQueueItem) {.async.} =
|
|
logScope:
|
|
requestId = item.requestId
|
|
slotIndex = item.slotIndex
|
|
|
|
if not self.running:
|
|
warn "Could not dispatch worker because queue is not running"
|
|
return
|
|
|
|
if onProcessSlot =? self.onProcessSlot:
|
|
try:
|
|
discard worker.doneProcessing.track(self)
|
|
await onProcessSlot(item, worker.doneProcessing)
|
|
await worker.doneProcessing
|
|
|
|
if err =? self.addWorker().errorOption:
|
|
raise err # catch below
|
|
|
|
except QueueNotRunningError as e:
|
|
info "could not re-add worker to worker queue, queue not running",
|
|
error = e.msg
|
|
except CancelledError:
|
|
# do not bubble exception up as it is called with `asyncSpawn` which would
|
|
# convert the exception into a `FutureDefect`
|
|
discard
|
|
except CatchableError as e:
|
|
# we don't have any insight into types of errors that `onProcessSlot` can
|
|
# throw because it is caller-defined
|
|
warn "Unknown error processing slot in worker", error = e.msg
|
|
|
|
proc clearSeenFlags*(self: SlotQueue) =
|
|
# Enumerate all items in the queue, overwriting each item with `seen = false`.
|
|
# To avoid issues with new queue items being pushed to the queue while all
|
|
# items are being iterated (eg if a new storage request comes in and pushes
|
|
# new slots to the queue), this routine must remain synchronous.
|
|
|
|
if self.queue.empty:
|
|
return
|
|
|
|
for item in self.queue.mitems:
|
|
item.seen = false # does not maintain the heap invariant
|
|
|
|
# force heap reshuffling to maintain the heap invariant
|
|
doAssert self.queue.update(self.queue[0]), "slot queue failed to reshuffle"
|
|
|
|
trace "all 'seen' flags cleared"
|
|
|
|
proc start*(self: SlotQueue) {.async.} =
|
|
if self.running:
|
|
return
|
|
|
|
trace "starting slot queue"
|
|
|
|
self.running = true
|
|
|
|
# must be called in `start` to avoid sideeffects in `new`
|
|
self.workers = newAsyncQueue[SlotQueueWorker](self.maxWorkers)
|
|
|
|
# Add initial workers to the `AsyncHeapQueue`. Once a worker has completed its
|
|
# task, a new worker will be pushed to the queue
|
|
for i in 0..<self.maxWorkers:
|
|
if err =? self.addWorker().errorOption:
|
|
error "start: error adding new worker", error = err.msg
|
|
|
|
while self.running:
|
|
try:
|
|
if self.paused:
|
|
trace "Queue is paused, waiting for new slots or availabilities to be modified/added"
|
|
|
|
# block until unpaused is true/fired, ie wait for queue to be unpaused
|
|
await self.unpaused.wait()
|
|
|
|
let worker = await self.workers.popFirst().track(self) # if workers saturated, wait here for new workers
|
|
let item = await self.queue.pop().track(self) # if queue empty, wait here for new items
|
|
|
|
logScope:
|
|
reqId = item.requestId
|
|
slotIdx = item.slotIndex
|
|
seen = item.seen
|
|
|
|
if not self.running: # may have changed after waiting for pop
|
|
trace "not running, exiting"
|
|
break
|
|
|
|
# If, upon processing a slot, the slot item already has a `seen` flag set,
|
|
# the queue should be paused.
|
|
if item.seen:
|
|
trace "processing already seen item, pausing queue",
|
|
reqId = item.requestId, slotIdx = item.slotIndex
|
|
self.pause()
|
|
# put item back in queue so that if other items are pushed while paused,
|
|
# it will be sorted accordingly. Otherwise, this item would be processed
|
|
# immediately (with priority over other items) once unpaused
|
|
trace "readding seen item back into the queue"
|
|
discard self.push(item) # on error, drop the item and continue
|
|
worker.doneProcessing.complete()
|
|
await sleepAsync(1.millis) # poll
|
|
continue
|
|
|
|
trace "processing item"
|
|
|
|
self.dispatch(worker, item)
|
|
.track(self)
|
|
.catch(proc (e: ref CatchableError) =
|
|
error "Unknown error dispatching worker", error = e.msg
|
|
)
|
|
|
|
await sleepAsync(1.millis) # poll
|
|
except CancelledError:
|
|
trace "slot queue cancelled"
|
|
return
|
|
except CatchableError as e: # raised from self.queue.pop() or self.workers.pop()
|
|
warn "slot queue error encountered during processing", error = e.msg
|
|
|
|
proc stop*(self: SlotQueue) {.async.} =
|
|
if not self.running:
|
|
return
|
|
|
|
trace "stopping slot queue"
|
|
|
|
self.running = false
|
|
|
|
await self.trackedFutures.cancelTracked()
|