nim-ffi/ffi/ffi_request_queue.nim
Ivan FB 5d49ee6b08
feat(ffi): sharded MPSC request ingress (alternative to lock-free #98) (#101)
Co-authored-by: Gabriel Cruz <8129788+gmelodie@users.noreply.github.com>
2026-06-27 19:08:10 +02:00

110 lines
4.3 KiB
Nim

## Sharded, mutex-guarded MPSC ingress for `ptr FFIThreadRequest`: foreign
## threads enqueue without serialising against each other.
##
## Why sharded: one shared queue funnels all producers through a single cache
## line, capping submit throughput. N independent queues (one per producer)
## remove that hotspot — producers contend only when two pick the same queue.
##
## Each queue is an intrusive FIFO under its own `Lock`: race-free under TSAN, and
## the request is its own node (intrusive `next`), so enqueue never allocates nor
## touches a Nim GC heap (the cross-thread `MemRegion` hazard).
##
## FIFO holds per queue, not globally. Unbounded by design: submit never blocks
## or rejects; completion comes via each request's callback.
import std/[atomics, locks]
import ./ffi_thread_request
const
RequestQueueCount* = 16
## Independent ingress queues. ≥ the expected concurrent producer count keeps
## queue collisions (hence lock contention) near zero.
QueuePadBytes = 192
## Pads each queue well past a cache line (128B on Apple silicon) so adjacent
## queues' hot fields never false-share — false sharing would re-serialise
## exactly what the sharding is meant to spread out.
static:
# `myQueueIndex` maps threads to queues with an `and` mask, so the count must
# be a power of two — otherwise the distribution silently skews onto a subset.
doAssert (RequestQueueCount and (RequestQueueCount - 1)) == 0,
"RequestQueueCount must be a power of two"
type
RequestQueue = object
lock: Lock
head: ptr FFIThreadRequest ## consumer pops here (oldest)
tail: ptr FFIThreadRequest ## producers on this queue append here (newest)
pad: array[QueuePadBytes, byte]
RequestQueueBank* = object
queues: array[RequestQueueCount, RequestQueue]
var gRequestQueue {.threadvar.}: int
var gRequestQueueAssigned {.threadvar.}: bool
var gRequestQueueCounter: Atomic[int]
## Hands each producer thread a distinct queue round-robin on first use, so
## queues fill evenly regardless of OS thread-id distribution.
proc myQueueIndex(): int {.raises: [].} =
if not gRequestQueueAssigned:
gRequestQueue = gRequestQueueCounter.fetchAdd(1)
gRequestQueueAssigned = true
return gRequestQueue and (RequestQueueCount - 1) # RequestQueueCount is a power of two
proc initRequestQueue*(bank: var RequestQueueBank) {.raises: [].} =
for queue in bank.queues.mitems:
queue.lock.initLock()
queue.head = nil
queue.tail = nil
proc deinitRequestQueue*(bank: var RequestQueueBank) {.raises: [].} =
## Both producers and the consumer must have stopped. Frees any request still
## queued on any queue — e.g. one a producer raced in after the FFI thread's
## final drain — so a teardown race leaks nothing instead of dangling them.
for queue in bank.queues.mitems:
var request = queue.head
while not request.isNil():
let nextRequest = request[].next
deleteRequest(request)
request = nextRequest
queue.head = nil
queue.tail = nil
queue.lock.deinitLock()
proc pushRequest*(
bank: var RequestQueueBank, request: ptr FFIThreadRequest
): bool {.raises: [].} =
## Append `request` to this producer thread's queue (takes ownership). Returns
## true only when the queue was empty: the consumer sleeps on an empty queue, so
## that's the one push that must wake it; a missed wake just waits the 100ms poll.
request[].next = nil
let idx = myQueueIndex()
withLock bank.queues[idx].lock:
let wasEmpty = bank.queues[idx].tail.isNil()
if bank.queues[idx].tail.isNil():
bank.queues[idx].head = request
else:
bank.queues[idx].tail[].next = request
bank.queues[idx].tail = request
return wasEmpty
proc mergeQueues*(bank: var RequestQueueBank): ptr FFIThreadRequest {.raises: [].} =
## Single-consumer: splice every queue into one chain, resetting them to empty.
## Returns nil when all are empty; the caller then owns the chain and must read
## each request's `next` before dispatching (dispatch frees the request).
var head: ptr FFIThreadRequest = nil
var tail: ptr FFIThreadRequest = nil
for queue in bank.queues.mitems:
withLock queue.lock:
let h = queue.head
if not h.isNil():
if head.isNil():
head = h
else:
tail[].next = h
tail = queue.tail
queue.head = nil
queue.tail = nil
return head