## Sharded, mutex-guarded MPSC ingress for `ptr FFIThreadRequest`: foreign
## threads enqueue without serialising against each other.
##
## Why sharded: one shared queue funnels all producers through a single cache
## line, capping submit throughput. N independent queues (one per producer)
## remove that hotspot — producers contend only when two pick the same queue.
##
## Each queue is an intrusive FIFO under its own `Lock`: race-free under TSAN, and
## the request is its own node (intrusive `next`), so enqueue never allocates nor
## touches a Nim GC heap (the cross-thread `MemRegion` hazard).
##
## FIFO holds per queue, not globally. Unbounded by design: submit never blocks
## or rejects; completion comes via each request's callback.

import std/[atomics, locks]
import ./ffi_thread_request

const
  RequestQueueCount* = 16
    ## Independent ingress queues. ≥ the expected concurrent producer count keeps
    ## queue collisions (hence lock contention) near zero.
  QueuePadBytes = 192
    ## Pads each queue well past a cache line (128B on Apple silicon) so adjacent
    ## queues' hot fields never false-share — false sharing would re-serialise
    ## exactly what the sharding is meant to spread out.

static:
  # `myQueueIndex` maps threads to queues with an `and` mask, so the count must
  # be a power of two — otherwise the distribution silently skews onto a subset.
  doAssert (RequestQueueCount and (RequestQueueCount - 1)) == 0,
    "RequestQueueCount must be a power of two"

type
  RequestQueue = object
    lock: Lock
    head: ptr FFIThreadRequest ## consumer pops here (oldest)
    tail: ptr FFIThreadRequest ## producers on this queue append here (newest)
    pad: array[QueuePadBytes, byte]

  RequestQueueBank* = object
    queues: array[RequestQueueCount, RequestQueue]

var gRequestQueue {.threadvar.}: int
var gRequestQueueAssigned {.threadvar.}: bool
var gRequestQueueCounter: Atomic[int]
  ## Hands each producer thread a distinct queue round-robin on first use, so
  ## queues fill evenly regardless of OS thread-id distribution.

proc myQueueIndex(): int {.raises: [].} =
  if not gRequestQueueAssigned:
    gRequestQueue = gRequestQueueCounter.fetchAdd(1)
    gRequestQueueAssigned = true
  return gRequestQueue and (RequestQueueCount - 1) # RequestQueueCount is a power of two

proc initRequestQueue*(bank: var RequestQueueBank) {.raises: [].} =
  for queue in bank.queues.mitems:
    queue.lock.initLock()
    queue.head = nil
    queue.tail = nil

proc deinitRequestQueue*(bank: var RequestQueueBank) {.raises: [].} =
  ## Both producers and the consumer must have stopped. Frees any request still
  ## queued on any queue — e.g. one a producer raced in after the FFI thread's
  ## final drain — so a teardown race leaks nothing instead of dangling them.
  for queue in bank.queues.mitems:
    var request = queue.head
    while not request.isNil():
      let nextRequest = request[].next
      deleteRequest(request)
      request = nextRequest
    queue.head = nil
    queue.tail = nil
    queue.lock.deinitLock()

proc pushRequest*(
    bank: var RequestQueueBank, request: ptr FFIThreadRequest
): bool {.raises: [].} =
  ## Append `request` to this producer thread's queue (takes ownership). Returns
  ## true only when the queue was empty: the consumer sleeps on an empty queue, so
  ## that's the one push that must wake it; a missed wake just waits the 100ms poll.
  request[].next = nil
  let idx = myQueueIndex()
  withLock bank.queues[idx].lock:
    let wasEmpty = bank.queues[idx].tail.isNil()
    if bank.queues[idx].tail.isNil():
      bank.queues[idx].head = request
    else:
      bank.queues[idx].tail[].next = request
    bank.queues[idx].tail = request
    return wasEmpty

proc mergeQueues*(bank: var RequestQueueBank): ptr FFIThreadRequest {.raises: [].} =
  ## Single-consumer: splice every queue into one chain, resetting them to empty.
  ## Returns nil when all are empty; the caller then owns the chain and must read
  ## each request's `next` before dispatching (dispatch frees the request).
  var head: ptr FFIThreadRequest = nil
  var tail: ptr FFIThreadRequest = nil
  for queue in bank.queues.mitems:
    withLock queue.lock:
      let h = queue.head
      if not h.isNil():
        if head.isNil():
          head = h
        else:
          tail[].next = h
        tail = queue.tail
        queue.head = nil
        queue.tail = nil
  return head