nim-sds/library/sds_thread/sds_thread.nim

{.pragma: exported, exportc, cdecl, raises: [].}
{.pragma: callback, cdecl, raises: [], gcsafe.}
{.passc: "-fPIC".}

import std/[options, atomics, os, net, locks]
import chronicles, chronos, chronos/threadsync, taskpools/channels_spsc_single, results
import ../ffi_types, ./inter_thread_communication/sds_thread_request, sds/sds_utils

type SdsContext* = object
  thread: Thread[(ptr SdsContext)]
  lock: Lock
  reqChannel: ChannelSPSCSingle[ptr SdsThreadRequest]
  reqSignal: ThreadSignalPtr
    # to inform The SDS Thread (a.k.a TST) that a new request is sent
  reqReceivedSignal: ThreadSignalPtr
    # to inform the main thread that the request is rx by TST
  userData*: pointer
  eventCallback*: pointer
  eventUserdata*: pointer
  retrievalHintProvider*: pointer
  retrievalHintUserData*: pointer
  running: Atomic[bool] # To control when the thread is running

proc runSds(ctx: ptr SdsContext) {.async.} =
  ## This is the worker body. This runs the SDS instance
  ## and attends library user requests (stop, connect_to, etc.)

  var rm: ReliabilityManager

  while true:
    await ctx.reqSignal.wait()

    if ctx.running.load == false:
      break

    ## Trying to get a request from the libsds requestor thread
    var request: ptr SdsThreadRequest
    let recvOk = ctx.reqChannel.tryRecv(request)
    if not recvOk:
      error "sds thread could not receive a request"
      continue

    ## Ack receipt to the requester thread BEFORE processing — it only
    ## waits for "received", not "processed", so the caller's throughput
    ## doesn't change. Processing is then awaited (was: asyncSpawn'd),
    ## which serializes requests on this worker. The SP channel + lock
    ## above already assume no concurrent requests, so awaiting here
    ## aligns the processing side with that assumption.
    let fireRes = ctx.reqReceivedSignal.fireSync()
    if fireRes.isErr():
      error "could not fireSync back to requester thread", error = fireRes.error

    await SdsThreadRequest.process(request, addr rm)

proc run(ctx: ptr SdsContext) {.thread.} =
  ## Launch sds worker
  waitFor runSds(ctx)

proc createSdsThread*(): Result[ptr SdsContext, string] =
  ## This proc is called from the main thread and it creates
  ## the SDS working thread.
  var ctx = createShared(SdsContext, 1)
  ctx.reqSignal = ThreadSignalPtr.new().valueOr:
    return err("couldn't create reqSignal ThreadSignalPtr")
  ctx.reqReceivedSignal = ThreadSignalPtr.new().valueOr:
    return err("couldn't create reqReceivedSignal ThreadSignalPtr")
  ctx.lock.initLock()

  ctx.running.store(true)

  try:
    createThread(ctx.thread, run, ctx)
  except ValueError, ResourceExhaustedError:
    # and freeShared for typed allocations!
    freeShared(ctx)

    return err("failed to create the SDS thread: " & getCurrentExceptionMsg())

  return ok(ctx)

proc destroySdsThread*(ctx: ptr SdsContext): Result[void, string] =
  ctx.running.store(false)

  let signaledOnTime = ctx.reqSignal.fireSync().valueOr:
    return err("error in destroySdsThread: " & $error)
  if not signaledOnTime:
    return err("failed to signal reqSignal on time in destroySdsThread")

  joinThread(ctx.thread)
  ctx.lock.deinitLock()
  ?ctx.reqSignal.close()
  ?ctx.reqReceivedSignal.close()
  freeShared(ctx)

  return ok()

proc sendRequestToSdsThread*(
    ctx: ptr SdsContext,
    reqType: RequestType,
    reqContent: pointer,
    callback: SdsCallBack,
    userData: pointer,
): Result[void, string] =
  let req = SdsThreadRequest.createShared(reqType, reqContent, callback, userData)

  # This lock is only necessary while we use a SP Channel and while the signalling
  # between threads assumes that there aren't concurrent requests.
  # Rearchitecting the signaling + migrating to a MP Channel will allow us to receive
  # requests concurrently and spare us the need of locks
  ctx.lock.acquire()
  defer:
    ctx.lock.release()
  ## Sending the request
  let sentOk = ctx.reqChannel.trySend(req)
  if not sentOk:
    deallocShared(req)
    return err("Couldn't send a request to the sds thread: " & $req[])

  let fireSyncRes = ctx.reqSignal.fireSync()
  if fireSyncRes.isErr():
    deallocShared(req)
    return err("failed fireSync: " & $fireSyncRes.error)

  if fireSyncRes.get() == false:
    deallocShared(req)
    return err("Couldn't fireSync in time")

  ## wait until the SDS Thread properly received the request
  let res = ctx.reqReceivedSignal.waitSync()
  if res.isErr():
    deallocShared(req)
    return err("Couldn't receive reqReceivedSignal signal")

  ## Notice that in case of "ok", the deallocShared(req) is performed by the SDS Thread in the
  ## process proc.
  ok()
feat: extensive set of initial features. 2025-05-29 16:48:53 +05:30			`{.pragma: exported, exportc, cdecl, raises: [].}`
			`{.pragma: callback, cdecl, raises: [], gcsafe.}`
			`{.passc: "-fPIC".}`

			`import std/[options, atomics, os, net, locks]`
			`import chronicles, chronos, chronos/threadsync, taskpools/channels_spsc_single, results`
feat: make Persistence interface async (#69) * feat: make Persistence interface async The 14 Persistence proc fields now return Future[...] with {.async: (raises: []), gcsafe.}, allowing real I/O backends (SQLite, encrypted file, network) to suspend rather than block the Chronos event loop the manager runs on. Propagates through: - ReliabilityManager.lock: system.Lock -> chronos.AsyncLock. Acquired across awaits cleanly; matches the single-threaded Chronos worker the FFI uses. Multi-OS-thread use is now explicitly the caller's responsibility. - sds_utils + sds.nim public API procs (wrapOutgoingMessage, unwrapReceivedMessage, markDependenciesMet, setCallbacks, resetReliabilityManager, cleanup, ensureChannel, removeChannel, the getter snapshots, etc.) are now async. - FFI request handlers in library/sds_thread/... await the new API. - Tests converted via an asyncTest template that wraps each test body in an async proc; setup/teardown use waitFor for their single async call (ensureChannel / cleanup). Lock scope is preserved exactly: the same call sites that held the kernel Lock today hold AsyncLock now -- no new locking added. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> * refactor: drop asyncSpawn, add asyncSetup/asyncTeardown Three asyncSpawn usages removed: - sds.nim startPeriodicTasks: stored the periodic-task futures on ReliabilityManager (new field `periodicTasks: seq[FutureBase]`) so cleanup can cancel them on shutdown instead of leaking the loops against a cleared manager. - library/sds_thread/sds_thread.nim: fireSync moved BEFORE processing, then `await SdsThreadRequest.process(...)` instead of asyncSpawn'ing it. Aligns the worker with the SP-channel + lock assumption that there are no concurrent requests; caller throughput is unchanged because the caller only waits for receipt (fireSync), not processing. - tests TestBus repair callback: replaced asyncSpawn(deliverExcept...) with an explicit pending-delivery queue drained by `bus.drain()`. Integration tests no longer rely on `sleepAsync(10ms)` to let spawned deliveries finish — they await drain instead. Tests also pick up an asyncSetup/asyncTeardown pair (tests/async_unittest.nim) so suite fixtures can `await` directly. All `waitFor` in setup/teardown blocks is gone; only the top-level asyncTest wrapper still uses waitFor (once, to drive the async proc to completion). Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> * Correctly propagate error hidden by new async move * Correctly handle future cancellation exceptions, +some housekeeping * Apply suggestion from @Ivansete-status Co-authored-by: Ivan FB <128452529+Ivansete-status@users.noreply.github.com> * Stylistics, async default implication addressed, nph style run * Remove leaking CancelledFuture from public facing + as a consequence it is tuneled into handling CatchableError everywhere --------- Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com> Co-authored-by: Ivan FB <128452529+Ivansete-status@users.noreply.github.com> 2026-05-25 22:30:15 +02:00			`import ../ffi_types, ./inter_thread_communication/sds_thread_request, sds/sds_utils`
feat: extensive set of initial features. 2025-05-29 16:48:53 +05:30
			`type SdsContext* = object`
			`thread: Thread[(ptr SdsContext)]`
			`lock: Lock`
			`reqChannel: ChannelSPSCSingle[ptr SdsThreadRequest]`
			`reqSignal: ThreadSignalPtr`
			`# to inform The SDS Thread (a.k.a TST) that a new request is sent`
			`reqReceivedSignal: ThreadSignalPtr`
			`# to inform the main thread that the request is rx by TST`
			`userData*: pointer`
			`eventCallback*: pointer`
			`eventUserdata*: pointer`
feat: support retrieval hints for efficient message retrieval from store nodes (#18) * feat: updates for retrieval hint * use HistoryEntry for deps * chore: rearrange helper funcs * chore: address review comments * fix: simplify with mapIt 2026-01-29 15:22:40 +05:30			`retrievalHintProvider*: pointer`
			`retrievalHintUserData*: pointer`
feat: extensive set of initial features. 2025-05-29 16:48:53 +05:30			`running: Atomic[bool] # To control when the thread is running`

			`proc runSds(ctx: ptr SdsContext) {.async.} =`
			`## This is the worker body. This runs the SDS instance`
			`## and attends library user requests (stop, connect_to, etc.)`

			`var rm: ReliabilityManager`

			`while true:`
			`await ctx.reqSignal.wait()`

			`if ctx.running.load == false:`
			`break`

			`## Trying to get a request from the libsds requestor thread`
			`var request: ptr SdsThreadRequest`
			`let recvOk = ctx.reqChannel.tryRecv(request)`
			`if not recvOk:`
			`error "sds thread could not receive a request"`
			`continue`

feat: make Persistence interface async (#69) * feat: make Persistence interface async The 14 Persistence proc fields now return Future[...] with {.async: (raises: []), gcsafe.}, allowing real I/O backends (SQLite, encrypted file, network) to suspend rather than block the Chronos event loop the manager runs on. Propagates through: - ReliabilityManager.lock: system.Lock -> chronos.AsyncLock. Acquired across awaits cleanly; matches the single-threaded Chronos worker the FFI uses. Multi-OS-thread use is now explicitly the caller's responsibility. - sds_utils + sds.nim public API procs (wrapOutgoingMessage, unwrapReceivedMessage, markDependenciesMet, setCallbacks, resetReliabilityManager, cleanup, ensureChannel, removeChannel, the getter snapshots, etc.) are now async. - FFI request handlers in library/sds_thread/... await the new API. - Tests converted via an asyncTest template that wraps each test body in an async proc; setup/teardown use waitFor for their single async call (ensureChannel / cleanup). Lock scope is preserved exactly: the same call sites that held the kernel Lock today hold AsyncLock now -- no new locking added. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> * refactor: drop asyncSpawn, add asyncSetup/asyncTeardown Three asyncSpawn usages removed: - sds.nim startPeriodicTasks: stored the periodic-task futures on ReliabilityManager (new field `periodicTasks: seq[FutureBase]`) so cleanup can cancel them on shutdown instead of leaking the loops against a cleared manager. - library/sds_thread/sds_thread.nim: fireSync moved BEFORE processing, then `await SdsThreadRequest.process(...)` instead of asyncSpawn'ing it. Aligns the worker with the SP-channel + lock assumption that there are no concurrent requests; caller throughput is unchanged because the caller only waits for receipt (fireSync), not processing. - tests TestBus repair callback: replaced asyncSpawn(deliverExcept...) with an explicit pending-delivery queue drained by `bus.drain()`. Integration tests no longer rely on `sleepAsync(10ms)` to let spawned deliveries finish — they await drain instead. Tests also pick up an asyncSetup/asyncTeardown pair (tests/async_unittest.nim) so suite fixtures can `await` directly. All `waitFor` in setup/teardown blocks is gone; only the top-level asyncTest wrapper still uses waitFor (once, to drive the async proc to completion). Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> * Correctly propagate error hidden by new async move * Correctly handle future cancellation exceptions, +some housekeeping * Apply suggestion from @Ivansete-status Co-authored-by: Ivan FB <128452529+Ivansete-status@users.noreply.github.com> * Stylistics, async default implication addressed, nph style run * Remove leaking CancelledFuture from public facing + as a consequence it is tuneled into handling CatchableError everywhere --------- Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com> Co-authored-by: Ivan FB <128452529+Ivansete-status@users.noreply.github.com> 2026-05-25 22:30:15 +02:00			`## Ack receipt to the requester thread BEFORE processing — it only`
			`## waits for "received", not "processed", so the caller's throughput`
			`## doesn't change. Processing is then awaited (was: asyncSpawn'd),`
			`## which serializes requests on this worker. The SP channel + lock`
			`## above already assume no concurrent requests, so awaiting here`
			`## aligns the processing side with that assumption.`
feat: extensive set of initial features. 2025-05-29 16:48:53 +05:30			`let fireRes = ctx.reqReceivedSignal.fireSync()`
			`if fireRes.isErr():`
			`error "could not fireSync back to requester thread", error = fireRes.error`

feat: make Persistence interface async (#69) * feat: make Persistence interface async The 14 Persistence proc fields now return Future[...] with {.async: (raises: []), gcsafe.}, allowing real I/O backends (SQLite, encrypted file, network) to suspend rather than block the Chronos event loop the manager runs on. Propagates through: - ReliabilityManager.lock: system.Lock -> chronos.AsyncLock. Acquired across awaits cleanly; matches the single-threaded Chronos worker the FFI uses. Multi-OS-thread use is now explicitly the caller's responsibility. - sds_utils + sds.nim public API procs (wrapOutgoingMessage, unwrapReceivedMessage, markDependenciesMet, setCallbacks, resetReliabilityManager, cleanup, ensureChannel, removeChannel, the getter snapshots, etc.) are now async. - FFI request handlers in library/sds_thread/... await the new API. - Tests converted via an asyncTest template that wraps each test body in an async proc; setup/teardown use waitFor for their single async call (ensureChannel / cleanup). Lock scope is preserved exactly: the same call sites that held the kernel Lock today hold AsyncLock now -- no new locking added. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> * refactor: drop asyncSpawn, add asyncSetup/asyncTeardown Three asyncSpawn usages removed: - sds.nim startPeriodicTasks: stored the periodic-task futures on ReliabilityManager (new field `periodicTasks: seq[FutureBase]`) so cleanup can cancel them on shutdown instead of leaking the loops against a cleared manager. - library/sds_thread/sds_thread.nim: fireSync moved BEFORE processing, then `await SdsThreadRequest.process(...)` instead of asyncSpawn'ing it. Aligns the worker with the SP-channel + lock assumption that there are no concurrent requests; caller throughput is unchanged because the caller only waits for receipt (fireSync), not processing. - tests TestBus repair callback: replaced asyncSpawn(deliverExcept...) with an explicit pending-delivery queue drained by `bus.drain()`. Integration tests no longer rely on `sleepAsync(10ms)` to let spawned deliveries finish — they await drain instead. Tests also pick up an asyncSetup/asyncTeardown pair (tests/async_unittest.nim) so suite fixtures can `await` directly. All `waitFor` in setup/teardown blocks is gone; only the top-level asyncTest wrapper still uses waitFor (once, to drive the async proc to completion). Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> * Correctly propagate error hidden by new async move * Correctly handle future cancellation exceptions, +some housekeeping * Apply suggestion from @Ivansete-status Co-authored-by: Ivan FB <128452529+Ivansete-status@users.noreply.github.com> * Stylistics, async default implication addressed, nph style run * Remove leaking CancelledFuture from public facing + as a consequence it is tuneled into handling CatchableError everywhere --------- Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com> Co-authored-by: Ivan FB <128452529+Ivansete-status@users.noreply.github.com> 2026-05-25 22:30:15 +02:00			`await SdsThreadRequest.process(request, addr rm)`

feat: extensive set of initial features. 2025-05-29 16:48:53 +05:30			`proc run(ctx: ptr SdsContext) {.thread.} =`
			`## Launch sds worker`
			`waitFor runSds(ctx)`

			`proc createSdsThread*(): Result[ptr SdsContext, string] =`
			`## This proc is called from the main thread and it creates`
			`## the SDS working thread.`
			`var ctx = createShared(SdsContext, 1)`
			`ctx.reqSignal = ThreadSignalPtr.new().valueOr:`
			`return err("couldn't create reqSignal ThreadSignalPtr")`
			`ctx.reqReceivedSignal = ThreadSignalPtr.new().valueOr:`
			`return err("couldn't create reqReceivedSignal ThreadSignalPtr")`
			`ctx.lock.initLock()`

			`ctx.running.store(true)`

			`try:`
			`createThread(ctx.thread, run, ctx)`
			`except ValueError, ResourceExhaustedError:`
			`# and freeShared for typed allocations!`
			`freeShared(ctx)`

			`return err("failed to create the SDS thread: " & getCurrentExceptionMsg())`

			`return ok(ctx)`

			`proc destroySdsThread*(ctx: ptr SdsContext): Result[void, string] =`
			`ctx.running.store(false)`

			`let signaledOnTime = ctx.reqSignal.fireSync().valueOr:`
			`return err("error in destroySdsThread: " & $error)`
			`if not signaledOnTime:`
			`return err("failed to signal reqSignal on time in destroySdsThread")`

			`joinThread(ctx.thread)`
			`ctx.lock.deinitLock()`
			`?ctx.reqSignal.close()`
			`?ctx.reqReceivedSignal.close()`
			`freeShared(ctx)`

			`return ok()`

			`proc sendRequestToSdsThread*(`
			`ctx: ptr SdsContext,`
			`reqType: RequestType,`
			`reqContent: pointer,`
			`callback: SdsCallBack,`
			`userData: pointer,`
			`): Result[void, string] =`
			`let req = SdsThreadRequest.createShared(reqType, reqContent, callback, userData)`

			`# This lock is only necessary while we use a SP Channel and while the signalling`
			`# between threads assumes that there aren't concurrent requests.`
			`# Rearchitecting the signaling + migrating to a MP Channel will allow us to receive`
			`# requests concurrently and spare us the need of locks`
			`ctx.lock.acquire()`
			`defer:`
			`ctx.lock.release()`
			`## Sending the request`
			`let sentOk = ctx.reqChannel.trySend(req)`
			`if not sentOk:`
			`deallocShared(req)`
			`return err("Couldn't send a request to the sds thread: " & $req[])`

			`let fireSyncRes = ctx.reqSignal.fireSync()`
			`if fireSyncRes.isErr():`
			`deallocShared(req)`
			`return err("failed fireSync: " & $fireSyncRes.error)`

			`if fireSyncRes.get() == false:`
			`deallocShared(req)`
			`return err("Couldn't fireSync in time")`

			`## wait until the SDS Thread properly received the request`
			`let res = ctx.reqReceivedSignal.waitSync()`
			`if res.isErr():`
			`deallocShared(req)`
			`return err("Couldn't receive reqReceivedSignal signal")`

			`## Notice that in case of "ok", the deallocShared(req) is performed by the SDS Thread in the`
			`## process proc.`
			`ok()`