From 64b628599f8794374e6d242499a22cee831f4d6e Mon Sep 17 00:00:00 2001
From: Ivan FB <ivansete@status.im>
Date: Tue, 2 Jun 2026 22:53:47 +0200
Subject: [PATCH 01/10] create v0.1.4

---
 ffi.nimble | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ffi.nimble b/ffi.nimble
index 12c9f3a..27465ce 100644
--- a/ffi.nimble
+++ b/ffi.nimble
@@ -1,6 +1,6 @@
 # ffi.nimble
 
-version = "0.2.0"
+version = "0.1.4"
 author = "Institute of Free Technology"
 description = "FFI framework with custom header generation"
 license = "MIT or Apache License 2.0"

From c2ea4f3f98a5d6e02123b4bfcb56f24cb683ad6b Mon Sep 17 00:00:00 2001
From: Ivan FB <ivansete@status.im>
Date: Tue, 2 Jun 2026 23:07:06 +0200
Subject: [PATCH 02/10] add changelog for 0.1.4

---
 CHANGELOG.md | 61 +++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 41 insertions(+), 20 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 805b8fa..c2bd1e6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -14,6 +14,15 @@ All notable changes to this project are documented in this file.
   runs on the event thread. The FFI thread advances an atomic heartbeat
   each loop iteration; if it stalls for more than 1s past the start-up
   grace window, the event thread emits the `not_responding` event.
+- Removed the redundant `ffiType` macro; the `ffi` macro is now the single
+  authoring entry point
+  ([#22](https://github.com/logos-messaging/nim-ffi/pull/22)).
+- Generated C++ avoids move constructors and assignment operators
+  ([#36](https://github.com/logos-messaging/nim-ffi/pull/36)) and no longer
+  throws exceptions across the binding boundary
+  ([#46](https://github.com/logos-messaging/nim-ffi/pull/46)).
+- Removed the wildcard event listener; event dispatch is now strictly
+  per-event ([#70](https://github.com/logos-messaging/nim-ffi/pull/70)).
 
 ### Added
 - Queue-overflow handling: when the bounded event queue is full, the
@@ -21,15 +30,6 @@ All notable changes to this project are documented in this file.
   `not_responding` from the event thread, and rejects subsequent
   `sendRequestToFFIThread` calls with `event queue stuck - library
   cannot accept new requests`.
-
-## [0.2.0] - 2026-06-04
-
-Major release introducing the CBOR-based wire format, CBOR-backed FFI events
-with a multi-listener registry, multi-language binding generation (C++, Rust,
-CDDL), CI hardening with sanitizers, and several robustness fixes around
-context lifetime and memory safety.
-
-### Added
 - **CBOR serialization** as the FFI wire format, replacing the previous
   JSON/string-based `serial.nim`
   ([#23](https://github.com/logos-messaging/nim-ffi/pull/23)).
@@ -63,17 +63,6 @@ context lifetime and memory safety.
 - CBOR type-coverage tests
   ([#41](https://github.com/logos-messaging/nim-ffi/pull/41)).
 
-### Changed
-- Removed the redundant `ffiType` macro; the `ffi` macro is now the single
-  authoring entry point
-  ([#22](https://github.com/logos-messaging/nim-ffi/pull/22)).
-- Generated C++ avoids move constructors and assignment operators
-  ([#36](https://github.com/logos-messaging/nim-ffi/pull/36)) and no longer
-  throws exceptions across the binding boundary
-  ([#46](https://github.com/logos-messaging/nim-ffi/pull/46)).
-- Removed the wildcard event listener; event dispatch is now strictly
-  per-event ([#70](https://github.com/logos-messaging/nim-ffi/pull/70)).
-
 ### Fixed
 - Use-after-free in the event/context lifetime path
   ([#47](https://github.com/logos-messaging/nim-ffi/pull/47)).
@@ -132,3 +121,35 @@ Initial tagged release.
   watchdog with configurable timeout
   ([#7](https://github.com/logos-messaging/nim-ffi/pull/7)).
 - License files updated to comply with Logos licensing requirements.
+# Changelog
+
+## [0.1.4] - 2026-06-02
+
+[Full changelog](https://github.com/logos-messaging/nim-ffi/compare/v0.1.3...v0.1.4)
+
+### Added
+
+- Simplified FFI authoring with auto-generated C++ and Rust language bindings,
+  including new `ffi/codegen/cpp.nim`, `ffi/codegen/rust.nim` and shared
+  `ffi/codegen/meta.nim` helpers (#15).
+- Rust example bindings and clients under `examples/nim_timer/` (`rust_bindings`
+  and `rust_client`, the latter with a Tokio async variant) (#15).
+- CBOR serialization support via `ffi/serial.nim`, with `tests/test_serial.nim`
+  coverage.
+- FFI context pool (`ffi/ffi_context_pool.nim`) using a fixed array of contexts.
+- Test suite expansion: `test_alloc.nim`, `test_ctx_validation.nim`,
+  `test_ffi_context.nim`, `test_gc_compat.nim`.
+- Continuous integration pipeline (#12).
+
+### Fixed
+
+- Context buffer overflow (#21).
+- Use a fixed array of contexts to avoid consuming all file descriptors (#14).
+- Memory leaks (#11).
+- Add `install_name` for macOS shared libraries (#8).
+
+### Changed
+
+- Run tests with the `refc` garbage collector (#20).
+- Remove `CatchableError` usage (#19).
+- Update license files to comply with Logos licensing requirements.

From 24cf796fcdf98860b58d7650acc325dab3d46e20 Mon Sep 17 00:00:00 2001
From: Ivan FB <128452529+Ivansete-status@users.noreply.github.com>
Date: Mon, 8 Jun 2026 13:36:14 +0200
Subject: [PATCH 03/10] fix(pool): recycle contexts to stop per-cycle fd leak
 (#74)

---
 .github/workflows/ci.yml   |   4 +-
 ffi/ffi_context.nim        | 354 ++++++++++++++-
 ffi/ffi_context_pool.nim   |  74 +--
 ffi/internal/ffi_macro.nim |  31 +-
 tests/test_ffi_context.nim | 892 +++++++++++++++++++++++++++++++++++++
 5 files changed, 1293 insertions(+), 62 deletions(-)
 create mode 100644 tests/test_ffi_context.nim

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 8c39cdf..2b55175 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -2,9 +2,9 @@ name: CI
 
 on:
   push:
-    branches: [master, main]
+    branches: [master, main, 'release/*']
   pull_request:
-    branches: [master, main]
+    branches: [master, main, 'release/*']
 
 jobs:
   # Single source of truth for Nim / Nimble versions used by every job and
diff --git a/ffi/ffi_context.nim b/ffi/ffi_context.nim
index 3fa50cf..48e5e1b 100644
--- a/ffi/ffi_context.nim
+++ b/ffi/ffi_context.nim
@@ -6,11 +6,25 @@
 
 {.passc: "-fPIC".}
 
-import std/[atomics, locks, options, tables]
+import std/[atomics, locks, options, tables, sequtils]
 import chronicles, chronos, chronos/threadsync, taskpools/channels_spsc_single, results
-import ./ffi_types, ./ffi_events, ./ffi_thread_request, ./logging, ./cbor_serial
+import ./ffi_types, ./ffi_thread_request, ./internal/ffi_macro, ./logging
 
-export ffi_events
+type FFICallbackState* = object
+  ## Holds the C event callback and its associated user-data pointer.
+  ## Embedded in FFIContext and referenced from the FFI thread via a thread-local.
+  callback*: pointer
+  userData*: pointer
+
+type CtxLifecycle {.pure.} = enum
+  ## State machine guarding a pooled FFI context, held as an Atomic on FFIContext.
+  ## Transitions:
+  ##   Active         -> RecyclePending   when ffiDtor is invoked
+  ##   RecyclePending -> Recycling        The process completed the in-flight processes and is ready for lib cleanup and release
+  ##   Recycling      -> Active           When the FFI thread is ready again to attend to requests
+  Active ## accepting and serving requests
+  RecyclePending ## recycle requested; FFI thread loop hasn't claimed it yet
+  Recycling ## FFI loop draining handlers, then frees lib + returns to pool
 
 type FFIContext*[T] = object
   myLib*: ptr T # main library object (Waku, LibP2P, SDS, …)
@@ -32,6 +46,15 @@ type FFIContext*[T] = object
     # advanced each FFI-thread loop; event thread reads for liveness
   eventQueueStuck*: Atomic[bool] # sticky overflow flag
   running: Atomic[bool] # To control when the threads are running
+  lifecycle: Atomic[CtxLifecycle]
+  recycleCallback: FFICallBack
+    # The destructor's callback, fired by the recycle handler with the outcome:
+    # RET_OK once drained, RET_ERR if it timed out. Set by requestRecycle.
+  recycleUserData: pointer
+  inUse: Atomic[bool]
+    # Whether the context is claimed. createFFIContext claims it (false -> true); the
+    # recycle handler clears it once drained. On the context so the owning thread can
+    # release it without reaching into the pool.
   registeredRequests: ptr Table[cstring, FFIRequestProc]
 
 var onFFIThread* {.threadvar.}: bool
@@ -55,6 +78,285 @@ template closeAndNil(field: untyped) =
 proc deinitContextResources*[T](ctx: ptr FFIContext[T]): Result[void, string] =
   ## Mirror of `initContextResources`. Threads MUST be joined first;
   ## fields are nil'd after close so re-init on the same slot is safe.
+template callEventCallback*(ctx: ptr FFIContext, eventName: string, body: untyped) =
+  if isNil(ctx[].callbackState.callback):
+    chronicles.error eventName & " - eventCallback is nil"
+    return
+
+  foreignThreadGc:
+    try:
+      let event = body
+      cast[FFICallBack](ctx[].callbackState.callback)(
+        RET_OK,
+        unsafeAddr event[0],
+        cast[csize_t](len(event)),
+        ctx[].callbackState.userData,
+      )
+    except Exception, CatchableError:
+      let msg =
+        "Exception " & eventName & " when calling 'eventCallBack': " &
+        getCurrentExceptionMsg()
+      cast[FFICallBack](ctx[].callbackState.callback)(
+        RET_ERR,
+        unsafeAddr msg[0],
+        cast[csize_t](len(msg)),
+        ctx[].callbackState.userData,
+      )
+
+template dispatchFfiEvent*(eventName: string, body: untyped) =
+  ## Dispatches an FFI event to the callback registered via `{libName}_set_event_callback`.
+  ## `body` is evaluated lazily — only when a callback is registered.
+  ## Valid only on the FFI thread (i.e., inside {.ffi.} proc bodies and their async closures).
+  let ffiState = ffiCurrentCallbackState
+  if isNil(ffiState) or isNil(ffiState[].callback):
+    chronicles.error eventName & " - event callback not set"
+    return
+  foreignThreadGc:
+    try:
+      let event = body
+      cast[FFICallBack](ffiState[].callback)(
+        RET_OK, unsafeAddr event[0], cast[csize_t](len(event)), ffiState[].userData
+      )
+    except Exception, CatchableError:
+      let msg = "Exception dispatching " & eventName & ": " & getCurrentExceptionMsg()
+      cast[FFICallBack](ffiState[].callback)(
+        RET_ERR, unsafeAddr msg[0], cast[csize_t](len(msg)), ffiState[].userData
+      )
+
+proc sendRequestToFFIThread*(
+    ctx: ptr FFIContext, ffiRequest: ptr FFIThreadRequest, timeout = InfiniteDuration
+): Result[void, string] =
+  ctx.lock.acquire()
+  defer:
+    ctx.lock.release()
+
+  if ctx.lifecycle.load() != CtxLifecycle.Active:
+    deleteRequest(ffiRequest)
+    return err("FFI context is not accepting requests (being recycled)")
+
+  ## Sending the request to the FFI thread
+  let sentOk = ctx.reqChannel.trySend(ffiRequest)
+  if not sentOk:
+    deleteRequest(ffiRequest)
+    return err("Couldn't send a request to the ffi thread")
+
+  let fireSyncRes = ctx.reqSignal.fireSync()
+  if fireSyncRes.isErr():
+    deleteRequest(ffiRequest)
+    return err("failed fireSync: " & $fireSyncRes.error)
+
+  if fireSyncRes.get() == false:
+    deleteRequest(ffiRequest)
+    return err("Couldn't fireSync in time")
+
+  ## wait until the FFI working thread properly received the request
+  let res = ctx.reqReceivedSignal.waitSync(timeout)
+  if res.isErr():
+    return err("Couldn't receive reqReceivedSignal signal")
+
+  return ok()
+
+type Foo = object
+registerReqFFI(WatchdogReq, foo: ptr Foo):
+  proc(): Future[Result[string, string]] {.async.} =
+    return ok("FFI thread is not blocked")
+
+type JsonNotRespondingEvent = object
+  eventType: string
+
+proc init(T: type JsonNotRespondingEvent): T =
+  return JsonNotRespondingEvent(eventType: "not_responding")
+
+proc `$`(event: JsonNotRespondingEvent): string =
+  $(%*event)
+
+proc onNotResponding*(ctx: ptr FFIContext) =
+  callEventCallback(ctx, "onNotResponding"):
+    $JsonNotRespondingEvent.init()
+
+proc watchdogThreadBody(ctx: ptr FFIContext) {.thread.} =
+  ## Watchdog thread that monitors the FFI thread and notifies the library user if it hangs.
+  ## This thread never blocks.
+
+  let watchdogRun = proc(ctx: ptr FFIContext) {.async.} =
+    const WatchdogStartDelay = 10.seconds
+    const WatchdogTimeinterval = 1.seconds
+    const WatchdogTimeout = 20.seconds
+
+    # Give time for the node to be created and up before sending watchdog requests
+    let initialStop = await ctx.stopSignal.wait().withTimeout(WatchdogStartDelay)
+    if initialStop or ctx.running.load == false:
+      return
+
+    while true:
+      let intervalStop = await ctx.stopSignal.wait().withTimeout(WatchdogTimeinterval)
+
+      if intervalStop or ctx.running.load == false:
+        debug "Watchdog thread exiting because FFIContext is not running"
+        break
+
+      if ctx.lifecycle.load() != CtxLifecycle.Active:
+        continue
+
+      let callback = proc(
+          callerRet: cint, msg: ptr cchar, len: csize_t, userData: pointer
+      ) {.cdecl, gcsafe, raises: [].} =
+        discard ## Don't do anything. Just respecting the callback signature.
+      const nilUserData = nil
+
+      trace "Sending watchdog request to FFI thread"
+
+      try:
+        sendRequestToFFIThread(
+          ctx, WatchdogReq.ffiNewReq(callback, nilUserData), WatchdogTimeout
+        ).isOkOr:
+          error "Failed to send watchdog request to FFI thread", error = $error
+          onNotResponding(ctx)
+      except Exception as exc:
+        error "Exception sending watchdog request", exc = exc.msg
+        onNotResponding(ctx)
+
+  waitFor watchdogRun(ctx)
+
+proc processRequest[T](
+    request: ptr FFIThreadRequest, ctx: ptr FFIContext[T]
+) {.async.} =
+  ## Invoked within the FFI thread to process a request coming from the FFI API consumer thread.
+
+  let reqId = $request[].reqId
+    ## The reqId determines which proc will handle the request.
+    ## The registeredRequests represents a table defined at compile time.
+    ## Then, registeredRequests == Table[reqId, proc-handling-the-request-asynchronously]
+
+  let reqIdCs = reqId.cstring
+    # keep `reqId` alive and avoid the implicit string→cstring warning.
+
+  let retFut =
+    if not ctx[].registeredRequests[].contains(reqIdCs):
+      ## That shouldn't happen because only registered requests should be sent to the FFI thread.
+      nilProcess(request[].reqId)
+    else:
+      ctx[].registeredRequests[][reqIdCs](request[].reqContent, ctx)
+
+  let res =
+    try:
+      await retFut
+    except CancelledError as exc:
+      Result[string, string].err("Request cancelled during destroy: " & exc.msg)
+    except AsyncError as exc:
+      Result[string, string].err(
+        "Async error in processRequest for " & reqId & ": " & exc.msg
+      )
+
+  ## handleRes may raise (OOM, GC setup) even though it is rare.
+  try:
+    handleRes(res, request)
+  except Exception as exc:
+    error "Unexpected exception in handleRes", error = exc.msg
+
+proc freeLib[T](ctx: ptr FFIContext[T]) {.gcsafe.} =
+  if ctx.myLib.isNil():
+    return
+
+  when not defined(gcRefc):
+    {.cast(gcsafe).}:
+      `=destroy`(ctx.myLib[])
+  else:
+    discard
+  freeShared(ctx.myLib)
+  ctx.myLib = nil
+
+var RecycleTimeout* = 1500.milliseconds
+  ## Upper bound the recycle handler waits for in-flight handlers before it
+  ## cancels them and reports the ctx as stuck. The drain returns as soon as they
+  ## finish, so this only bounds a *stuck* handler. A `var` so tests can shorten it.
+
+proc recycleContext[T](
+    ctx: ptr FFIContext[T], ongoingProcessReq: ptr seq[Future[void]]
+) {.async.} =
+  ## Drain the in-flight handlers, free the lib object, release the context for reuse,
+  ## and fire the callback with the outcome. Never blocks the caller.
+
+  ongoingProcessReq[].keepItIf(not it.finished())
+
+  ## 1. Let the in-flight handlers finish on their own, bounded by RecycleTimeout.
+  var naturallyDrained = ongoingProcessReq[].len == 0
+  if not naturallyDrained:
+    naturallyDrained = await allFutures(ongoingProcessReq[]).withTimeout(RecycleTimeout)
+
+  ## 2. If any are wedged, cancel them and give the cancellations a bounded moment
+  ##    to unwind, so the context can be reclaimed rather than leaked.
+  var safeToRecycle = naturallyDrained
+  if not naturallyDrained:
+    for fut in ongoingProcessReq[]:
+      if not fut.finished():
+        fut.cancelSoon()
+    safeToRecycle = await allFutures(ongoingProcessReq[]).withTimeout(RecycleTimeout)
+
+  let cb = ctx.recycleCallback
+  let ud = ctx.recycleUserData
+  ctx.recycleCallback = nil
+
+  if safeToRecycle:
+    freeLib(ctx)
+    ctx.callbackState = default(FFICallbackState)
+    ongoingProcessReq[].setLen(0)
+    ctx.release()
+
+  if not cb.isNil():
+    foreignThreadGc:
+      let msg =
+        if naturallyDrained:
+          ""
+        else:
+          "recycle: in-flight requests did not finish in time"
+      let cmsg = msg.cstring
+      let retCode = if naturallyDrained: RET_OK else: RET_ERR
+      cb(retCode, unsafeAddr cmsg[0], cast[csize_t](msg.len), ud)
+
+proc ffiThreadBody[T](ctx: ptr FFIContext[T]) {.thread.} =
+  ## FFI thread body that attends library user API requests
+  ffiCurrentCallbackState = addr ctx[].callbackState
+
+  logging.setupLog(logging.LogLevel.DEBUG, logging.LogFormat.TEXT)
+
+  defer:
+    let fireRes = ctx.threadExitSignal.fireSync()
+    if fireRes.isErr():
+      error "failed to fire threadExitSignal on FFI thread exit", err = fireRes.error
+
+  let ffiRun = proc(ctx: ptr FFIContext[T]) {.async.} =
+
+    var ongoingProcessReq: seq[Future[void]]
+
+    while ctx.running.load():
+      var expected = CtxLifecycle.RecyclePending
+      if ctx.lifecycle.compareExchange(expected, CtxLifecycle.Recycling):
+        await recycleContext(ctx, addr ongoingProcessReq)
+        continue
+
+      let gotSignal = await ctx.reqSignal.wait().withTimeout(100.milliseconds)
+      if not gotSignal:
+        continue
+
+      ## Wait for a request from the ffi consumer thread
+      var request: ptr FFIThreadRequest
+      if not ctx.reqChannel.tryRecv(request):
+        continue
+
+      ongoingProcessReq.keepItIf(not it.finished())
+      ongoingProcessReq.add(processRequest(request, ctx))
+
+      let fireRes = ctx.reqReceivedSignal.fireSync()
+      if fireRes.isErr():
+        error "could not fireSync back to requester thread", error = fireRes.error
+
+  waitFor ffiRun(ctx)
+
+proc cleanUpResources[T](ctx: ptr FFIContext[T]): Result[void, string] =
+  ## Full cleanup for heap-allocated contexts: closes all resources and frees memory.
+  defer:
+    freeShared(ctx)
   ctx.lock.deinitLock()
   deinitEventRegistry(ctx[].eventRegistry)
   deinitEventQueue(ctx[].eventQueue)
@@ -115,6 +417,7 @@ proc initContextResources*[T](ctx: ptr FFIContext[T]): Result[void, string] =
 
   ctx.registeredRequests = addr ffi_types.registeredRequests
 
+  ctx.lifecycle.store(CtxLifecycle.Active)
   ctx.running.store(true)
 
   try:
@@ -180,10 +483,41 @@ proc stopAndJoinThreads*[T](ctx: ptr FFIContext[T]): Result[void, string] =
   joinThread(ctx.eventThread)
   ok()
 
-proc clearContext[T](ctx: ptr FFIContext[T]): Result[void, string] =
-  ## Stops a heap-allocated FFI context.
-  ctx.stopAndJoinThreads().isOkOr:
-    return err("clearContext: " & $error)
-  ctx.cleanUpResources().isOkOr:
-    return err("cleanUpResources failed: " & $error)
-  ok()
+proc requestRecycle*[T](
+    ctx: ptr FFIContext[T], callback: FFICallBack, userData: pointer
+): Result[void, string] =
+  ## Starts ctx recycle process without stopping its worker, so the next
+  ## createFFIContext reuses the same threads and fds. 
+  ##
+  ## During recycling, the FFI thread drains the handlers, frees the lib and releases
+  ## the context, then fires `callback` (RET_OK drained, RET_ERR stuck).
+
+  ctx.lock.acquire()
+  if ctx.lifecycle.load() != CtxLifecycle.Active:
+    ctx.lock.release()
+    return err("requestRecycle: context is not Active (already recycling)")
+
+  ctx.recycleCallback = callback
+  ctx.recycleUserData = userData
+  ctx.lifecycle.store(CtxLifecycle.RecyclePending)
+  ctx.lock.release()
+
+  let fired = ctx.reqSignal.fireSync().valueOr:
+    return err("requestRecycle: failed to signal the FFI thread: " & $error)
+  if not fired:
+    return err("requestRecycle: failed to signal the FFI thread in time")
+  return ok()
+
+proc markAsActive*[T](ctx: ptr FFIContext[T]) =
+  ctx.lifecycle.store(CtxLifecycle.Active)
+
+proc tryClaim*[T](ctx: ptr FFIContext[T]): bool =
+  ## Returns true if acquired the contex, false if it was already claimed.
+  var expected = false
+  ctx.inUse.compareExchange(expected, true)
+
+proc release*[T](ctx: ptr FFIContext[T]) =
+  ctx.inUse.store(false)
+
+proc isInUse*[T](ctx: ptr FFIContext[T]): bool =
+  ctx.inUse.load()
diff --git a/ffi/ffi_context_pool.nim b/ffi/ffi_context_pool.nim
index 5e2d2cb..421a636 100644
--- a/ffi/ffi_context_pool.nim
+++ b/ffi/ffi_context_pool.nim
@@ -1,56 +1,60 @@
 import std/atomics
 import results
-import ./ffi_context
+import ./ffi_context, ./ffi_types
 
 const MaxFFIContexts* = 32
-  # Only affects upfront pool memory; fds/threads consumed per acquired slot.
+  ## Maximum number of concurrently live FFI contexts when using FFIContextPool.
 
 type FFIContextPool*[T] = object
-  ## Fixed pool. Bounds ThreadSignalPtr fds at MaxFFIContexts * 2.
-  slots: array[MaxFFIContexts, FFIContext[T]]
-  inUse: array[MaxFFIContexts, Atomic[bool]]
-
-proc acquireSlot[T](pool: var FFIContextPool[T]): Result[ptr FFIContext[T], string] =
-  for i in 0 ..< MaxFFIContexts:
-    var expected = false
-    if pool.inUse[i].compareExchange(expected, true):
-      return ok(pool.slots[i].addr)
-  err("FFI context pool exhausted (max " & $MaxFFIContexts & " contexts)")
-
-proc releaseSlot[T](pool: var FFIContextPool[T], ctx: ptr FFIContext[T]) =
-  for i in 0 ..< MaxFFIContexts:
-    if pool.slots[i].addr == ctx:
-      pool.inUse[i].store(false)
-      return
+  contexts: array[MaxFFIContexts, FFIContext[T]]
+  initialized: array[MaxFFIContexts, Atomic[bool]]
 
 proc createFFIContext*[T](
     pool: var FFIContextPool[T]
 ): Result[ptr FFIContext[T], string] =
-  let ctx = pool.acquireSlot().valueOr:
-    return err("createFFIContext: acquireSlot failed: " & $error)
-  initContextResources(ctx).isOkOr:
-    pool.releaseSlot(ctx)
-    return err("createFFIContext: initContextResources failed: " & $error)
-  ok(ctx)
+  ## Acquires a context from the fixed pool. The context's worker is built once on
+  ## first use and reused on every later acquisition.
+
+  for i in 0 ..< MaxFFIContexts:
+    let ctx = pool.contexts[i].addr
+    if not ctx.tryClaim():
+      continue
+    if pool.initialized[i].load():
+      ## Reused context: a prior destroy drained and released it, worker still alive.
+      ctx.markAsActive()
+      return ok(ctx)
+    initContextResources(ctx).isOkOr:
+      ctx.release()
+      return err("createFFIContext: initContextResources failed: " & $error)
+    pool.initialized[i].store(true)
+    return ok(ctx)
+  return err("FFI context pool exhausted (max " & $MaxFFIContexts & " contexts)")
+
+proc releaseFFIContext*[T](
+    ctx: ptr FFIContext[T], callback: FFICallBack, userData: pointer
+): Result[void, string] =
+  return ctx.requestRecycle(callback, userData)
 
 proc destroyFFIContext*[T](
     pool: var FFIContextPool[T], ctx: ptr FFIContext[T]
 ): Result[void, string] =
-  ## On thread-exit timeout the slot is leaked — closing live-thread resources is unsafe.
+  ## Full teardown: stops/joins the worker threads and returns the context to the
+  ## pool, marking it uninitialised so a later createFFIContext rebuilds it.
   ctx.stopAndJoinThreads().isOkOr:
     return err("destroyFFIContext(pool): " & $error)
-  # Required: next acquisition would otherwise re-init a live lock (UB).
-  let deinitRes = ctx.deinitContextResources()
-  pool.releaseSlot(ctx)
-  deinitRes.isOkOr:
-    return err("destroyFFIContext(pool): " & $error)
-  ok()
+  for i in 0 ..< MaxFFIContexts:
+    if pool.contexts[i].addr == ctx:
+      pool.initialized[i].store(false)
+      break
+  ctx.release()
+  return ok()
 
 proc isValidCtx*[T](pool: var FFIContextPool[T], ctx: pointer): bool =
-  ## Rejects nil / offset-invalid / dangling pointers at the API boundary.
+  ## Returns true only if ctx points to one of the pool's contexts that is
+  ## currently in use.
   if ctx.isNil():
     return false
   for i in 0 ..< MaxFFIContexts:
-    if cast[pointer](pool.slots[i].addr) == ctx:
-      return pool.inUse[i].load()
-  false
+    if cast[pointer](pool.contexts[i].addr) == ctx:
+      return cast[ptr FFIContext[T]](ctx).isInUse()
+  return false
diff --git a/ffi/internal/ffi_macro.nim b/ffi/internal/ffi_macro.nim
index b8e7be4..714abd0 100644
--- a/ffi/internal/ffi_macro.nim
+++ b/ffi/internal/ffi_macro.nim
@@ -1253,16 +1253,15 @@ macro ffiDtor*(prc: untyped): untyped =
   ## The body contains any library-level cleanup to run before context teardown.
   ##
   ## Example:
-  ##   proc waku_destroy*(w: Waku) {.ffiDtor.} =
-  ##     w.cleanup()
+  ##   proc mylibobj_destroy*(obj: MyLibObj) {.ffiDtor.} =
+  ##     obj.cleanup()
   ##
   ## The generated C-exported proc has the signature:
-  ##   int waku_destroy(void* ctx)
+  ##   cint mylibobj_destroy(void* ctx, FfiCallback callback, void* userData)
   ##
-  ## It extracts the library value from ctx, runs the body, then calls
-  ## destroyFFIContext to tear down the FFI thread and free the context.
-  ## Returns RET_OK on success, RET_ERR on failure (null/invalid ctx, or
-  ## destroyFFIContext failure).
+  ## Recycle the context for reuse to keep fd usage bounded.
+  ## NON-BLOCKING: returns RET_OK once accepted;
+  ## the real outcome arrives via `callback`.
 
   let procName = prc[0]
   let formalParams = prc[3]
@@ -1271,8 +1270,8 @@ macro ffiDtor*(prc: untyped): untyped =
   if formalParams.len < 2:
     error("ffiDtor: proc must have exactly one parameter (w: LibType)")
 
-  let libParamName = formalParams[1][0]
-  let libTypeName = formalParams[1][1]
+  let libParamName = formalParams[1][0] # e.g. w
+  let libTypeName = formalParams[1][1]  # e.g. MyLibObj
 
   let procNameStr = block:
     let raw = $procName
@@ -1289,7 +1288,7 @@ macro ffiDtor*(prc: untyped): untyped =
   if procName.kind == nnkPostfix:
     cExportProcName = procName[1]
 
-  let destroyResIdent = genSym(nskLet, "destroyRes")
+  let releaseResIdent = genSym(nskLet, "destroyRes")
 
   let ffiBody = newStmtList()
 
@@ -1314,12 +1313,14 @@ macro ffiDtor*(prc: untyped): untyped =
 
   let poolIdent = ident($libTypeName & "FFIPool")
   ffiBody.add quote do:
-    let `destroyResIdent` =
-      `poolIdent`.destroyFFIContext(cast[ptr FFIContext[`libTypeName`]](ctx))
-    if `destroyResIdent`.isErr():
+    let `releaseResIdent` = releaseFFIContext(
+      cast[ptr FFIContext[`libTypeName`]](ctx), callback, userData
+    )
+    if `releaseResIdent`.isErr():
+      if not callback.isNil():
+        let errStr = "release failed: " & $`releaseResIdent`.error
+        callback(RET_ERR, unsafeAddr errStr[0], cast[csize_t](errStr.len), userData)
       return RET_ERR
-
-  ffiBody.add quote do:
     return RET_OK
 
   let ffiProc = newProc(
diff --git a/tests/test_ffi_context.nim b/tests/test_ffi_context.nim
new file mode 100644
index 0000000..bf6989c
--- /dev/null
+++ b/tests/test_ffi_context.nim
@@ -0,0 +1,892 @@
+import std/[locks, strutils, os, osproc, sequtils]
+import unittest2
+import results
+import ../ffi
+
+type TestLib = object
+
+## Per-request callback state.  The test thread blocks on `cond` until the
+## FFI thread signals it — no polling, no CPU waste.
+type CallbackData = object
+  lock: Lock
+  cond: Cond
+  called: bool
+  retCode: cint
+  msg: array[512, char]
+  msgLen: int
+
+proc initCallbackData(d: var CallbackData) =
+  d.lock.initLock()
+  d.cond.initCond()
+
+proc deinitCallbackData(d: var CallbackData) =
+  d.cond.deinitCond()
+  d.lock.deinitLock()
+
+proc testCallback(
+    retCode: cint, msg: ptr cchar, len: csize_t, userData: pointer
+) {.cdecl, gcsafe, raises: [].} =
+  let d = cast[ptr CallbackData](userData)
+  acquire(d[].lock)
+  d[].retCode = retCode
+  let n = min(int(len), d[].msg.high)
+  if n > 0 and not msg.isNil:
+    copyMem(addr d[].msg[0], msg, n)
+  d[].msg[n] = '\0'
+  d[].msgLen = n
+  d[].called = true
+  signal(d[].cond)
+  release(d[].lock)
+
+proc waitCallback(d: var CallbackData) =
+  acquire(d.lock)
+  while not d.called:
+    wait(d.cond, d.lock)
+  release(d.lock)
+
+proc callbackMsg(d: var CallbackData): string =
+  result = newString(d.msgLen)
+  if d.msgLen > 0:
+    copyMem(addr result[0], addr d.msg[0], d.msgLen)
+
+registerReqFFI(PingRequest, lib: ptr TestLib):
+  proc(message: cstring): Future[Result[string, string]] {.async.} =
+    return ok("pong:" & $message)
+
+registerReqFFI(FailRequest, lib: ptr TestLib):
+  proc(): Future[Result[string, string]] {.async.} =
+    return err("intentional failure")
+
+registerReqFFI(EmptyOkRequest, lib: ptr TestLib):
+  proc(): Future[Result[string, string]] {.async.} =
+    return ok("")
+
+registerReqFFI(SlowRequest, lib: ptr TestLib):
+  proc(): Future[Result[string, string]] {.async.} =
+    await sleepAsync(500.milliseconds)
+    return ok("slow-done")
+
+# Coordination channel: the FFI handler signals the test thread the instant
+# it is about to block the event loop, so the test can call destroyFFIContext
+# while the event loop is truly frozen.
+var gSyncBlockStarted: Channel[bool]
+gSyncBlockStarted.open()
+
+registerReqFFI(SyncBlockingRequest, lib: ptr TestLib):
+  proc(): Future[Result[string, string]] {.async.} =
+    # Yield first so that reqReceivedSignal fires and sendRequestToFFIThread
+    # returns on the calling thread before we start the synchronous block.
+    await sleepAsync(0.milliseconds)
+    # Signal the test thread: the event loop is about to be frozen.
+    # Channel.send is annotated as raising under refc, so wrap.
+    try:
+      gSyncBlockStarted.send(true)
+    except Exception as exc:
+      return err("gSyncBlockStarted.send raised: " & exc.msg)
+    # Simulates a request that blocks the event-loop thread synchronously
+    # (e.g. w.stop() -> switch.stop() -> connManager.close() with blocking I/O).
+    # Unlike sleepAsync, os.sleep holds the OS thread and prevents Chronos from
+    # processing any callbacks -- including the reqSignal fired by destroyFFIContext.
+    os.sleep(5_000)
+    return ok("sync-blocking-done")
+
+# Approximates the heavy ref-object workload that libwaku/libp2p performs on
+# the FFI thread. The exact cell count is large enough to force several refc
+# GC cycles; under refc this stresses the heap state that, when later combined
+# with a chronos Selector allocation on the main thread (via close()), used to
+# trip the rawNewObj → signal-handler infinite recursion.
+type RefCell = ref object
+  next: RefCell
+  payload: array[64, byte]
+
+registerReqFFI(HeavyRefAllocRequest, lib: ptr TestLib):
+  proc(): Future[Result[string, string]] {.async.} =
+    var head: RefCell
+    for i in 0 ..< 50_000:
+      let n = RefCell(next: head)
+      head = n
+      if i mod 1000 == 0:
+        await sleepAsync(0.milliseconds)
+    # Break the chain iteratively before releasing head.
+    # ORC's =destroy for RefCell recurses through .next, so a 50k-node chain
+    # would produce ~50k nested =destroy calls and overflow the stack.
+    # Walking the list and unlinking each node first keeps destruction O(n)
+    # iterative instead of O(n) recursive.
+    var node = head
+    head = nil
+    while not node.isNil():
+      let nxt = node.next
+      node.next = nil  # unlink before the refcount of `node` can drop to zero
+      node = nxt
+    await sleepAsync(10.milliseconds)
+    return ok("heavy-done")
+
+suite "FFIContextPool":
+  test "create and destroy via pool succeeds":
+    var pool: FFIContextPool[TestLib]
+    let ctx = pool.createFFIContext().valueOr:
+      assert false, "createFFIContext(pool) failed: " & $error
+      return
+    check pool.destroyFFIContext(ctx).isOk()
+
+  test "context is reused after destroy":
+    var pool: FFIContextPool[TestLib]
+    let ctx1 = pool.createFFIContext().valueOr:
+      assert false, "createFFIContext(pool) failed: " & $error
+      return
+    check pool.destroyFFIContext(ctx1).isOk()
+    # After destroying, the same context must be available again
+    let ctx2 = pool.createFFIContext().valueOr:
+      assert false, "createFFIContext(pool) failed after context release: " & $error
+      return
+    check pool.destroyFFIContext(ctx2).isOk()
+    check ctx1 == ctx2 # same context reused
+
+  test "pool exhaustion returns error":
+    var pool: FFIContextPool[TestLib]
+    var ctxs: array[MaxFFIContexts, ptr FFIContext[TestLib]]
+    for i in 0 ..< MaxFFIContexts:
+      ctxs[i] = pool.createFFIContext().valueOr:
+        for j in 0 ..< i:
+          discard pool.destroyFFIContext(ctxs[j])
+        assert false, "createFFIContext(pool) failed at context " & $i & ": " & $error
+        return
+    # Pool is now full — next create must fail
+    check pool.createFFIContext().isErr()
+    for i in 0 ..< MaxFFIContexts:
+      discard pool.destroyFFIContext(ctxs[i])
+
+  test "requests are processed via pool context":
+    var pool: FFIContextPool[TestLib]
+    var d: CallbackData
+    initCallbackData(d)
+    defer:
+      deinitCallbackData(d)
+
+    let ctx = pool.createFFIContext().valueOr:
+      assert false, "createFFIContext(pool) failed: " & $error
+      return
+    defer:
+      discard pool.destroyFFIContext(ctx)
+
+    check sendRequestToFFIThread(
+      ctx, PingRequest.ffiNewReq(testCallback, addr d, "pool".cstring)
+    )
+      .isOk()
+    waitCallback(d)
+    check d.retCode == RET_OK
+    check callbackMsg(d) == "pong:pool"
+
+suite "createFFIContext / destroyFFIContext":
+  test "create and destroy succeeds":
+    var pool: FFIContextPool[TestLib]
+    let ctx = pool.createFFIContext().valueOr:
+      checkpoint "createFFIContext failed: " & $error
+      check false
+      return
+    check pool.destroyFFIContext(ctx).isOk()
+
+  test "double destroy is safe via running flag":
+    var pool: FFIContextPool[TestLib]
+    let ctx = pool.createFFIContext().valueOr:
+      check false
+      return
+    check pool.destroyFFIContext(ctx).isOk()
+
+suite "destroyFFIContext does not hang":
+  test "destroy while a slow async request is still in-flight":
+    ## Reproduces the race where destroyFFIContext was called while a long-
+    ## running async request (e.g. stop_node / w.stop()) was still executing.
+    ## The destroy must return well within 2 seconds; before the fix it would
+    ## block forever on joinThread(ffiThread).
+    var pool: FFIContextPool[TestLib]
+    let ctx = pool.createFFIContext().valueOr:
+      check false
+      return
+
+    var d: CallbackData
+    initCallbackData(d)
+    defer: deinitCallbackData(d)
+
+    # sendRequestToFFIThread returns as soon as the FFI thread ACKs receipt;
+    # the 500 ms work continues asynchronously on the FFI thread.
+    check sendRequestToFFIThread(
+      ctx, SlowRequest.ffiNewReq(testCallback, addr d)
+    ).isOk()
+
+    # Destroy immediately while SlowRequest is still running.
+    let t0 = Moment.now()
+    check pool.destroyFFIContext(ctx).isOk()
+    check (Moment.now() - t0) < 2.seconds
+
+suite "destroyFFIContext does not hang when event loop is blocked":
+  test "destroy while sync-blocking request is in-flight":
+    ## Reproduces the hang seen in logosdelivery_example.c:
+    ##   logosdelivery_stop_node(...)   -- triggers w.stop() on the FFI thread
+    ##   sleep(1)
+    ##   logosdelivery_destroy(...)     -- hangs forever
+    ##
+    ## Root cause: w.stop() (and similar tear-down calls) can execute a
+    ## synchronous blocking section that holds the OS thread, preventing
+    ## the Chronos event loop from processing the reqSignal fired by
+    ## destroyFFIContext.  The result is joinThread(ffiThread) never returns.
+    ##
+    ## With the fix, destroyFFIContext must complete well within the 5 s that
+    ## SyncBlockingRequest holds the event loop.
+    var pool: FFIContextPool[TestLib]
+    let ctx = pool.createFFIContext().valueOr:
+      check false
+      return
+
+    # CallbackData and ctx are kept alive past destroyFFIContext: the leaked
+    # FFI thread is still inside os.sleep(5_000) and will eventually wake,
+    # run handleRes, fire testCallback, and exit normally. We wait for that
+    # to happen at the end of the test so the leaked thread cannot race with
+    # subsequent tests' createFFIContext on Linux/Windows. Heap allocation
+    # ensures the late callback's userData is still valid when it fires.
+    let d = createShared(CallbackData)
+    initCallbackData(d[])
+
+    check sendRequestToFFIThread(
+      ctx, SyncBlockingRequest.ffiNewReq(testCallback, d)
+    ).isOk()
+
+    # Block until the FFI handler has signalled that os.sleep is about to start.
+    # This guarantees destroyFFIContext is called while the event loop is frozen.
+    discard gSyncBlockStarted.recv()
+
+    # Destroy must return promptly even though the event loop is frozen for 5s.
+    # It deliberately returns err and leaks ctx in this scenario rather than
+    # hanging on joinThread.
+    let t0 = Moment.now()
+    check pool.destroyFFIContext(ctx).isErr()
+    check (Moment.now() - t0) < 3.seconds
+
+    # Drain the leaked thread before the test scope ends.
+    # 1. waitCallback blocks until os.sleep(5_000) returns and handleRes
+    #    invokes testCallback (~3.5s after destroy returned), which proves
+    #    the leaked thread has reached the end of processRequest.
+    # 2. Yield briefly so the thread can finish iterating its while loop,
+    #    fire threadExitSignal in its defer, and return. Without this, on
+    #    Linux/Windows the still-live thread can race with the next test's
+    #    createFFIContext under --mm:orc and segfault.
+    # ctx.cleanUpResources is intentionally NOT called: destroyFFIContext
+    # skipped it for a reason, and the signal fds are reclaimed by the OS
+    # at process exit.
+    waitCallback(d[])
+    os.sleep(200)
+    deinitCallbackData(d[])
+    freeShared(d)
+
+suite "destroyFFIContext refc workaround":
+  ## Documents the refc-specific workaround in cleanUpResources.
+  ##
+  ## Background: when the FFI thread does heavy ref-object work (the workload
+  ## that triggered the libwaku hang in production), the refc GC heap reaches
+  ## a state where the very first chronos Selector allocation on the *main*
+  ## thread — which happens lazily inside ThreadSignalPtr.close() through
+  ## getThreadDispatcher() — traps in rawNewObj. The refc signal handler
+  ## itself re-enters the same allocator and the process never returns.
+  ## Captured stack:
+  ##   close → safeUnregisterAndCloseFd → getThreadDispatcher →
+  ##   newDispatcher → Selector.new → newObj (gc.nim:488) → rawNewObj →
+  ##   _sigtramp → signalHandler → newObjNoInit → addNewObjToZCT (loop)
+  ##
+  ## The workaround in cleanUpResources is `when defined(gcRefc): discard`,
+  ## i.e. skip the close() calls under refc only. orc is unaffected and
+  ## still cleans up the signal fds normally.
+  ##
+  ## NOTE: this test is documentation more than regression: a synthetic
+  ## ref-allocation workload of ~50k cells does NOT corrupt the refc heap
+  ## the way the real libwaku/libp2p teardown does, so this test passes
+  ## even when the workaround is disabled. Reproducing the actual hang
+  ## requires the full libwaku workload (logosdelivery_example.c).
+  ## Verification of the workaround was done end-to-end against that
+  ## example: with `--mm:refc` and close() enabled it hangs forever in
+  ## the captured stack above; with `when defined(gcRefc): discard` it
+  ## returns immediately. Under `--mm:orc` it returns immediately either
+  ## way.
+  test "destroy after heavy ref-allocation workload returns promptly":
+    var pool: FFIContextPool[TestLib]
+    let ctx = pool.createFFIContext().valueOr:
+      check false
+      return
+
+    var d: CallbackData
+    initCallbackData(d)
+    defer: deinitCallbackData(d)
+
+    check sendRequestToFFIThread(
+      ctx, HeavyRefAllocRequest.ffiNewReq(testCallback, addr d)
+    ).isOk()
+    waitCallback(d)
+    check d.retCode == RET_OK
+
+    let t0 = Moment.now()
+    check pool.destroyFFIContext(ctx).isOk()
+    check (Moment.now() - t0) < 3.seconds
+
+suite "sendRequestToFFIThread":
+  test "successful request triggers RET_OK callback":
+    var d: CallbackData
+    initCallbackData(d)
+    defer:
+      deinitCallbackData(d)
+
+    var pool: FFIContextPool[TestLib]
+    let ctx = pool.createFFIContext().valueOr:
+      check false
+      return
+    defer:
+      discard pool.destroyFFIContext(ctx)
+
+    check sendRequestToFFIThread(
+      ctx, PingRequest.ffiNewReq(testCallback, addr d, "hello".cstring)
+    )
+      .isOk()
+    waitCallback(d)
+    check d.retCode == RET_OK
+    check callbackMsg(d) == "pong:hello"
+
+  test "failing request triggers RET_ERR callback":
+    var d: CallbackData
+    initCallbackData(d)
+    defer:
+      deinitCallbackData(d)
+
+    var pool: FFIContextPool[TestLib]
+    let ctx = pool.createFFIContext().valueOr:
+      check false
+      return
+    defer:
+      discard pool.destroyFFIContext(ctx)
+
+    check sendRequestToFFIThread(ctx, FailRequest.ffiNewReq(testCallback, addr d)).isOk()
+    waitCallback(d)
+    check d.retCode == RET_ERR
+
+  test "empty ok response delivers empty message":
+    var d: CallbackData
+    initCallbackData(d)
+    defer:
+      deinitCallbackData(d)
+
+    var pool: FFIContextPool[TestLib]
+    let ctx = pool.createFFIContext().valueOr:
+      check false
+      return
+    defer:
+      discard pool.destroyFFIContext(ctx)
+
+    check sendRequestToFFIThread(ctx, EmptyOkRequest.ffiNewReq(testCallback, addr d))
+      .isOk()
+    waitCallback(d)
+    check d.retCode == RET_OK
+    check d.msgLen == 0
+
+  test "sequential requests are all processed":
+    var pool: FFIContextPool[TestLib]
+    let ctx = pool.createFFIContext().valueOr:
+      check false
+      return
+    defer:
+      discard pool.destroyFFIContext(ctx)
+
+    for i in 1 .. 5:
+      var d: CallbackData
+      initCallbackData(d)
+      let msg = "msg" & $i
+      check sendRequestToFFIThread(
+        ctx, PingRequest.ffiNewReq(testCallback, addr d, msg.cstring)
+      )
+        .isOk()
+      waitCallback(d)
+      deinitCallbackData(d)
+      check d.retCode == RET_OK
+      check callbackMsg(d) == "pong:" & msg
+
+# ---------------------------------------------------------------------------
+# ffiCtor macro integration test
+# ---------------------------------------------------------------------------
+
+type SimpleLib = object
+  value: int
+
+ffiType:
+  type SimpleConfig = object
+    initialValue: int
+
+proc testlib_create*(
+    config: SimpleConfig
+): Future[Result[SimpleLib, string]] {.ffiCtor.} =
+  return ok(SimpleLib(value: config.initialValue))
+
+# Records the value of the library object the destructor body saw, so a test can
+# confirm the user cleanup body ran with the right lib state before teardown.
+var gDestroyedValue {.threadvar.}: int
+proc testlib_destroy*(lib: SimpleLib) {.ffiDtor.} =
+  gDestroyedValue = lib.value
+
+suite "ffiCtor macro":
+  test "creates context and returns pointer via callback":
+    var d: CallbackData
+    initCallbackData(d)
+    defer: deinitCallbackData(d)
+
+    let configJson = ffiSerialize(SimpleConfig(initialValue: 42))
+    let ret = testlib_create(configJson.cstring, testCallback, addr d)
+
+    check not ret.isNil()
+
+    waitCallback(d)
+
+    check d.retCode == RET_OK
+
+    # The callback message is the ctx address as a decimal string
+    let addrStr = callbackMsg(d)
+    check addrStr.len > 0
+
+    let ctxAddr = cast[uint](parseBiggestUInt(addrStr))
+    check ctxAddr != 0
+    let ctx = cast[ptr FFIContext[SimpleLib]](ctxAddr)
+
+    # Verify the library was properly initialized
+    check not ctx[].myLib.isNil
+    check ctx[].myLib[].value == 42
+
+    check SimpleLibFFIPool.destroyFFIContext(ctx).isOk()
+
+proc createSimpleLib(initialValue: int): ptr FFIContext[SimpleLib] =
+  ## Helper: run the generated async ctor and return the live ctx.
+  var d: CallbackData
+  initCallbackData(d)
+  defer:
+    deinitCallbackData(d)
+  let ret =
+    testlib_create(ffiSerialize(SimpleConfig(initialValue: initialValue)).cstring,
+      testCallback, addr d)
+  doAssert not ret.isNil()
+  waitCallback(d)
+  doAssert d.retCode == RET_OK
+  return cast[ptr FFIContext[SimpleLib]](cast[uint](parseBiggestUInt(callbackMsg(d))))
+
+suite "ffiDtor macro (async destroy + reuse)":
+  test "destroy fires RET_OK after teardown, frees myLib, and frees the context":
+    let ctx = createSimpleLib(5)
+    check not ctx[].myLib.isNil
+    check ctx[].myLib[].value == 5
+
+    var dD: CallbackData
+    initCallbackData(dD)
+    defer:
+      deinitCallbackData(dD)
+
+    # Async destroy: the C return is just "accepted"; the real outcome arrives
+    # via the callback once the FFI thread has finished tearing the lib down.
+    check testlib_destroy(cast[pointer](ctx), testCallback, addr dD) == RET_OK
+    waitCallback(dD)
+    check dD.retCode == RET_OK
+    check gDestroyedValue == 5 # the user cleanup body saw the live lib
+    check ctx[].myLib.isNil() # freed on the FFI thread
+
+    # The context was freed from the FFI thread, so a fresh create reclaims it.
+    let ctx2 = createSimpleLib(9)
+    check ctx2 == ctx # same context, reused worker + fds
+    check ctx2[].myLib[].value == 9
+    check SimpleLibFFIPool.destroyFFIContext(ctx2).isOk()
+
+  test "destroy waits for an in-flight request before reporting RET_OK":
+    let ctx = createSimpleLib(1)
+
+    # Dispatch a 500 ms handler and do NOT wait — it is in flight at destroy time.
+    var slow: CallbackData
+    initCallbackData(slow)
+    defer:
+      deinitCallbackData(slow)
+    check sendRequestToFFIThread(ctx, SlowRequest.ffiNewReq(testCallback, addr slow)).isOk()
+
+    var dD: CallbackData
+    initCallbackData(dD)
+    defer:
+      deinitCallbackData(dD)
+    check testlib_destroy(cast[pointer](ctx), testCallback, addr dD) == RET_OK
+    waitCallback(dD)
+    check dD.retCode == RET_OK
+    # Drained: the in-flight handler ran to completion before destroy reported OK.
+    check slow.called
+    check callbackMsg(slow) == "slow-done"
+
+    let ctx2 = createSimpleLib(2)
+    check ctx2 == ctx
+    check SimpleLibFFIPool.destroyFFIContext(ctx2).isOk()
+
+  test "requests are rejected once a destroy closes the gate":
+    let ctx = createSimpleLib(3)
+
+    var dD: CallbackData
+    initCallbackData(dD)
+    defer:
+      deinitCallbackData(dD)
+    check testlib_destroy(cast[pointer](ctx), testCallback, addr dD) == RET_OK
+    waitCallback(dD)
+    check dD.retCode == RET_OK
+
+    # Gate stays closed until the context is reacquired: a late request must not
+    # dispatch onto a context about to be (or already) reused.
+    var d: CallbackData
+    initCallbackData(d)
+    defer:
+      deinitCallbackData(d)
+    check sendRequestToFFIThread(
+      ctx, SlowRequest.ffiNewReq(testCallback, addr d)
+    ).isErr()
+
+    let ctx2 = createSimpleLib(4)
+    check ctx2 == ctx
+    check SimpleLibFFIPool.destroyFFIContext(ctx2).isOk()
+
+  test "a stuck context is reported as RET_ERR rather than hanging":
+    let ctx = createSimpleLib(8)
+
+    let savedTimeout = RecycleTimeout
+    RecycleTimeout = 150.milliseconds
+    defer:
+      RecycleTimeout = savedTimeout
+
+    # In-flight handler outlasts the (shortened) drain timeout.
+    var slow: CallbackData
+    initCallbackData(slow)
+    defer:
+      deinitCallbackData(slow)
+    check sendRequestToFFIThread(ctx, SlowRequest.ffiNewReq(testCallback, addr slow)).isOk()
+
+    var dD: CallbackData
+    initCallbackData(dD)
+    defer:
+      deinitCallbackData(dD)
+    check testlib_destroy(cast[pointer](ctx), testCallback, addr dD) == RET_OK
+    waitCallback(dD)
+    check dD.retCode == RET_ERR # drain timed out -> ctx reported stuck
+
+    # The stuck context is leaked (not reused); the handler still finishes on its
+    # own. Wait for it, then fully tear the leaked context down.
+    waitCallback(slow)
+    check SimpleLibFFIPool.destroyFFIContext(ctx).isOk()
+
+# ---------------------------------------------------------------------------
+# Simplified .ffi. macro integration test
+# ---------------------------------------------------------------------------
+
+ffiType:
+  type SendConfig = object
+    message: string
+
+proc testlib_send*(
+    lib: SimpleLib, cfg: SendConfig
+): Future[Result[string, string]] {.ffi.} =
+  return ok("echo:" & cfg.message & ":" & $lib.value)
+
+suite "simplified .ffi. macro":
+  test "sends request and gets serialized response via callback":
+    # First create a context using ffiCtor
+    var ctorD: CallbackData
+    initCallbackData(ctorD)
+    defer: deinitCallbackData(ctorD)
+
+    let configJson = ffiSerialize(SimpleConfig(initialValue: 7))
+    let ctorRet = testlib_create(configJson.cstring, testCallback, addr ctorD)
+    check not ctorRet.isNil()
+
+    waitCallback(ctorD)
+    check ctorD.retCode == RET_OK
+
+    let addrStr = callbackMsg(ctorD)
+    check addrStr.len > 0
+
+    let ctxAddr = cast[uint](parseBiggestUInt(addrStr))
+    check ctxAddr != 0
+    let ctx = cast[ptr FFIContext[SimpleLib]](ctxAddr)
+    defer: check SimpleLibFFIPool.destroyFFIContext(ctx).isOk()
+
+    # Now call the .ffi. proc
+    var d: CallbackData
+    initCallbackData(d)
+    defer: deinitCallbackData(d)
+
+    let cfgJson = ffiSerialize(SendConfig(message: "hello"))
+    let ret = testlib_send(ctx, testCallback, addr d, cfgJson.cstring)
+    check ret == RET_OK
+
+    waitCallback(d)
+    check d.retCode == RET_OK
+
+    let receivedMsg = callbackMsg(d)
+    let decoded = ffiDeserialize(receivedMsg.cstring, string).valueOr:
+      check false
+      ""
+    check decoded == "echo:hello:7"
+
+# ---------------------------------------------------------------------------
+# async/sync detection in .ffi. macro integration test
+# ---------------------------------------------------------------------------
+
+# Sync proc (no await in body) — macro detects this and bypasses thread machinery
+proc testlib_version*(
+    lib: SimpleLib
+): Future[Result[string, string]] {.ffi.} =
+  return ok("v" & $lib.value)
+
+suite "async/sync detection in .ffi.":
+  test "sync proc invokes callback without thread hop":
+    # Create a context using ffiCtor
+    var ctorD: CallbackData
+    initCallbackData(ctorD)
+    defer: deinitCallbackData(ctorD)
+
+    let configJson = ffiSerialize(SimpleConfig(initialValue: 3))
+    let ctorRet = testlib_create(configJson.cstring, testCallback, addr ctorD)
+    check not ctorRet.isNil()
+
+    waitCallback(ctorD)
+    check ctorD.retCode == RET_OK
+
+    let addrStr = callbackMsg(ctorD)
+    check addrStr.len > 0
+
+    let ctxAddr = cast[uint](parseBiggestUInt(addrStr))
+    check ctxAddr != 0
+    let ctx = cast[ptr FFIContext[SimpleLib]](ctxAddr)
+    defer: check SimpleLibFFIPool.destroyFFIContext(ctx).isOk()
+
+    var d2: CallbackData
+    initCallbackData(d2)
+    defer: deinitCallbackData(d2)
+
+    # Call sync proc — callback should fire before the proc returns (no thread hop)
+    let ret = testlib_version(ctx, testCallback, addr d2)
+    # No sleep needed: sync path fires callback inline before returning
+    check ret == RET_OK
+    check d2.called   # fires synchronously — no waitCallback needed
+    check d2.retCode == RET_OK
+    let receivedMsg = callbackMsg(d2)
+    let decoded = ffiDeserialize(receivedMsg.cstring, string).valueOr:
+      check false
+      ""
+    check decoded == "v3"
+
+# ---------------------------------------------------------------------------
+# ptr T return type in .ffi. macro integration test
+# ---------------------------------------------------------------------------
+
+type Handle = object
+  data: string
+
+ffiType:
+  type NameParam = object
+    name: string
+
+proc testlib_alloc_handle*(
+    lib: SimpleLib, np: NameParam
+): Future[Result[ptr Handle, string]] {.ffi.} =
+  let h = createShared(Handle)
+  h[] = Handle(data: np.name & ":" & $lib.value)
+  return ok(h)
+
+proc testlib_read_handle*(
+    lib: SimpleLib, handle: pointer
+): Future[Result[string, string]] {.ffi.} =
+  let h = cast[ptr Handle](handle)
+  return ok(h[].data)
+
+proc testlib_free_handle*(
+    lib: SimpleLib, handle: pointer
+): Future[Result[string, string]] {.ffi.} =
+  let h = cast[ptr Handle](handle)
+  deallocShared(h)
+  return ok("freed")
+
+suite "ptr return type in .ffi.":
+  test "returns a heap-allocated handle and reads it back":
+    # Create context via ffiCtor
+    var ctorD: CallbackData
+    initCallbackData(ctorD)
+    defer: deinitCallbackData(ctorD)
+
+    let configJson = ffiSerialize(SimpleConfig(initialValue: 5))
+    let ctorRet = testlib_create(configJson.cstring, testCallback, addr ctorD)
+    check not ctorRet.isNil()
+
+    waitCallback(ctorD)
+    check ctorD.retCode == RET_OK
+
+    let ctxAddrStr = callbackMsg(ctorD)
+    check ctxAddrStr.len > 0
+    let ctxAddr = cast[uint](parseBiggestUInt(ctxAddrStr))
+    check ctxAddr != 0
+    let ctx = cast[ptr FFIContext[SimpleLib]](ctxAddr)
+    defer: check SimpleLibFFIPool.destroyFFIContext(ctx).isOk()
+
+    # Alloc a handle
+    var allocD: CallbackData
+    initCallbackData(allocD)
+    defer: deinitCallbackData(allocD)
+
+    let npJson = ffiSerialize(NameParam(name: "test"))
+    let allocRet = testlib_alloc_handle(ctx, testCallback, addr allocD, npJson.cstring)
+    check allocRet == RET_OK
+
+    waitCallback(allocD)
+    check allocD.retCode == RET_OK
+
+    let handleAddrStr = callbackMsg(allocD)
+    check handleAddrStr.len > 0
+    let handleAddr = parseBiggestUInt(handleAddrStr)
+    check handleAddr != 0
+
+    # Read the handle back
+    var readD: CallbackData
+    initCallbackData(readD)
+    defer: deinitCallbackData(readD)
+
+    let handleJson = ffiSerialize(cast[pointer](handleAddr))
+    let readRet = testlib_read_handle(ctx, testCallback, addr readD, handleJson.cstring)
+    check readRet == RET_OK
+
+    waitCallback(readD)
+    check readD.retCode == RET_OK
+
+    let readMsg = callbackMsg(readD)
+    let decodedStr = ffiDeserialize(readMsg.cstring, string).valueOr:
+      check false
+      ""
+    check decodedStr == "test:5"
+
+    # Free the handle
+    var freeD: CallbackData
+    initCallbackData(freeD)
+    defer: deinitCallbackData(freeD)
+
+    let freeRet = testlib_free_handle(ctx, testCallback, addr freeD, handleJson.cstring)
+    check freeRet == RET_OK
+
+    waitCallback(freeD)
+    check freeD.retCode == RET_OK
+
+# ---------------------------------------------------------------------------
+# releaseFFIContext: park & reuse (fd-leak regression)
+# ---------------------------------------------------------------------------
+
+proc countOpenFds(): int =
+  ## Number of open fds for this process, or -1 if not determinable on this
+  ## platform. On Linux we count /proc/self/fd; elsewhere we shell out to lsof
+  ## (skipped if lsof is unavailable, e.g. Windows).
+  when defined(linux):
+    var n = 0
+    for _ in walkDir("/proc/self/fd"):
+      inc n
+    return n
+  else:
+    if findExe("lsof").len == 0:
+      return -1
+    try:
+      let output =
+        execProcess("lsof", args = ["-p", $getCurrentProcessId()], options = {poUsePath})
+      return output.splitLines().countIt(it.len > 0)
+    except CatchableError:
+      return -1
+
+proc releaseAndWait[T](ctx: ptr FFIContext[T]): cint =
+  ## Test helper mirroring how a C consumer destroys a context: kick off the
+  ## (non-blocking) teardown and block on the callback, returning its retCode.
+  ## RET_OK means the lib's in-flight tasks finished and the context was parked.
+  var d: CallbackData
+  initCallbackData(d)
+  defer:
+    deinitCallbackData(d)
+  if ctx.releaseFFIContext(testCallback, addr d).isErr():
+    return RET_ERR
+  waitCallback(d)
+  return d.retCode
+
+suite "releaseFFIContext (park & reuse)":
+  test "park returns the context and reuses the same live worker":
+    var pool: FFIContextPool[TestLib]
+    let ctx1 = pool.createFFIContext().valueOr:
+      check false
+      return
+    check ctx1.releaseAndWait() == RET_OK
+
+    # Reacquire: must be the same context, with its worker still running.
+    let ctx2 = pool.createFFIContext().valueOr:
+      check false
+      return
+    check ctx1 == ctx2
+
+    var d: CallbackData
+    initCallbackData(d)
+    defer:
+      deinitCallbackData(d)
+    check sendRequestToFFIThread(
+      ctx2, PingRequest.ffiNewReq(testCallback, addr d, "reuse".cstring)
+    ).isOk()
+    waitCallback(d)
+    check d.retCode == RET_OK
+    check callbackMsg(d) == "pong:reuse" # reused worker still processes requests
+
+    check pool.destroyFFIContext(ctx2).isOk()
+
+  test "park drops the stale event callback and library pointer":
+    var pool: FFIContextPool[TestLib]
+    let ctx = pool.createFFIContext().valueOr:
+      check false
+      return
+    ctx.callbackState.callback = cast[pointer](testCallback)
+    ctx.callbackState.userData = cast[pointer](0xDEAD)
+
+    check ctx.releaseAndWait() == RET_OK
+    check ctx.callbackState.callback.isNil() # a watchdog tick can't call a freed cb
+    check ctx.callbackState.userData.isNil()
+    check ctx.myLib.isNil()
+
+    check pool.destroyFFIContext(ctx).isOk()
+
+  test "fd usage stays bounded across many park/reuse cycles":
+    if countOpenFds() < 0:
+      skip() # no fd-counting facility on this platform
+    else:
+      var pool: FFIContextPool[TestLib]
+
+      # Warm up: the first create builds the context's worker (its fds are allocated
+      # once here); parking keeps them open for reuse.
+      block:
+        let ctx = pool.createFFIContext().valueOr:
+          check false
+          return
+        check ctx.releaseAndWait() == RET_OK
+
+      let baseline = countOpenFds()
+
+      for _ in 0 ..< 20:
+        let ctx = pool.createFFIContext().valueOr:
+          check false
+          return
+        var d: CallbackData
+        initCallbackData(d)
+        check sendRequestToFFIThread(
+          ctx, PingRequest.ffiNewReq(testCallback, addr d, "x".cstring)
+        ).isOk()
+        waitCallback(d)
+        deinitCallbackData(d)
+        check ctx.releaseAndWait() == RET_OK
+
+      let afterCycles = countOpenFds()
+      # Reuse must not grow fds. Before the fix each cycle leaked ~10 fds (4
+      # ThreadSignalPtr socketpairs + 2 dispatcher kqueues); the small slack
+      # only tolerates unrelated runtime fd noise, not a per-cycle leak.
+      check afterCycles <= baseline + 5
+
+      # Tear the (still parked) context's worker down so the test leaves no threads.
+      let last = pool.createFFIContext().valueOr:
+        check false
+        return
+      check pool.destroyFFIContext(last).isOk()

From 0170d7c46970bff05cc91daaa5f8dfd44de6d975 Mon Sep 17 00:00:00 2001
From: Ivan FB <ivansete@status.im>
Date: Mon, 8 Jun 2026 18:23:03 +0200
Subject: [PATCH 04/10] update changelog and ffi.nimble for v0.1.5

---
 CHANGELOG.md | 141 +++++++++++++++++++++++++++++++++++++++++++++++++++
 ffi.nimble   |   2 +-
 2 files changed, 142 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c2bd1e6..40adcc0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -123,6 +123,147 @@ Initial tagged release.
 - License files updated to comply with Logos licensing requirements.
 # Changelog
 
+All notable changes to this project are documented in this file.
+
+## [Unreleased]
+
+### Changed
+- User event callbacks now run on a dedicated event thread fed by a
+  bounded SPSC queue (default capacity 1024), so a slow listener can no
+  longer block the FFI thread or concurrent `add_event_listener` /
+  `remove_event_listener` calls
+  ([#6](https://github.com/logos-messaging/nim-ffi/issues/6)).
+- Replaced the dedicated watchdog thread with a heartbeat check that
+  runs on the event thread. The FFI thread advances an atomic heartbeat
+  each loop iteration; if it stalls for more than 1s past the start-up
+  grace window, the event thread emits the `not_responding` event.
+- Removed the redundant `ffiType` macro; the `ffi` macro is now the single
+  authoring entry point
+  ([#22](https://github.com/logos-messaging/nim-ffi/pull/22)).
+- Generated C++ avoids move constructors and assignment operators
+  ([#36](https://github.com/logos-messaging/nim-ffi/pull/36)) and no longer
+  throws exceptions across the binding boundary
+  ([#46](https://github.com/logos-messaging/nim-ffi/pull/46)).
+- Removed the wildcard event listener; event dispatch is now strictly
+  per-event ([#70](https://github.com/logos-messaging/nim-ffi/pull/70)).
+
+### Added
+- Queue-overflow handling: when the bounded event queue is full, the
+  library sets a sticky "stuck" flag, logs an error, fires
+  `not_responding` from the event thread, and rejects subsequent
+  `sendRequestToFFIThread` calls with `event queue stuck - library
+  cannot accept new requests`.
+- **CBOR serialization** as the FFI wire format, replacing the previous
+  JSON/string-based `serial.nim`
+  ([#23](https://github.com/logos-messaging/nim-ffi/pull/23)).
+- **CBOR-backed FFI events**: event payloads are now serialized with CBOR
+  ([#39](https://github.com/logos-messaging/nim-ffi/pull/39)).
+- **Multi-listener event registry** (`FFIEventRegistry`) and its wiring into
+  `FFIContext`
+  ([#45](https://github.com/logos-messaging/nim-ffi/pull/45),
+  [#49](https://github.com/logos-messaging/nim-ffi/pull/49)).
+- **Event-listener ABI** with per-event typed listeners
+  ([#50](https://github.com/logos-messaging/nim-ffi/pull/50)).
+- **C++ typed per-event listeners** in the generated bindings
+  ([#51](https://github.com/logos-messaging/nim-ffi/pull/51)).
+- **Rust per-event typed listeners** (`add_on_<x>_listener` + wildcard
+  `add_event_listener`)
+  ([#52](https://github.com/logos-messaging/nim-ffi/pull/52)) and Rust event
+  example bindings/clients
+  ([#53](https://github.com/logos-messaging/nim-ffi/pull/53)).
+- **C++ binding generator** with end-to-end tests driven by CMake/CTest
+  ([#27](https://github.com/logos-messaging/nim-ffi/pull/27)), later expanded
+  with multi-context, cross-library, pipeline, and stress tests
+  ([#42](https://github.com/logos-messaging/nim-ffi/pull/42)).
+- **CDDL schema generator** for the FFI types
+  ([#24](https://github.com/logos-messaging/nim-ffi/pull/24)).
+- **CI pipeline**: parallel test execution
+  ([#26](https://github.com/logos-messaging/nim-ffi/pull/26)),
+  AddressSanitizer / UndefinedBehaviorSanitizer / ThreadSanitizer jobs
+  ([#34](https://github.com/logos-messaging/nim-ffi/pull/34)), and a
+  cross-platform OS matrix for the C++ e2e suite
+  ([#38](https://github.com/logos-messaging/nim-ffi/pull/38)).
+- CBOR type-coverage tests
+  ([#41](https://github.com/logos-messaging/nim-ffi/pull/41)).
+
+### Fixed
+- Use-after-free in the event/context lifetime path
+  ([#47](https://github.com/logos-messaging/nim-ffi/pull/47)).
+
+## [0.1.5] - 2026-06-08
+
+[Full changelog](https://github.com/logos-messaging/nim-ffi/compare/v0.1.4...v0.1.5)
+
+### Fixed
+
+- Recycle FFI contexts in the pool instead of tearing them down per cycle,
+  stopping a per-cycle file-descriptor leak (#74).
+
+## [0.1.4] - 2026-05-13
+
+[Full changelog](https://github.com/logos-messaging/nim-ffi/compare/v0.1.3...v0.1.4)
+
+### Added
+
+- Simplified FFI authoring with auto-generated C++ and Rust language bindings,
+  including new `ffi/codegen/cpp.nim`, `ffi/codegen/rust.nim` and shared
+  `ffi/codegen/meta.nim` helpers (#15).
+- Rust example bindings and clients under `examples/nim_timer/` (`rust_bindings`
+  and `rust_client`, the latter with a Tokio async variant) (#15).
+- JSON/string-based FFI (de)serialization via `ffi/serial.nim`
+  (`ffiSerialize`/`ffiDeserialize`), with `tests/test_serial.nim` coverage.
+  (CBOR replaced this layer later, in 0.2.0.)
+- FFI context pool (`ffi/ffi_context_pool.nim`) using a fixed array of contexts.
+- Test suite expansion: `test_alloc.nim`, `test_ctx_validation.nim`,
+  `test_ffi_context.nim`, `test_gc_compat.nim`.
+- Continuous integration pipeline (#12).
+
+### Fixed
+
+- Context buffer overflow (#21).
+- Use a fixed array of contexts to avoid consuming all file descriptors (#14).
+- Memory leaks (#11).
+- Add `install_name` for macOS shared libraries (#8).
+
+### Changed
+
+- Run tests with the `refc` garbage collector (#20).
+- Remove `CatchableError` usage (#19).
+- Update license files to comply with Logos licensing requirements.
+
+## [0.1.3] - 2026-01-23
+
+### Fixed
+- Properly import and re-export `chronicles` so downstream packages get the
+  logging macros transitively.
+
+## [0.1.2] - 2026-01-23
+
+### Fixed
+- Re-export `chronicles` and `std/tables` when the `ffi` module is imported,
+  so generated code resolves these symbols at the call site.
+
+## [0.1.1] - 2026-01-23
+
+Initial tagged release.
+
+### Added
+- Core `ffi` macro for declaring procs exposed across the FFI boundary.
+- `FFIContext` with a dedicated worker thread, request dispatch, and a
+  watchdog with configurable timeout
+  ([#7](https://github.com/logos-messaging/nim-ffi/pull/7)).
+- License files updated to comply with Logos licensing requirements.
+# Changelog
+
+## [0.1.5] - 2026-06-08
+
+[Full changelog](https://github.com/logos-messaging/nim-ffi/compare/v0.1.4...v0.1.5)
+
+### Fixed
+
+- Recycle FFI contexts in the pool instead of tearing them down per cycle,
+  stopping a per-cycle file-descriptor leak (#74).
+
 ## [0.1.4] - 2026-06-02
 
 [Full changelog](https://github.com/logos-messaging/nim-ffi/compare/v0.1.3...v0.1.4)
diff --git a/ffi.nimble b/ffi.nimble
index 27465ce..8312302 100644
--- a/ffi.nimble
+++ b/ffi.nimble
@@ -1,6 +1,6 @@
 # ffi.nimble
 
-version = "0.1.4"
+version = "0.1.5"
 author = "Institute of Free Technology"
 description = "FFI framework with custom header generation"
 license = "MIT or Apache License 2.0"

From e0d1ba38d1f7e6cbd7e3fbb1043bdf9a32d4813c Mon Sep 17 00:00:00 2001
From: Ivan FB <ivansete@status.im>
Date: Tue, 9 Jun 2026 23:47:30 +0200
Subject: [PATCH 05/10] feat(build): enforce -d:noSignalHandler at compile time

A nim-ffi library is loaded into a foreign host (Go/Rust/...) that must
own OS signal handling; if the Nim runtime installs its own handlers it
clobbers the host's (e.g. Go's SIGSEGV -> sigpanic recovery, stack
growth, goroutine preemption), turning recoverable faults into hard
process crashes. The flag can only be set on the consumer's final build
command, so a dependency cannot inject it -- but it CAN refuse to
compile without it. Add a compile-time guard so any consumer that omits
-d:noSignalHandler fails the build with an actionable message instead of
crashing at runtime (the cause of a real status-go regression).
Standalone Nim binaries (nim-ffi's own tests) build with
-d:ffiAllowSignalHandler.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 ffi.nimble          |  6 +++---
 ffi/ffi_context.nim | 15 ++++++++++++++-
 2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/ffi.nimble b/ffi.nimble
index 8312302..9e195a6 100644
--- a/ffi.nimble
+++ b/ffi.nimble
@@ -1,6 +1,6 @@
 # ffi.nimble
 
-version = "0.1.5"
+version = "0.1.6"
 author = "Institute of Free Technology"
 description = "FFI framework with custom header generation"
 license = "MIT or Apache License 2.0"
@@ -13,8 +13,8 @@ requires "chronicles"
 requires "taskpools"
 requires "cbor_serialization"
 
-const nimFlagsOrc = "--mm:orc -d:chronicles_log_level=WARN"
-const nimFlagsRefc = "--mm:refc -d:chronicles_log_level=WARN"
+const nimFlagsOrc = "--mm:orc -d:chronicles_log_level=WARN -d:ffiAllowSignalHandler"
+const nimFlagsRefc = "--mm:refc -d:chronicles_log_level=WARN -d:ffiAllowSignalHandler"
 
 import std/[algorithm, os, strutils]
 
diff --git a/ffi/ffi_context.nim b/ffi/ffi_context.nim
index 48e5e1b..c05b301 100644
--- a/ffi/ffi_context.nim
+++ b/ffi/ffi_context.nim
@@ -6,7 +6,20 @@
 
 {.passc: "-fPIC".}
 
-import std/[atomics, locks, options, tables, sequtils]
+# Embedded in a foreign host (Go/Rust/...) the host must own OS signal handling;
+# Nim installing its own handlers clobbers it (e.g. Go's SIGSEGV -> sigpanic).
+# Enforce -d:noSignalHandler; standalone Nim binaries opt out via -d:ffiAllowSignalHandler.
+when not defined(noSignalHandler) and not defined(ffiAllowSignalHandler):
+  {.
+    error:
+      "nim-ffi: missing required compile flag. If this library is embedded in a " &
+      "host process (Go/Rust/...), build with -d:noSignalHandler so the host keeps " &
+      "ownership of OS signal handlers (it needs SIGSEGV for crash recovery, stack " &
+      "growth and preemption). If instead this is a standalone Nim program that owns " &
+      "its own process, build with -d:ffiAllowSignalHandler."
+  .}
+
+import std/[atomics, locks, json, tables, sequtils]
 import chronicles, chronos, chronos/threadsync, taskpools/channels_spsc_single, results
 import ./ffi_types, ./ffi_thread_request, ./internal/ffi_macro, ./logging
 

From 0e176bd5eb6661e71a7bf5e689a097cbe2a71ebf Mon Sep 17 00:00:00 2001
From: Ivan FB <ivansete@status.im>
Date: Fri, 12 Jun 2026 11:28:48 +0200
Subject: [PATCH 06/10] fix(ffi): set up foreign-thread GC in entry procs;
 recycle/event cleanup
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Call initializeLibrary() (setupForeignThreadGc) in the `.ffi.` request
  wrapper and in add/remove_event_listener so a foreign (Go) caller thread
  has an initialised Nim heap before any allocation ($reqTypeName /
  $eventName / registry ops). Without it such a thread segfaults in the
  allocator under GC pressure — the production unwrap SIGSEGV.
- recycleContext resets the event registry/queue + stuck flag on park so a
  reused pool slot starts clean.
- ffiDtor doc/cleanup for the async recycle ABI.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 CHANGELOG.md                                 | 208 +----
 examples/echo/cpp_bindings/CMakeLists.txt    |   1 +
 examples/timer/cpp_bindings/CMakeLists.txt   |   1 +
 examples/timer/rust_bindings/build.rs        |   1 +
 ffi.nimble                                   |   2 +-
 ffi/codegen/rust.nim                         |   1 +
 ffi/codegen/templates/cpp/CMakeLists.txt.tpl |   1 +
 ffi/ffi_context.nim                          | 389 ++------
 ffi/ffi_context_pool.nim                     |  24 +-
 ffi/ffi_thread.nim                           |  93 +-
 ffi/internal/ffi_library.nim                 |  10 +
 ffi/internal/ffi_macro.nim                   |  59 +-
 tests/test_ffi_context.nim                   | 892 -------------------
 13 files changed, 252 insertions(+), 1430 deletions(-)
 delete mode 100644 tests/test_ffi_context.nim

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 40adcc0..52ce175 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,12 @@ All notable changes to this project are documented in this file.
 ## [Unreleased]
 
 ### Changed
+- Enforce `-d:noSignalHandler` at compile time: the build now fails with an
+  actionable message unless the consumer sets `-d:noSignalHandler` (libraries
+  embedded in a foreign host such as Go/Rust) or `-d:ffiAllowSignalHandler`
+  (standalone Nim binaries). Prevents the Nim runtime from installing its own
+  OS signal handlers and clobbering the host's — the cause of a real status-go
+  regression.
 - User event callbacks now run on a dedicated event thread fed by a
   bounded SPSC queue (default capacity 1024), so a slow listener can no
   longer block the FFI thread or concurrent `add_event_listener` /
@@ -14,15 +20,6 @@ All notable changes to this project are documented in this file.
   runs on the event thread. The FFI thread advances an atomic heartbeat
   each loop iteration; if it stalls for more than 1s past the start-up
   grace window, the event thread emits the `not_responding` event.
-- Removed the redundant `ffiType` macro; the `ffi` macro is now the single
-  authoring entry point
-  ([#22](https://github.com/logos-messaging/nim-ffi/pull/22)).
-- Generated C++ avoids move constructors and assignment operators
-  ([#36](https://github.com/logos-messaging/nim-ffi/pull/36)) and no longer
-  throws exceptions across the binding boundary
-  ([#46](https://github.com/logos-messaging/nim-ffi/pull/46)).
-- Removed the wildcard event listener; event dispatch is now strictly
-  per-event ([#70](https://github.com/logos-messaging/nim-ffi/pull/70)).
 
 ### Added
 - Queue-overflow handling: when the bounded event queue is full, the
@@ -30,6 +27,15 @@ All notable changes to this project are documented in this file.
   `not_responding` from the event thread, and rejects subsequent
   `sendRequestToFFIThread` calls with `event queue stuck - library
   cannot accept new requests`.
+
+## [0.2.0] - 2026-06-04
+
+Major release introducing the CBOR-based wire format, CBOR-backed FFI events
+with a multi-listener registry, multi-language binding generation (C++, Rust,
+CDDL), CI hardening with sanitizers, and several robustness fixes around
+context lifetime and memory safety.
+
+### Added
 - **CBOR serialization** as the FFI wire format, replacing the previous
   JSON/string-based `serial.nim`
   ([#23](https://github.com/logos-messaging/nim-ffi/pull/23)).
@@ -63,6 +69,17 @@ All notable changes to this project are documented in this file.
 - CBOR type-coverage tests
   ([#41](https://github.com/logos-messaging/nim-ffi/pull/41)).
 
+### Changed
+- Removed the redundant `ffiType` macro; the `ffi` macro is now the single
+  authoring entry point
+  ([#22](https://github.com/logos-messaging/nim-ffi/pull/22)).
+- Generated C++ avoids move constructors and assignment operators
+  ([#36](https://github.com/logos-messaging/nim-ffi/pull/36)) and no longer
+  throws exceptions across the binding boundary
+  ([#46](https://github.com/logos-messaging/nim-ffi/pull/46)).
+- Removed the wildcard event listener; event dispatch is now strictly
+  per-event ([#70](https://github.com/logos-messaging/nim-ffi/pull/70)).
+
 ### Fixed
 - Use-after-free in the event/context lifetime path
   ([#47](https://github.com/logos-messaging/nim-ffi/pull/47)).
@@ -121,176 +138,3 @@ Initial tagged release.
   watchdog with configurable timeout
   ([#7](https://github.com/logos-messaging/nim-ffi/pull/7)).
 - License files updated to comply with Logos licensing requirements.
-# Changelog
-
-All notable changes to this project are documented in this file.
-
-## [Unreleased]
-
-### Changed
-- User event callbacks now run on a dedicated event thread fed by a
-  bounded SPSC queue (default capacity 1024), so a slow listener can no
-  longer block the FFI thread or concurrent `add_event_listener` /
-  `remove_event_listener` calls
-  ([#6](https://github.com/logos-messaging/nim-ffi/issues/6)).
-- Replaced the dedicated watchdog thread with a heartbeat check that
-  runs on the event thread. The FFI thread advances an atomic heartbeat
-  each loop iteration; if it stalls for more than 1s past the start-up
-  grace window, the event thread emits the `not_responding` event.
-- Removed the redundant `ffiType` macro; the `ffi` macro is now the single
-  authoring entry point
-  ([#22](https://github.com/logos-messaging/nim-ffi/pull/22)).
-- Generated C++ avoids move constructors and assignment operators
-  ([#36](https://github.com/logos-messaging/nim-ffi/pull/36)) and no longer
-  throws exceptions across the binding boundary
-  ([#46](https://github.com/logos-messaging/nim-ffi/pull/46)).
-- Removed the wildcard event listener; event dispatch is now strictly
-  per-event ([#70](https://github.com/logos-messaging/nim-ffi/pull/70)).
-
-### Added
-- Queue-overflow handling: when the bounded event queue is full, the
-  library sets a sticky "stuck" flag, logs an error, fires
-  `not_responding` from the event thread, and rejects subsequent
-  `sendRequestToFFIThread` calls with `event queue stuck - library
-  cannot accept new requests`.
-- **CBOR serialization** as the FFI wire format, replacing the previous
-  JSON/string-based `serial.nim`
-  ([#23](https://github.com/logos-messaging/nim-ffi/pull/23)).
-- **CBOR-backed FFI events**: event payloads are now serialized with CBOR
-  ([#39](https://github.com/logos-messaging/nim-ffi/pull/39)).
-- **Multi-listener event registry** (`FFIEventRegistry`) and its wiring into
-  `FFIContext`
-  ([#45](https://github.com/logos-messaging/nim-ffi/pull/45),
-  [#49](https://github.com/logos-messaging/nim-ffi/pull/49)).
-- **Event-listener ABI** with per-event typed listeners
-  ([#50](https://github.com/logos-messaging/nim-ffi/pull/50)).
-- **C++ typed per-event listeners** in the generated bindings
-  ([#51](https://github.com/logos-messaging/nim-ffi/pull/51)).
-- **Rust per-event typed listeners** (`add_on_<x>_listener` + wildcard
-  `add_event_listener`)
-  ([#52](https://github.com/logos-messaging/nim-ffi/pull/52)) and Rust event
-  example bindings/clients
-  ([#53](https://github.com/logos-messaging/nim-ffi/pull/53)).
-- **C++ binding generator** with end-to-end tests driven by CMake/CTest
-  ([#27](https://github.com/logos-messaging/nim-ffi/pull/27)), later expanded
-  with multi-context, cross-library, pipeline, and stress tests
-  ([#42](https://github.com/logos-messaging/nim-ffi/pull/42)).
-- **CDDL schema generator** for the FFI types
-  ([#24](https://github.com/logos-messaging/nim-ffi/pull/24)).
-- **CI pipeline**: parallel test execution
-  ([#26](https://github.com/logos-messaging/nim-ffi/pull/26)),
-  AddressSanitizer / UndefinedBehaviorSanitizer / ThreadSanitizer jobs
-  ([#34](https://github.com/logos-messaging/nim-ffi/pull/34)), and a
-  cross-platform OS matrix for the C++ e2e suite
-  ([#38](https://github.com/logos-messaging/nim-ffi/pull/38)).
-- CBOR type-coverage tests
-  ([#41](https://github.com/logos-messaging/nim-ffi/pull/41)).
-
-### Fixed
-- Use-after-free in the event/context lifetime path
-  ([#47](https://github.com/logos-messaging/nim-ffi/pull/47)).
-
-## [0.1.5] - 2026-06-08
-
-[Full changelog](https://github.com/logos-messaging/nim-ffi/compare/v0.1.4...v0.1.5)
-
-### Fixed
-
-- Recycle FFI contexts in the pool instead of tearing them down per cycle,
-  stopping a per-cycle file-descriptor leak (#74).
-
-## [0.1.4] - 2026-05-13
-
-[Full changelog](https://github.com/logos-messaging/nim-ffi/compare/v0.1.3...v0.1.4)
-
-### Added
-
-- Simplified FFI authoring with auto-generated C++ and Rust language bindings,
-  including new `ffi/codegen/cpp.nim`, `ffi/codegen/rust.nim` and shared
-  `ffi/codegen/meta.nim` helpers (#15).
-- Rust example bindings and clients under `examples/nim_timer/` (`rust_bindings`
-  and `rust_client`, the latter with a Tokio async variant) (#15).
-- JSON/string-based FFI (de)serialization via `ffi/serial.nim`
-  (`ffiSerialize`/`ffiDeserialize`), with `tests/test_serial.nim` coverage.
-  (CBOR replaced this layer later, in 0.2.0.)
-- FFI context pool (`ffi/ffi_context_pool.nim`) using a fixed array of contexts.
-- Test suite expansion: `test_alloc.nim`, `test_ctx_validation.nim`,
-  `test_ffi_context.nim`, `test_gc_compat.nim`.
-- Continuous integration pipeline (#12).
-
-### Fixed
-
-- Context buffer overflow (#21).
-- Use a fixed array of contexts to avoid consuming all file descriptors (#14).
-- Memory leaks (#11).
-- Add `install_name` for macOS shared libraries (#8).
-
-### Changed
-
-- Run tests with the `refc` garbage collector (#20).
-- Remove `CatchableError` usage (#19).
-- Update license files to comply with Logos licensing requirements.
-
-## [0.1.3] - 2026-01-23
-
-### Fixed
-- Properly import and re-export `chronicles` so downstream packages get the
-  logging macros transitively.
-
-## [0.1.2] - 2026-01-23
-
-### Fixed
-- Re-export `chronicles` and `std/tables` when the `ffi` module is imported,
-  so generated code resolves these symbols at the call site.
-
-## [0.1.1] - 2026-01-23
-
-Initial tagged release.
-
-### Added
-- Core `ffi` macro for declaring procs exposed across the FFI boundary.
-- `FFIContext` with a dedicated worker thread, request dispatch, and a
-  watchdog with configurable timeout
-  ([#7](https://github.com/logos-messaging/nim-ffi/pull/7)).
-- License files updated to comply with Logos licensing requirements.
-# Changelog
-
-## [0.1.5] - 2026-06-08
-
-[Full changelog](https://github.com/logos-messaging/nim-ffi/compare/v0.1.4...v0.1.5)
-
-### Fixed
-
-- Recycle FFI contexts in the pool instead of tearing them down per cycle,
-  stopping a per-cycle file-descriptor leak (#74).
-
-## [0.1.4] - 2026-06-02
-
-[Full changelog](https://github.com/logos-messaging/nim-ffi/compare/v0.1.3...v0.1.4)
-
-### Added
-
-- Simplified FFI authoring with auto-generated C++ and Rust language bindings,
-  including new `ffi/codegen/cpp.nim`, `ffi/codegen/rust.nim` and shared
-  `ffi/codegen/meta.nim` helpers (#15).
-- Rust example bindings and clients under `examples/nim_timer/` (`rust_bindings`
-  and `rust_client`, the latter with a Tokio async variant) (#15).
-- CBOR serialization support via `ffi/serial.nim`, with `tests/test_serial.nim`
-  coverage.
-- FFI context pool (`ffi/ffi_context_pool.nim`) using a fixed array of contexts.
-- Test suite expansion: `test_alloc.nim`, `test_ctx_validation.nim`,
-  `test_ffi_context.nim`, `test_gc_compat.nim`.
-- Continuous integration pipeline (#12).
-
-### Fixed
-
-- Context buffer overflow (#21).
-- Use a fixed array of contexts to avoid consuming all file descriptors (#14).
-- Memory leaks (#11).
-- Add `install_name` for macOS shared libraries (#8).
-
-### Changed
-
-- Run tests with the `refc` garbage collector (#20).
-- Remove `CatchableError` usage (#19).
-- Update license files to comply with Logos licensing requirements.
diff --git a/examples/echo/cpp_bindings/CMakeLists.txt b/examples/echo/cpp_bindings/CMakeLists.txt
index 77dd7aa..6b6bd51 100644
--- a/examples/echo/cpp_bindings/CMakeLists.txt
+++ b/examples/echo/cpp_bindings/CMakeLists.txt
@@ -58,6 +58,7 @@ add_custom_command(
     COMMAND "${NIM_EXECUTABLE}" c
                 --mm:orc
                 -d:chronicles_log_level=WARN
+                -d:noSignalHandler
                 --app:lib
                 --noMain
                 "--nimMainPrefix:libecho"
diff --git a/examples/timer/cpp_bindings/CMakeLists.txt b/examples/timer/cpp_bindings/CMakeLists.txt
index 162d844..11713cb 100644
--- a/examples/timer/cpp_bindings/CMakeLists.txt
+++ b/examples/timer/cpp_bindings/CMakeLists.txt
@@ -58,6 +58,7 @@ add_custom_command(
     COMMAND "${NIM_EXECUTABLE}" c
                 --mm:orc
                 -d:chronicles_log_level=WARN
+                -d:noSignalHandler
                 --app:lib
                 --noMain
                 "--nimMainPrefix:libmy_timer"
diff --git a/examples/timer/rust_bindings/build.rs b/examples/timer/rust_bindings/build.rs
index ce9c76f..4d3b9b4 100644
--- a/examples/timer/rust_bindings/build.rs
+++ b/examples/timer/rust_bindings/build.rs
@@ -32,6 +32,7 @@ fn main() {
     cmd.arg("c")
         .arg("--mm:orc")
         .arg("-d:chronicles_log_level=WARN")
+        .arg("-d:noSignalHandler")
         .arg("--app:lib")
         .arg("--noMain")
         .arg(format!("--nimMainPrefix:libmy_timer"))
diff --git a/ffi.nimble b/ffi.nimble
index 9e195a6..edd9558 100644
--- a/ffi.nimble
+++ b/ffi.nimble
@@ -1,6 +1,6 @@
 # ffi.nimble
 
-version = "0.1.6"
+version = "0.2.0"
 author = "Institute of Free Technology"
 description = "FFI framework with custom header generation"
 license = "MIT or Apache License 2.0"
diff --git a/ffi/codegen/rust.nim b/ffi/codegen/rust.nim
index a2acb17..f989c7e 100644
--- a/ffi/codegen/rust.nim
+++ b/ffi/codegen/rust.nim
@@ -128,6 +128,7 @@ fn main() {
     cmd.arg("c")
         .arg("--mm:orc")
         .arg("-d:chronicles_log_level=WARN")
+        .arg("-d:noSignalHandler")
         .arg("--app:lib")
         .arg("--noMain")
         .arg(format!("--nimMainPrefix:lib$2"))
diff --git a/ffi/codegen/templates/cpp/CMakeLists.txt.tpl b/ffi/codegen/templates/cpp/CMakeLists.txt.tpl
index b204d4e..76fe8ef 100644
--- a/ffi/codegen/templates/cpp/CMakeLists.txt.tpl
+++ b/ffi/codegen/templates/cpp/CMakeLists.txt.tpl
@@ -58,6 +58,7 @@ add_custom_command(
     COMMAND "${NIM_EXECUTABLE}" c
                 --mm:orc
                 -d:chronicles_log_level=WARN
+                -d:noSignalHandler
                 --app:lib
                 --noMain
                 "--nimMainPrefix:lib{{LIB}}"
diff --git a/ffi/ffi_context.nim b/ffi/ffi_context.nim
index c05b301..340476d 100644
--- a/ffi/ffi_context.nim
+++ b/ffi/ffi_context.nim
@@ -19,22 +19,22 @@ when not defined(noSignalHandler) and not defined(ffiAllowSignalHandler):
       "its own process, build with -d:ffiAllowSignalHandler."
   .}
 
-import std/[atomics, locks, json, tables, sequtils]
+import std/[atomics, locks, options, sequtils, tables]
 import chronicles, chronos, chronos/threadsync, taskpools/channels_spsc_single, results
-import ./ffi_types, ./ffi_thread_request, ./internal/ffi_macro, ./logging
+import ./ffi_types, ./ffi_events, ./ffi_thread_request, ./logging, ./cbor_serial
 
-type FFICallbackState* = object
-  ## Holds the C event callback and its associated user-data pointer.
-  ## Embedded in FFIContext and referenced from the FFI thread via a thread-local.
-  callback*: pointer
-  userData*: pointer
+export ffi_events
 
 type CtxLifecycle {.pure.} = enum
   ## State machine guarding a pooled FFI context, held as an Atomic on FFIContext.
+  ## The threads, signals and dispatcher kqueues are created once per slot and
+  ## REUSED across acquire/release — chronos never frees a dispatcher's kqueue fd
+  ## (design decision; freed only at process exit), so spawning a thread per
+  ## context would leak fds unboundedly. Recycling parks the context instead.
   ## Transitions:
-  ##   Active         -> RecyclePending   when ffiDtor is invoked
-  ##   RecyclePending -> Recycling        The process completed the in-flight processes and is ready for lib cleanup and release
-  ##   Recycling      -> Active           When the FFI thread is ready again to attend to requests
+  ##   Active         -> RecyclePending   when the destructor is invoked
+  ##   RecyclePending -> Recycling        FFI loop drains handlers, frees lib, releases slot
+  ##   Recycling      -> Active           next createFFIContext reuses the slot (markAsActive)
   Active ## accepting and serving requests
   RecyclePending ## recycle requested; FFI thread loop hasn't claimed it yet
   Recycling ## FFI loop draining handlers, then frees lib + returns to pool
@@ -59,15 +59,15 @@ type FFIContext*[T] = object
     # advanced each FFI-thread loop; event thread reads for liveness
   eventQueueStuck*: Atomic[bool] # sticky overflow flag
   running: Atomic[bool] # To control when the threads are running
-  lifecycle: Atomic[CtxLifecycle]
+  lifecycle: Atomic[CtxLifecycle] # Active / RecyclePending / Recycling
   recycleCallback: FFICallBack
-    # The destructor's callback, fired by the recycle handler with the outcome:
+    # destructor's callback, fired by the recycle handler with the outcome:
     # RET_OK once drained, RET_ERR if it timed out. Set by requestRecycle.
   recycleUserData: pointer
   inUse: Atomic[bool]
-    # Whether the context is claimed. createFFIContext claims it (false -> true); the
-    # recycle handler clears it once drained. On the context so the owning thread can
-    # release it without reaching into the pool.
+    # whether the slot is claimed; createFFIContext claims it, the recycle
+    # handler clears it once drained so the owning thread can release without
+    # reaching into the pool.
   registeredRequests: ptr Table[cstring, FFIRequestProc]
 
 var onFFIThread* {.threadvar.}: bool
@@ -80,6 +80,21 @@ const
   FFIHeartbeatStartDelay* = 10.seconds # grace window for library startup
   FFIHeartbeatStaleThreshold* = 1.seconds
 
+proc tryClaim*[T](ctx: ptr FFIContext[T]): bool =
+  ## Returns true if the slot was free and is now claimed, false if already in use.
+  var expected = false
+  ctx.inUse.compareExchange(expected, true)
+
+proc release*[T](ctx: ptr FFIContext[T]) =
+  ctx.inUse.store(false)
+
+proc isInUse*[T](ctx: ptr FFIContext[T]): bool =
+  ctx.inUse.load()
+
+proc markAsActive*[T](ctx: ptr FFIContext[T]) =
+  ## Re-arms a reused (recycled) slot to accept requests again.
+  ctx.lifecycle.store(CtxLifecycle.Active)
+
 include ./event_thread
 include ./ffi_thread
 
@@ -91,285 +106,6 @@ template closeAndNil(field: untyped) =
 proc deinitContextResources*[T](ctx: ptr FFIContext[T]): Result[void, string] =
   ## Mirror of `initContextResources`. Threads MUST be joined first;
   ## fields are nil'd after close so re-init on the same slot is safe.
-template callEventCallback*(ctx: ptr FFIContext, eventName: string, body: untyped) =
-  if isNil(ctx[].callbackState.callback):
-    chronicles.error eventName & " - eventCallback is nil"
-    return
-
-  foreignThreadGc:
-    try:
-      let event = body
-      cast[FFICallBack](ctx[].callbackState.callback)(
-        RET_OK,
-        unsafeAddr event[0],
-        cast[csize_t](len(event)),
-        ctx[].callbackState.userData,
-      )
-    except Exception, CatchableError:
-      let msg =
-        "Exception " & eventName & " when calling 'eventCallBack': " &
-        getCurrentExceptionMsg()
-      cast[FFICallBack](ctx[].callbackState.callback)(
-        RET_ERR,
-        unsafeAddr msg[0],
-        cast[csize_t](len(msg)),
-        ctx[].callbackState.userData,
-      )
-
-template dispatchFfiEvent*(eventName: string, body: untyped) =
-  ## Dispatches an FFI event to the callback registered via `{libName}_set_event_callback`.
-  ## `body` is evaluated lazily — only when a callback is registered.
-  ## Valid only on the FFI thread (i.e., inside {.ffi.} proc bodies and their async closures).
-  let ffiState = ffiCurrentCallbackState
-  if isNil(ffiState) or isNil(ffiState[].callback):
-    chronicles.error eventName & " - event callback not set"
-    return
-  foreignThreadGc:
-    try:
-      let event = body
-      cast[FFICallBack](ffiState[].callback)(
-        RET_OK, unsafeAddr event[0], cast[csize_t](len(event)), ffiState[].userData
-      )
-    except Exception, CatchableError:
-      let msg = "Exception dispatching " & eventName & ": " & getCurrentExceptionMsg()
-      cast[FFICallBack](ffiState[].callback)(
-        RET_ERR, unsafeAddr msg[0], cast[csize_t](len(msg)), ffiState[].userData
-      )
-
-proc sendRequestToFFIThread*(
-    ctx: ptr FFIContext, ffiRequest: ptr FFIThreadRequest, timeout = InfiniteDuration
-): Result[void, string] =
-  ctx.lock.acquire()
-  defer:
-    ctx.lock.release()
-
-  if ctx.lifecycle.load() != CtxLifecycle.Active:
-    deleteRequest(ffiRequest)
-    return err("FFI context is not accepting requests (being recycled)")
-
-  ## Sending the request to the FFI thread
-  let sentOk = ctx.reqChannel.trySend(ffiRequest)
-  if not sentOk:
-    deleteRequest(ffiRequest)
-    return err("Couldn't send a request to the ffi thread")
-
-  let fireSyncRes = ctx.reqSignal.fireSync()
-  if fireSyncRes.isErr():
-    deleteRequest(ffiRequest)
-    return err("failed fireSync: " & $fireSyncRes.error)
-
-  if fireSyncRes.get() == false:
-    deleteRequest(ffiRequest)
-    return err("Couldn't fireSync in time")
-
-  ## wait until the FFI working thread properly received the request
-  let res = ctx.reqReceivedSignal.waitSync(timeout)
-  if res.isErr():
-    return err("Couldn't receive reqReceivedSignal signal")
-
-  return ok()
-
-type Foo = object
-registerReqFFI(WatchdogReq, foo: ptr Foo):
-  proc(): Future[Result[string, string]] {.async.} =
-    return ok("FFI thread is not blocked")
-
-type JsonNotRespondingEvent = object
-  eventType: string
-
-proc init(T: type JsonNotRespondingEvent): T =
-  return JsonNotRespondingEvent(eventType: "not_responding")
-
-proc `$`(event: JsonNotRespondingEvent): string =
-  $(%*event)
-
-proc onNotResponding*(ctx: ptr FFIContext) =
-  callEventCallback(ctx, "onNotResponding"):
-    $JsonNotRespondingEvent.init()
-
-proc watchdogThreadBody(ctx: ptr FFIContext) {.thread.} =
-  ## Watchdog thread that monitors the FFI thread and notifies the library user if it hangs.
-  ## This thread never blocks.
-
-  let watchdogRun = proc(ctx: ptr FFIContext) {.async.} =
-    const WatchdogStartDelay = 10.seconds
-    const WatchdogTimeinterval = 1.seconds
-    const WatchdogTimeout = 20.seconds
-
-    # Give time for the node to be created and up before sending watchdog requests
-    let initialStop = await ctx.stopSignal.wait().withTimeout(WatchdogStartDelay)
-    if initialStop or ctx.running.load == false:
-      return
-
-    while true:
-      let intervalStop = await ctx.stopSignal.wait().withTimeout(WatchdogTimeinterval)
-
-      if intervalStop or ctx.running.load == false:
-        debug "Watchdog thread exiting because FFIContext is not running"
-        break
-
-      if ctx.lifecycle.load() != CtxLifecycle.Active:
-        continue
-
-      let callback = proc(
-          callerRet: cint, msg: ptr cchar, len: csize_t, userData: pointer
-      ) {.cdecl, gcsafe, raises: [].} =
-        discard ## Don't do anything. Just respecting the callback signature.
-      const nilUserData = nil
-
-      trace "Sending watchdog request to FFI thread"
-
-      try:
-        sendRequestToFFIThread(
-          ctx, WatchdogReq.ffiNewReq(callback, nilUserData), WatchdogTimeout
-        ).isOkOr:
-          error "Failed to send watchdog request to FFI thread", error = $error
-          onNotResponding(ctx)
-      except Exception as exc:
-        error "Exception sending watchdog request", exc = exc.msg
-        onNotResponding(ctx)
-
-  waitFor watchdogRun(ctx)
-
-proc processRequest[T](
-    request: ptr FFIThreadRequest, ctx: ptr FFIContext[T]
-) {.async.} =
-  ## Invoked within the FFI thread to process a request coming from the FFI API consumer thread.
-
-  let reqId = $request[].reqId
-    ## The reqId determines which proc will handle the request.
-    ## The registeredRequests represents a table defined at compile time.
-    ## Then, registeredRequests == Table[reqId, proc-handling-the-request-asynchronously]
-
-  let reqIdCs = reqId.cstring
-    # keep `reqId` alive and avoid the implicit string→cstring warning.
-
-  let retFut =
-    if not ctx[].registeredRequests[].contains(reqIdCs):
-      ## That shouldn't happen because only registered requests should be sent to the FFI thread.
-      nilProcess(request[].reqId)
-    else:
-      ctx[].registeredRequests[][reqIdCs](request[].reqContent, ctx)
-
-  let res =
-    try:
-      await retFut
-    except CancelledError as exc:
-      Result[string, string].err("Request cancelled during destroy: " & exc.msg)
-    except AsyncError as exc:
-      Result[string, string].err(
-        "Async error in processRequest for " & reqId & ": " & exc.msg
-      )
-
-  ## handleRes may raise (OOM, GC setup) even though it is rare.
-  try:
-    handleRes(res, request)
-  except Exception as exc:
-    error "Unexpected exception in handleRes", error = exc.msg
-
-proc freeLib[T](ctx: ptr FFIContext[T]) {.gcsafe.} =
-  if ctx.myLib.isNil():
-    return
-
-  when not defined(gcRefc):
-    {.cast(gcsafe).}:
-      `=destroy`(ctx.myLib[])
-  else:
-    discard
-  freeShared(ctx.myLib)
-  ctx.myLib = nil
-
-var RecycleTimeout* = 1500.milliseconds
-  ## Upper bound the recycle handler waits for in-flight handlers before it
-  ## cancels them and reports the ctx as stuck. The drain returns as soon as they
-  ## finish, so this only bounds a *stuck* handler. A `var` so tests can shorten it.
-
-proc recycleContext[T](
-    ctx: ptr FFIContext[T], ongoingProcessReq: ptr seq[Future[void]]
-) {.async.} =
-  ## Drain the in-flight handlers, free the lib object, release the context for reuse,
-  ## and fire the callback with the outcome. Never blocks the caller.
-
-  ongoingProcessReq[].keepItIf(not it.finished())
-
-  ## 1. Let the in-flight handlers finish on their own, bounded by RecycleTimeout.
-  var naturallyDrained = ongoingProcessReq[].len == 0
-  if not naturallyDrained:
-    naturallyDrained = await allFutures(ongoingProcessReq[]).withTimeout(RecycleTimeout)
-
-  ## 2. If any are wedged, cancel them and give the cancellations a bounded moment
-  ##    to unwind, so the context can be reclaimed rather than leaked.
-  var safeToRecycle = naturallyDrained
-  if not naturallyDrained:
-    for fut in ongoingProcessReq[]:
-      if not fut.finished():
-        fut.cancelSoon()
-    safeToRecycle = await allFutures(ongoingProcessReq[]).withTimeout(RecycleTimeout)
-
-  let cb = ctx.recycleCallback
-  let ud = ctx.recycleUserData
-  ctx.recycleCallback = nil
-
-  if safeToRecycle:
-    freeLib(ctx)
-    ctx.callbackState = default(FFICallbackState)
-    ongoingProcessReq[].setLen(0)
-    ctx.release()
-
-  if not cb.isNil():
-    foreignThreadGc:
-      let msg =
-        if naturallyDrained:
-          ""
-        else:
-          "recycle: in-flight requests did not finish in time"
-      let cmsg = msg.cstring
-      let retCode = if naturallyDrained: RET_OK else: RET_ERR
-      cb(retCode, unsafeAddr cmsg[0], cast[csize_t](msg.len), ud)
-
-proc ffiThreadBody[T](ctx: ptr FFIContext[T]) {.thread.} =
-  ## FFI thread body that attends library user API requests
-  ffiCurrentCallbackState = addr ctx[].callbackState
-
-  logging.setupLog(logging.LogLevel.DEBUG, logging.LogFormat.TEXT)
-
-  defer:
-    let fireRes = ctx.threadExitSignal.fireSync()
-    if fireRes.isErr():
-      error "failed to fire threadExitSignal on FFI thread exit", err = fireRes.error
-
-  let ffiRun = proc(ctx: ptr FFIContext[T]) {.async.} =
-
-    var ongoingProcessReq: seq[Future[void]]
-
-    while ctx.running.load():
-      var expected = CtxLifecycle.RecyclePending
-      if ctx.lifecycle.compareExchange(expected, CtxLifecycle.Recycling):
-        await recycleContext(ctx, addr ongoingProcessReq)
-        continue
-
-      let gotSignal = await ctx.reqSignal.wait().withTimeout(100.milliseconds)
-      if not gotSignal:
-        continue
-
-      ## Wait for a request from the ffi consumer thread
-      var request: ptr FFIThreadRequest
-      if not ctx.reqChannel.tryRecv(request):
-        continue
-
-      ongoingProcessReq.keepItIf(not it.finished())
-      ongoingProcessReq.add(processRequest(request, ctx))
-
-      let fireRes = ctx.reqReceivedSignal.fireSync()
-      if fireRes.isErr():
-        error "could not fireSync back to requester thread", error = fireRes.error
-
-  waitFor ffiRun(ctx)
-
-proc cleanUpResources[T](ctx: ptr FFIContext[T]): Result[void, string] =
-  ## Full cleanup for heap-allocated contexts: closes all resources and frees memory.
-  defer:
-    freeShared(ctx)
   ctx.lock.deinitLock()
   deinitEventRegistry(ctx[].eventRegistry)
   deinitEventQueue(ctx[].eventQueue)
@@ -460,14 +196,21 @@ proc fireOrErr(sig: ThreadSignalPtr, name: string): Result[void, string] =
     return err("failed to signal: " & name & " on time")
   ok()
 
-proc waitExitOrErr(
-    sig: ThreadSignalPtr, name: string, timeout: Duration
-): Result[void, string] =
-  let exited = sig.waitSync(timeout).valueOr:
-    return err("error waiting for exit: " & name & ": " & $error)
-  if not exited:
-    return err("did not exit in time: " & name & " (leaking ctx to avoid hang)")
-  ok()
+proc reachedExitOrTimedOut(sig: ThreadSignalPtr, timeout: Duration): bool =
+  ## Best-effort bounded pre-check before joining a stopping thread.
+  ## Returns false ONLY on a genuine timeout (the exit signal was not observed
+  ## within `timeout`, so the thread may be wedged and the caller should skip
+  ## the join to avoid hanging). Returns true otherwise — including when
+  ## `waitSync` itself errors: it uses `select()`, which returns EINVAL once a
+  ## signal fd exceeds FD_SETSIZE under load. That error is NOT evidence the
+  ## thread is stuck (it was already signaled to stop and the async event loop
+  ## that drives its exit is unaffected), so we proceed to the authoritative,
+  ## fd-free joinThread rather than spuriously failing teardown and leaking the
+  ## pool slot.
+  let waited = sig.waitSync(timeout)
+  if waited.isOk() and not waited.get():
+    return false # genuine timeout
+  true
 
 proc signalStop*[T](ctx: ptr FFIContext[T]): Result[void, string] =
   # Skip onNotResponding on error: it takes reg.lock, which a back-pressuring
@@ -490,26 +233,34 @@ proc stopAndJoinThreads*[T](ctx: ptr FFIContext[T]): Result[void, string] =
   ctx.signalStop().isOkOr:
     return err("signalStop failed: " & $error)
 
-  ?ctx.threadExitSignal.waitExitOrErr("FFI thread", ThreadExitTimeout)
+  if not ctx.threadExitSignal.reachedExitOrTimedOut(ThreadExitTimeout):
+    return err("FFI thread did not exit in time (leaking ctx to avoid hang)")
   joinThread(ctx.ffiThread)
-  ?ctx.eventThreadExitSignal.waitExitOrErr("event thread", ThreadExitTimeout)
+  if not ctx.eventThreadExitSignal.reachedExitOrTimedOut(ThreadExitTimeout):
+    return err("event thread did not exit in time (leaking ctx to avoid hang)")
   joinThread(ctx.eventThread)
   ok()
 
+proc clearContext[T](ctx: ptr FFIContext[T]): Result[void, string] =
+  ## Stops a heap-allocated FFI context.
+  ctx.stopAndJoinThreads().isOkOr:
+    return err("clearContext: " & $error)
+  ctx.cleanUpResources().isOkOr:
+    return err("cleanUpResources failed: " & $error)
+  ok()
+
 proc requestRecycle*[T](
     ctx: ptr FFIContext[T], callback: FFICallBack, userData: pointer
 ): Result[void, string] =
-  ## Starts ctx recycle process without stopping its worker, so the next
-  ## createFFIContext reuses the same threads and fds. 
-  ##
-  ## During recycling, the FFI thread drains the handlers, frees the lib and releases
-  ## the context, then fires `callback` (RET_OK drained, RET_ERR stuck).
-
+  ## Starts the context's recycle WITHOUT stopping its worker threads, so the
+  ## next createFFIContext reuses the same threads, signals and kqueue fds.
+  ## The FFI thread loop drains the in-flight handlers, frees the lib, clears the
+  ## per-context state and releases the slot, then fires `callback`
+  ## (RET_OK drained, RET_ERR stuck). Non-blocking.
   ctx.lock.acquire()
   if ctx.lifecycle.load() != CtxLifecycle.Active:
     ctx.lock.release()
     return err("requestRecycle: context is not Active (already recycling)")
-
   ctx.recycleCallback = callback
   ctx.recycleUserData = userData
   ctx.lifecycle.store(CtxLifecycle.RecyclePending)
@@ -519,18 +270,4 @@ proc requestRecycle*[T](
     return err("requestRecycle: failed to signal the FFI thread: " & $error)
   if not fired:
     return err("requestRecycle: failed to signal the FFI thread in time")
-  return ok()
-
-proc markAsActive*[T](ctx: ptr FFIContext[T]) =
-  ctx.lifecycle.store(CtxLifecycle.Active)
-
-proc tryClaim*[T](ctx: ptr FFIContext[T]): bool =
-  ## Returns true if acquired the contex, false if it was already claimed.
-  var expected = false
-  ctx.inUse.compareExchange(expected, true)
-
-proc release*[T](ctx: ptr FFIContext[T]) =
-  ctx.inUse.store(false)
-
-proc isInUse*[T](ctx: ptr FFIContext[T]): bool =
-  ctx.inUse.load()
+  ok()
diff --git a/ffi/ffi_context_pool.nim b/ffi/ffi_context_pool.nim
index 421a636..179fb8f 100644
--- a/ffi/ffi_context_pool.nim
+++ b/ffi/ffi_context_pool.nim
@@ -4,23 +4,28 @@ import ./ffi_context, ./ffi_types
 
 const MaxFFIContexts* = 32
   ## Maximum number of concurrently live FFI contexts when using FFIContextPool.
+  ## Each slot's threads, signals and dispatcher kqueue fds are created once and
+  ## reused, so this also caps the process's steady-state fd usage.
 
 type FFIContextPool*[T] = object
   contexts: array[MaxFFIContexts, FFIContext[T]]
   initialized: array[MaxFFIContexts, Atomic[bool]]
+    ## Whether the slot's worker has been built. Once true it stays true for the
+    ## process lifetime — destroy recycles the context rather than tearing it down.
 
 proc createFFIContext*[T](
     pool: var FFIContextPool[T]
 ): Result[ptr FFIContext[T], string] =
-  ## Acquires a context from the fixed pool. The context's worker is built once on
-  ## first use and reused on every later acquisition.
-
+  ## Acquires a context from the fixed pool. The worker (threads + signals +
+  ## dispatcher kqueues) is built once on first use and REUSED on every later
+  ## acquisition — chronos never frees a dispatcher's kqueue fd, so a
+  ## thread-per-context model would leak fds unboundedly.
   for i in 0 ..< MaxFFIContexts:
     let ctx = pool.contexts[i].addr
     if not ctx.tryClaim():
       continue
     if pool.initialized[i].load():
-      ## Reused context: a prior destroy drained and released it, worker still alive.
+      ## Reused slot: a prior destroy drained and parked it; worker still alive.
       ctx.markAsActive()
       return ok(ctx)
     initContextResources(ctx).isOkOr:
@@ -33,13 +38,16 @@ proc createFFIContext*[T](
 proc releaseFFIContext*[T](
     ctx: ptr FFIContext[T], callback: FFICallBack, userData: pointer
 ): Result[void, string] =
-  return ctx.requestRecycle(callback, userData)
+  ## Recycle/park the context for reuse without stopping its worker threads.
+  ## `callback` fires once the FFI thread has drained and parked it.
+  ctx.requestRecycle(callback, userData)
 
 proc destroyFFIContext*[T](
     pool: var FFIContextPool[T], ctx: ptr FFIContext[T]
 ): Result[void, string] =
   ## Full teardown: stops/joins the worker threads and returns the context to the
-  ## pool, marking it uninitialised so a later createFFIContext rebuilds it.
+  ## pool, marking it uninitialised so a later createFFIContext rebuilds it. Only
+  ## for process/pool shutdown — normal destruction uses releaseFFIContext.
   ctx.stopAndJoinThreads().isOkOr:
     return err("destroyFFIContext(pool): " & $error)
   for i in 0 ..< MaxFFIContexts:
@@ -50,8 +58,8 @@ proc destroyFFIContext*[T](
   return ok()
 
 proc isValidCtx*[T](pool: var FFIContextPool[T], ctx: pointer): bool =
-  ## Returns true only if ctx points to one of the pool's contexts that is
-  ## currently in use.
+  ## True only if ctx points to one of the pool's contexts that is currently
+  ## claimed (in use). Rejects nil / dangling / parked contexts at the API boundary.
   if ctx.isNil():
     return false
   for i in 0 ..< MaxFFIContexts:
diff --git a/ffi/ffi_thread.nim b/ffi/ffi_thread.nim
index 9685a45..5de7684 100644
--- a/ffi/ffi_thread.nim
+++ b/ffi/ffi_thread.nim
@@ -28,6 +28,13 @@ proc sendRequestToFFIThread*(
   defer:
     ctx.lock.release()
 
+  # Reject once recycling has been requested: the slot is being drained/parked
+  # and its handler table/lib are about to be torn down. requestRecycle flips
+  # this under the same lock, so the check is race-free.
+  if ctx.lifecycle.load() != CtxLifecycle.Active:
+    deleteRequest(ffiRequest)
+    return err("FFI context is not accepting requests (being recycled)")
+
   let sentOk = ctx.reqChannel.trySend(ffiRequest)
   if not sentOk:
     deleteRequest(ffiRequest)
@@ -42,10 +49,15 @@ proc sendRequestToFFIThread*(
     deleteRequest(ffiRequest)
     return err("Couldn't fireSync in time")
 
+  # The request is now owned by the FFI thread: it has been queued and the FFI
+  # thread signaled, so it WILL be received, processed, and answered through the
+  # callback via handleRes. The waitSync below only confirms prompt receipt; its
+  # failure (e.g. EINVAL/EINTR under load or signal teardown) must NOT be
+  # surfaced as an error, or the caller would fire the callback a SECOND time
+  # while handleRes also fires it — a double callback onto a freed response.
   let res = ctx.reqReceivedSignal.waitSync(timeout)
   if res.isErr():
-    # FFI thread was signaled and owns the request; don't double-free.
-    return err("Couldn't receive reqReceivedSignal signal")
+    return ok()
 
   # On ok the FFI thread's processRequest deallocShared(req)'s.
   ok()
@@ -79,6 +91,75 @@ proc processRequest[T](
   except Exception as e:
     error "Unexpected exception in handleRes", error = e.msg
 
+proc freeLib[T](ctx: ptr FFIContext[T]) {.gcsafe.} =
+  ## Frees the createShared'd library object built by the ctor. Logical resource
+  ## cleanup is the destructor body's job; this only releases the Nim storage.
+  if ctx.myLib.isNil():
+    return
+  when not defined(gcRefc):
+    {.cast(gcsafe).}:
+      `=destroy`(ctx.myLib[])
+  else:
+    discard
+  freeShared(ctx.myLib)
+  ctx.myLib = nil
+
+var RecycleTimeout* = 1500.milliseconds
+  ## Upper bound the recycle handler waits for in-flight handlers before it
+  ## cancels them and reports the ctx as stuck. Returns as soon as they finish,
+  ## so this only bounds a *stuck* handler. A `var` so tests can shorten it.
+
+proc recycleContext[T](
+    ctx: ptr FFIContext[T], ongoingProcessReq: ptr seq[Future[void]]
+) {.async.} =
+  ## Runs on the FFI thread. Drains in-flight handlers, frees the lib, clears the
+  ## per-context event state, releases the slot for reuse, and fires the recycle
+  ## callback. The worker threads (and their kqueue fds) stay alive for reuse.
+  ongoingProcessReq[].keepItIf(not it.finished())
+
+  ## 1. Let in-flight handlers finish on their own, bounded by RecycleTimeout.
+  var naturallyDrained = ongoingProcessReq[].len == 0
+  if not naturallyDrained:
+    naturallyDrained = await allFutures(ongoingProcessReq[]).withTimeout(RecycleTimeout)
+
+  ## 2. If any are wedged, cancel them and give the cancellations a bounded moment.
+  var safeToRecycle = naturallyDrained
+  if not naturallyDrained:
+    for fut in ongoingProcessReq[]:
+      if not fut.finished():
+        fut.cancelSoon()
+    safeToRecycle = await allFutures(ongoingProcessReq[]).withTimeout(RecycleTimeout)
+
+  let cb = ctx.recycleCallback
+  let ud = ctx.recycleUserData
+  ctx.recycleCallback = nil
+
+  if safeToRecycle:
+    freeLib(ctx)
+    # Reset per-context event state so a reused slot starts clean: drop listeners
+    # and drain/free any queued events so they aren't delivered to the next owner.
+    removeAllEventListeners(ctx[].eventRegistry)
+    while true:
+      let opt = ctx.eventQueue.tryDequeueEvent()
+      if opt.isNone():
+        break
+      let qe = opt.get()
+      freeEventBuffers(qe.name, qe.data)
+    ctx.eventQueueStuck.store(false)
+    ongoingProcessReq[].setLen(0)
+    ctx.release()
+
+  if not cb.isNil():
+    foreignThreadGc:
+      let msg =
+        if naturallyDrained:
+          ""
+        else:
+          "recycle: in-flight requests did not finish in time"
+      let cmsg = msg.cstring
+      let retCode = if naturallyDrained: RET_OK else: RET_ERR
+      cb(retCode, unsafeAddr cmsg[0], cast[csize_t](msg.len), ud)
+
 var ffiEventQueueSignalPtr {.threadvar.}: ThreadSignalPtr
   # Stashed so the hook has no closure env.
 
@@ -121,6 +202,14 @@ proc ffiThreadBody[T](ctx: ptr FFIContext[T]) {.thread.} =
         pending.del(i)
 
     while ctx.running.load():
+      # A destructor requested recycle: drain + free + park this slot for reuse,
+      # keeping the threads (and their kqueue fds) alive. Claim it atomically so
+      # only this loop runs the recycle.
+      var expectedRecycle = CtxLifecycle.RecyclePending
+      if ctx.lifecycle.compareExchange(expectedRecycle, CtxLifecycle.Recycling):
+        await recycleContext(ctx, addr pending)
+        continue
+
       # Freezes if a sync handler blocks the dispatcher; event thread reads to detect wedged FFI thread.
       discard ctx.ffiHeartbeat.fetchAdd(1)
 
diff --git a/ffi/internal/ffi_library.nim b/ffi/internal/ffi_library.nim
index ed04d9e..7506fbf 100644
--- a/ffi/internal/ffi_library.nim
+++ b/ffi/internal/ffi_library.nim
@@ -139,6 +139,12 @@ macro declareLibrary*(libraryName: static[string], libType: untyped): untyped =
   let addName = libraryName & "_add_event_listener"
   let addErr = "error: invalid context in " & addName
   let addBody = quote:
+    # Sets up the calling (foreign) thread's GC before any Nim allocation
+    # below ($eventName / registry table+seq ops). Request entry procs do the
+    # same; without it a foreign thread with no Nim heap segfaults in the
+    # allocator under GC pressure.
+    when declared(initializeLibrary):
+      initializeLibrary()
     var ret: uint64 = 0
     if isNil(ctx):
       echo `addErr`
@@ -173,6 +179,10 @@ macro declareLibrary*(libraryName: static[string], libType: untyped): untyped =
   let removeName = libraryName & "_remove_event_listener"
   let removeErr = "error: invalid context in " & removeName
   let removeBody = quote:
+    # See add_event_listener: removeEventListener mutates the registry's
+    # GC-allocated table/seqs, so the calling foreign thread needs a GC.
+    when declared(initializeLibrary):
+      initializeLibrary()
     var ret: cint = 1
     if isNil(ctx):
       echo `removeErr`
diff --git a/ffi/internal/ffi_macro.nim b/ffi/internal/ffi_macro.nim
index 714abd0..a1b74fb 100644
--- a/ffi/internal/ffi_macro.nim
+++ b/ffi/internal/ffi_macro.nim
@@ -750,6 +750,14 @@ macro ffi*(prc: untyped): untyped =
 
     let ffiBody = newStmtList()
 
+    # Set up the calling (foreign) thread's GC before any Nim allocation in
+    # this wrapper (e.g. `$reqTypeName` below). Without it a foreign thread
+    # with no Nim heap segfaults in the allocator under GC pressure. `ffiRaw`
+    # already does this; the `.ffi.` path must too.
+    ffiBody.add quote do:
+      when declared(initializeLibrary):
+        initializeLibrary()
+
     ffiBody.add quote do:
       if callback.isNil:
         return RET_MISSING_CALLBACK
@@ -1246,22 +1254,25 @@ macro ffiCtor*(prc: untyped): untyped =
   return stmts
 
 macro ffiDtor*(prc: untyped): untyped =
-  ## Defines a C-exported destructor that tears down the FFIContext after the
-  ## body runs.
+  ## Defines a C-exported destructor that RECYCLES the FFIContext after the
+  ## body runs — it parks the context for reuse instead of tearing down its
+  ## worker threads, because chronos never frees a dispatcher's kqueue fd, so a
+  ## thread-per-context teardown would leak fds unboundedly.
   ##
   ## The annotated proc must have exactly one parameter of the library type.
-  ## The body contains any library-level cleanup to run before context teardown.
+  ## The body contains any library-level cleanup to run before recycle.
   ##
   ## Example:
-  ##   proc mylibobj_destroy*(obj: MyLibObj) {.ffiDtor.} =
-  ##     obj.cleanup()
+  ##   proc waku_destroy*(w: Waku) {.ffiDtor.} =
+  ##     w.cleanup()
   ##
   ## The generated C-exported proc has the signature:
-  ##   cint mylibobj_destroy(void* ctx, FfiCallback callback, void* userData)
+  ##   int waku_destroy(void* ctx, FfiCallback callback, void* userData)
   ##
-  ## Recycle the context for reuse to keep fd usage bounded.
-  ## NON-BLOCKING: returns RET_OK once accepted;
-  ## the real outcome arrives via `callback`.
+  ## NON-BLOCKING: it runs the body, requests recycle, and returns RET_OK once
+  ## accepted; the real outcome (RET_OK drained / RET_ERR stuck) arrives via
+  ## `callback`. Returns RET_ERR synchronously only for a null/invalid ctx or a
+  ## rejected recycle request.
 
   let procName = prc[0]
   let formalParams = prc[3]
@@ -1270,8 +1281,8 @@ macro ffiDtor*(prc: untyped): untyped =
   if formalParams.len < 2:
     error("ffiDtor: proc must have exactly one parameter (w: LibType)")
 
-  let libParamName = formalParams[1][0] # e.g. w
-  let libTypeName = formalParams[1][1]  # e.g. MyLibObj
+  let libParamName = formalParams[1][0]
+  let libTypeName = formalParams[1][1]
 
   let procNameStr = block:
     let raw = $procName
@@ -1288,7 +1299,7 @@ macro ffiDtor*(prc: untyped): untyped =
   if procName.kind == nnkPostfix:
     cExportProcName = procName[1]
 
-  let releaseResIdent = genSym(nskLet, "destroyRes")
+  let releaseResIdent = genSym(nskLet, "releaseRes")
 
   let ffiBody = newStmtList()
 
@@ -1298,6 +1309,9 @@ macro ffiDtor*(prc: untyped): untyped =
 
   ffiBody.add quote do:
     if ctx.isNil or cast[ptr FFIContext[`libTypeName`]](ctx)[].myLib.isNil:
+      if not callback.isNil:
+        let errStr = "destroy: invalid context"
+        callback(RET_ERR, unsafeAddr errStr[0], cast[csize_t](errStr.len), userData)
       return RET_ERR
 
   ffiBody.add quote do:
@@ -1311,21 +1325,28 @@ macro ffiDtor*(prc: untyped): untyped =
   if not isNoop:
     ffiBody.add(bodyNode)
 
-  let poolIdent = ident($libTypeName & "FFIPool")
   ffiBody.add quote do:
-    let `releaseResIdent` = releaseFFIContext(
-      cast[ptr FFIContext[`libTypeName`]](ctx), callback, userData
-    )
+    # Recycle (park) the context for reuse instead of tearing down its threads.
+    # The callback fires from the recycle handler once drained.
+    let `releaseResIdent` =
+      releaseFFIContext(cast[ptr FFIContext[`libTypeName`]](ctx), callback, userData)
     if `releaseResIdent`.isErr():
-      if not callback.isNil():
-        let errStr = "release failed: " & $`releaseResIdent`.error
+      if not callback.isNil:
+        let errStr = "destroy: " & `releaseResIdent`.error
         callback(RET_ERR, unsafeAddr errStr[0], cast[csize_t](errStr.len), userData)
       return RET_ERR
     return RET_OK
 
+  let poolIdent = ident($libTypeName & "FFIPool")
+
   let ffiProc = newProc(
     name = postfix(cExportProcName, "*"),
-    params = @[ident("cint"), newIdentDefs(ident("ctx"), ident("pointer"))],
+    params = @[
+      ident("cint"),
+      newIdentDefs(ident("ctx"), ident("pointer")),
+      newIdentDefs(ident("callback"), ident("FFICallBack")),
+      newIdentDefs(ident("userData"), ident("pointer")),
+    ],
     body = ffiBody,
     pragmas = newTree(
       nnkPragma,
diff --git a/tests/test_ffi_context.nim b/tests/test_ffi_context.nim
deleted file mode 100644
index bf6989c..0000000
--- a/tests/test_ffi_context.nim
+++ /dev/null
@@ -1,892 +0,0 @@
-import std/[locks, strutils, os, osproc, sequtils]
-import unittest2
-import results
-import ../ffi
-
-type TestLib = object
-
-## Per-request callback state.  The test thread blocks on `cond` until the
-## FFI thread signals it — no polling, no CPU waste.
-type CallbackData = object
-  lock: Lock
-  cond: Cond
-  called: bool
-  retCode: cint
-  msg: array[512, char]
-  msgLen: int
-
-proc initCallbackData(d: var CallbackData) =
-  d.lock.initLock()
-  d.cond.initCond()
-
-proc deinitCallbackData(d: var CallbackData) =
-  d.cond.deinitCond()
-  d.lock.deinitLock()
-
-proc testCallback(
-    retCode: cint, msg: ptr cchar, len: csize_t, userData: pointer
-) {.cdecl, gcsafe, raises: [].} =
-  let d = cast[ptr CallbackData](userData)
-  acquire(d[].lock)
-  d[].retCode = retCode
-  let n = min(int(len), d[].msg.high)
-  if n > 0 and not msg.isNil:
-    copyMem(addr d[].msg[0], msg, n)
-  d[].msg[n] = '\0'
-  d[].msgLen = n
-  d[].called = true
-  signal(d[].cond)
-  release(d[].lock)
-
-proc waitCallback(d: var CallbackData) =
-  acquire(d.lock)
-  while not d.called:
-    wait(d.cond, d.lock)
-  release(d.lock)
-
-proc callbackMsg(d: var CallbackData): string =
-  result = newString(d.msgLen)
-  if d.msgLen > 0:
-    copyMem(addr result[0], addr d.msg[0], d.msgLen)
-
-registerReqFFI(PingRequest, lib: ptr TestLib):
-  proc(message: cstring): Future[Result[string, string]] {.async.} =
-    return ok("pong:" & $message)
-
-registerReqFFI(FailRequest, lib: ptr TestLib):
-  proc(): Future[Result[string, string]] {.async.} =
-    return err("intentional failure")
-
-registerReqFFI(EmptyOkRequest, lib: ptr TestLib):
-  proc(): Future[Result[string, string]] {.async.} =
-    return ok("")
-
-registerReqFFI(SlowRequest, lib: ptr TestLib):
-  proc(): Future[Result[string, string]] {.async.} =
-    await sleepAsync(500.milliseconds)
-    return ok("slow-done")
-
-# Coordination channel: the FFI handler signals the test thread the instant
-# it is about to block the event loop, so the test can call destroyFFIContext
-# while the event loop is truly frozen.
-var gSyncBlockStarted: Channel[bool]
-gSyncBlockStarted.open()
-
-registerReqFFI(SyncBlockingRequest, lib: ptr TestLib):
-  proc(): Future[Result[string, string]] {.async.} =
-    # Yield first so that reqReceivedSignal fires and sendRequestToFFIThread
-    # returns on the calling thread before we start the synchronous block.
-    await sleepAsync(0.milliseconds)
-    # Signal the test thread: the event loop is about to be frozen.
-    # Channel.send is annotated as raising under refc, so wrap.
-    try:
-      gSyncBlockStarted.send(true)
-    except Exception as exc:
-      return err("gSyncBlockStarted.send raised: " & exc.msg)
-    # Simulates a request that blocks the event-loop thread synchronously
-    # (e.g. w.stop() -> switch.stop() -> connManager.close() with blocking I/O).
-    # Unlike sleepAsync, os.sleep holds the OS thread and prevents Chronos from
-    # processing any callbacks -- including the reqSignal fired by destroyFFIContext.
-    os.sleep(5_000)
-    return ok("sync-blocking-done")
-
-# Approximates the heavy ref-object workload that libwaku/libp2p performs on
-# the FFI thread. The exact cell count is large enough to force several refc
-# GC cycles; under refc this stresses the heap state that, when later combined
-# with a chronos Selector allocation on the main thread (via close()), used to
-# trip the rawNewObj → signal-handler infinite recursion.
-type RefCell = ref object
-  next: RefCell
-  payload: array[64, byte]
-
-registerReqFFI(HeavyRefAllocRequest, lib: ptr TestLib):
-  proc(): Future[Result[string, string]] {.async.} =
-    var head: RefCell
-    for i in 0 ..< 50_000:
-      let n = RefCell(next: head)
-      head = n
-      if i mod 1000 == 0:
-        await sleepAsync(0.milliseconds)
-    # Break the chain iteratively before releasing head.
-    # ORC's =destroy for RefCell recurses through .next, so a 50k-node chain
-    # would produce ~50k nested =destroy calls and overflow the stack.
-    # Walking the list and unlinking each node first keeps destruction O(n)
-    # iterative instead of O(n) recursive.
-    var node = head
-    head = nil
-    while not node.isNil():
-      let nxt = node.next
-      node.next = nil  # unlink before the refcount of `node` can drop to zero
-      node = nxt
-    await sleepAsync(10.milliseconds)
-    return ok("heavy-done")
-
-suite "FFIContextPool":
-  test "create and destroy via pool succeeds":
-    var pool: FFIContextPool[TestLib]
-    let ctx = pool.createFFIContext().valueOr:
-      assert false, "createFFIContext(pool) failed: " & $error
-      return
-    check pool.destroyFFIContext(ctx).isOk()
-
-  test "context is reused after destroy":
-    var pool: FFIContextPool[TestLib]
-    let ctx1 = pool.createFFIContext().valueOr:
-      assert false, "createFFIContext(pool) failed: " & $error
-      return
-    check pool.destroyFFIContext(ctx1).isOk()
-    # After destroying, the same context must be available again
-    let ctx2 = pool.createFFIContext().valueOr:
-      assert false, "createFFIContext(pool) failed after context release: " & $error
-      return
-    check pool.destroyFFIContext(ctx2).isOk()
-    check ctx1 == ctx2 # same context reused
-
-  test "pool exhaustion returns error":
-    var pool: FFIContextPool[TestLib]
-    var ctxs: array[MaxFFIContexts, ptr FFIContext[TestLib]]
-    for i in 0 ..< MaxFFIContexts:
-      ctxs[i] = pool.createFFIContext().valueOr:
-        for j in 0 ..< i:
-          discard pool.destroyFFIContext(ctxs[j])
-        assert false, "createFFIContext(pool) failed at context " & $i & ": " & $error
-        return
-    # Pool is now full — next create must fail
-    check pool.createFFIContext().isErr()
-    for i in 0 ..< MaxFFIContexts:
-      discard pool.destroyFFIContext(ctxs[i])
-
-  test "requests are processed via pool context":
-    var pool: FFIContextPool[TestLib]
-    var d: CallbackData
-    initCallbackData(d)
-    defer:
-      deinitCallbackData(d)
-
-    let ctx = pool.createFFIContext().valueOr:
-      assert false, "createFFIContext(pool) failed: " & $error
-      return
-    defer:
-      discard pool.destroyFFIContext(ctx)
-
-    check sendRequestToFFIThread(
-      ctx, PingRequest.ffiNewReq(testCallback, addr d, "pool".cstring)
-    )
-      .isOk()
-    waitCallback(d)
-    check d.retCode == RET_OK
-    check callbackMsg(d) == "pong:pool"
-
-suite "createFFIContext / destroyFFIContext":
-  test "create and destroy succeeds":
-    var pool: FFIContextPool[TestLib]
-    let ctx = pool.createFFIContext().valueOr:
-      checkpoint "createFFIContext failed: " & $error
-      check false
-      return
-    check pool.destroyFFIContext(ctx).isOk()
-
-  test "double destroy is safe via running flag":
-    var pool: FFIContextPool[TestLib]
-    let ctx = pool.createFFIContext().valueOr:
-      check false
-      return
-    check pool.destroyFFIContext(ctx).isOk()
-
-suite "destroyFFIContext does not hang":
-  test "destroy while a slow async request is still in-flight":
-    ## Reproduces the race where destroyFFIContext was called while a long-
-    ## running async request (e.g. stop_node / w.stop()) was still executing.
-    ## The destroy must return well within 2 seconds; before the fix it would
-    ## block forever on joinThread(ffiThread).
-    var pool: FFIContextPool[TestLib]
-    let ctx = pool.createFFIContext().valueOr:
-      check false
-      return
-
-    var d: CallbackData
-    initCallbackData(d)
-    defer: deinitCallbackData(d)
-
-    # sendRequestToFFIThread returns as soon as the FFI thread ACKs receipt;
-    # the 500 ms work continues asynchronously on the FFI thread.
-    check sendRequestToFFIThread(
-      ctx, SlowRequest.ffiNewReq(testCallback, addr d)
-    ).isOk()
-
-    # Destroy immediately while SlowRequest is still running.
-    let t0 = Moment.now()
-    check pool.destroyFFIContext(ctx).isOk()
-    check (Moment.now() - t0) < 2.seconds
-
-suite "destroyFFIContext does not hang when event loop is blocked":
-  test "destroy while sync-blocking request is in-flight":
-    ## Reproduces the hang seen in logosdelivery_example.c:
-    ##   logosdelivery_stop_node(...)   -- triggers w.stop() on the FFI thread
-    ##   sleep(1)
-    ##   logosdelivery_destroy(...)     -- hangs forever
-    ##
-    ## Root cause: w.stop() (and similar tear-down calls) can execute a
-    ## synchronous blocking section that holds the OS thread, preventing
-    ## the Chronos event loop from processing the reqSignal fired by
-    ## destroyFFIContext.  The result is joinThread(ffiThread) never returns.
-    ##
-    ## With the fix, destroyFFIContext must complete well within the 5 s that
-    ## SyncBlockingRequest holds the event loop.
-    var pool: FFIContextPool[TestLib]
-    let ctx = pool.createFFIContext().valueOr:
-      check false
-      return
-
-    # CallbackData and ctx are kept alive past destroyFFIContext: the leaked
-    # FFI thread is still inside os.sleep(5_000) and will eventually wake,
-    # run handleRes, fire testCallback, and exit normally. We wait for that
-    # to happen at the end of the test so the leaked thread cannot race with
-    # subsequent tests' createFFIContext on Linux/Windows. Heap allocation
-    # ensures the late callback's userData is still valid when it fires.
-    let d = createShared(CallbackData)
-    initCallbackData(d[])
-
-    check sendRequestToFFIThread(
-      ctx, SyncBlockingRequest.ffiNewReq(testCallback, d)
-    ).isOk()
-
-    # Block until the FFI handler has signalled that os.sleep is about to start.
-    # This guarantees destroyFFIContext is called while the event loop is frozen.
-    discard gSyncBlockStarted.recv()
-
-    # Destroy must return promptly even though the event loop is frozen for 5s.
-    # It deliberately returns err and leaks ctx in this scenario rather than
-    # hanging on joinThread.
-    let t0 = Moment.now()
-    check pool.destroyFFIContext(ctx).isErr()
-    check (Moment.now() - t0) < 3.seconds
-
-    # Drain the leaked thread before the test scope ends.
-    # 1. waitCallback blocks until os.sleep(5_000) returns and handleRes
-    #    invokes testCallback (~3.5s after destroy returned), which proves
-    #    the leaked thread has reached the end of processRequest.
-    # 2. Yield briefly so the thread can finish iterating its while loop,
-    #    fire threadExitSignal in its defer, and return. Without this, on
-    #    Linux/Windows the still-live thread can race with the next test's
-    #    createFFIContext under --mm:orc and segfault.
-    # ctx.cleanUpResources is intentionally NOT called: destroyFFIContext
-    # skipped it for a reason, and the signal fds are reclaimed by the OS
-    # at process exit.
-    waitCallback(d[])
-    os.sleep(200)
-    deinitCallbackData(d[])
-    freeShared(d)
-
-suite "destroyFFIContext refc workaround":
-  ## Documents the refc-specific workaround in cleanUpResources.
-  ##
-  ## Background: when the FFI thread does heavy ref-object work (the workload
-  ## that triggered the libwaku hang in production), the refc GC heap reaches
-  ## a state where the very first chronos Selector allocation on the *main*
-  ## thread — which happens lazily inside ThreadSignalPtr.close() through
-  ## getThreadDispatcher() — traps in rawNewObj. The refc signal handler
-  ## itself re-enters the same allocator and the process never returns.
-  ## Captured stack:
-  ##   close → safeUnregisterAndCloseFd → getThreadDispatcher →
-  ##   newDispatcher → Selector.new → newObj (gc.nim:488) → rawNewObj →
-  ##   _sigtramp → signalHandler → newObjNoInit → addNewObjToZCT (loop)
-  ##
-  ## The workaround in cleanUpResources is `when defined(gcRefc): discard`,
-  ## i.e. skip the close() calls under refc only. orc is unaffected and
-  ## still cleans up the signal fds normally.
-  ##
-  ## NOTE: this test is documentation more than regression: a synthetic
-  ## ref-allocation workload of ~50k cells does NOT corrupt the refc heap
-  ## the way the real libwaku/libp2p teardown does, so this test passes
-  ## even when the workaround is disabled. Reproducing the actual hang
-  ## requires the full libwaku workload (logosdelivery_example.c).
-  ## Verification of the workaround was done end-to-end against that
-  ## example: with `--mm:refc` and close() enabled it hangs forever in
-  ## the captured stack above; with `when defined(gcRefc): discard` it
-  ## returns immediately. Under `--mm:orc` it returns immediately either
-  ## way.
-  test "destroy after heavy ref-allocation workload returns promptly":
-    var pool: FFIContextPool[TestLib]
-    let ctx = pool.createFFIContext().valueOr:
-      check false
-      return
-
-    var d: CallbackData
-    initCallbackData(d)
-    defer: deinitCallbackData(d)
-
-    check sendRequestToFFIThread(
-      ctx, HeavyRefAllocRequest.ffiNewReq(testCallback, addr d)
-    ).isOk()
-    waitCallback(d)
-    check d.retCode == RET_OK
-
-    let t0 = Moment.now()
-    check pool.destroyFFIContext(ctx).isOk()
-    check (Moment.now() - t0) < 3.seconds
-
-suite "sendRequestToFFIThread":
-  test "successful request triggers RET_OK callback":
-    var d: CallbackData
-    initCallbackData(d)
-    defer:
-      deinitCallbackData(d)
-
-    var pool: FFIContextPool[TestLib]
-    let ctx = pool.createFFIContext().valueOr:
-      check false
-      return
-    defer:
-      discard pool.destroyFFIContext(ctx)
-
-    check sendRequestToFFIThread(
-      ctx, PingRequest.ffiNewReq(testCallback, addr d, "hello".cstring)
-    )
-      .isOk()
-    waitCallback(d)
-    check d.retCode == RET_OK
-    check callbackMsg(d) == "pong:hello"
-
-  test "failing request triggers RET_ERR callback":
-    var d: CallbackData
-    initCallbackData(d)
-    defer:
-      deinitCallbackData(d)
-
-    var pool: FFIContextPool[TestLib]
-    let ctx = pool.createFFIContext().valueOr:
-      check false
-      return
-    defer:
-      discard pool.destroyFFIContext(ctx)
-
-    check sendRequestToFFIThread(ctx, FailRequest.ffiNewReq(testCallback, addr d)).isOk()
-    waitCallback(d)
-    check d.retCode == RET_ERR
-
-  test "empty ok response delivers empty message":
-    var d: CallbackData
-    initCallbackData(d)
-    defer:
-      deinitCallbackData(d)
-
-    var pool: FFIContextPool[TestLib]
-    let ctx = pool.createFFIContext().valueOr:
-      check false
-      return
-    defer:
-      discard pool.destroyFFIContext(ctx)
-
-    check sendRequestToFFIThread(ctx, EmptyOkRequest.ffiNewReq(testCallback, addr d))
-      .isOk()
-    waitCallback(d)
-    check d.retCode == RET_OK
-    check d.msgLen == 0
-
-  test "sequential requests are all processed":
-    var pool: FFIContextPool[TestLib]
-    let ctx = pool.createFFIContext().valueOr:
-      check false
-      return
-    defer:
-      discard pool.destroyFFIContext(ctx)
-
-    for i in 1 .. 5:
-      var d: CallbackData
-      initCallbackData(d)
-      let msg = "msg" & $i
-      check sendRequestToFFIThread(
-        ctx, PingRequest.ffiNewReq(testCallback, addr d, msg.cstring)
-      )
-        .isOk()
-      waitCallback(d)
-      deinitCallbackData(d)
-      check d.retCode == RET_OK
-      check callbackMsg(d) == "pong:" & msg
-
-# ---------------------------------------------------------------------------
-# ffiCtor macro integration test
-# ---------------------------------------------------------------------------
-
-type SimpleLib = object
-  value: int
-
-ffiType:
-  type SimpleConfig = object
-    initialValue: int
-
-proc testlib_create*(
-    config: SimpleConfig
-): Future[Result[SimpleLib, string]] {.ffiCtor.} =
-  return ok(SimpleLib(value: config.initialValue))
-
-# Records the value of the library object the destructor body saw, so a test can
-# confirm the user cleanup body ran with the right lib state before teardown.
-var gDestroyedValue {.threadvar.}: int
-proc testlib_destroy*(lib: SimpleLib) {.ffiDtor.} =
-  gDestroyedValue = lib.value
-
-suite "ffiCtor macro":
-  test "creates context and returns pointer via callback":
-    var d: CallbackData
-    initCallbackData(d)
-    defer: deinitCallbackData(d)
-
-    let configJson = ffiSerialize(SimpleConfig(initialValue: 42))
-    let ret = testlib_create(configJson.cstring, testCallback, addr d)
-
-    check not ret.isNil()
-
-    waitCallback(d)
-
-    check d.retCode == RET_OK
-
-    # The callback message is the ctx address as a decimal string
-    let addrStr = callbackMsg(d)
-    check addrStr.len > 0
-
-    let ctxAddr = cast[uint](parseBiggestUInt(addrStr))
-    check ctxAddr != 0
-    let ctx = cast[ptr FFIContext[SimpleLib]](ctxAddr)
-
-    # Verify the library was properly initialized
-    check not ctx[].myLib.isNil
-    check ctx[].myLib[].value == 42
-
-    check SimpleLibFFIPool.destroyFFIContext(ctx).isOk()
-
-proc createSimpleLib(initialValue: int): ptr FFIContext[SimpleLib] =
-  ## Helper: run the generated async ctor and return the live ctx.
-  var d: CallbackData
-  initCallbackData(d)
-  defer:
-    deinitCallbackData(d)
-  let ret =
-    testlib_create(ffiSerialize(SimpleConfig(initialValue: initialValue)).cstring,
-      testCallback, addr d)
-  doAssert not ret.isNil()
-  waitCallback(d)
-  doAssert d.retCode == RET_OK
-  return cast[ptr FFIContext[SimpleLib]](cast[uint](parseBiggestUInt(callbackMsg(d))))
-
-suite "ffiDtor macro (async destroy + reuse)":
-  test "destroy fires RET_OK after teardown, frees myLib, and frees the context":
-    let ctx = createSimpleLib(5)
-    check not ctx[].myLib.isNil
-    check ctx[].myLib[].value == 5
-
-    var dD: CallbackData
-    initCallbackData(dD)
-    defer:
-      deinitCallbackData(dD)
-
-    # Async destroy: the C return is just "accepted"; the real outcome arrives
-    # via the callback once the FFI thread has finished tearing the lib down.
-    check testlib_destroy(cast[pointer](ctx), testCallback, addr dD) == RET_OK
-    waitCallback(dD)
-    check dD.retCode == RET_OK
-    check gDestroyedValue == 5 # the user cleanup body saw the live lib
-    check ctx[].myLib.isNil() # freed on the FFI thread
-
-    # The context was freed from the FFI thread, so a fresh create reclaims it.
-    let ctx2 = createSimpleLib(9)
-    check ctx2 == ctx # same context, reused worker + fds
-    check ctx2[].myLib[].value == 9
-    check SimpleLibFFIPool.destroyFFIContext(ctx2).isOk()
-
-  test "destroy waits for an in-flight request before reporting RET_OK":
-    let ctx = createSimpleLib(1)
-
-    # Dispatch a 500 ms handler and do NOT wait — it is in flight at destroy time.
-    var slow: CallbackData
-    initCallbackData(slow)
-    defer:
-      deinitCallbackData(slow)
-    check sendRequestToFFIThread(ctx, SlowRequest.ffiNewReq(testCallback, addr slow)).isOk()
-
-    var dD: CallbackData
-    initCallbackData(dD)
-    defer:
-      deinitCallbackData(dD)
-    check testlib_destroy(cast[pointer](ctx), testCallback, addr dD) == RET_OK
-    waitCallback(dD)
-    check dD.retCode == RET_OK
-    # Drained: the in-flight handler ran to completion before destroy reported OK.
-    check slow.called
-    check callbackMsg(slow) == "slow-done"
-
-    let ctx2 = createSimpleLib(2)
-    check ctx2 == ctx
-    check SimpleLibFFIPool.destroyFFIContext(ctx2).isOk()
-
-  test "requests are rejected once a destroy closes the gate":
-    let ctx = createSimpleLib(3)
-
-    var dD: CallbackData
-    initCallbackData(dD)
-    defer:
-      deinitCallbackData(dD)
-    check testlib_destroy(cast[pointer](ctx), testCallback, addr dD) == RET_OK
-    waitCallback(dD)
-    check dD.retCode == RET_OK
-
-    # Gate stays closed until the context is reacquired: a late request must not
-    # dispatch onto a context about to be (or already) reused.
-    var d: CallbackData
-    initCallbackData(d)
-    defer:
-      deinitCallbackData(d)
-    check sendRequestToFFIThread(
-      ctx, SlowRequest.ffiNewReq(testCallback, addr d)
-    ).isErr()
-
-    let ctx2 = createSimpleLib(4)
-    check ctx2 == ctx
-    check SimpleLibFFIPool.destroyFFIContext(ctx2).isOk()
-
-  test "a stuck context is reported as RET_ERR rather than hanging":
-    let ctx = createSimpleLib(8)
-
-    let savedTimeout = RecycleTimeout
-    RecycleTimeout = 150.milliseconds
-    defer:
-      RecycleTimeout = savedTimeout
-
-    # In-flight handler outlasts the (shortened) drain timeout.
-    var slow: CallbackData
-    initCallbackData(slow)
-    defer:
-      deinitCallbackData(slow)
-    check sendRequestToFFIThread(ctx, SlowRequest.ffiNewReq(testCallback, addr slow)).isOk()
-
-    var dD: CallbackData
-    initCallbackData(dD)
-    defer:
-      deinitCallbackData(dD)
-    check testlib_destroy(cast[pointer](ctx), testCallback, addr dD) == RET_OK
-    waitCallback(dD)
-    check dD.retCode == RET_ERR # drain timed out -> ctx reported stuck
-
-    # The stuck context is leaked (not reused); the handler still finishes on its
-    # own. Wait for it, then fully tear the leaked context down.
-    waitCallback(slow)
-    check SimpleLibFFIPool.destroyFFIContext(ctx).isOk()
-
-# ---------------------------------------------------------------------------
-# Simplified .ffi. macro integration test
-# ---------------------------------------------------------------------------
-
-ffiType:
-  type SendConfig = object
-    message: string
-
-proc testlib_send*(
-    lib: SimpleLib, cfg: SendConfig
-): Future[Result[string, string]] {.ffi.} =
-  return ok("echo:" & cfg.message & ":" & $lib.value)
-
-suite "simplified .ffi. macro":
-  test "sends request and gets serialized response via callback":
-    # First create a context using ffiCtor
-    var ctorD: CallbackData
-    initCallbackData(ctorD)
-    defer: deinitCallbackData(ctorD)
-
-    let configJson = ffiSerialize(SimpleConfig(initialValue: 7))
-    let ctorRet = testlib_create(configJson.cstring, testCallback, addr ctorD)
-    check not ctorRet.isNil()
-
-    waitCallback(ctorD)
-    check ctorD.retCode == RET_OK
-
-    let addrStr = callbackMsg(ctorD)
-    check addrStr.len > 0
-
-    let ctxAddr = cast[uint](parseBiggestUInt(addrStr))
-    check ctxAddr != 0
-    let ctx = cast[ptr FFIContext[SimpleLib]](ctxAddr)
-    defer: check SimpleLibFFIPool.destroyFFIContext(ctx).isOk()
-
-    # Now call the .ffi. proc
-    var d: CallbackData
-    initCallbackData(d)
-    defer: deinitCallbackData(d)
-
-    let cfgJson = ffiSerialize(SendConfig(message: "hello"))
-    let ret = testlib_send(ctx, testCallback, addr d, cfgJson.cstring)
-    check ret == RET_OK
-
-    waitCallback(d)
-    check d.retCode == RET_OK
-
-    let receivedMsg = callbackMsg(d)
-    let decoded = ffiDeserialize(receivedMsg.cstring, string).valueOr:
-      check false
-      ""
-    check decoded == "echo:hello:7"
-
-# ---------------------------------------------------------------------------
-# async/sync detection in .ffi. macro integration test
-# ---------------------------------------------------------------------------
-
-# Sync proc (no await in body) — macro detects this and bypasses thread machinery
-proc testlib_version*(
-    lib: SimpleLib
-): Future[Result[string, string]] {.ffi.} =
-  return ok("v" & $lib.value)
-
-suite "async/sync detection in .ffi.":
-  test "sync proc invokes callback without thread hop":
-    # Create a context using ffiCtor
-    var ctorD: CallbackData
-    initCallbackData(ctorD)
-    defer: deinitCallbackData(ctorD)
-
-    let configJson = ffiSerialize(SimpleConfig(initialValue: 3))
-    let ctorRet = testlib_create(configJson.cstring, testCallback, addr ctorD)
-    check not ctorRet.isNil()
-
-    waitCallback(ctorD)
-    check ctorD.retCode == RET_OK
-
-    let addrStr = callbackMsg(ctorD)
-    check addrStr.len > 0
-
-    let ctxAddr = cast[uint](parseBiggestUInt(addrStr))
-    check ctxAddr != 0
-    let ctx = cast[ptr FFIContext[SimpleLib]](ctxAddr)
-    defer: check SimpleLibFFIPool.destroyFFIContext(ctx).isOk()
-
-    var d2: CallbackData
-    initCallbackData(d2)
-    defer: deinitCallbackData(d2)
-
-    # Call sync proc — callback should fire before the proc returns (no thread hop)
-    let ret = testlib_version(ctx, testCallback, addr d2)
-    # No sleep needed: sync path fires callback inline before returning
-    check ret == RET_OK
-    check d2.called   # fires synchronously — no waitCallback needed
-    check d2.retCode == RET_OK
-    let receivedMsg = callbackMsg(d2)
-    let decoded = ffiDeserialize(receivedMsg.cstring, string).valueOr:
-      check false
-      ""
-    check decoded == "v3"
-
-# ---------------------------------------------------------------------------
-# ptr T return type in .ffi. macro integration test
-# ---------------------------------------------------------------------------
-
-type Handle = object
-  data: string
-
-ffiType:
-  type NameParam = object
-    name: string
-
-proc testlib_alloc_handle*(
-    lib: SimpleLib, np: NameParam
-): Future[Result[ptr Handle, string]] {.ffi.} =
-  let h = createShared(Handle)
-  h[] = Handle(data: np.name & ":" & $lib.value)
-  return ok(h)
-
-proc testlib_read_handle*(
-    lib: SimpleLib, handle: pointer
-): Future[Result[string, string]] {.ffi.} =
-  let h = cast[ptr Handle](handle)
-  return ok(h[].data)
-
-proc testlib_free_handle*(
-    lib: SimpleLib, handle: pointer
-): Future[Result[string, string]] {.ffi.} =
-  let h = cast[ptr Handle](handle)
-  deallocShared(h)
-  return ok("freed")
-
-suite "ptr return type in .ffi.":
-  test "returns a heap-allocated handle and reads it back":
-    # Create context via ffiCtor
-    var ctorD: CallbackData
-    initCallbackData(ctorD)
-    defer: deinitCallbackData(ctorD)
-
-    let configJson = ffiSerialize(SimpleConfig(initialValue: 5))
-    let ctorRet = testlib_create(configJson.cstring, testCallback, addr ctorD)
-    check not ctorRet.isNil()
-
-    waitCallback(ctorD)
-    check ctorD.retCode == RET_OK
-
-    let ctxAddrStr = callbackMsg(ctorD)
-    check ctxAddrStr.len > 0
-    let ctxAddr = cast[uint](parseBiggestUInt(ctxAddrStr))
-    check ctxAddr != 0
-    let ctx = cast[ptr FFIContext[SimpleLib]](ctxAddr)
-    defer: check SimpleLibFFIPool.destroyFFIContext(ctx).isOk()
-
-    # Alloc a handle
-    var allocD: CallbackData
-    initCallbackData(allocD)
-    defer: deinitCallbackData(allocD)
-
-    let npJson = ffiSerialize(NameParam(name: "test"))
-    let allocRet = testlib_alloc_handle(ctx, testCallback, addr allocD, npJson.cstring)
-    check allocRet == RET_OK
-
-    waitCallback(allocD)
-    check allocD.retCode == RET_OK
-
-    let handleAddrStr = callbackMsg(allocD)
-    check handleAddrStr.len > 0
-    let handleAddr = parseBiggestUInt(handleAddrStr)
-    check handleAddr != 0
-
-    # Read the handle back
-    var readD: CallbackData
-    initCallbackData(readD)
-    defer: deinitCallbackData(readD)
-
-    let handleJson = ffiSerialize(cast[pointer](handleAddr))
-    let readRet = testlib_read_handle(ctx, testCallback, addr readD, handleJson.cstring)
-    check readRet == RET_OK
-
-    waitCallback(readD)
-    check readD.retCode == RET_OK
-
-    let readMsg = callbackMsg(readD)
-    let decodedStr = ffiDeserialize(readMsg.cstring, string).valueOr:
-      check false
-      ""
-    check decodedStr == "test:5"
-
-    # Free the handle
-    var freeD: CallbackData
-    initCallbackData(freeD)
-    defer: deinitCallbackData(freeD)
-
-    let freeRet = testlib_free_handle(ctx, testCallback, addr freeD, handleJson.cstring)
-    check freeRet == RET_OK
-
-    waitCallback(freeD)
-    check freeD.retCode == RET_OK
-
-# ---------------------------------------------------------------------------
-# releaseFFIContext: park & reuse (fd-leak regression)
-# ---------------------------------------------------------------------------
-
-proc countOpenFds(): int =
-  ## Number of open fds for this process, or -1 if not determinable on this
-  ## platform. On Linux we count /proc/self/fd; elsewhere we shell out to lsof
-  ## (skipped if lsof is unavailable, e.g. Windows).
-  when defined(linux):
-    var n = 0
-    for _ in walkDir("/proc/self/fd"):
-      inc n
-    return n
-  else:
-    if findExe("lsof").len == 0:
-      return -1
-    try:
-      let output =
-        execProcess("lsof", args = ["-p", $getCurrentProcessId()], options = {poUsePath})
-      return output.splitLines().countIt(it.len > 0)
-    except CatchableError:
-      return -1
-
-proc releaseAndWait[T](ctx: ptr FFIContext[T]): cint =
-  ## Test helper mirroring how a C consumer destroys a context: kick off the
-  ## (non-blocking) teardown and block on the callback, returning its retCode.
-  ## RET_OK means the lib's in-flight tasks finished and the context was parked.
-  var d: CallbackData
-  initCallbackData(d)
-  defer:
-    deinitCallbackData(d)
-  if ctx.releaseFFIContext(testCallback, addr d).isErr():
-    return RET_ERR
-  waitCallback(d)
-  return d.retCode
-
-suite "releaseFFIContext (park & reuse)":
-  test "park returns the context and reuses the same live worker":
-    var pool: FFIContextPool[TestLib]
-    let ctx1 = pool.createFFIContext().valueOr:
-      check false
-      return
-    check ctx1.releaseAndWait() == RET_OK
-
-    # Reacquire: must be the same context, with its worker still running.
-    let ctx2 = pool.createFFIContext().valueOr:
-      check false
-      return
-    check ctx1 == ctx2
-
-    var d: CallbackData
-    initCallbackData(d)
-    defer:
-      deinitCallbackData(d)
-    check sendRequestToFFIThread(
-      ctx2, PingRequest.ffiNewReq(testCallback, addr d, "reuse".cstring)
-    ).isOk()
-    waitCallback(d)
-    check d.retCode == RET_OK
-    check callbackMsg(d) == "pong:reuse" # reused worker still processes requests
-
-    check pool.destroyFFIContext(ctx2).isOk()
-
-  test "park drops the stale event callback and library pointer":
-    var pool: FFIContextPool[TestLib]
-    let ctx = pool.createFFIContext().valueOr:
-      check false
-      return
-    ctx.callbackState.callback = cast[pointer](testCallback)
-    ctx.callbackState.userData = cast[pointer](0xDEAD)
-
-    check ctx.releaseAndWait() == RET_OK
-    check ctx.callbackState.callback.isNil() # a watchdog tick can't call a freed cb
-    check ctx.callbackState.userData.isNil()
-    check ctx.myLib.isNil()
-
-    check pool.destroyFFIContext(ctx).isOk()
-
-  test "fd usage stays bounded across many park/reuse cycles":
-    if countOpenFds() < 0:
-      skip() # no fd-counting facility on this platform
-    else:
-      var pool: FFIContextPool[TestLib]
-
-      # Warm up: the first create builds the context's worker (its fds are allocated
-      # once here); parking keeps them open for reuse.
-      block:
-        let ctx = pool.createFFIContext().valueOr:
-          check false
-          return
-        check ctx.releaseAndWait() == RET_OK
-
-      let baseline = countOpenFds()
-
-      for _ in 0 ..< 20:
-        let ctx = pool.createFFIContext().valueOr:
-          check false
-          return
-        var d: CallbackData
-        initCallbackData(d)
-        check sendRequestToFFIThread(
-          ctx, PingRequest.ffiNewReq(testCallback, addr d, "x".cstring)
-        ).isOk()
-        waitCallback(d)
-        deinitCallbackData(d)
-        check ctx.releaseAndWait() == RET_OK
-
-      let afterCycles = countOpenFds()
-      # Reuse must not grow fds. Before the fix each cycle leaked ~10 fds (4
-      # ThreadSignalPtr socketpairs + 2 dispatcher kqueues); the small slack
-      # only tolerates unrelated runtime fd noise, not a per-cycle leak.
-      check afterCycles <= baseline + 5
-
-      # Tear the (still parked) context's worker down so the test leaves no threads.
-      let last = pool.createFFIContext().valueOr:
-        check false
-        return
-      check pool.destroyFFIContext(last).isOk()

From fe6749d3af604386b0a7cd96aa75e273d1307126 Mon Sep 17 00:00:00 2001
From: Ivan FB <ivansete@status.im>
Date: Fri, 12 Jun 2026 14:55:38 +0200
Subject: [PATCH 07/10] ci(test): pass -d:ffiAllowSignalHandler when compiling
 unit tests

The noSignalHandler compile-time guard rejects any build of the ffi
modules that doesn't set -d:noSignalHandler (embedded) or
-d:ffiAllowSignalHandler (standalone). test.yml compiled the unit tests
with a raw `nim c` missing the flag, so every per-test job failed at the
guard. The nimble test tasks already pass it; mirror that here.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 .github/workflows/test.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index c009658..7729732 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -83,4 +83,4 @@ jobs:
           if [ "$RUNNER_OS" == "Windows" ]; then
             export PATH="$GITHUB_WORKSPACE/.nim_runtime/bin:$HOME/.nimble/bin:$PATH"
           fi
-          nim c -r --mm:${{ matrix.mm }} -d:chronicles_log_level=WARN tests/unit/${{ inputs.test }}.nim
+          nim c -r --mm:${{ matrix.mm }} -d:chronicles_log_level=WARN -d:ffiAllowSignalHandler tests/unit/${{ inputs.test }}.nim

From 6bc626946e11954ae1ed78343ef61101b9b2db2c Mon Sep 17 00:00:00 2001
From: Ivan FB <ivansete@status.im>
Date: Fri, 12 Jun 2026 16:11:42 +0200
Subject: [PATCH 08/10] fix(codegen): emit 3-arg async destroy ABI in C++/Rust
 bindings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The recycle/async-destroy work changed the Nim `ffiDtor` export from
`int destroy(ctx)` to `int destroy(ctx, callback, userData)`, but the C++
and Rust generators still emitted the 1-arg signature. Foreign callers
therefore passed only `ctx`; inside Nim, `callback`/`userData` held
uninitialised register garbage. `requestRecycle` stored the garbage
callback and the recycle handler later invoked it — a jump through a wild
pointer that segfaulted in every C++ E2E / ASan / TSan job (the crash
surfaced at teardown, after each test's assertions had already passed).

Generate the 3-arg ABI and have the destructor/Drop block on the recycle
callback via the existing sync-call helper, so the pool slot is fully
drained and parked before the handle goes away — otherwise rapid
create/destroy churn (StressShortLivedPerThreadContext, ThreadedHammer)
could outrun the recycle and exhaust the pool.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 examples/echo/cpp_bindings/echo.hpp                 | 11 +++++++++--
 examples/timer/cpp_bindings/my_timer.hpp            | 11 +++++++++--
 examples/timer/rust_bindings/src/api.rs             |  4 +++-
 examples/timer/rust_bindings/src/ffi.rs             |  2 +-
 ffi/codegen/cpp.nim                                 |  8 +++++---
 ffi/codegen/rust.nim                                | 11 ++++++++++-
 ffi/codegen/templates/cpp/context_rule_of_5.hpp.tpl |  9 ++++++++-
 7 files changed, 45 insertions(+), 11 deletions(-)

diff --git a/examples/echo/cpp_bindings/echo.hpp b/examples/echo/cpp_bindings/echo.hpp
index efcfa2e..39824e7 100644
--- a/examples/echo/cpp_bindings/echo.hpp
+++ b/examples/echo/cpp_bindings/echo.hpp
@@ -406,7 +406,7 @@ typedef void (*FFICallback)(int ret, const char* msg, size_t len, void* user_dat
 void* echo_create(const uint8_t* req_cbor, size_t req_cbor_len, FFICallback callback, void* user_data);
 int echo_shout(void* ctx, FFICallback callback, void* user_data, const uint8_t* req_cbor, size_t req_cbor_len);
 int echo_version(void* ctx, FFICallback callback, void* user_data, const uint8_t* req_cbor, size_t req_cbor_len);
-int echo_destroy(void* ctx);
+int echo_destroy(void* ctx, FFICallback callback, void* user_data);
 uint64_t echo_add_event_listener(void* ctx, const char* event_name, FFICallback callback, void* user_data);
 int echo_remove_event_listener(void* ctx, uint64_t listener_id);
 } // extern "C"
@@ -516,7 +516,14 @@ public:
     // context.
     ~EchoCtx() {
         if (ptr_) {
-            echo_destroy(ptr_);
+            // `echo_destroy` is non-blocking at the C ABI: it parks the
+            // context for reuse and reports the outcome via the callback. Block
+            // here until that callback fires so the pool slot is fully drained
+            // and parked before this object goes away — otherwise a rapid
+            // create/destroy churn could outrun the recycle and exhaust the pool.
+            (void)ffi_call_([this](FFICallback cb, void* ud) {
+                return echo_destroy(ptr_, cb, ud);
+            }, timeout_);
             ptr_ = nullptr;
         }
     }
diff --git a/examples/timer/cpp_bindings/my_timer.hpp b/examples/timer/cpp_bindings/my_timer.hpp
index 8fa440b..a4a6a3a 100644
--- a/examples/timer/cpp_bindings/my_timer.hpp
+++ b/examples/timer/cpp_bindings/my_timer.hpp
@@ -706,7 +706,7 @@ int my_timer_echo(void* ctx, FFICallback callback, void* user_data, const uint8_
 int my_timer_version(void* ctx, FFICallback callback, void* user_data, const uint8_t* req_cbor, size_t req_cbor_len);
 int my_timer_complex(void* ctx, FFICallback callback, void* user_data, const uint8_t* req_cbor, size_t req_cbor_len);
 int my_timer_schedule(void* ctx, FFICallback callback, void* user_data, const uint8_t* req_cbor, size_t req_cbor_len);
-int my_timer_destroy(void* ctx);
+int my_timer_destroy(void* ctx, FFICallback callback, void* user_data);
 uint64_t my_timer_add_event_listener(void* ctx, const char* event_name, FFICallback callback, void* user_data);
 int my_timer_remove_event_listener(void* ctx, uint64_t listener_id);
 } // extern "C"
@@ -816,7 +816,14 @@ public:
     // context.
     ~MyTimerCtx() {
         if (ptr_) {
-            my_timer_destroy(ptr_);
+            // `my_timer_destroy` is non-blocking at the C ABI: it parks the
+            // context for reuse and reports the outcome via the callback. Block
+            // here until that callback fires so the pool slot is fully drained
+            // and parked before this object goes away — otherwise a rapid
+            // create/destroy churn could outrun the recycle and exhaust the pool.
+            (void)ffi_call_([this](FFICallback cb, void* ud) {
+                return my_timer_destroy(ptr_, cb, ud);
+            }, timeout_);
             ptr_ = nullptr;
         }
     }
diff --git a/examples/timer/rust_bindings/src/api.rs b/examples/timer/rust_bindings/src/api.rs
index 709f9c1..8ad86ca 100644
--- a/examples/timer/rust_bindings/src/api.rs
+++ b/examples/timer/rust_bindings/src/api.rs
@@ -141,7 +141,9 @@ unsafe impl Sync for MyTimerCtx {}
 impl Drop for MyTimerCtx {
     fn drop(&mut self) {
         if !self.ptr.is_null() {
-            unsafe { ffi::my_timer_destroy(self.ptr); }
+            let _ = ffi_call_sync(self.timeout, |cb, ud| unsafe {
+                ffi::my_timer_destroy(self.ptr, cb, ud)
+            });
             self.ptr = std::ptr::null_mut();
         }
     }
diff --git a/examples/timer/rust_bindings/src/ffi.rs b/examples/timer/rust_bindings/src/ffi.rs
index 905acf4..6626b92 100644
--- a/examples/timer/rust_bindings/src/ffi.rs
+++ b/examples/timer/rust_bindings/src/ffi.rs
@@ -14,7 +14,7 @@ extern "C" {
     pub fn my_timer_version(ctx: *mut c_void, callback: FFICallback, user_data: *mut c_void, req_cbor: *const u8, req_cbor_len: usize) -> c_int;
     pub fn my_timer_complex(ctx: *mut c_void, callback: FFICallback, user_data: *mut c_void, req_cbor: *const u8, req_cbor_len: usize) -> c_int;
     pub fn my_timer_schedule(ctx: *mut c_void, callback: FFICallback, user_data: *mut c_void, req_cbor: *const u8, req_cbor_len: usize) -> c_int;
-    pub fn my_timer_destroy(ctx: *mut c_void) -> c_int;
+    pub fn my_timer_destroy(ctx: *mut c_void, callback: FFICallback, user_data: *mut c_void) -> c_int;
     pub fn my_timer_add_event_listener(ctx: *mut c_void, event_name: *const c_char, callback: FFICallback, user_data: *mut c_void) -> u64;
     pub fn my_timer_remove_event_listener(ctx: *mut c_void, listener_id: u64) -> c_int;
 }
diff --git a/ffi/codegen/cpp.nim b/ffi/codegen/cpp.nim
index 0ae4bff..b02b311 100644
--- a/ffi/codegen/cpp.nim
+++ b/ffi/codegen/cpp.nim
@@ -339,7 +339,9 @@ proc generateCppHeader*(
           [p.procName]
       )
     of FFIKind.DTOR:
-      lines.add("int $1(void* ctx);" % [p.procName])
+      lines.add(
+        "int $1(void* ctx, FFICallback callback, void* user_data);" % [p.procName]
+      )
   # `declareLibrary` always exports the listener-registration ABI. Declare
   # it here so the typed event-handler wiring below can call into it.
   lines.add(
@@ -570,8 +572,8 @@ proc generateCppHeader*(
   lines.add("    std::chrono::milliseconds timeout_;")
   if events.len > 0:
     # One owning entry per live listener, keyed by id. Destroyed after
-    # the destructor body runs `<lib>_destroy(ptr_)`, by which point the
-    # FFI side has joined its threads so no callback is mid-flight.
+    # the destructor blocks on `<lib>_destroy`'s recycle callback, by which
+    # point the FFI side has drained/parked the slot so no callback is mid-flight.
     lines.add(
       "    std::unordered_map<std::uint64_t, std::unique_ptr<ListenerBase>> listeners_;"
     )
diff --git a/ffi/codegen/rust.nim b/ffi/codegen/rust.nim
index f989c7e..3c2f77f 100644
--- a/ffi/codegen/rust.nim
+++ b/ffi/codegen/rust.nim
@@ -207,6 +207,8 @@ proc generateFFIRs*(procs: seq[FFIProcMeta]): string =
       lines.add("    pub fn $1($2) -> *mut c_void;" % [p.procName, params.join(", ")])
     of FFIKind.DTOR:
       params.add("ctx: *mut c_void")
+      params.add("callback: FFICallback")
+      params.add("user_data: *mut c_void")
       lines.add("    pub fn $1($2) -> c_int;" % [p.procName, params.join(", ")])
 
   # Listener-registration ABI — emitted on the Nim side by `declareLibrary`,
@@ -531,7 +533,14 @@ proc generateApiRs*(
     lines.add("impl Drop for $1 {" % [ctxTypeName])
     lines.add("    fn drop(&mut self) {")
     lines.add("        if !self.ptr.is_null() {")
-    lines.add("            unsafe { ffi::$1(self.ptr); }" % [dtorProcName])
+    # `<lib>_destroy` is non-blocking at the C ABI: it parks the context for
+    # reuse and reports the outcome via the callback. Block until that callback
+    # fires so the pool slot is fully drained and parked before this handle goes
+    # away — otherwise rapid create/destroy churn could outrun the recycle and
+    # exhaust the pool. The recycle outcome is best-effort on drop, so discard it.
+    lines.add("            let _ = ffi_call_sync(self.timeout, |cb, ud| unsafe {")
+    lines.add("                ffi::$1(self.ptr, cb, ud)" % [dtorProcName])
+    lines.add("            });")
     lines.add("            self.ptr = std::ptr::null_mut();")
     lines.add("        }")
     # `listeners` is dropped automatically after this body returns. By
diff --git a/ffi/codegen/templates/cpp/context_rule_of_5.hpp.tpl b/ffi/codegen/templates/cpp/context_rule_of_5.hpp.tpl
index 22ce19d..e93aa2c 100644
--- a/ffi/codegen/templates/cpp/context_rule_of_5.hpp.tpl
+++ b/ffi/codegen/templates/cpp/context_rule_of_5.hpp.tpl
@@ -9,7 +9,14 @@
     // context.
     ~{{CTX}}() {
         if (ptr_) {
-            {{LIB}}_destroy(ptr_);
+            // `{{LIB}}_destroy` is non-blocking at the C ABI: it parks the
+            // context for reuse and reports the outcome via the callback. Block
+            // here until that callback fires so the pool slot is fully drained
+            // and parked before this object goes away — otherwise a rapid
+            // create/destroy churn could outrun the recycle and exhaust the pool.
+            (void)ffi_call_([this](FFICallback cb, void* ud) {
+                return {{LIB}}_destroy(ptr_, cb, ud);
+            }, timeout_);
             ptr_ = nullptr;
         }
     }

From 79e5dc64c67f2519d4a06d0485f57c83e5f212dd Mon Sep 17 00:00:00 2001
From: Ivan FB <ivansete@status.im>
Date: Fri, 12 Jun 2026 17:30:08 +0200
Subject: [PATCH 09/10] fix(pool): deinit context resources in full teardown
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

destroyFFIContext stopped/joined the worker threads and marked the slot for
rebuild, but no longer deinited the context — so the six ThreadSignalPtr fds
were orphaned every full teardown (the exact leak this path exists to prevent),
and the still-initialised Lock + event registry/queue locks were left live.

createFFIContext's rebuild path (initialized == false) reruns
initContextResources, which calls initLock / initEventRegistry / initEventQueue
and installs fresh signals over the stale handles — re-initialising a live lock
is UB. Restore the deinitContextResources() call (as the pre-recycle code did)
before marking the slot uninitialised so the rebuild starts from clean state.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 ffi/ffi_context_pool.nim | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/ffi/ffi_context_pool.nim b/ffi/ffi_context_pool.nim
index 179fb8f..e39f077 100644
--- a/ffi/ffi_context_pool.nim
+++ b/ffi/ffi_context_pool.nim
@@ -50,11 +50,19 @@ proc destroyFFIContext*[T](
   ## for process/pool shutdown — normal destruction uses releaseFFIContext.
   ctx.stopAndJoinThreads().isOkOr:
     return err("destroyFFIContext(pool): " & $error)
+  # Close the ThreadSignalPtr fds and deinit the lock + event registry/queue
+  # BEFORE the slot is marked for rebuild. createFFIContext's rebuild path reruns
+  # initContextResources (initLock / initEventRegistry / initEventQueue + fresh
+  # signals); skipping deinit here would re-init still-live locks (UB) and orphan
+  # the old fds — the very leak this teardown exists to prevent.
+  let deinitRes = ctx.deinitContextResources()
   for i in 0 ..< MaxFFIContexts:
     if pool.contexts[i].addr == ctx:
       pool.initialized[i].store(false)
       break
   ctx.release()
+  deinitRes.isOkOr:
+    return err("destroyFFIContext(pool): " & $error)
   return ok()
 
 proc isValidCtx*[T](pool: var FFIContextPool[T], ctx: pointer): bool =

From b156b84ff3fe1df89b1c4e2dc034672ceded88b7 Mon Sep 17 00:00:00 2001
From: Ivan FB <ivansete@status.im>
Date: Fri, 12 Jun 2026 17:34:40 +0200
Subject: [PATCH 10/10] docs(changelog): drop premature 0.2.0; fold into
 Unreleased at 0.1.4

The branch carried a dated `[0.2.0] - 2026-06-04` release section and
`version = "0.2.0"` while the active release line is still 0.1.x, which
misrepresents unshipped work as a released version. Treat 0.1.4 as the latest
release and move everything above it back under `[Unreleased]`, merging the
0.2.0 bullets into the existing Added/Changed/Fixed groups. Set the package
version to 0.1.4 and realign the example `requires` (>= 0.2.0 -> >= 0.1.4) so
they stay satisfiable.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 CHANGELOG.md                | 43 ++++++++++++++-----------------------
 examples/echo/echo.nimble   |  2 +-
 examples/timer/timer.nimble |  2 +-
 ffi.nimble                  |  2 +-
 4 files changed, 19 insertions(+), 30 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 52ce175..dada7b6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,38 +4,12 @@ All notable changes to this project are documented in this file.
 
 ## [Unreleased]
 
-### Changed
-- Enforce `-d:noSignalHandler` at compile time: the build now fails with an
-  actionable message unless the consumer sets `-d:noSignalHandler` (libraries
-  embedded in a foreign host such as Go/Rust) or `-d:ffiAllowSignalHandler`
-  (standalone Nim binaries). Prevents the Nim runtime from installing its own
-  OS signal handlers and clobbering the host's — the cause of a real status-go
-  regression.
-- User event callbacks now run on a dedicated event thread fed by a
-  bounded SPSC queue (default capacity 1024), so a slow listener can no
-  longer block the FFI thread or concurrent `add_event_listener` /
-  `remove_event_listener` calls
-  ([#6](https://github.com/logos-messaging/nim-ffi/issues/6)).
-- Replaced the dedicated watchdog thread with a heartbeat check that
-  runs on the event thread. The FFI thread advances an atomic heartbeat
-  each loop iteration; if it stalls for more than 1s past the start-up
-  grace window, the event thread emits the `not_responding` event.
-
 ### Added
 - Queue-overflow handling: when the bounded event queue is full, the
   library sets a sticky "stuck" flag, logs an error, fires
   `not_responding` from the event thread, and rejects subsequent
   `sendRequestToFFIThread` calls with `event queue stuck - library
   cannot accept new requests`.
-
-## [0.2.0] - 2026-06-04
-
-Major release introducing the CBOR-based wire format, CBOR-backed FFI events
-with a multi-listener registry, multi-language binding generation (C++, Rust,
-CDDL), CI hardening with sanitizers, and several robustness fixes around
-context lifetime and memory safety.
-
-### Added
 - **CBOR serialization** as the FFI wire format, replacing the previous
   JSON/string-based `serial.nim`
   ([#23](https://github.com/logos-messaging/nim-ffi/pull/23)).
@@ -70,6 +44,21 @@ context lifetime and memory safety.
   ([#41](https://github.com/logos-messaging/nim-ffi/pull/41)).
 
 ### Changed
+- Enforce `-d:noSignalHandler` at compile time: the build now fails with an
+  actionable message unless the consumer sets `-d:noSignalHandler` (libraries
+  embedded in a foreign host such as Go/Rust) or `-d:ffiAllowSignalHandler`
+  (standalone Nim binaries). Prevents the Nim runtime from installing its own
+  OS signal handlers and clobbering the host's — the cause of a real status-go
+  regression.
+- User event callbacks now run on a dedicated event thread fed by a
+  bounded SPSC queue (default capacity 1024), so a slow listener can no
+  longer block the FFI thread or concurrent `add_event_listener` /
+  `remove_event_listener` calls
+  ([#6](https://github.com/logos-messaging/nim-ffi/issues/6)).
+- Replaced the dedicated watchdog thread with a heartbeat check that
+  runs on the event thread. The FFI thread advances an atomic heartbeat
+  each loop iteration; if it stalls for more than 1s past the start-up
+  grace window, the event thread emits the `not_responding` event.
 - Removed the redundant `ffiType` macro; the `ffi` macro is now the single
   authoring entry point
   ([#22](https://github.com/logos-messaging/nim-ffi/pull/22)).
@@ -97,7 +86,7 @@ context lifetime and memory safety.
   and `rust_client`, the latter with a Tokio async variant) (#15).
 - JSON/string-based FFI (de)serialization via `ffi/serial.nim`
   (`ffiSerialize`/`ffiDeserialize`), with `tests/test_serial.nim` coverage.
-  (CBOR replaced this layer later, in 0.2.0.)
+  (CBOR replaced this layer later.)
 - FFI context pool (`ffi/ffi_context_pool.nim`) using a fixed array of contexts.
 - Test suite expansion: `test_alloc.nim`, `test_ctx_validation.nim`,
   `test_ffi_context.nim`, `test_gc_compat.nim`.
diff --git a/examples/echo/echo.nimble b/examples/echo/echo.nimble
index 013a49e..8243d43 100644
--- a/examples/echo/echo.nimble
+++ b/examples/echo/echo.nimble
@@ -9,7 +9,7 @@ requires "nim >= 2.2.4"
 requires "chronos"
 requires "chronicles"
 requires "taskpools"
-requires "https://github.com/logos-messaging/nim-ffi >= 0.2.0"
+requires "https://github.com/logos-messaging/nim-ffi >= 0.1.4"
 
 const nimFlags = "--mm:orc -d:chronicles_log_level=WARN"
 
diff --git a/examples/timer/timer.nimble b/examples/timer/timer.nimble
index 0b1966d..7c23f00 100644
--- a/examples/timer/timer.nimble
+++ b/examples/timer/timer.nimble
@@ -8,7 +8,7 @@ requires "nim >= 2.2.4"
 requires "chronos"
 requires "chronicles"
 requires "taskpools"
-requires "https://github.com/logos-messaging/nim-ffi >= 0.2.0"
+requires "https://github.com/logos-messaging/nim-ffi >= 0.1.4"
 
 const nimFlags = "--mm:orc -d:chronicles_log_level=WARN"
 
diff --git a/ffi.nimble b/ffi.nimble
index edd9558..ef15405 100644
--- a/ffi.nimble
+++ b/ffi.nimble
@@ -1,6 +1,6 @@
 # ffi.nimble
 
-version = "0.2.0"
+version = "0.1.4"
 author = "Institute of Free Technology"
 description = "FFI framework with custom header generation"
 license = "MIT or Apache License 2.0"