mirror of
https://github.com/logos-messaging/nim-ffi.git
synced 2026-05-11 20:59:32 +00:00
542 lines
18 KiB
Nim
542 lines
18 KiB
Nim
import std/[locks, strutils, os]
|
|
import unittest2
|
|
import results
|
|
import ../ffi
|
|
|
|
type TestLib = object
|
|
|
|
## Per-request callback state. The test thread blocks on `cond` until the
|
|
## FFI thread signals it — no polling, no CPU waste.
|
|
type CallbackData = object
|
|
lock: Lock
|
|
cond: Cond
|
|
called: bool
|
|
retCode: cint
|
|
msg: array[512, char]
|
|
msgLen: int
|
|
|
|
proc initCallbackData(d: var CallbackData) =
|
|
d.lock.initLock()
|
|
d.cond.initCond()
|
|
|
|
proc deinitCallbackData(d: var CallbackData) =
|
|
d.cond.deinitCond()
|
|
d.lock.deinitLock()
|
|
|
|
proc testCallback(
|
|
retCode: cint, msg: ptr cchar, len: csize_t, userData: pointer
|
|
) {.cdecl, gcsafe, raises: [].} =
|
|
let d = cast[ptr CallbackData](userData)
|
|
acquire(d[].lock)
|
|
d[].retCode = retCode
|
|
let n = min(int(len), d[].msg.high)
|
|
if n > 0 and not msg.isNil:
|
|
copyMem(addr d[].msg[0], msg, n)
|
|
d[].msg[n] = '\0'
|
|
d[].msgLen = n
|
|
d[].called = true
|
|
signal(d[].cond)
|
|
release(d[].lock)
|
|
|
|
proc waitCallback(d: var CallbackData) =
|
|
acquire(d.lock)
|
|
while not d.called:
|
|
wait(d.cond, d.lock)
|
|
release(d.lock)
|
|
|
|
proc callbackMsg(d: var CallbackData): string =
|
|
result = newString(d.msgLen)
|
|
if d.msgLen > 0:
|
|
copyMem(addr result[0], addr d.msg[0], d.msgLen)
|
|
|
|
registerReqFFI(PingRequest, lib: ptr TestLib):
|
|
proc(message: cstring): Future[Result[string, string]] {.async.} =
|
|
return ok("pong:" & $message)
|
|
|
|
registerReqFFI(FailRequest, lib: ptr TestLib):
|
|
proc(): Future[Result[string, string]] {.async.} =
|
|
return err("intentional failure")
|
|
|
|
registerReqFFI(EmptyOkRequest, lib: ptr TestLib):
|
|
proc(): Future[Result[string, string]] {.async.} =
|
|
return ok("")
|
|
|
|
registerReqFFI(SlowRequest, lib: ptr TestLib):
|
|
proc(): Future[Result[string, string]] {.async.} =
|
|
await sleepAsync(500.milliseconds)
|
|
return ok("slow-done")
|
|
|
|
# Coordination channel: the FFI handler signals the test thread the instant
|
|
# it is about to block the event loop, so the test can call destroyFFIContext
|
|
# while the event loop is truly frozen.
|
|
var gSyncBlockStarted: Channel[bool]
|
|
gSyncBlockStarted.open()
|
|
|
|
registerReqFFI(SyncBlockingRequest, lib: ptr TestLib):
|
|
proc(): Future[Result[string, string]] {.async.} =
|
|
# Yield first so that reqReceivedSignal fires and sendRequestToFFIThread
|
|
# returns on the calling thread before we start the synchronous block.
|
|
await sleepAsync(0.milliseconds)
|
|
# Signal the test thread: the event loop is about to be frozen.
|
|
# Channel.send is annotated as raising under refc, so wrap.
|
|
try:
|
|
gSyncBlockStarted.send(true)
|
|
except Exception as exc:
|
|
return err("gSyncBlockStarted.send raised: " & exc.msg)
|
|
# Simulates a request that blocks the event-loop thread synchronously
|
|
# (e.g. w.stop() -> switch.stop() -> connManager.close() with blocking I/O).
|
|
# Unlike sleepAsync, os.sleep holds the OS thread and prevents Chronos from
|
|
# processing any callbacks -- including the reqSignal fired by destroyFFIContext.
|
|
os.sleep(5_000)
|
|
return ok("sync-blocking-done")
|
|
|
|
# Approximates the heavy ref-object workload that libwaku/libp2p performs on
|
|
# the FFI thread. The exact cell count is large enough to force several refc
|
|
# GC cycles; under refc this stresses the heap state that, when later combined
|
|
# with a chronos Selector allocation on the main thread (via close()), used to
|
|
# trip the rawNewObj → signal-handler infinite recursion.
|
|
type RefCell = ref object
|
|
next: RefCell
|
|
payload: array[64, byte]
|
|
|
|
registerReqFFI(HeavyRefAllocRequest, lib: ptr TestLib):
|
|
proc(): Future[Result[string, string]] {.async.} =
|
|
var head: RefCell
|
|
for i in 0 ..< 50_000:
|
|
let n = RefCell(next: head)
|
|
head = n
|
|
if i mod 1000 == 0:
|
|
await sleepAsync(0.milliseconds)
|
|
# Let the chain become collectable and yield so refc has a chance to run.
|
|
head = nil
|
|
await sleepAsync(10.milliseconds)
|
|
return ok("heavy-done")
|
|
|
|
suite "createFFIContext / destroyFFIContext":
|
|
test "create and destroy succeeds":
|
|
let ctx = createFFIContext[TestLib]().valueOr:
|
|
checkpoint "createFFIContext failed: " & $error
|
|
check false
|
|
return
|
|
check destroyFFIContext(ctx).isOk()
|
|
|
|
test "double destroy is safe via running flag":
|
|
let ctx = createFFIContext[TestLib]().valueOr:
|
|
check false
|
|
return
|
|
check destroyFFIContext(ctx).isOk()
|
|
|
|
suite "destroyFFIContext does not hang":
|
|
test "destroy while a slow async request is still in-flight":
|
|
## Reproduces the race where destroyFFIContext was called while a long-
|
|
## running async request (e.g. stop_node / w.stop()) was still executing.
|
|
## The destroy must return well within 2 seconds; before the fix it would
|
|
## block forever on joinThread(ffiThread).
|
|
let ctx = createFFIContext[TestLib]().valueOr:
|
|
check false
|
|
return
|
|
|
|
var d: CallbackData
|
|
initCallbackData(d)
|
|
defer: deinitCallbackData(d)
|
|
|
|
# sendRequestToFFIThread returns as soon as the FFI thread ACKs receipt;
|
|
# the 500 ms work continues asynchronously on the FFI thread.
|
|
check sendRequestToFFIThread(
|
|
ctx, SlowRequest.ffiNewReq(testCallback, addr d)
|
|
).isOk()
|
|
|
|
# Destroy immediately while SlowRequest is still running.
|
|
let t0 = Moment.now()
|
|
check destroyFFIContext(ctx).isOk()
|
|
check (Moment.now() - t0) < 2.seconds
|
|
|
|
suite "destroyFFIContext does not hang when event loop is blocked":
|
|
test "destroy while sync-blocking request is in-flight":
|
|
## Reproduces the hang seen in logosdelivery_example.c:
|
|
## logosdelivery_stop_node(...) -- triggers w.stop() on the FFI thread
|
|
## sleep(1)
|
|
## logosdelivery_destroy(...) -- hangs forever
|
|
##
|
|
## Root cause: w.stop() (and similar tear-down calls) can execute a
|
|
## synchronous blocking section that holds the OS thread, preventing
|
|
## the Chronos event loop from processing the reqSignal fired by
|
|
## destroyFFIContext. The result is joinThread(ffiThread) never returns.
|
|
##
|
|
## With the fix, destroyFFIContext must complete well within the 5 s that
|
|
## SyncBlockingRequest holds the event loop.
|
|
let ctx = createFFIContext[TestLib]().valueOr:
|
|
check false
|
|
return
|
|
|
|
var d: CallbackData
|
|
initCallbackData(d)
|
|
defer: deinitCallbackData(d)
|
|
|
|
check sendRequestToFFIThread(
|
|
ctx, SyncBlockingRequest.ffiNewReq(testCallback, addr d)
|
|
).isOk()
|
|
|
|
# Block until the FFI handler has signalled that os.sleep is about to start.
|
|
# This guarantees destroyFFIContext is called while the event loop is frozen.
|
|
discard gSyncBlockStarted.recv()
|
|
|
|
# Destroy must return promptly even though the event loop is frozen for 5s.
|
|
# It deliberately returns err and leaks ctx in this scenario rather than
|
|
# hanging on joinThread.
|
|
let t0 = Moment.now()
|
|
check destroyFFIContext(ctx).isErr()
|
|
check (Moment.now() - t0) < 3.seconds
|
|
|
|
suite "destroyFFIContext refc workaround":
|
|
## Documents the refc-specific workaround in cleanUpResources.
|
|
##
|
|
## Background: when the FFI thread does heavy ref-object work (the workload
|
|
## that triggered the libwaku hang in production), the refc GC heap reaches
|
|
## a state where the very first chronos Selector allocation on the *main*
|
|
## thread — which happens lazily inside ThreadSignalPtr.close() through
|
|
## getThreadDispatcher() — traps in rawNewObj. The refc signal handler
|
|
## itself re-enters the same allocator and the process never returns.
|
|
## Captured stack:
|
|
## close → safeUnregisterAndCloseFd → getThreadDispatcher →
|
|
## newDispatcher → Selector.new → newObj (gc.nim:488) → rawNewObj →
|
|
## _sigtramp → signalHandler → newObjNoInit → addNewObjToZCT (loop)
|
|
##
|
|
## The workaround in cleanUpResources is `when defined(gcRefc): discard`,
|
|
## i.e. skip the close() calls under refc only. orc is unaffected and
|
|
## still cleans up the signal fds normally.
|
|
##
|
|
## NOTE: this test is documentation more than regression: a synthetic
|
|
## ref-allocation workload of ~50k cells does NOT corrupt the refc heap
|
|
## the way the real libwaku/libp2p teardown does, so this test passes
|
|
## even when the workaround is disabled. Reproducing the actual hang
|
|
## requires the full libwaku workload (logosdelivery_example.c).
|
|
## Verification of the workaround was done end-to-end against that
|
|
## example: with `--mm:refc` and close() enabled it hangs forever in
|
|
## the captured stack above; with `when defined(gcRefc): discard` it
|
|
## returns immediately. Under `--mm:orc` it returns immediately either
|
|
## way.
|
|
test "destroy after heavy ref-allocation workload returns promptly":
|
|
let ctx = createFFIContext[TestLib]().valueOr:
|
|
check false
|
|
return
|
|
|
|
var d: CallbackData
|
|
initCallbackData(d)
|
|
defer: deinitCallbackData(d)
|
|
|
|
check sendRequestToFFIThread(
|
|
ctx, HeavyRefAllocRequest.ffiNewReq(testCallback, addr d)
|
|
).isOk()
|
|
waitCallback(d)
|
|
check d.retCode == RET_OK
|
|
|
|
let t0 = Moment.now()
|
|
check destroyFFIContext(ctx).isOk()
|
|
check (Moment.now() - t0) < 3.seconds
|
|
|
|
suite "sendRequestToFFIThread":
|
|
test "successful request triggers RET_OK callback":
|
|
var d: CallbackData
|
|
initCallbackData(d)
|
|
defer: deinitCallbackData(d)
|
|
|
|
let ctx = createFFIContext[TestLib]().valueOr:
|
|
check false
|
|
return
|
|
defer: discard destroyFFIContext(ctx)
|
|
|
|
check sendRequestToFFIThread(ctx, PingRequest.ffiNewReq(testCallback, addr d, "hello".cstring)).isOk()
|
|
waitCallback(d)
|
|
check d.retCode == RET_OK
|
|
check callbackMsg(d) == "pong:hello"
|
|
|
|
test "failing request triggers RET_ERR callback":
|
|
var d: CallbackData
|
|
initCallbackData(d)
|
|
defer: deinitCallbackData(d)
|
|
|
|
let ctx = createFFIContext[TestLib]().valueOr:
|
|
check false
|
|
return
|
|
defer: discard destroyFFIContext(ctx)
|
|
|
|
check sendRequestToFFIThread(ctx, FailRequest.ffiNewReq(testCallback, addr d)).isOk()
|
|
waitCallback(d)
|
|
check d.retCode == RET_ERR
|
|
|
|
test "empty ok response delivers empty message":
|
|
var d: CallbackData
|
|
initCallbackData(d)
|
|
defer: deinitCallbackData(d)
|
|
|
|
let ctx = createFFIContext[TestLib]().valueOr:
|
|
check false
|
|
return
|
|
defer: discard destroyFFIContext(ctx)
|
|
|
|
check sendRequestToFFIThread(ctx, EmptyOkRequest.ffiNewReq(testCallback, addr d)).isOk()
|
|
waitCallback(d)
|
|
check d.retCode == RET_OK
|
|
check d.msgLen == 0
|
|
|
|
test "sequential requests are all processed":
|
|
let ctx = createFFIContext[TestLib]().valueOr:
|
|
check false
|
|
return
|
|
defer: discard destroyFFIContext(ctx)
|
|
|
|
for i in 1 .. 5:
|
|
var d: CallbackData
|
|
initCallbackData(d)
|
|
let msg = "msg" & $i
|
|
check sendRequestToFFIThread(ctx, PingRequest.ffiNewReq(testCallback, addr d, msg.cstring)).isOk()
|
|
waitCallback(d)
|
|
deinitCallbackData(d)
|
|
check d.retCode == RET_OK
|
|
check callbackMsg(d) == "pong:" & msg
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# ffiCtor macro integration test
|
|
# ---------------------------------------------------------------------------
|
|
|
|
type SimpleLib = object
|
|
value: int
|
|
|
|
ffiType:
|
|
type SimpleConfig = object
|
|
initialValue: int
|
|
|
|
proc testlib_create*(
|
|
config: SimpleConfig
|
|
): Future[Result[SimpleLib, string]] {.ffiCtor.} =
|
|
return ok(SimpleLib(value: config.initialValue))
|
|
|
|
suite "ffiCtor macro":
|
|
test "creates context and returns pointer via callback":
|
|
var d: CallbackData
|
|
initCallbackData(d)
|
|
defer: deinitCallbackData(d)
|
|
|
|
let configJson = ffiSerialize(SimpleConfig(initialValue: 42))
|
|
let ret = testlib_create(configJson.cstring, testCallback, addr d)
|
|
|
|
check not ret.isNil()
|
|
|
|
waitCallback(d)
|
|
|
|
check d.retCode == RET_OK
|
|
|
|
# The callback message is the ctx address as a decimal string
|
|
let addrStr = callbackMsg(d)
|
|
check addrStr.len > 0
|
|
|
|
let ctxAddr = cast[uint](parseBiggestUInt(addrStr))
|
|
check ctxAddr != 0
|
|
let ctx = cast[ptr FFIContext[SimpleLib]](ctxAddr)
|
|
|
|
# Verify the library was properly initialized
|
|
check not ctx[].myLib.isNil
|
|
check ctx[].myLib[].value == 42
|
|
|
|
check destroyFFIContext(ctx).isOk()
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Simplified .ffi. macro integration test
|
|
# ---------------------------------------------------------------------------
|
|
|
|
ffiType:
|
|
type SendConfig = object
|
|
message: string
|
|
|
|
proc testlib_send*(
|
|
lib: SimpleLib, cfg: SendConfig
|
|
): Future[Result[string, string]] {.ffi.} =
|
|
return ok("echo:" & cfg.message & ":" & $lib.value)
|
|
|
|
suite "simplified .ffi. macro":
|
|
test "sends request and gets serialized response via callback":
|
|
# First create a context using ffiCtor
|
|
var ctorD: CallbackData
|
|
initCallbackData(ctorD)
|
|
defer: deinitCallbackData(ctorD)
|
|
|
|
let configJson = ffiSerialize(SimpleConfig(initialValue: 7))
|
|
let ctorRet = testlib_create(configJson.cstring, testCallback, addr ctorD)
|
|
check not ctorRet.isNil()
|
|
|
|
waitCallback(ctorD)
|
|
check ctorD.retCode == RET_OK
|
|
|
|
let addrStr = callbackMsg(ctorD)
|
|
check addrStr.len > 0
|
|
|
|
let ctxAddr = cast[uint](parseBiggestUInt(addrStr))
|
|
check ctxAddr != 0
|
|
let ctx = cast[ptr FFIContext[SimpleLib]](ctxAddr)
|
|
defer: check destroyFFIContext(ctx).isOk()
|
|
|
|
# Now call the .ffi. proc
|
|
var d: CallbackData
|
|
initCallbackData(d)
|
|
defer: deinitCallbackData(d)
|
|
|
|
let cfgJson = ffiSerialize(SendConfig(message: "hello"))
|
|
let ret = testlib_send(ctx, testCallback, addr d, cfgJson.cstring)
|
|
check ret == RET_OK
|
|
|
|
waitCallback(d)
|
|
check d.retCode == RET_OK
|
|
|
|
let receivedMsg = callbackMsg(d)
|
|
let decoded = ffiDeserialize(receivedMsg.cstring, string).valueOr:
|
|
check false
|
|
""
|
|
check decoded == "echo:hello:7"
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# async/sync detection in .ffi. macro integration test
|
|
# ---------------------------------------------------------------------------
|
|
|
|
# Sync proc (no await in body) — macro detects this and bypasses thread machinery
|
|
proc testlib_version*(
|
|
lib: SimpleLib
|
|
): Future[Result[string, string]] {.ffi.} =
|
|
return ok("v" & $lib.value)
|
|
|
|
suite "async/sync detection in .ffi.":
|
|
test "sync proc invokes callback without thread hop":
|
|
# Create a context using ffiCtor
|
|
var ctorD: CallbackData
|
|
initCallbackData(ctorD)
|
|
defer: deinitCallbackData(ctorD)
|
|
|
|
let configJson = ffiSerialize(SimpleConfig(initialValue: 3))
|
|
let ctorRet = testlib_create(configJson.cstring, testCallback, addr ctorD)
|
|
check not ctorRet.isNil()
|
|
|
|
waitCallback(ctorD)
|
|
check ctorD.retCode == RET_OK
|
|
|
|
let addrStr = callbackMsg(ctorD)
|
|
check addrStr.len > 0
|
|
|
|
let ctxAddr = cast[uint](parseBiggestUInt(addrStr))
|
|
check ctxAddr != 0
|
|
let ctx = cast[ptr FFIContext[SimpleLib]](ctxAddr)
|
|
defer: check destroyFFIContext(ctx).isOk()
|
|
|
|
var d2: CallbackData
|
|
initCallbackData(d2)
|
|
defer: deinitCallbackData(d2)
|
|
|
|
# Call sync proc — callback should fire before the proc returns (no thread hop)
|
|
let ret = testlib_version(ctx, testCallback, addr d2)
|
|
# No sleep needed: sync path fires callback inline before returning
|
|
check ret == RET_OK
|
|
check d2.called # fires synchronously — no waitCallback needed
|
|
check d2.retCode == RET_OK
|
|
let receivedMsg = callbackMsg(d2)
|
|
let decoded = ffiDeserialize(receivedMsg.cstring, string).valueOr:
|
|
check false
|
|
""
|
|
check decoded == "v3"
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# ptr T return type in .ffi. macro integration test
|
|
# ---------------------------------------------------------------------------
|
|
|
|
type Handle = object
|
|
data: string
|
|
|
|
ffiType:
|
|
type NameParam = object
|
|
name: string
|
|
|
|
proc testlib_alloc_handle*(
|
|
lib: SimpleLib, np: NameParam
|
|
): Future[Result[ptr Handle, string]] {.ffi.} =
|
|
let h = createShared(Handle)
|
|
h[] = Handle(data: np.name & ":" & $lib.value)
|
|
return ok(h)
|
|
|
|
proc testlib_read_handle*(
|
|
lib: SimpleLib, handle: pointer
|
|
): Future[Result[string, string]] {.ffi.} =
|
|
let h = cast[ptr Handle](handle)
|
|
return ok(h[].data)
|
|
|
|
proc testlib_free_handle*(
|
|
lib: SimpleLib, handle: pointer
|
|
): Future[Result[string, string]] {.ffi.} =
|
|
let h = cast[ptr Handle](handle)
|
|
deallocShared(h)
|
|
return ok("freed")
|
|
|
|
suite "ptr return type in .ffi.":
|
|
test "returns a heap-allocated handle and reads it back":
|
|
# Create context via ffiCtor
|
|
var ctorD: CallbackData
|
|
initCallbackData(ctorD)
|
|
defer: deinitCallbackData(ctorD)
|
|
|
|
let configJson = ffiSerialize(SimpleConfig(initialValue: 5))
|
|
let ctorRet = testlib_create(configJson.cstring, testCallback, addr ctorD)
|
|
check not ctorRet.isNil()
|
|
|
|
waitCallback(ctorD)
|
|
check ctorD.retCode == RET_OK
|
|
|
|
let ctxAddrStr = callbackMsg(ctorD)
|
|
check ctxAddrStr.len > 0
|
|
let ctxAddr = cast[uint](parseBiggestUInt(ctxAddrStr))
|
|
check ctxAddr != 0
|
|
let ctx = cast[ptr FFIContext[SimpleLib]](ctxAddr)
|
|
defer: check destroyFFIContext(ctx).isOk()
|
|
|
|
# Alloc a handle
|
|
var allocD: CallbackData
|
|
initCallbackData(allocD)
|
|
defer: deinitCallbackData(allocD)
|
|
|
|
let npJson = ffiSerialize(NameParam(name: "test"))
|
|
let allocRet = testlib_alloc_handle(ctx, testCallback, addr allocD, npJson.cstring)
|
|
check allocRet == RET_OK
|
|
|
|
waitCallback(allocD)
|
|
check allocD.retCode == RET_OK
|
|
|
|
let handleAddrStr = callbackMsg(allocD)
|
|
check handleAddrStr.len > 0
|
|
let handleAddr = parseBiggestUInt(handleAddrStr)
|
|
check handleAddr != 0
|
|
|
|
# Read the handle back
|
|
var readD: CallbackData
|
|
initCallbackData(readD)
|
|
defer: deinitCallbackData(readD)
|
|
|
|
let handleJson = ffiSerialize(cast[pointer](handleAddr))
|
|
let readRet = testlib_read_handle(ctx, testCallback, addr readD, handleJson.cstring)
|
|
check readRet == RET_OK
|
|
|
|
waitCallback(readD)
|
|
check readD.retCode == RET_OK
|
|
|
|
let readMsg = callbackMsg(readD)
|
|
let decodedStr = ffiDeserialize(readMsg.cstring, string).valueOr:
|
|
check false
|
|
""
|
|
check decodedStr == "test:5"
|
|
|
|
# Free the handle
|
|
var freeD: CallbackData
|
|
initCallbackData(freeD)
|
|
defer: deinitCallbackData(freeD)
|
|
|
|
let freeRet = testlib_free_handle(ctx, testCallback, addr freeD, handleJson.cstring)
|
|
check freeRet == RET_OK
|
|
|
|
waitCallback(freeD)
|
|
check freeD.retCode == RET_OK
|