diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ac5f716..f2b38c1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -75,7 +75,7 @@ jobs: if [ "$RUNNER_OS" == "Windows" ]; then export PATH="$GITHUB_WORKSPACE/.nim_runtime/bin:$HOME/.nimble/bin:$PATH" fi - nim c -r --mm:${{ matrix.mm }} -d:chronicles_log_level=WARN tests/test_alloc.nim + nim c -r --mm:${{ matrix.mm }} -d:chronicles_log_level=WARN -d:ffiAllowSignalHandler tests/test_alloc.nim - name: Run FFI context tests (${{ matrix.mm }}) shell: bash @@ -83,7 +83,7 @@ jobs: if [ "$RUNNER_OS" == "Windows" ]; then export PATH="$GITHUB_WORKSPACE/.nim_runtime/bin:$HOME/.nimble/bin:$PATH" fi - nim c -r --mm:${{ matrix.mm }} -d:chronicles_log_level=WARN tests/test_ffi_context.nim + nim c -r --mm:${{ matrix.mm }} -d:chronicles_log_level=WARN -d:ffiAllowSignalHandler tests/test_ffi_context.nim - name: Run GC compatibility tests (${{ matrix.mm }}) shell: bash @@ -91,4 +91,4 @@ jobs: if [ "$RUNNER_OS" == "Windows" ]; then export PATH="$GITHUB_WORKSPACE/.nim_runtime/bin:$HOME/.nimble/bin:$PATH" fi - nim c -r --mm:${{ matrix.mm }} -d:chronicles_log_level=WARN tests/test_gc_compat.nim + nim c -r --mm:${{ matrix.mm }} -d:chronicles_log_level=WARN -d:ffiAllowSignalHandler tests/test_gc_compat.nim diff --git a/ffi/ffi_context.nim b/ffi/ffi_context.nim index 61d806c..e0cf619 100644 --- a/ffi/ffi_context.nim +++ b/ffi/ffi_context.nim @@ -38,6 +38,11 @@ type CtxLifecycle {.pure.} = enum type FFIContext*[T] = object myLib*: ptr T # main library object (e.g., Waku, LibP2P, SDS, the one to be exposed as a library) + myLibRefd: bool + # refc only: whether we hold a GC_ref on myLib[]. myLib lives in non-GC + # shared memory, so a GC-managed lib object stored there is invisible to + # refc's cycle collector and gets reclaimed mid-use under sustained load. + # Pinned in processRequest, released in freeLib. Unused under orc/arc. ffiThread: Thread[(ptr FFIContext[T])] # represents the main FFI thread in charge of attending API consumer actions watchdogThread: Thread[(ptr FFIContext[T])] @@ -243,6 +248,15 @@ proc processRequest[T]( "Async error in processRequest for " & reqId & ": " & exc.msg ) + # Pin the lib object as a GC root once a handler has installed it. Under refc + # myLib lives in non-GC shared memory, invisible to the cycle collector, so it + # gets reclaimed mid-operation under sustained load. orc tracks it precisely. + when defined(gcRefc): + when T is ref: + if not ctx.myLibRefd and not ctx.myLib.isNil(): + GC_ref(ctx.myLib[]) + ctx.myLibRefd = true + ## handleRes may raise (OOM, GC setup) even though it is rare. try: handleRes(res, request) @@ -254,10 +268,16 @@ proc freeLib[T](ctx: ptr FFIContext[T]) {.gcsafe.} = return when not defined(gcRefc): - {.cast(gcsafe).}: - `=destroy`(ctx.myLib[]) + try: + {.cast(gcsafe).}: + `=destroy`(ctx.myLib[]) + except Exception: + discard else: - discard + when T is ref: + if ctx.myLibRefd: + GC_unref(ctx.myLib[]) + ctx.myLibRefd = false freeShared(ctx.myLib) ctx.myLib = nil diff --git a/ffi/internal/ffi_library.nim b/ffi/internal/ffi_library.nim index 7d7d11d..356ac2d 100644 --- a/ffi/internal/ffi_library.nim +++ b/ffi/internal/ffi_library.nim @@ -53,10 +53,13 @@ macro declareLibraryBase*(libraryName: static[string]): untyped = ) res.add(procDef) - # Create: var initialized: Atomic[bool] + # Create: var initialized, initDone: Atomic[bool] + # `initialized` elects the single thread that runs NimMain; `initDone` signals + # that NimMain has finished so concurrent first-time callers can safely proceed. let atomicType = nnkBracketExpr.newTree(ident("Atomic"), ident("bool")) let varStmt = nnkVarSection.newTree( - nnkIdentDefs.newTree(ident("initialized"), atomicType, newEmptyNode()) + nnkIdentDefs.newTree(ident("initialized"), atomicType, newEmptyNode()), + nnkIdentDefs.newTree(ident("initDone"), atomicType, newEmptyNode()), ) res.add(varStmt) @@ -80,6 +83,13 @@ macro declareLibraryBase*(libraryName: static[string]): untyped = ## Being `` the value given in the optional ## compilation flag --nimMainPrefix:yourprefix `nimMainName`() + initDone.store(true) + else: + ## Another thread won the election and is running (or has run) NimMain. + ## Block until it finishes: proceeding now would race a half-initialized + ## Nim runtime/GC and corrupt the heap on concurrent first-time calls. + while not initDone.load(): + discard when declared(setupForeignThreadGc): setupForeignThreadGc() when declared(nimGC_setStackBottom): diff --git a/ffi/internal/ffi_macro.nim b/ffi/internal/ffi_macro.nim index be23618..dc96565 100644 --- a/ffi/internal/ffi_macro.nim +++ b/ffi/internal/ffi_macro.nim @@ -234,12 +234,18 @@ proc buildFfiNewReqProc(reqTypeName, body: NimNode): NimNode = `reqObjIdent`[].`fieldNameIdent` = `fieldNameIdent` ) - # FFIThreadRequest.init using fnv1aHash32 + # The request id is the type name. Use the compile-time string literal (same + # value as `$T`, and as the registry key in addNewRequestToRegistry) rather + # than `$T`, which allocates a GC string on the *calling* thread — under a + # foreign host (Go) those are transient/concurrent OS threads where running + # Nim's GC corrupts memory. `cstring()` points at static data. + let reqNameLit = newLit( + if reqTypeName.kind == nnkPostfix: $reqTypeName[1] else: $reqTypeName + ) newBody.add( quote do: - let typeStr = $T var ret = - FFIThreadRequest.init(callback, userData, typeStr.cstring, `reqObjIdent`) + FFIThreadRequest.init(callback, userData, cstring(`reqNameLit`), `reqObjIdent`) proc destroyContent(content: pointer) {.nimcall.} = ffiDeleteReq(cast[ptr `reqTypeName`](content)) @@ -1113,9 +1119,14 @@ proc buildCtorFfiNewReqProc(reqTypeName: NimNode, paramNames: seq[string]): NimN newBody.add quote do: `reqObjIdent`[].`fieldName` = `fieldName`.alloc() + # Compile-time type-name literal instead of `$T` — `$T` allocates a GC string + # on the calling thread, which corrupts memory under a foreign host (Go) whose + # callers are transient/concurrent OS threads. See buildFfiNewReqProc. + let reqNameLit = newLit( + if reqTypeName.kind == nnkPostfix: $reqTypeName[1] else: $reqTypeName + ) newBody.add quote do: - let typeStr = $T - var ret = FFIThreadRequest.init(callback, userData, typeStr.cstring, `reqObjIdent`) + var ret = FFIThreadRequest.init(callback, userData, cstring(`reqNameLit`), `reqObjIdent`) proc destroyContent(content: pointer) {.nimcall.} = ffiDeleteReq(cast[ptr `reqTypeName`](content))