mirror of
https://github.com/logos-messaging/nim-ffi.git
synced 2026-06-20 16:29:31 +00:00
fix: foreign-host (Go) concurrency crashes (#83)
This commit is contained in:
parent
e86b136e79
commit
0a6bf6c49f
6
.github/workflows/ci.yml
vendored
6
.github/workflows/ci.yml
vendored
@ -75,7 +75,7 @@ jobs:
|
||||
if [ "$RUNNER_OS" == "Windows" ]; then
|
||||
export PATH="$GITHUB_WORKSPACE/.nim_runtime/bin:$HOME/.nimble/bin:$PATH"
|
||||
fi
|
||||
nim c -r --mm:${{ matrix.mm }} -d:chronicles_log_level=WARN tests/test_alloc.nim
|
||||
nim c -r --mm:${{ matrix.mm }} -d:chronicles_log_level=WARN -d:ffiAllowSignalHandler tests/test_alloc.nim
|
||||
|
||||
- name: Run FFI context tests (${{ matrix.mm }})
|
||||
shell: bash
|
||||
@ -83,7 +83,7 @@ jobs:
|
||||
if [ "$RUNNER_OS" == "Windows" ]; then
|
||||
export PATH="$GITHUB_WORKSPACE/.nim_runtime/bin:$HOME/.nimble/bin:$PATH"
|
||||
fi
|
||||
nim c -r --mm:${{ matrix.mm }} -d:chronicles_log_level=WARN tests/test_ffi_context.nim
|
||||
nim c -r --mm:${{ matrix.mm }} -d:chronicles_log_level=WARN -d:ffiAllowSignalHandler tests/test_ffi_context.nim
|
||||
|
||||
- name: Run GC compatibility tests (${{ matrix.mm }})
|
||||
shell: bash
|
||||
@ -91,4 +91,4 @@ jobs:
|
||||
if [ "$RUNNER_OS" == "Windows" ]; then
|
||||
export PATH="$GITHUB_WORKSPACE/.nim_runtime/bin:$HOME/.nimble/bin:$PATH"
|
||||
fi
|
||||
nim c -r --mm:${{ matrix.mm }} -d:chronicles_log_level=WARN tests/test_gc_compat.nim
|
||||
nim c -r --mm:${{ matrix.mm }} -d:chronicles_log_level=WARN -d:ffiAllowSignalHandler tests/test_gc_compat.nim
|
||||
|
||||
@ -38,6 +38,11 @@ type CtxLifecycle {.pure.} = enum
|
||||
type FFIContext*[T] = object
|
||||
myLib*: ptr T
|
||||
# main library object (e.g., Waku, LibP2P, SDS, the one to be exposed as a library)
|
||||
myLibRefd: bool
|
||||
# refc only: whether we hold a GC_ref on myLib[]. myLib lives in non-GC
|
||||
# shared memory, so a GC-managed lib object stored there is invisible to
|
||||
# refc's cycle collector and gets reclaimed mid-use under sustained load.
|
||||
# Pinned in processRequest, released in freeLib. Unused under orc/arc.
|
||||
ffiThread: Thread[(ptr FFIContext[T])]
|
||||
# represents the main FFI thread in charge of attending API consumer actions
|
||||
watchdogThread: Thread[(ptr FFIContext[T])]
|
||||
@ -243,6 +248,15 @@ proc processRequest[T](
|
||||
"Async error in processRequest for " & reqId & ": " & exc.msg
|
||||
)
|
||||
|
||||
# Pin the lib object as a GC root once a handler has installed it. Under refc
|
||||
# myLib lives in non-GC shared memory, invisible to the cycle collector, so it
|
||||
# gets reclaimed mid-operation under sustained load. orc tracks it precisely.
|
||||
when defined(gcRefc):
|
||||
when T is ref:
|
||||
if not ctx.myLibRefd and not ctx.myLib.isNil():
|
||||
GC_ref(ctx.myLib[])
|
||||
ctx.myLibRefd = true
|
||||
|
||||
## handleRes may raise (OOM, GC setup) even though it is rare.
|
||||
try:
|
||||
handleRes(res, request)
|
||||
@ -254,10 +268,16 @@ proc freeLib[T](ctx: ptr FFIContext[T]) {.gcsafe.} =
|
||||
return
|
||||
|
||||
when not defined(gcRefc):
|
||||
{.cast(gcsafe).}:
|
||||
`=destroy`(ctx.myLib[])
|
||||
try:
|
||||
{.cast(gcsafe).}:
|
||||
`=destroy`(ctx.myLib[])
|
||||
except Exception:
|
||||
discard
|
||||
else:
|
||||
discard
|
||||
when T is ref:
|
||||
if ctx.myLibRefd:
|
||||
GC_unref(ctx.myLib[])
|
||||
ctx.myLibRefd = false
|
||||
freeShared(ctx.myLib)
|
||||
ctx.myLib = nil
|
||||
|
||||
|
||||
@ -53,10 +53,13 @@ macro declareLibraryBase*(libraryName: static[string]): untyped =
|
||||
)
|
||||
res.add(procDef)
|
||||
|
||||
# Create: var initialized: Atomic[bool]
|
||||
# Create: var initialized, initDone: Atomic[bool]
|
||||
# `initialized` elects the single thread that runs NimMain; `initDone` signals
|
||||
# that NimMain has finished so concurrent first-time callers can safely proceed.
|
||||
let atomicType = nnkBracketExpr.newTree(ident("Atomic"), ident("bool"))
|
||||
let varStmt = nnkVarSection.newTree(
|
||||
nnkIdentDefs.newTree(ident("initialized"), atomicType, newEmptyNode())
|
||||
nnkIdentDefs.newTree(ident("initialized"), atomicType, newEmptyNode()),
|
||||
nnkIdentDefs.newTree(ident("initDone"), atomicType, newEmptyNode()),
|
||||
)
|
||||
res.add(varStmt)
|
||||
|
||||
@ -80,6 +83,13 @@ macro declareLibraryBase*(libraryName: static[string]): untyped =
|
||||
## Being `<yourprefix>` the value given in the optional
|
||||
## compilation flag --nimMainPrefix:yourprefix
|
||||
`nimMainName`()
|
||||
initDone.store(true)
|
||||
else:
|
||||
## Another thread won the election and is running (or has run) NimMain.
|
||||
## Block until it finishes: proceeding now would race a half-initialized
|
||||
## Nim runtime/GC and corrupt the heap on concurrent first-time calls.
|
||||
while not initDone.load():
|
||||
discard
|
||||
when declared(setupForeignThreadGc):
|
||||
setupForeignThreadGc()
|
||||
when declared(nimGC_setStackBottom):
|
||||
|
||||
@ -234,12 +234,18 @@ proc buildFfiNewReqProc(reqTypeName, body: NimNode): NimNode =
|
||||
`reqObjIdent`[].`fieldNameIdent` = `fieldNameIdent`
|
||||
)
|
||||
|
||||
# FFIThreadRequest.init using fnv1aHash32
|
||||
# The request id is the type name. Use the compile-time string literal (same
|
||||
# value as `$T`, and as the registry key in addNewRequestToRegistry) rather
|
||||
# than `$T`, which allocates a GC string on the *calling* thread — under a
|
||||
# foreign host (Go) those are transient/concurrent OS threads where running
|
||||
# Nim's GC corrupts memory. `cstring(<literal>)` points at static data.
|
||||
let reqNameLit = newLit(
|
||||
if reqTypeName.kind == nnkPostfix: $reqTypeName[1] else: $reqTypeName
|
||||
)
|
||||
newBody.add(
|
||||
quote do:
|
||||
let typeStr = $T
|
||||
var ret =
|
||||
FFIThreadRequest.init(callback, userData, typeStr.cstring, `reqObjIdent`)
|
||||
FFIThreadRequest.init(callback, userData, cstring(`reqNameLit`), `reqObjIdent`)
|
||||
proc destroyContent(content: pointer) {.nimcall.} =
|
||||
ffiDeleteReq(cast[ptr `reqTypeName`](content))
|
||||
|
||||
@ -1113,9 +1119,14 @@ proc buildCtorFfiNewReqProc(reqTypeName: NimNode, paramNames: seq[string]): NimN
|
||||
newBody.add quote do:
|
||||
`reqObjIdent`[].`fieldName` = `fieldName`.alloc()
|
||||
|
||||
# Compile-time type-name literal instead of `$T` — `$T` allocates a GC string
|
||||
# on the calling thread, which corrupts memory under a foreign host (Go) whose
|
||||
# callers are transient/concurrent OS threads. See buildFfiNewReqProc.
|
||||
let reqNameLit = newLit(
|
||||
if reqTypeName.kind == nnkPostfix: $reqTypeName[1] else: $reqTypeName
|
||||
)
|
||||
newBody.add quote do:
|
||||
let typeStr = $T
|
||||
var ret = FFIThreadRequest.init(callback, userData, typeStr.cstring, `reqObjIdent`)
|
||||
var ret = FFIThreadRequest.init(callback, userData, cstring(`reqNameLit`), `reqObjIdent`)
|
||||
proc destroyContent(content: pointer) {.nimcall.} =
|
||||
ffiDeleteReq(cast[ptr `reqTypeName`](content))
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user