nim-ffi/ffi/alloc.nim
Ivan FB f3206c30b8
feat(ffi): emit a native (zero-serialization) C ABI alongside CBOR
A single {.ffi.} definition now produces BOTH interfaces, chosen by the
caller at link time rather than by a global compile flag:

- `<name>`      — native typed-arg C export. Args travel to the FFI thread in
                  a c_malloc'd C-POD struct passed by pointer (no CBOR), and the
                  result is delivered to the callback as raw bytes. This is the
                  preferred path for same-process callers: no serialization on
                  either side.
- `<name>_cbor` — the existing CBOR-buffer dispatcher, kept for generic /
                  cross-language callers.

Both share the user's helper proc; they register distinct handlers keyed by
"<Camel>Req" (CBOR) and "<Camel>ReqNative". FFIThreadRequest gains a `cborMode`
flag and a `payloadFree` hook so the native C-POD payload (which owns duplicated
cstring fields) is released correctly and an empty native result is delivered as
a zero-length buffer instead of the CBOR null sentinel. alloc.nim gains
ffiCMalloc/ffiCFree (prefixed to avoid Nim's style-insensitive clash with
ansi_c.c_malloc/c_free).

Verified end-to-end on a scalar-param lib: native calls return raw strings
("calc v1", "sum=42"); the _cbor variant still returns CBOR.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-05-31 02:02:39 +02:00

82 lines
2.9 KiB
Nim

## Cross-thread allocation helpers backed by libc `malloc`/`free`.
##
## We deliberately avoid Nim's `allocShared`/`deallocShared` here. Under
## `--mm:orc` they delegate to the per-thread `allocator` MemRegion stored
## in TLS; freeing such a buffer from a different thread later walks
## `chunk.owner` back to that MemRegion. If the original thread has exited
## by then (e.g. a `std::async` worker that produced the FFI request and
## was destroyed before the FFI thread ran `deleteRequest`), `chunk.owner`
## dangles into reclaimed TLS and `addToSharedFreeList` segfaults — TSan on
## ARM reproduces this from `TimerE2E.ThreadedHammer`. `malloc`/`free` are
## process-global and thread-lifetime-independent, so freeing on a different
## thread is safe.
import system/ansi_c
## Can be shared safely between threads
type SharedSeq*[T] = tuple[data: ptr UncheckedArray[T], len: int]
proc alloc*(str: cstring): cstring =
## Allocates a fresh null-terminated copy of `str` via `c_malloc`. The
## returned pointer must be released with `dealloc(cstring)`.
if str.isNil():
var ret = cast[cstring](c_malloc(1))
ret[0] = '\0'
return ret
let ret = cast[cstring](c_malloc(csize_t(len(str) + 1)))
copyMem(ret, str, len(str) + 1)
return ret
proc alloc*(str: string): cstring =
## Allocates a fresh null-terminated copy of `str` via `c_malloc`. The
## returned pointer must be released with `dealloc(cstring)`.
var ret = cast[cstring](c_malloc(csize_t(str.len + 1)))
let s = cast[seq[char]](str)
for i in 0 ..< str.len:
ret[i] = s[i]
ret[str.len] = '\0'
return ret
proc dealloc*(p: cstring) {.inline.} =
## Frees a buffer obtained from one of the `alloc(...)` overloads above.
## Nil-safe.
if not p.isNil():
c_free(cast[pointer](p))
proc ffiCMalloc*(T: typedesc): ptr T =
## Allocates a zero-initialised `T` via `c_malloc` so the buffer can cross
## threads safely (see the module note). Used to carry a native (non-CBOR)
## request payload by pointer; release with `ffiCFree`. (Named with the `ffi`
## prefix so it doesn't collide with `ansi_c.c_free`/`c_malloc` under Nim's
## style-insensitive identifier rules.)
let p = cast[ptr T](c_malloc(csize_t(sizeof(T))))
zeroMem(p, sizeof(T))
return p
proc ffiCFree*(p: pointer) {.inline.} =
## Frees a buffer obtained from `ffiCMalloc`. Nil-safe.
if not p.isNil():
c_free(p)
proc allocSharedSeq*[T](s: seq[T]): SharedSeq[T] =
if s.len == 0:
return (cast[ptr UncheckedArray[T]](nil), 0)
let data = c_malloc(csize_t(sizeof(T) * s.len))
copyMem(data, unsafeAddr s[0], sizeof(T) * s.len)
return (cast[ptr UncheckedArray[T]](data), s.len)
proc deallocSharedSeq*[T](s: var SharedSeq[T]) =
if not s.data.isNil():
c_free(s.data)
s.len = 0
proc toSeq*[T](s: SharedSeq[T]): seq[T] =
## Creates a seq[T] from a SharedSeq[T]. No explicit dealloc is required
## as req[T] is a GC managed type.
var ret = newSeq[T]()
for i in 0 ..< s.len:
ret.add(s.data[i])
return ret