From 6bc626946e11954ae1ed78343ef61101b9b2db2c Mon Sep 17 00:00:00 2001 From: Ivan FB Date: Fri, 12 Jun 2026 16:11:42 +0200 Subject: [PATCH] fix(codegen): emit 3-arg async destroy ABI in C++/Rust bindings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The recycle/async-destroy work changed the Nim `ffiDtor` export from `int destroy(ctx)` to `int destroy(ctx, callback, userData)`, but the C++ and Rust generators still emitted the 1-arg signature. Foreign callers therefore passed only `ctx`; inside Nim, `callback`/`userData` held uninitialised register garbage. `requestRecycle` stored the garbage callback and the recycle handler later invoked it — a jump through a wild pointer that segfaulted in every C++ E2E / ASan / TSan job (the crash surfaced at teardown, after each test's assertions had already passed). Generate the 3-arg ABI and have the destructor/Drop block on the recycle callback via the existing sync-call helper, so the pool slot is fully drained and parked before the handle goes away — otherwise rapid create/destroy churn (StressShortLivedPerThreadContext, ThreadedHammer) could outrun the recycle and exhaust the pool. Co-Authored-By: Claude Opus 4.8 --- examples/echo/cpp_bindings/echo.hpp | 11 +++++++++-- examples/timer/cpp_bindings/my_timer.hpp | 11 +++++++++-- examples/timer/rust_bindings/src/api.rs | 4 +++- examples/timer/rust_bindings/src/ffi.rs | 2 +- ffi/codegen/cpp.nim | 8 +++++--- ffi/codegen/rust.nim | 11 ++++++++++- ffi/codegen/templates/cpp/context_rule_of_5.hpp.tpl | 9 ++++++++- 7 files changed, 45 insertions(+), 11 deletions(-) diff --git a/examples/echo/cpp_bindings/echo.hpp b/examples/echo/cpp_bindings/echo.hpp index efcfa2e..39824e7 100644 --- a/examples/echo/cpp_bindings/echo.hpp +++ b/examples/echo/cpp_bindings/echo.hpp @@ -406,7 +406,7 @@ typedef void (*FFICallback)(int ret, const char* msg, size_t len, void* user_dat void* echo_create(const uint8_t* req_cbor, size_t req_cbor_len, FFICallback callback, void* user_data); int echo_shout(void* ctx, FFICallback callback, void* user_data, const uint8_t* req_cbor, size_t req_cbor_len); int echo_version(void* ctx, FFICallback callback, void* user_data, const uint8_t* req_cbor, size_t req_cbor_len); -int echo_destroy(void* ctx); +int echo_destroy(void* ctx, FFICallback callback, void* user_data); uint64_t echo_add_event_listener(void* ctx, const char* event_name, FFICallback callback, void* user_data); int echo_remove_event_listener(void* ctx, uint64_t listener_id); } // extern "C" @@ -516,7 +516,14 @@ public: // context. ~EchoCtx() { if (ptr_) { - echo_destroy(ptr_); + // `echo_destroy` is non-blocking at the C ABI: it parks the + // context for reuse and reports the outcome via the callback. Block + // here until that callback fires so the pool slot is fully drained + // and parked before this object goes away — otherwise a rapid + // create/destroy churn could outrun the recycle and exhaust the pool. + (void)ffi_call_([this](FFICallback cb, void* ud) { + return echo_destroy(ptr_, cb, ud); + }, timeout_); ptr_ = nullptr; } } diff --git a/examples/timer/cpp_bindings/my_timer.hpp b/examples/timer/cpp_bindings/my_timer.hpp index 8fa440b..a4a6a3a 100644 --- a/examples/timer/cpp_bindings/my_timer.hpp +++ b/examples/timer/cpp_bindings/my_timer.hpp @@ -706,7 +706,7 @@ int my_timer_echo(void* ctx, FFICallback callback, void* user_data, const uint8_ int my_timer_version(void* ctx, FFICallback callback, void* user_data, const uint8_t* req_cbor, size_t req_cbor_len); int my_timer_complex(void* ctx, FFICallback callback, void* user_data, const uint8_t* req_cbor, size_t req_cbor_len); int my_timer_schedule(void* ctx, FFICallback callback, void* user_data, const uint8_t* req_cbor, size_t req_cbor_len); -int my_timer_destroy(void* ctx); +int my_timer_destroy(void* ctx, FFICallback callback, void* user_data); uint64_t my_timer_add_event_listener(void* ctx, const char* event_name, FFICallback callback, void* user_data); int my_timer_remove_event_listener(void* ctx, uint64_t listener_id); } // extern "C" @@ -816,7 +816,14 @@ public: // context. ~MyTimerCtx() { if (ptr_) { - my_timer_destroy(ptr_); + // `my_timer_destroy` is non-blocking at the C ABI: it parks the + // context for reuse and reports the outcome via the callback. Block + // here until that callback fires so the pool slot is fully drained + // and parked before this object goes away — otherwise a rapid + // create/destroy churn could outrun the recycle and exhaust the pool. + (void)ffi_call_([this](FFICallback cb, void* ud) { + return my_timer_destroy(ptr_, cb, ud); + }, timeout_); ptr_ = nullptr; } } diff --git a/examples/timer/rust_bindings/src/api.rs b/examples/timer/rust_bindings/src/api.rs index 709f9c1..8ad86ca 100644 --- a/examples/timer/rust_bindings/src/api.rs +++ b/examples/timer/rust_bindings/src/api.rs @@ -141,7 +141,9 @@ unsafe impl Sync for MyTimerCtx {} impl Drop for MyTimerCtx { fn drop(&mut self) { if !self.ptr.is_null() { - unsafe { ffi::my_timer_destroy(self.ptr); } + let _ = ffi_call_sync(self.timeout, |cb, ud| unsafe { + ffi::my_timer_destroy(self.ptr, cb, ud) + }); self.ptr = std::ptr::null_mut(); } } diff --git a/examples/timer/rust_bindings/src/ffi.rs b/examples/timer/rust_bindings/src/ffi.rs index 905acf4..6626b92 100644 --- a/examples/timer/rust_bindings/src/ffi.rs +++ b/examples/timer/rust_bindings/src/ffi.rs @@ -14,7 +14,7 @@ extern "C" { pub fn my_timer_version(ctx: *mut c_void, callback: FFICallback, user_data: *mut c_void, req_cbor: *const u8, req_cbor_len: usize) -> c_int; pub fn my_timer_complex(ctx: *mut c_void, callback: FFICallback, user_data: *mut c_void, req_cbor: *const u8, req_cbor_len: usize) -> c_int; pub fn my_timer_schedule(ctx: *mut c_void, callback: FFICallback, user_data: *mut c_void, req_cbor: *const u8, req_cbor_len: usize) -> c_int; - pub fn my_timer_destroy(ctx: *mut c_void) -> c_int; + pub fn my_timer_destroy(ctx: *mut c_void, callback: FFICallback, user_data: *mut c_void) -> c_int; pub fn my_timer_add_event_listener(ctx: *mut c_void, event_name: *const c_char, callback: FFICallback, user_data: *mut c_void) -> u64; pub fn my_timer_remove_event_listener(ctx: *mut c_void, listener_id: u64) -> c_int; } diff --git a/ffi/codegen/cpp.nim b/ffi/codegen/cpp.nim index 0ae4bff..b02b311 100644 --- a/ffi/codegen/cpp.nim +++ b/ffi/codegen/cpp.nim @@ -339,7 +339,9 @@ proc generateCppHeader*( [p.procName] ) of FFIKind.DTOR: - lines.add("int $1(void* ctx);" % [p.procName]) + lines.add( + "int $1(void* ctx, FFICallback callback, void* user_data);" % [p.procName] + ) # `declareLibrary` always exports the listener-registration ABI. Declare # it here so the typed event-handler wiring below can call into it. lines.add( @@ -570,8 +572,8 @@ proc generateCppHeader*( lines.add(" std::chrono::milliseconds timeout_;") if events.len > 0: # One owning entry per live listener, keyed by id. Destroyed after - # the destructor body runs `_destroy(ptr_)`, by which point the - # FFI side has joined its threads so no callback is mid-flight. + # the destructor blocks on `_destroy`'s recycle callback, by which + # point the FFI side has drained/parked the slot so no callback is mid-flight. lines.add( " std::unordered_map> listeners_;" ) diff --git a/ffi/codegen/rust.nim b/ffi/codegen/rust.nim index f989c7e..3c2f77f 100644 --- a/ffi/codegen/rust.nim +++ b/ffi/codegen/rust.nim @@ -207,6 +207,8 @@ proc generateFFIRs*(procs: seq[FFIProcMeta]): string = lines.add(" pub fn $1($2) -> *mut c_void;" % [p.procName, params.join(", ")]) of FFIKind.DTOR: params.add("ctx: *mut c_void") + params.add("callback: FFICallback") + params.add("user_data: *mut c_void") lines.add(" pub fn $1($2) -> c_int;" % [p.procName, params.join(", ")]) # Listener-registration ABI — emitted on the Nim side by `declareLibrary`, @@ -531,7 +533,14 @@ proc generateApiRs*( lines.add("impl Drop for $1 {" % [ctxTypeName]) lines.add(" fn drop(&mut self) {") lines.add(" if !self.ptr.is_null() {") - lines.add(" unsafe { ffi::$1(self.ptr); }" % [dtorProcName]) + # `_destroy` is non-blocking at the C ABI: it parks the context for + # reuse and reports the outcome via the callback. Block until that callback + # fires so the pool slot is fully drained and parked before this handle goes + # away — otherwise rapid create/destroy churn could outrun the recycle and + # exhaust the pool. The recycle outcome is best-effort on drop, so discard it. + lines.add(" let _ = ffi_call_sync(self.timeout, |cb, ud| unsafe {") + lines.add(" ffi::$1(self.ptr, cb, ud)" % [dtorProcName]) + lines.add(" });") lines.add(" self.ptr = std::ptr::null_mut();") lines.add(" }") # `listeners` is dropped automatically after this body returns. By diff --git a/ffi/codegen/templates/cpp/context_rule_of_5.hpp.tpl b/ffi/codegen/templates/cpp/context_rule_of_5.hpp.tpl index 22ce19d..e93aa2c 100644 --- a/ffi/codegen/templates/cpp/context_rule_of_5.hpp.tpl +++ b/ffi/codegen/templates/cpp/context_rule_of_5.hpp.tpl @@ -9,7 +9,14 @@ // context. ~{{CTX}}() { if (ptr_) { - {{LIB}}_destroy(ptr_); + // `{{LIB}}_destroy` is non-blocking at the C ABI: it parks the + // context for reuse and reports the outcome via the callback. Block + // here until that callback fires so the pool slot is fully drained + // and parked before this object goes away — otherwise a rapid + // create/destroy churn could outrun the recycle and exhaust the pool. + (void)ffi_call_([this](FFICallback cb, void* ud) { + return {{LIB}}_destroy(ptr_, cb, ud); + }, timeout_); ptr_ = nullptr; } }