diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f67d52f..d36234d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -7,51 +7,86 @@ on: branches: [master, main] jobs: + # Single source of truth for Nim / Nimble versions used by every job and + # every reusable workflow below. Values live in versions.env at the repo + # root so they're greppable and editable in one place, next to ffi.nimble. + # The job exposes them as outputs because the `with:` of a reusable-workflow + # call only accepts the `needs`, `inputs`, `vars`, and `github` contexts — + # `env` is not allowed there, which rules out plain workflow-level env vars. + versions: + runs-on: ubuntu-latest + outputs: + nim-versions: ${{ steps.load.outputs.NIM_VERSIONS }} + nimble: ${{ steps.load.outputs.NIMBLE_VERSION }} + steps: + - uses: actions/checkout@v4 + - id: load + run: cat versions.env >> "$GITHUB_OUTPUT" + alloc: name: Alloc + needs: versions uses: ./.github/workflows/test.yml with: test: test_alloc + nim-versions: ${{ needs.versions.outputs.nim-versions }} + nimble-version: ${{ needs.versions.outputs.nimble }} ffi-context: name: FFI Context + needs: versions uses: ./.github/workflows/test.yml with: test: test_ffi_context + nim-versions: ${{ needs.versions.outputs.nim-versions }} + nimble-version: ${{ needs.versions.outputs.nimble }} gc-compat: name: GC Compatibility + needs: versions uses: ./.github/workflows/test.yml with: test: test_gc_compat + nim-versions: ${{ needs.versions.outputs.nim-versions }} + nimble-version: ${{ needs.versions.outputs.nimble }} serial: name: Serial + needs: versions uses: ./.github/workflows/test.yml with: test: test_serial + nim-versions: ${{ needs.versions.outputs.nim-versions }} + nimble-version: ${{ needs.versions.outputs.nimble }} ctx-validation: name: Context Validation + needs: versions uses: ./.github/workflows/test.yml with: test: test_ctx_validation + nim-versions: ${{ needs.versions.outputs.nim-versions }} + nimble-version: ${{ needs.versions.outputs.nimble }} cpp-e2e: name: C++ E2E + needs: versions # Codegen output doesn't vary with mm and CMake/FetchContent is most - # reliable on Linux, so we run a single config rather than the full matrix. + # reliable on Linux, so we don't matrix over OS or mm here — just Nim. + strategy: + fail-fast: false + matrix: + nim-version: ${{ fromJSON(needs.versions.outputs.nim-versions) }} runs-on: ubuntu-22.04 env: - NIMBLE_VERSION: '0.22.3' - NIM_VERSION: '2.2.4' + NIMBLE_VERSION: ${{ needs.versions.outputs.nimble }} steps: - uses: actions/checkout@v4 - name: Setup Nim uses: jiro4989/setup-nim-action@v2 with: - nim-version: ${{ env.NIM_VERSION }} + nim-version: ${{ matrix.nim-version }} repo-token: ${{ secrets.GITHUB_TOKEN }} - name: Install Nimble ${{ env.NIMBLE_VERSION }} @@ -66,16 +101,16 @@ jobs: path: | nimbledeps/ nimble.paths - key: ${{ runner.os }}-nimbledeps-${{ env.NIM_VERSION }}-${{ hashFiles('*.nimble') }} + key: ${{ runner.os }}-nimbledeps-${{ matrix.nim-version }}-${{ hashFiles('*.nimble') }} restore-keys: | - ${{ runner.os }}-nimbledeps-${{ env.NIM_VERSION }}- + ${{ runner.os }}-nimbledeps-${{ matrix.nim-version }}- ${{ runner.os }}-nimbledeps- - name: Install nimble deps if: steps.cache-nimbledeps.outputs.cache-hit != 'true' run: nimble setup --localdeps -y - - name: Cache CMake FetchContent (GoogleTest + nlohmann_json) + - name: Cache CMake FetchContent (GoogleTest) uses: actions/cache@v4 with: path: tests/e2e/cpp/build/_deps @@ -83,3 +118,21 @@ jobs: - name: Run C++ e2e tests run: nimble test_cpp_e2e -y + + tests-asan-ubsan: + name: Tests · ASan+UBSan+LSan + needs: versions + uses: ./.github/workflows/tests-sanitized.yml + with: + sanitizer: asan-ubsan + nim-versions: ${{ needs.versions.outputs.nim-versions }} + nimble-version: ${{ needs.versions.outputs.nimble }} + + tests-tsan: + name: Tests · TSan + needs: versions + uses: ./.github/workflows/tests-sanitized.yml + with: + sanitizer: tsan + nim-versions: ${{ needs.versions.outputs.nim-versions }} + nimble-version: ${{ needs.versions.outputs.nimble }} diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index d854f5f..c009658 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -6,9 +6,13 @@ on: test: required: true type: string - -env: - NIMBLE_VERSION: '0.22.3' + nim-versions: + required: true + type: string + description: JSON array of Nim versions to run the matrix against, e.g. '["2.2.4","stable"]'. + nimble-version: + required: true + type: string jobs: run: @@ -16,7 +20,7 @@ jobs: fail-fast: false matrix: os: [ubuntu-22.04, macos-15, windows-latest] - nim-version: ['2.2.4', 'stable'] + nim-version: ${{ fromJSON(inputs.nim-versions) }} mm: [orc, refc] include: - os: ubuntu-22.04 @@ -38,13 +42,13 @@ jobs: nim-version: ${{ matrix.nim-version }} repo-token: ${{ secrets.GITHUB_TOKEN }} - - name: Install Nimble ${{ env.NIMBLE_VERSION }} + - name: Install Nimble ${{ inputs.nimble-version }} shell: bash run: | if [ "$RUNNER_OS" == "Windows" ]; then export PATH="$GITHUB_WORKSPACE/.nim_runtime/bin:$PATH" fi - cd /tmp && nimble install "nimble@${{ env.NIMBLE_VERSION }}" -y + cd /tmp && nimble install "nimble@${{ inputs.nimble-version }}" -y echo "$HOME/.nimble/bin" >> $GITHUB_PATH - name: Cache nimble deps @@ -63,9 +67,6 @@ jobs: if: steps.cache-nimbledeps.outputs.cache-hit != 'true' shell: bash run: | - if [ "$RUNNER_OS" == "Windows" ]; then - export PATH="$GITHUB_WORKSPACE/.nim_runtime/bin:$HOME/.nimble/bin:$PATH" - fi nimble setup --localdeps -y - name: Build diff --git a/.github/workflows/tests-sanitized.yml b/.github/workflows/tests-sanitized.yml new file mode 100644 index 0000000..7e88d58 --- /dev/null +++ b/.github/workflows/tests-sanitized.yml @@ -0,0 +1,92 @@ +name: tests-sanitized + +on: + workflow_call: + inputs: + sanitizer: + required: true + type: string + nim-versions: + required: true + type: string + description: JSON array of Nim versions to run the matrix against, e.g. '["2.2.4","stable"]'. + nimble-version: + required: true + type: string + +jobs: + run: + strategy: + fail-fast: false + matrix: + os: [ubuntu-24.04, ubuntu-24.04-arm] + nim-version: ${{ fromJSON(inputs.nim-versions) }} + # refc's GC conservatively scans the stack via setjmp+walk, which + # intentionally reads one word past the saved-registers buffer to + # transition into scanning the rest of the thread stack. ASan flags + # that as a stack-buffer-overflow (false positive — see its "custom + # stack unwind mechanism" hint). orc doesn't conservatively scan, so + # it's the only mm we sanitize under asan-ubsan. refc keeps coverage + # in the non-sanitized matrix (test.yml) and under tsan. + # (Can't express this via `exclude` or job-level `if:` — neither + # has access to `inputs.sanitizer` in a way that gates a matrix + # combination, so we filter at the dimension itself.) + mm: ${{ fromJSON(inputs.sanitizer == 'asan-ubsan' && '["orc"]' || '["orc","refc"]') }} + + runs-on: ${{ matrix.os }} + name: ${{ inputs.sanitizer }} · ${{ matrix.os }} · Nim ${{ matrix.nim-version }} · ${{ matrix.mm }} + + env: + NIM_FFI_MM: ${{ matrix.mm }} + NIM_FFI_SAN: ${{ inputs.sanitizer }} + # Per-sanitizer runtime options. Mirrors what tests/e2e/cpp/CMakeLists.txt + # injects via gtest_discover_tests so unit and e2e runs agree. + # + # `asan-ubsan` enables leak detection inside ASan (detect_leaks=1); + # LSAN_OPTIONS is honoured for suppressions when LSan runs under ASan. + ASAN_OPTIONS: halt_on_error=1:abort_on_error=1:detect_leaks=1:strict_string_checks=1 + UBSAN_OPTIONS: halt_on_error=1:print_stacktrace=1 + LSAN_OPTIONS: suppressions=${{ github.workspace }}/tests/e2e/cpp/lsan.supp:print_suppressions=0 + TSAN_OPTIONS: halt_on_error=1:second_deadlock_stack=1:history_size=7 + + steps: + - uses: actions/checkout@v4 + + - name: Setup Nim + uses: jiro4989/setup-nim-action@v2 + with: + nim-version: ${{ matrix.nim-version }} + repo-token: ${{ secrets.GITHUB_TOKEN }} + + - name: Install Nimble ${{ inputs.nimble-version }} + run: | + cd /tmp && nimble install "nimble@${{ inputs.nimble-version }}" -y + echo "$HOME/.nimble/bin" >> $GITHUB_PATH + + - name: Cache nimble deps + id: cache-nimbledeps + uses: actions/cache@v4 + with: + path: | + nimbledeps/ + nimble.paths + key: ${{ runner.os }}-${{ runner.arch }}-nimbledeps-${{ matrix.nim-version }}-${{ hashFiles('*.nimble') }} + restore-keys: | + ${{ runner.os }}-${{ runner.arch }}-nimbledeps-${{ matrix.nim-version }}- + ${{ runner.os }}-${{ runner.arch }}-nimbledeps- + + - name: Install nimble deps + if: steps.cache-nimbledeps.outputs.cache-hit != 'true' + run: nimble setup --localdeps -y + + - name: Cache CMake FetchContent (GoogleTest) + uses: actions/cache@v4 + with: + path: tests/e2e/cpp/build/_deps + key: ${{ runner.os }}-${{ runner.arch }}-cpp-e2e-deps-${{ inputs.sanitizer }}-${{ hashFiles('tests/e2e/cpp/CMakeLists.txt') }} + + - name: Run unit tests (${{ inputs.sanitizer }}) + run: nimble test_sanitized -y + + - name: Run C++ e2e tests (${{ inputs.sanitizer }}) + run: nimble test_cpp_e2e_sanitized -y diff --git a/config.nims b/config.nims index ccb78e3..be54fdf 100644 --- a/config.nims +++ b/config.nims @@ -3,6 +3,7 @@ switch("path", thisDir()) # begin Nimble config (version 2) +--noNimblePath when withDir(thisDir(), system.fileExists("nimble.paths")): include "nimble.paths" # end Nimble config \ No newline at end of file diff --git a/ffi.nimble b/ffi.nimble index 47b1b8b..400e03b 100644 --- a/ffi.nimble +++ b/ffi.nimble @@ -16,21 +16,55 @@ requires "cbor_serialization" const nimFlagsOrc = "--mm:orc -d:chronicles_log_level=WARN" const nimFlagsRefc = "--mm:refc -d:chronicles_log_level=WARN" +import std/[algorithm, os, strutils] + +proc discoverUnitTests(): seq[string] = + # `listFiles` returns both .nim sources and any compiled binaries left in + # the dir from prior local runs — filter to .nim so we don't run a test + # twice (and don't try to `nim c -r` a stale binary). + var names: seq[string] = @[] + for path in listFiles(thisDir() / "tests/unit"): + if path.endsWith(".nim"): + let name = path.extractFilename.changeFileExt("") + if name.startsWith("test_"): + names.add(name) + names.sort() + return names + +let unitTests = discoverUnitTests() + +proc sanFlags(san: string): string = + # Each --passC / --passL adds one literal flag to the C compiler / linker + # invocation — avoids any quoting ambiguity that arises from putting + # space-separated flags inside a single --passC argument. + # + # `asan-ubsan` enables LeakSanitizer too: ASan includes LSan, so leaks are + # reported when ASAN_OPTIONS=detect_leaks=1 (set by the sanitizer CI job). + case san + of "none", "": + "" + of "asan-ubsan": + " --passC:-fsanitize=address,undefined" & + " --passC:-fno-sanitize-recover=all" & + " --passC:-fno-omit-frame-pointer" & + " --passC:-g" & + " --passL:-fsanitize=address,undefined" + of "tsan": + " --passC:-fsanitize=thread" & + " --passC:-fno-omit-frame-pointer" & + " --passC:-g" & + " --passC:-O1" & + " --passL:-fsanitize=thread" + else: + raise newException(ValueError, "unknown NIM_FFI_SAN: " & san) + task buildffi, "Compile the library": exec "nim c " & nimFlagsOrc & " --app:lib --noMain ffi.nim" task test, "Run all tests under --mm:orc and --mm:refc": for flags in [nimFlagsOrc, nimFlagsRefc]: - exec "nim c -r " & flags & " tests/unit/test_alloc.nim" - exec "nim c -r " & flags & " tests/unit/test_ffi_context.nim" - exec "nim c -r " & flags & " tests/unit/test_gc_compat.nim" - exec "nim c -r " & flags & " tests/unit/test_serial.nim" - exec "nim c -r " & flags & " tests/unit/test_ctx_validation.nim" - exec "nim c -r " & flags & " tests/unit/test_nim_native_api.nim" - exec "nim c -r " & flags & " tests/unit/test_meta.nim" - exec "nim c -r " & flags & " tests/unit/test_string_helpers.nim" - exec "nim c -r " & flags & " tests/unit/test_wire_compat.nim" - exec "nim c -r " & flags & " tests/unit/test_cddl_codegen.nim" + for t in unitTests: + exec "nim c -r " & flags & " tests/unit/" & t & ".nim" task test_alloc, "Run alloc unit tests under --mm:orc and --mm:refc": exec "nim c -r " & nimFlagsOrc & " tests/unit/test_alloc.nim" @@ -51,6 +85,35 @@ task test_cpp_e2e, "Build and run the C++ end-to-end tests for the timer example exec "cmake --build tests/e2e/cpp/build" exec "ctest --test-dir tests/e2e/cpp/build --output-on-failure" +task test_sanitized, "Run all unit tests under a sanitizer (NIM_FFI_SAN) and mm (NIM_FFI_MM)": + let san = getEnv("NIM_FFI_SAN", "none") + let mm = getEnv("NIM_FFI_MM", "") + let extra = sanFlags(san) + let modes = + if mm == "orc": @[nimFlagsOrc] + elif mm == "refc": @[nimFlagsRefc] + else: @[nimFlagsOrc, nimFlagsRefc] + if san == "tsan": + let suppPath = thisDir() & "/tsan.supp" + let existing = getEnv("TSAN_OPTIONS") + if existing == "": + putEnv("TSAN_OPTIONS", "suppressions=" & suppPath) + elif "suppressions=" notin existing: + putEnv("TSAN_OPTIONS", existing & ":suppressions=" & suppPath) + for flags in modes: + for t in unitTests: + exec "nim c -r " & flags & extra & " tests/unit/" & t & ".nim" + +task test_cpp_e2e_sanitized, "Build and run the C++ e2e tests with a sanitizer (NIM_FFI_SAN) and mm (NIM_FFI_MM)": + let mm = getEnv("NIM_FFI_MM", "orc") + let san = getEnv("NIM_FFI_SAN", "none") + exec "nimble genbindings_cpp" + exec "cmake -S tests/e2e/cpp -B tests/e2e/cpp/build" & + " -DNIM_FFI_MM=" & mm & + " -DNIM_FFI_SANITIZER=" & san + exec "cmake --build tests/e2e/cpp/build -j" + exec "ctest --test-dir tests/e2e/cpp/build --output-on-failure" + task genbindings_example, "Generate Rust bindings for the timer example": exec "nim c " & nimFlagsOrc & " --app:lib --noMain --nimMainPrefix:libmy_timer -d:ffiGenBindings -o:/dev/null examples/timer/timer.nim" exec "nim c " & nimFlagsRefc & " --app:lib --noMain --nimMainPrefix:libmy_timer -d:ffiGenBindings -o:/dev/null examples/timer/timer.nim" diff --git a/ffi/alloc.nim b/ffi/alloc.nim index a12d5f9..66e688e 100644 --- a/ffi/alloc.nim +++ b/ffi/alloc.nim @@ -1,40 +1,60 @@ +## Cross-thread allocation helpers backed by libc `malloc`/`free`. +## +## We deliberately avoid Nim's `allocShared`/`deallocShared` here. Under +## `--mm:orc` they delegate to the per-thread `allocator` MemRegion stored +## in TLS; freeing such a buffer from a different thread later walks +## `chunk.owner` back to that MemRegion. If the original thread has exited +## by then (e.g. a `std::async` worker that produced the FFI request and +## was destroyed before the FFI thread ran `deleteRequest`), `chunk.owner` +## dangles into reclaimed TLS and `addToSharedFreeList` segfaults — TSan on +## ARM reproduces this from `TimerE2E.ThreadedHammer`. `malloc`/`free` are +## process-global and thread-lifetime-independent, so freeing on a different +## thread is safe. + +import system/ansi_c + ## Can be shared safely between threads type SharedSeq*[T] = tuple[data: ptr UncheckedArray[T], len: int] proc alloc*(str: cstring): cstring = - # Byte allocation from the given address. - # There should be the corresponding manual deallocation with deallocShared ! + ## Allocates a fresh null-terminated copy of `str` via `c_malloc`. The + ## returned pointer must be released with `dealloc(cstring)`. if str.isNil(): - var ret = cast[cstring](allocShared(1)) # Allocate memory for the null terminator - ret[0] = '\0' # Set the null terminator + var ret = cast[cstring](c_malloc(1)) + ret[0] = '\0' return ret - let ret = cast[cstring](allocShared(len(str) + 1)) + let ret = cast[cstring](c_malloc(csize_t(len(str) + 1))) copyMem(ret, str, len(str) + 1) return ret proc alloc*(str: string): cstring = - ## Byte allocation from the given address. - ## There should be the corresponding manual deallocation with deallocShared ! - var ret = cast[cstring](allocShared(str.len + 1)) + ## Allocates a fresh null-terminated copy of `str` via `c_malloc`. The + ## returned pointer must be released with `dealloc(cstring)`. + var ret = cast[cstring](c_malloc(csize_t(str.len + 1))) let s = cast[seq[char]](str) for i in 0 ..< str.len: ret[i] = s[i] ret[str.len] = '\0' return ret -proc allocSharedSeq*[T](s: seq[T]): SharedSeq[T] = - let data = allocShared(sizeof(T) * s.len) +proc dealloc*(p: cstring) {.inline.} = + ## Frees a buffer obtained from one of the `alloc(...)` overloads above. + ## Nil-safe. + if not p.isNil(): + c_free(cast[pointer](p)) +proc allocSharedSeq*[T](s: seq[T]): SharedSeq[T] = if s.len == 0: return (cast[ptr UncheckedArray[T]](nil), 0) + let data = c_malloc(csize_t(sizeof(T) * s.len)) copyMem(data, unsafeAddr s[0], sizeof(T) * s.len) return (cast[ptr UncheckedArray[T]](data), s.len) proc deallocSharedSeq*[T](s: var SharedSeq[T]) = if not s.data.isNil(): - deallocShared(s.data) + c_free(s.data) s.len = 0 proc toSeq*[T](s: SharedSeq[T]): seq[T] = diff --git a/ffi/cbor_serial.nim b/ffi/cbor_serial.nim index a8a5621..2651631 100644 --- a/ffi/cbor_serial.nim +++ b/ffi/cbor_serial.nim @@ -3,8 +3,11 @@ ## FFI plumbing expects, and adds the few transport-only details the FFI layer ## needs on top: ## -## - `cborEncodeShared` writes into an `allocShared` buffer so the FFI thread -## can take ownership of the bytes without a second copy. +## - `cborEncodeShared` writes into a `c_malloc` buffer so the FFI thread +## can take ownership of the bytes without a second copy. `c_malloc` +## (not `allocShared`) because the buffer must be freeable from the FFI +## thread after the producing thread may have exited — see the note in +## `ffi/ffi_thread_request.nim`. ## - `CborNullByte` is the canonical "successful but no value" wire sentinel. ## ## `cborEncode` / `cborDecode` are the public API the macros and tests use. @@ -24,6 +27,7 @@ ## `.ffiCtor.`, which is validated against `FFIContextPool` on every ## re-entry. Arbitrary user pointers would lack that validation. +import system/ansi_c import cbor_serialization, cbor_serialization/std/options, results export cbor_serialization, options, results @@ -41,15 +45,15 @@ proc cborEncode*[T](x: T): seq[byte] = return Cbor.encode(x) proc cborEncodeShared*[T](x: T): tuple[data: ptr UncheckedArray[byte], len: int] = - ## Encodes `x` into a shared-memory buffer (`allocShared`). + ## Encodes `x` into a `c_malloc` buffer. ## - ## The returned `data` is owned by the caller and must be freed exactly once - ## via `deallocShared` (the FFIThreadRequest `deleteRequest` path does this + ## The returned `data` is owned by the caller and must be freed exactly + ## once via `c_free` (the FFIThreadRequest `deleteRequest` path does this ## automatically). Empty payloads return `(nil, 0)` without allocating. let bytes = Cbor.encode(x) if bytes.len == 0: return (nil, 0) - let buf = cast[ptr UncheckedArray[byte]](allocShared(bytes.len)) + let buf = cast[ptr UncheckedArray[byte]](c_malloc(csize_t(bytes.len))) copyMem(buf, unsafeAddr bytes[0], bytes.len) return (buf, bytes.len) diff --git a/ffi/ffi_context.nim b/ffi/ffi_context.nim index bd9477c..b99777a 100644 --- a/ffi/ffi_context.nim +++ b/ffi/ffi_context.nim @@ -218,12 +218,16 @@ proc processRequest[T]( else: ctx[].registeredRequests[][reqIdCs](cast[pointer](request), ctx) + ## Catch every catchable exception (including CancelledError raised by + ## the shutdown drain in ffiRun) so handleRes — and its `deleteRequest` + ## defer — always runs. Otherwise an abandoned in-flight handler would + ## leak its request envelope, reqId copy, and CBOR payload. let res = try: await retFut - except AsyncError as exc: + except CatchableError as exc: Result[seq[byte], string].err( - "Async error in processRequest for " & reqId & ": " & exc.msg + "Error in processRequest for " & reqId & ": " & exc.msg ) ## handleRes may raise (OOM, GC setup) even though it is rare. Catching here @@ -254,7 +258,23 @@ proc ffiThreadBody[T](ctx: ptr FFIContext[T]) {.thread.} = ## Holds the main library object, i.e., in charge of handling the ffi requests. ## e.g., Waku, LibP2P, SDS, etc. + ## In-flight processRequest futures. Tracked so they can be drained on + ## shutdown — otherwise destroying the context while a handler is + ## awaiting (e.g. sleepAsync) abandons the future and leaks the + ## request's envelope/reqId/payload allocations. + var pending: seq[Future[void]] = @[] + + proc reapCompleted() = + var i = 0 + while i < pending.len: + if pending[i].finished(): + pending.del(i) + else: + inc i + while ctx.running.load(): + reapCompleted() + let gotSignal = await ctx.reqSignal.wait().withTimeout(100.milliseconds) if not gotSignal: continue @@ -268,12 +288,24 @@ proc ffiThreadBody[T](ctx: ptr FFIContext[T]) {.thread.} = ctx.myLib = addr ffiReqHandler ## Handle the request - asyncSpawn processRequest(request, ctx) + pending.add processRequest(request, ctx) let fireRes = ctx.reqReceivedSignal.fireSync() if fireRes.isErr(): error "could not fireSync back to requester thread", error = fireRes.error + ## Drain in-flight handlers so each request's `deleteRequest` runs + ## before we exit. Without this, abandoning a future mid-await would + ## leak the request allocations (visible to LSan; previously hidden + ## because Nim's pool allocator kept the chunks alive in the process). + reapCompleted() + if pending.len > 0: + try: + await allFutures(pending) + except CatchableError as exc: + error "draining pending FFI requests on shutdown raised", + error = exc.msg + waitFor ffiRun(ctx) proc cleanUpResources[T](ctx: ptr FFIContext[T]): Result[void, string] = diff --git a/ffi/ffi_thread_request.nim b/ffi/ffi_thread_request.nim index 735e0cf..25a9632 100644 --- a/ffi/ffi_thread_request.nim +++ b/ffi/ffi_thread_request.nim @@ -1,7 +1,16 @@ ## Carries one CBOR-encoded request blob between the main thread and the FFI -## thread. The main thread allocates the request (in shared memory), the FFI -## thread frees it after invoking the user callback. +## thread. The main thread allocates the request, the FFI thread frees it +## after invoking the user callback. +## +## All three pieces (envelope, reqId copy, payload buffer) are obtained from +## libc `malloc` and released by libc `free`. Nim's `allocShared` under +## `--mm:orc` is backed by a per-thread `MemRegion` stored in TLS; if the +## producer thread (commonly a transient `std::async` worker on the foreign +## side) has exited by the time the FFI thread runs `deleteRequest`, the +## chunk's `owner` pointer dangles into reclaimed TLS and the deallocator +## segfaults. `malloc`/`free` are process-global and immune to that. +import system/ansi_c import results import chronos import ./ffi_types, ./alloc, ./cbor_serial @@ -21,10 +30,10 @@ type FFIThreadRequest* = object proc allocBaseRequest( callback: FFICallBack, userData: pointer, reqId: cstring ): ptr FFIThreadRequest = - ## Allocates the request envelope in shared memory and populates the - ## routing fields. Payload setup is delegated to one of the payload helpers - ## below depending on whether the bytes need to be copied or adopted. - var ret = createShared(FFIThreadRequest) + ## Allocates the request envelope via `c_malloc` and populates the routing + ## fields. Payload setup is delegated to one of the payload helpers below + ## depending on whether the bytes need to be copied or adopted. + var ret = cast[ptr FFIThreadRequest](c_malloc(csize_t(sizeof(FFIThreadRequest)))) ret[].callback = callback ret[].userData = userData ret[].reqId = reqId.alloc() @@ -33,25 +42,26 @@ proc allocBaseRequest( return ret proc copySharedPayload(req: ptr FFIThreadRequest, data: ptr byte, dataLen: int) = - ## Allocates a fresh shared buffer and copies `dataLen` bytes from `data` - ## into `req`. Empty payloads (non-positive `dataLen` or nil `data`) leave - ## the request's payload fields at their zero-initialised state. + ## Allocates a fresh `c_malloc` buffer and copies `dataLen` bytes from + ## `data` into `req`. Empty payloads (non-positive `dataLen` or nil + ## `data`) leave the request's payload fields at their zero-initialised + ## state. if dataLen > 0 and not data.isNil(): - req[].data = cast[ptr UncheckedArray[byte]](allocShared(dataLen)) + req[].data = cast[ptr UncheckedArray[byte]](c_malloc(csize_t(dataLen))) copyMem(req[].data, data, dataLen) req[].dataLen = dataLen proc adoptOwnedSharedPayload( req: ptr FFIThreadRequest, data: ptr UncheckedArray[byte], dataLen: int ) = - ## Embeds an already-`allocShared` buffer into `req` without copying. + ## Embeds an already-`c_malloc`'d buffer into `req` without copying. ## `(nil, 0)` is the empty-payload contract; a zero-length-but-non-nil ## buffer is treated as empty and disposed here so it doesn't leak. if dataLen > 0 and not data.isNil(): req[].data = data req[].dataLen = dataLen elif not data.isNil(): - deallocShared(data) + c_free(data) proc initFromPtr*( T: typedesc[FFIThreadRequest], @@ -91,24 +101,24 @@ proc initFromOwnedShared*( data: ptr UncheckedArray[byte], dataLen: int, ): ptr type T = - ## Takes ownership of an already-allocated shared-memory buffer (`data`) - ## and embeds it in the request without copying. Pair with `cborEncodeShared` - ## so the request payload travels from encoder to FFI thread with a single - ## allocation instead of seq → allocShared + copyMem. + ## Takes ownership of an already-allocated buffer (`data`) and embeds it + ## in the request without copying. Pair with `cborEncodeShared` so the + ## request payload travels from encoder to FFI thread with a single + ## allocation instead of seq → c_malloc + copyMem. ## - ## Ownership: `data` must have been allocated via `allocShared` / grown via - ## `reallocShared`. After this call, `deleteRequest` will `deallocShared` it. - ## Pass `(nil, 0)` for an empty payload. + ## Ownership: `data` must have been allocated via `c_malloc`. After this + ## call, `deleteRequest` will `c_free` it. Pass `(nil, 0)` for an empty + ## payload. var ret = allocBaseRequest(callback, userData, reqId) adoptOwnedSharedPayload(ret, data, dataLen) return ret proc deleteRequest*(request: ptr FFIThreadRequest) = if not request[].data.isNil: - deallocShared(request[].data) + c_free(request[].data) if not request[].reqId.isNil: - deallocShared(request[].reqId) - deallocShared(request) + c_free(cast[pointer](request[].reqId)) + c_free(request) proc handleRes*(res: Result[seq[byte], string], request: ptr FFIThreadRequest) = ## Fires the registered callback exactly once and frees the request. diff --git a/nimble.lock b/nimble.lock new file mode 100644 index 0000000..07a572f --- /dev/null +++ b/nimble.lock @@ -0,0 +1,174 @@ +{ + "version": 2, + "packages": { + "unittest2": { + "version": "0.2.5", + "vcsRevision": "26f2ef3ae0ec72a2a75bfe557e02e88f6a31c189", + "url": "https://github.com/status-im/nim-unittest2", + "downloadMethod": "git", + "dependencies": [], + "checksums": { + "sha1": "02bb3751ba9ddc3c17bfd89f2e41cb6bfb8fc0c9" + } + }, + "bearssl": { + "version": "0.2.8", + "vcsRevision": "22c6a76ce015bc07e011562bdcfc51d9446c1e82", + "url": "https://github.com/status-im/nim-bearssl", + "downloadMethod": "git", + "dependencies": [ + "unittest2" + ], + "checksums": { + "sha1": "da4dd7ae96d536bdaf42dca9c85d7aed024b6a86" + } + }, + "results": { + "version": "0.5.1", + "vcsRevision": "df8113dda4c2d74d460a8fa98252b0b771bf1f27", + "url": "https://github.com/arnetheduck/nim-results", + "downloadMethod": "git", + "dependencies": [], + "checksums": { + "sha1": "a9c011f74bc9ed5c91103917b9f382b12e82a9e7" + } + }, + "stew": { + "version": "0.5.0", + "vcsRevision": "4382b18f04b3c43c8409bfcd6b62063773b2bbaa", + "url": "https://github.com/status-im/nim-stew", + "downloadMethod": "git", + "dependencies": [ + "results", + "unittest2" + ], + "checksums": { + "sha1": "db22942939773ab7d5a0f2b2668c237240c67dd6" + } + }, + "faststreams": { + "version": "0.5.1", + "vcsRevision": "50889cd16ec8771106cdd0eeea460039e8571e06", + "url": "https://github.com/status-im/nim-faststreams", + "downloadMethod": "git", + "dependencies": [ + "stew", + "unittest2" + ], + "checksums": { + "sha1": "969ceb3666e807db8fe5c8df63466749822367a9" + } + }, + "serialization": { + "version": "0.5.2", + "vcsRevision": "b0f2fa32960ea532a184394b0f27be37bd80248b", + "url": "https://github.com/status-im/nim-serialization", + "downloadMethod": "git", + "dependencies": [ + "faststreams", + "unittest2", + "stew" + ], + "checksums": { + "sha1": "fa35c1bb76a0a02a2379fe86eaae0957c7527cb8" + } + }, + "cbor_serialization": { + "version": "0.3.0", + "vcsRevision": "1664160e04d153573373afddc552b9cbf6fbe4dc", + "url": "https://github.com/vacp2p/nim-cbor-serialization", + "downloadMethod": "git", + "dependencies": [ + "serialization", + "stew", + "results" + ], + "checksums": { + "sha1": "ab126eae09a6e39c72972a6a0b83cb06a2ffe8f0" + } + }, + "json_serialization": { + "version": "0.4.4", + "vcsRevision": "c343b0e243d9e17e2c40f3a8a24340f7c4a71d44", + "url": "https://github.com/status-im/nim-json-serialization", + "downloadMethod": "git", + "dependencies": [ + "faststreams", + "serialization", + "stew", + "results" + ], + "checksums": { + "sha1": "8b3115354104858a0ac9019356fb29720529c2bd" + } + }, + "testutils": { + "version": "0.8.1", + "vcsRevision": "6ce5e5e2301ccbc04b09d27ff78741ff4d352b4d", + "url": "https://github.com/status-im/nim-testutils", + "downloadMethod": "git", + "dependencies": [ + "unittest2" + ], + "checksums": { + "sha1": "96a11cf8b84fa9bd12d4a553afa1cc4b7f9df4e3" + } + }, + "chronicles": { + "version": "0.12.2", + "vcsRevision": "27ec507429a4eb81edc20f28292ee8ec420be05b", + "url": "https://github.com/status-im/nim-chronicles", + "downloadMethod": "git", + "dependencies": [ + "faststreams", + "serialization", + "json_serialization", + "testutils" + ], + "checksums": { + "sha1": "02febb20d088120b2836d3306cfa21f434f88f65" + } + }, + "httputils": { + "version": "0.4.1", + "vcsRevision": "f142cb2e8bd812dd002a6493b6082827bb248592", + "url": "https://github.com/status-im/nim-http-utils", + "downloadMethod": "git", + "dependencies": [ + "stew", + "results", + "unittest2" + ], + "checksums": { + "sha1": "016774ab31c3afff9a423f7d80584905ee59c570" + } + }, + "chronos": { + "version": "4.2.2", + "vcsRevision": "45f43a9ad8bd8bcf5903b42f365c1c879bd54240", + "url": "https://github.com/status-im/nim-chronos", + "downloadMethod": "git", + "dependencies": [ + "results", + "stew", + "bearssl", + "httputils", + "unittest2" + ], + "checksums": { + "sha1": "3a4c9477df8cef20a04e4f1b54a2d74fdfc2a3d0" + } + }, + "taskpools": { + "version": "0.1.0", + "vcsRevision": "9e8ccc754631ac55ac2fd495e167e74e86293edb", + "url": "https://github.com/status-im/nim-taskpools", + "downloadMethod": "git", + "dependencies": [], + "checksums": { + "sha1": "09e1b2fdad55b973724d61227971afc0df0b7a81" + } + } + }, + "tasks": {} +} diff --git a/tests/e2e/cpp/CMakeLists.txt b/tests/e2e/cpp/CMakeLists.txt index 41b2940..7b68a3d 100644 --- a/tests/e2e/cpp/CMakeLists.txt +++ b/tests/e2e/cpp/CMakeLists.txt @@ -28,6 +28,11 @@ add_executable(timer_e2e_tests test_timer_e2e.cpp) target_link_libraries(timer_e2e_tests PRIVATE my_timer_headers GTest::gtest_main) add_dependencies(timer_e2e_tests nim_lib) +if(NIM_FFI_SAN_CFLAGS) + target_compile_options(timer_e2e_tests PRIVATE ${NIM_FFI_SAN_CFLAGS}) + target_link_options(timer_e2e_tests PRIVATE ${NIM_FFI_SAN_LFLAGS}) +endif() + # The Nim-built shared library has install_name `@rpath/libmy_timer.dylib` # (set by `declareLibrary` on macOS for portability). The test binary must # therefore know where to find that dylib at load time — embed the build-tree @@ -38,5 +43,27 @@ set_target_properties(timer_e2e_tests PROPERTIES BUILD_RPATH "${_my_timer_dir}" INSTALL_RPATH "${_my_timer_dir}") +# Per-sanitizer runtime options: halt and exit non-zero on any report so +# ctest fails the job. The matching ASAN_OPTIONS / UBSAN_OPTIONS / +# LSAN_OPTIONS / TSAN_OPTIONS in CI provide the same defaults; we set them +# here too so local `ctest` runs behave identically. +# +# `asan-ubsan` runs LSan as part of ASan (detect_leaks=1). LSAN_OPTIONS +# is still honoured for suppressions when LSan runs under ASan's runtime. +set(_san_test_env "") +if(NIM_FFI_SANITIZER STREQUAL "asan-ubsan") + list(APPEND _san_test_env + "ASAN_OPTIONS=halt_on_error=1:abort_on_error=1:detect_leaks=1:strict_string_checks=1" + "UBSAN_OPTIONS=halt_on_error=1:print_stacktrace=1" + "LSAN_OPTIONS=suppressions=${CMAKE_CURRENT_SOURCE_DIR}/lsan.supp:print_suppressions=0") +elseif(NIM_FFI_SANITIZER STREQUAL "tsan") + list(APPEND _san_test_env + "TSAN_OPTIONS=halt_on_error=1:second_deadlock_stack=1:history_size=7") +endif() + include(GoogleTest) -gtest_discover_tests(timer_e2e_tests) +if(_san_test_env) + gtest_discover_tests(timer_e2e_tests PROPERTIES ENVIRONMENT "${_san_test_env}") +else() + gtest_discover_tests(timer_e2e_tests) +endif() diff --git a/tests/e2e/cpp/lsan.supp b/tests/e2e/cpp/lsan.supp new file mode 100644 index 0000000..94add2f --- /dev/null +++ b/tests/e2e/cpp/lsan.supp @@ -0,0 +1,22 @@ +# LeakSanitizer suppressions for the Nim runtime. +# +# These are process-lifetime allocations freed implicitly at exit — +# not real leaks. Add new entries here only with a comment justifying +# why the leak is unavoidable, and only for symbols inside the Nim +# standard library, chronos, or chronicles. Anything in our own code +# (ffi/*) or the generated bindings must be fixed, not suppressed. + +# Nim runtime initialisation — allocates global state freed at exit. +leak:NimMain +leak:PreMain +leak:systemDatInit + +# GC bootstrap — registers stack bottom / TLS slots once per thread. +leak:nimGC_setStackBottom +leak:initStackBottomWith +leak:setupForeignThreadGc + +# Async / logging library globals (event loop singletons, logger +# registries) — owned for the process lifetime. +leak:chronos +leak:chronicles diff --git a/tests/e2e/cpp/test_timer_e2e.cpp b/tests/e2e/cpp/test_timer_e2e.cpp index 79e9f6b..4941f63 100644 --- a/tests/e2e/cpp/test_timer_e2e.cpp +++ b/tests/e2e/cpp/test_timer_e2e.cpp @@ -8,9 +8,11 @@ #include "my_timer.hpp" +#include #include #include #include +#include #include #include @@ -128,3 +130,36 @@ TEST(TimerE2E, IndependentContextsKeepTheirOwnState) { EXPECT_EQ(rA.timerName, "alpha"); EXPECT_EQ(rB.timerName, "beta"); } + +// Concurrency workload for ThreadSanitizer: many threads hammering both a +// shared context (multi-producer into the same SPSC request queue — where +// producer-side races would live) and per-thread contexts (validates +// independent FFI threads stay isolated). Mixes sync and async paths so +// both code paths are exercised. +TEST(TimerE2E, ThreadedHammer) { + constexpr int kThreads = 8; + constexpr int kIters = 50; + + auto shared = makeCtx("hammer-shared"); + + std::vector workers; + std::atomic errors{0}; + workers.reserve(kThreads); + + for (int t = 0; t < kThreads; ++t) { + workers.emplace_back([&, t] { + auto own = makeCtx("hammer-t" + std::to_string(t)); + for (int i = 0; i < kIters; ++i) { + if ((i & 1) == 0) { + const auto r = shared.echo(EchoRequest{"s", 0}); + if (r.echoed != "s") ++errors; + } else { + auto f = own.echoAsync(EchoRequest{"a", 1}); + if (f.get().echoed != "a") ++errors; + } + } + }); + } + for (auto& w : workers) w.join(); + EXPECT_EQ(errors.load(), 0); +} diff --git a/tests/unit/test_alloc.nim b/tests/unit/test_alloc.nim index cd56e4c..64ce618 100644 --- a/tests/unit/test_alloc.nim +++ b/tests/unit/test_alloc.nim @@ -7,33 +7,33 @@ suite "alloc(cstring)": let res = alloc(s) check res != nil check res[0] == '\0' - deallocShared(res) + dealloc(res) test "copies content": let res = alloc("hello world".cstring) check $res == "hello world" - deallocShared(res) + dealloc(res) test "empty cstring": let res = alloc("".cstring) check len(res) == 0 - deallocShared(res) + dealloc(res) suite "alloc(string)": test "copies content": let res = alloc("test string") check $res == "test string" - deallocShared(res) + dealloc(res) test "empty string": let res = alloc("") check len(res) == 0 - deallocShared(res) + dealloc(res) test "string with special characters": let res = alloc("abc\0xyz") check res[0] == 'a' - deallocShared(res) + dealloc(res) suite "allocSharedSeq / deallocSharedSeq / toSeq": test "roundtrip int seq": diff --git a/tests/unit/test_cddl_codegen.nim b/tests/unit/test_cddl_codegen.nim index 32fba20..df32825 100644 --- a/tests/unit/test_cddl_codegen.nim +++ b/tests/unit/test_cddl_codegen.nim @@ -4,16 +4,16 @@ import std/strutils import unittest2 -import ../ffi/codegen/[meta, cddl] +import ffi/codegen/[meta, cddl] proc fieldsOf(pairs: openArray[(string, string)]): seq[FFIFieldMeta] = - var res = @[] + var res: seq[FFIFieldMeta] = @[] for p in pairs: res.add(FFIFieldMeta(name: p[0], typeName: p[1])) return res proc paramsOf(triples: openArray[(string, string, bool)]): seq[FFIParamMeta] = - var res = @[] + var res: seq[FFIParamMeta] = @[] for t in triples: res.add(FFIParamMeta(name: t[0], typeName: t[1], isPtr: t[2])) return res @@ -35,9 +35,6 @@ suite "nimTypeToCddl primitive mapping": check nimTypeToCddl("cstring") == "tstr" check nimTypeToCddl("pointer") == "uint" - test "pointer types map to uint": - check nimTypeToCddl("ptr Foo") == "uint" - test "seq[T] becomes [* T]": check nimTypeToCddl("seq[int]") == "[* int]" check nimTypeToCddl("seq[string]") == "[* tstr]" @@ -66,32 +63,29 @@ suite "generateCddlSchema": FFIProcMeta( procName: "nimtimer_create", libName: "nimtimer", - kind: ffiCtorKind, + kind: FFIKind.CTOR, libTypeName: "NimTimer", extraParams: @[param("config", "TimerConfig")], returnTypeName: "NimTimer", returnIsPtr: false, - isAsync: true, ), FFIProcMeta( procName: "nimtimer_echo", libName: "nimtimer", - kind: ffiFfiKind, + kind: FFIKind.FFI, libTypeName: "NimTimer", extraParams: @[param("req", "EchoRequest")], returnTypeName: "EchoResponse", returnIsPtr: false, - isAsync: true, ), FFIProcMeta( procName: "nimtimer_destroy", libName: "nimtimer", - kind: ffiDtorKind, + kind: FFIKind.DTOR, libTypeName: "NimTimer", extraParams: @[], returnTypeName: "", returnIsPtr: false, - isAsync: false, ), ] @@ -121,5 +115,5 @@ suite "generateCddlSchema": test "kind tags appear in proc comments": check "; nimtimer_create (ctor)" in cddl - check "; nimtimer_echo (async)" in cddl + check "; nimtimer_echo (ffi)" in cddl check "; nimtimer_destroy (dtor)" in cddl diff --git a/tests/unit/test_serial.nim b/tests/unit/test_serial.nim index c2dd54d..62d518a 100644 --- a/tests/unit/test_serial.nim +++ b/tests/unit/test_serial.nim @@ -187,18 +187,20 @@ suite "CBOR error handling": # --------------------------------------------------------------------------- # Regression for PR #23 review item 9: cborEncodeShared writes directly into -# a shared-memory buffer (allocShared), letting the FFI thread request take -# ownership without an intermediate seq[byte] copy. The shared-encoder must -# produce byte-for-byte the same output as the seq-encoder. +# a c_malloc buffer, letting the FFI thread request take ownership without +# an intermediate seq[byte] copy. The shared-encoder must produce +# byte-for-byte the same output as the seq-encoder. # --------------------------------------------------------------------------- +import system/ansi_c + suite "cborEncodeShared": test "object payload round-trips": let n = Nested(label: "", point: Point(x: 0, y: 0)) let (sd, sl) = cborEncodeShared(n) defer: if not sd.isNil: - deallocShared(sd) + c_free(sd) check sl > 0 let back = cborDecodePtr(sd, sl, Nested).value check back.label == "" @@ -211,7 +213,7 @@ suite "cborEncodeShared": let (sd, sl) = cborEncodeShared(n) defer: if not sd.isNil: - deallocShared(sd) + c_free(sd) check sl == seqBytes.len for i in 0 ..< sl: check sd[i] == seqBytes[i] @@ -220,8 +222,8 @@ suite "cborEncodeShared": check back.point.x == 3 check back.point.y == 4 - test "large string growth (exercises reallocShared)": - # Larger than the initial 16-byte cap so reallocShared must run several + test "large string growth": + # Larger than the initial 16-byte cap so the encoder must grow several # times; verifies the shared-mode grower handles repeated reallocations. var big = newString(4096) for i in 0 ..< big.len: @@ -229,7 +231,7 @@ suite "cborEncodeShared": let (sd, sl) = cborEncodeShared(big) defer: if not sd.isNil: - deallocShared(sd) + c_free(sd) let back = cborDecodePtr(sd, sl, string).value check back == big @@ -237,6 +239,6 @@ suite "cborEncodeShared": let (sd, sl) = cborEncodeShared("") defer: if not sd.isNil: - deallocShared(sd) + c_free(sd) check sl == 1 check sd[0] == 0x60'u8 diff --git a/tsan.supp b/tsan.supp new file mode 100644 index 0000000..abeb79e --- /dev/null +++ b/tsan.supp @@ -0,0 +1,18 @@ +# ThreadSanitizer suppressions for the FFI test suite. +# +# Format reference: https://github.com/google/sanitizers/wiki/ThreadSanitizerSuppressions +# +# Suppress the intentional thread leak from +# `destroyFFIContext does not hang when event loop is blocked`: +# when the FFI event loop is wedged by a synchronous handler, clearContext +# in ffi/ffi_context.nim deliberately skips joinThread on ThreadExitTimeout +# rather than hang the caller forever. The thread terminates cleanly, but +# was never joined — TSan reports that as a thread leak. +thread:*initContextResources* + +# Same intentional-leak path: because the FFI thread is never joined, +# TSan cannot prove happens-before between its writes (inside Nim's +# threadProcWrapper / the embedded Thread[T] struct on the test's stack) +# and the next test's reuse of the same stack region. Reported as a +# data race; root cause is the documented leak above. +race:*threadProcWrapper* diff --git a/versions.env b/versions.env new file mode 100644 index 0000000..49a48cf --- /dev/null +++ b/versions.env @@ -0,0 +1,2 @@ +NIM_VERSIONS=["2.2.4","stable"] +NIMBLE_VERSION=0.22.3