nim-sds/tests/test_bloom.nim

import unittest, results, strutils
import sds/bloom
from random import rand, randomize

suite "bloom filter":
  setup:
    let nElementsToTest = 10000
    let bfResult = initializeBloomFilter(capacity = nElementsToTest, errorRate = 0.001)
    check bfResult.isOk
    var bf = bfResult.get
    randomize(2882) # Seed the RNG
    var
      sampleChars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
      testElements = newSeq[string](nElementsToTest)

    for i in 0 ..< nElementsToTest:
      var newString = ""
      for j in 0 .. 7:
        newString.add(sampleChars[rand(51)])
      testElements[i] = newString

    for item in testElements:
      bf.insert(item)

  test "initialization parameters":
    check bf.capacity == nElementsToTest
    check bf.errorRate == 0.001
    check bf.kHashes == 10
    check bf.mBits div bf.capacity == 15 # bits per element

  test "basic operations":
    check bf.lookup("nonexistent") == false # Test empty lookup

    let bf2Result = initializeBloomFilter(100, 0.01)
    check bf2Result.isOk
    var bf2 = bf2Result.get
    bf2.insert("test string")
    check bf2.lookup("test string") == true
    check bf2.lookup("different string") == false

  test "error rate":
    var falsePositives = 0
    let testSize = nElementsToTest div 2
    for i in 0 ..< testSize:
      var testString = ""
      for j in 0 .. 8: # Different length than setup
        testString.add(sampleChars[rand(51)])
      if bf.lookup(testString):
        falsePositives.inc()

    let actualErrorRate = falsePositives.float / testSize.float
    check actualErrorRate < bf.errorRate * 1.5 # Allow some margin

  test "perfect recall":
    var lookupErrors = 0
    for item in testElements:
      if not bf.lookup(item):
        lookupErrors.inc()
    check lookupErrors == 0

  test "k/m bits specification":
    # Test error case for k > 12
    let errorCase = getMOverNBitsForK(k = 13, targetError = 0.01)
    check errorCase.isErr
    check errorCase.error ==
      "K must be <= 12 if forceNBitsPerElem is not also specified."

    # Test error case for unachievable error rate
    let errorCase2 = getMOverNBitsForK(k = 2, targetError = 0.00001)
    check errorCase2.isErr
    check errorCase2.error ==
      "Specified value of k and error rate not achievable using less than 4 bytes / element."

    # Test success cases
    let case1 = getMOverNBitsForK(k = 2, targetError = 0.1)
    check case1.isOk
    check case1.value == 6

    let case2 = getMOverNBitsForK(k = 7, targetError = 0.01)
    check case2.isOk
    check case2.value == 10

    let case3 = getMOverNBitsForK(k = 7, targetError = 0.001)
    check case3.isOk
    check case3.value == 16

    let bf2Result = initializeBloomFilter(10000, 0.001, k = 4, forceNBitsPerElem = 20)
    check bf2Result.isOk
    let bf2 = bf2Result.get
    check bf2.kHashes == 4
    check bf2.mBits == 200000

  test "string representation":
    let bf3Result = initializeBloomFilter(1000, 0.01, k = 4)
    check bf3Result.isOk
    let bf3 = bf3Result.get
    let str = $bf3
    check str.contains("1000") # Capacity
    check str.contains("4 hash") # Hash functions
    check str.contains("1.0e-02") # Error rate in scientific notation

suite "bloom filter special cases":
  test "different patterns of strings":
    const testSize = 10_000
    let patterns = @[
      "shortstr",
      repeat("a", 1000), # Very long string
      "special@#$%^&*()", # Special characters
      "unicode→★∑≈", # Unicode characters
      repeat("pattern", 10), # Repeating pattern
    ]

    let bfResult = initializeBloomFilter(testSize, 0.01)
    check bfResult.isOk
    var bf = bfResult.get
    var inserted = newSeq[string](testSize)

    # Test pattern handling
    for pattern in patterns:
      bf.insert(pattern)
      assert bf.lookup(pattern), "failed lookup pattern: " & pattern

    # Test general insertion and lookup
    for i in 0 ..< testSize:
      inserted[i] = $i & "test" & $rand(1000)
      bf.insert(inserted[i])

    # Verify all insertions
    var lookupErrors = 0
    for item in inserted:
      if not bf.lookup(item):
        lookupErrors.inc()
    check lookupErrors == 0

    # Check false positive rate
    var falsePositives = 0
    let fpTestSize = testSize div 2
    for i in 0 ..< fpTestSize:
      let testItem = "notpresent" & $i & $rand(1000)
      if bf.lookup(testItem):
        falsePositives.inc()

    let fpRate = falsePositives.float / fpTestSize.float
    check fpRate < bf.errorRate * 1.5 # Allow some margin but should be close to target
feat: add bloom filter (#3) 2025-01-13 13:49:28 +04:00			`import unittest, results, strutils`
feat: refactor to support building with Nimble (#52) Changes include: - Removing all submodules from vendor folder. - Updating sds.nimble with required depndencies. - Generating a nimble.lock file using Nimble. - Updated Nim code to reference depndencies correctly. - Added nix/deps.nix fixed output derivation that calls Nimble. - Updated nixpkgs to use 25.11 commit which provides Nimbe 0.20.1. - Disabled Nix Android builds on MacOS due to Nimble segfault. Signed-off-by: Jakub Sokołowski <jakub@status.im> 2026-02-11 16:32:02 +01:00			`import sds/bloom`
feat: add bloom filter (#3) 2025-01-13 13:49:28 +04:00			`from random import rand, randomize`

			`suite "bloom filter":`
			`setup:`
			`let nElementsToTest = 10000`
			`let bfResult = initializeBloomFilter(capacity = nElementsToTest, errorRate = 0.001)`
			`check bfResult.isOk`
			`var bf = bfResult.get`
			`randomize(2882) # Seed the RNG`
			`var`
			`sampleChars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"`
			`testElements = newSeq[string](nElementsToTest)`

feat: add rolling bloom filter, reliability utils and protobuf (#4) 2025-02-11 13:23:19 +05:30			`for i in 0 ..< nElementsToTest:`
feat: add bloom filter (#3) 2025-01-13 13:49:28 +04:00			`var newString = ""`
feat: add rolling bloom filter, reliability utils and protobuf (#4) 2025-02-11 13:23:19 +05:30			`for j in 0 .. 7:`
feat: add bloom filter (#3) 2025-01-13 13:49:28 +04:00			`newString.add(sampleChars[rand(51)])`
			`testElements[i] = newString`

			`for item in testElements:`
			`bf.insert(item)`

			`test "initialization parameters":`
			`check bf.capacity == nElementsToTest`
			`check bf.errorRate == 0.001`
			`check bf.kHashes == 10`
feat: add rolling bloom filter, reliability utils and protobuf (#4) 2025-02-11 13:23:19 +05:30			`check bf.mBits div bf.capacity == 15 # bits per element`
feat: add bloom filter (#3) 2025-01-13 13:49:28 +04:00
			`test "basic operations":`
feat: add rolling bloom filter, reliability utils and protobuf (#4) 2025-02-11 13:23:19 +05:30			`check bf.lookup("nonexistent") == false # Test empty lookup`

feat: add bloom filter (#3) 2025-01-13 13:49:28 +04:00			`let bf2Result = initializeBloomFilter(100, 0.01)`
			`check bf2Result.isOk`
			`var bf2 = bf2Result.get`
			`bf2.insert("test string")`
			`check bf2.lookup("test string") == true`
			`check bf2.lookup("different string") == false`

			`test "error rate":`
			`var falsePositives = 0`
			`let testSize = nElementsToTest div 2`
feat: add rolling bloom filter, reliability utils and protobuf (#4) 2025-02-11 13:23:19 +05:30			`for i in 0 ..< testSize:`
feat: add bloom filter (#3) 2025-01-13 13:49:28 +04:00			`var testString = ""`
feat: add rolling bloom filter, reliability utils and protobuf (#4) 2025-02-11 13:23:19 +05:30			`for j in 0 .. 8: # Different length than setup`
feat: add bloom filter (#3) 2025-01-13 13:49:28 +04:00			`testString.add(sampleChars[rand(51)])`
			`if bf.lookup(testString):`
			`falsePositives.inc()`

			`let actualErrorRate = falsePositives.float / testSize.float`
feat: add rolling bloom filter, reliability utils and protobuf (#4) 2025-02-11 13:23:19 +05:30			`check actualErrorRate < bf.errorRate * 1.5 # Allow some margin`

feat: add bloom filter (#3) 2025-01-13 13:49:28 +04:00			`test "perfect recall":`
			`var lookupErrors = 0`
			`for item in testElements:`
			`if not bf.lookup(item):`
			`lookupErrors.inc()`
			`check lookupErrors == 0`

			`test "k/m bits specification":`
			`# Test error case for k > 12`
			`let errorCase = getMOverNBitsForK(k = 13, targetError = 0.01)`
			`check errorCase.isErr`
feat: add rolling bloom filter, reliability utils and protobuf (#4) 2025-02-11 13:23:19 +05:30			`check errorCase.error ==`
			`"K must be <= 12 if forceNBitsPerElem is not also specified."`
feat: add bloom filter (#3) 2025-01-13 13:49:28 +04:00
			`# Test error case for unachievable error rate`
			`let errorCase2 = getMOverNBitsForK(k = 2, targetError = 0.00001)`
			`check errorCase2.isErr`
feat: add rolling bloom filter, reliability utils and protobuf (#4) 2025-02-11 13:23:19 +05:30			`check errorCase2.error ==`
			`"Specified value of k and error rate not achievable using less than 4 bytes / element."`
feat: add bloom filter (#3) 2025-01-13 13:49:28 +04:00
			`# Test success cases`
			`let case1 = getMOverNBitsForK(k = 2, targetError = 0.1)`
			`check case1.isOk`
			`check case1.value == 6`

			`let case2 = getMOverNBitsForK(k = 7, targetError = 0.01)`
			`check case2.isOk`
			`check case2.value == 10`

			`let case3 = getMOverNBitsForK(k = 7, targetError = 0.001)`
			`check case3.isOk`
			`check case3.value == 16`

			`let bf2Result = initializeBloomFilter(10000, 0.001, k = 4, forceNBitsPerElem = 20)`
			`check bf2Result.isOk`
			`let bf2 = bf2Result.get`
			`check bf2.kHashes == 4`
			`check bf2.mBits == 200000`

			`test "string representation":`
			`let bf3Result = initializeBloomFilter(1000, 0.01, k = 4)`
			`check bf3Result.isOk`
			`let bf3 = bf3Result.get`
			`let str = $bf3`
feat: add rolling bloom filter, reliability utils and protobuf (#4) 2025-02-11 13:23:19 +05:30			`check str.contains("1000") # Capacity`
			`check str.contains("4 hash") # Hash functions`
			`check str.contains("1.0e-02") # Error rate in scientific notation`
feat: add bloom filter (#3) 2025-01-13 13:49:28 +04:00
			`suite "bloom filter special cases":`
			`test "different patterns of strings":`
			`const testSize = 10_000`
feat: make Persistence interface async (#69) * feat: make Persistence interface async The 14 Persistence proc fields now return Future[...] with {.async: (raises: []), gcsafe.}, allowing real I/O backends (SQLite, encrypted file, network) to suspend rather than block the Chronos event loop the manager runs on. Propagates through: - ReliabilityManager.lock: system.Lock -> chronos.AsyncLock. Acquired across awaits cleanly; matches the single-threaded Chronos worker the FFI uses. Multi-OS-thread use is now explicitly the caller's responsibility. - sds_utils + sds.nim public API procs (wrapOutgoingMessage, unwrapReceivedMessage, markDependenciesMet, setCallbacks, resetReliabilityManager, cleanup, ensureChannel, removeChannel, the getter snapshots, etc.) are now async. - FFI request handlers in library/sds_thread/... await the new API. - Tests converted via an asyncTest template that wraps each test body in an async proc; setup/teardown use waitFor for their single async call (ensureChannel / cleanup). Lock scope is preserved exactly: the same call sites that held the kernel Lock today hold AsyncLock now -- no new locking added. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> * refactor: drop asyncSpawn, add asyncSetup/asyncTeardown Three asyncSpawn usages removed: - sds.nim startPeriodicTasks: stored the periodic-task futures on ReliabilityManager (new field `periodicTasks: seq[FutureBase]`) so cleanup can cancel them on shutdown instead of leaking the loops against a cleared manager. - library/sds_thread/sds_thread.nim: fireSync moved BEFORE processing, then `await SdsThreadRequest.process(...)` instead of asyncSpawn'ing it. Aligns the worker with the SP-channel + lock assumption that there are no concurrent requests; caller throughput is unchanged because the caller only waits for receipt (fireSync), not processing. - tests TestBus repair callback: replaced asyncSpawn(deliverExcept...) with an explicit pending-delivery queue drained by `bus.drain()`. Integration tests no longer rely on `sleepAsync(10ms)` to let spawned deliveries finish — they await drain instead. Tests also pick up an asyncSetup/asyncTeardown pair (tests/async_unittest.nim) so suite fixtures can `await` directly. All `waitFor` in setup/teardown blocks is gone; only the top-level asyncTest wrapper still uses waitFor (once, to drive the async proc to completion). Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> * Correctly propagate error hidden by new async move * Correctly handle future cancellation exceptions, +some housekeeping * Apply suggestion from @Ivansete-status Co-authored-by: Ivan FB <128452529+Ivansete-status@users.noreply.github.com> * Stylistics, async default implication addressed, nph style run * Remove leaking CancelledFuture from public facing + as a consequence it is tuneled into handling CatchableError everywhere --------- Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com> Co-authored-by: Ivan FB <128452529+Ivansete-status@users.noreply.github.com> 2026-05-25 22:30:15 +02:00			`let patterns = @[`
			`"shortstr",`
			`repeat("a", 1000), # Very long string`
			`"special@#$%^&*()", # Special characters`
			`"unicode→★∑≈", # Unicode characters`
			`repeat("pattern", 10), # Repeating pattern`
			`]`
feat: add rolling bloom filter, reliability utils and protobuf (#4) 2025-02-11 13:23:19 +05:30
feat: add bloom filter (#3) 2025-01-13 13:49:28 +04:00			`let bfResult = initializeBloomFilter(testSize, 0.01)`
			`check bfResult.isOk`
			`var bf = bfResult.get`
			`var inserted = newSeq[string](testSize)`
feat: add rolling bloom filter, reliability utils and protobuf (#4) 2025-02-11 13:23:19 +05:30
feat: add bloom filter (#3) 2025-01-13 13:49:28 +04:00			`# Test pattern handling`
			`for pattern in patterns:`
			`bf.insert(pattern)`
			`assert bf.lookup(pattern), "failed lookup pattern: " & pattern`
feat: add rolling bloom filter, reliability utils and protobuf (#4) 2025-02-11 13:23:19 +05:30
feat: add bloom filter (#3) 2025-01-13 13:49:28 +04:00			`# Test general insertion and lookup`
feat: add rolling bloom filter, reliability utils and protobuf (#4) 2025-02-11 13:23:19 +05:30			`for i in 0 ..< testSize:`
feat: add bloom filter (#3) 2025-01-13 13:49:28 +04:00			`inserted[i] = $i & "test" & $rand(1000)`
			`bf.insert(inserted[i])`
feat: add rolling bloom filter, reliability utils and protobuf (#4) 2025-02-11 13:23:19 +05:30
feat: add bloom filter (#3) 2025-01-13 13:49:28 +04:00			`# Verify all insertions`
			`var lookupErrors = 0`
			`for item in inserted:`
			`if not bf.lookup(item):`
			`lookupErrors.inc()`
			`check lookupErrors == 0`
feat: add rolling bloom filter, reliability utils and protobuf (#4) 2025-02-11 13:23:19 +05:30
feat: add bloom filter (#3) 2025-01-13 13:49:28 +04:00			`# Check false positive rate`
			`var falsePositives = 0`
			`let fpTestSize = testSize div 2`
feat: add rolling bloom filter, reliability utils and protobuf (#4) 2025-02-11 13:23:19 +05:30			`for i in 0 ..< fpTestSize:`
feat: add bloom filter (#3) 2025-01-13 13:49:28 +04:00			`let testItem = "notpresent" & $i & $rand(1000)`
			`if bf.lookup(testItem):`
			`falsePositives.inc()`
feat: add rolling bloom filter, reliability utils and protobuf (#4) 2025-02-11 13:23:19 +05:30
feat: add bloom filter (#3) 2025-01-13 13:49:28 +04:00			`let fpRate = falsePositives.float / fpTestSize.float`
feat: add rolling bloom filter, reliability utils and protobuf (#4) 2025-02-11 13:23:19 +05:30			`check fpRate < bf.errorRate * 1.5 # Allow some margin but should be close to target`