2025-01-13 13:49:28 +04:00
|
|
|
from math import ceil, ln, pow, round
|
|
|
|
|
import hashes
|
|
|
|
|
import strutils
|
|
|
|
|
import results
|
|
|
|
|
import private/probabilities
|
2026-04-24 09:50:18 +02:00
|
|
|
import ./types/bloom_filter
|
|
|
|
|
export bloom_filter
|
2025-01-13 13:49:28 +04:00
|
|
|
|
2025-02-11 13:23:19 +05:30
|
|
|
{.push overflowChecks: off.} # Turn off overflow checks for hashing operations
|
2025-01-13 13:49:28 +04:00
|
|
|
|
|
|
|
|
proc hashN(item: string, n: int, maxValue: int): int =
|
|
|
|
|
## Get the nth hash using Nim's built-in hash function using
|
|
|
|
|
## the double hashing technique from Kirsch and Mitzenmacher, 2008:
|
|
|
|
|
## http://www.eecs.harvard.edu/~kirsch/pubs/bbbf/rsa.pdf
|
|
|
|
|
let
|
2025-02-11 13:23:19 +05:30
|
|
|
hashA = abs(hash(item)) mod maxValue # Use abs to handle negative hashes
|
2025-01-13 13:49:28 +04:00
|
|
|
hashB = abs(hash(item & " b")) mod maxValue # string concatenation
|
2026-04-24 09:50:18 +02:00
|
|
|
return abs((hashA + n * hashB)) mod maxValue
|
2025-01-13 13:49:28 +04:00
|
|
|
|
|
|
|
|
{.pop.}
|
|
|
|
|
|
2025-02-11 13:23:19 +05:30
|
|
|
proc getMOverNBitsForK*(
|
|
|
|
|
k: int, targetError: float, probabilityTable = kErrors
|
|
|
|
|
): Result[int, string] =
|
2025-01-13 13:49:28 +04:00
|
|
|
## Returns the optimal number of m/n bits for a given k.
|
2025-02-11 13:23:19 +05:30
|
|
|
if k notin 0 .. 12:
|
2025-01-13 13:49:28 +04:00
|
|
|
return err("K must be <= 12 if forceNBitsPerElem is not also specified.")
|
|
|
|
|
|
2025-02-11 13:23:19 +05:30
|
|
|
for mOverN in 2 .. probabilityTable[k].high:
|
2025-01-13 13:49:28 +04:00
|
|
|
if probabilityTable[k][mOverN] < targetError:
|
|
|
|
|
return ok(mOverN)
|
|
|
|
|
|
2026-04-24 09:50:18 +02:00
|
|
|
return err(
|
2025-02-11 13:23:19 +05:30
|
|
|
"Specified value of k and error rate not achievable using less than 4 bytes / element."
|
|
|
|
|
)
|
2025-01-13 13:49:28 +04:00
|
|
|
|
2025-02-11 13:23:19 +05:30
|
|
|
proc initializeBloomFilter*(
|
|
|
|
|
capacity: int, errorRate: float, k = 0, forceNBitsPerElem = 0
|
|
|
|
|
): Result[BloomFilter, string] =
|
2025-01-13 13:49:28 +04:00
|
|
|
## Initializes a Bloom filter with specified parameters.
|
|
|
|
|
##
|
|
|
|
|
## Parameters:
|
|
|
|
|
## - capacity: Expected number of elements to be inserted
|
|
|
|
|
## - errorRate: Desired false positive rate (e.g., 0.01 for 1%)
|
|
|
|
|
## - k: Optional number of hash functions. If 0, calculated optimally
|
|
|
|
|
## See http://pages.cs.wisc.edu/~cao/papers/summary-cache/node8.html for
|
|
|
|
|
## useful tables on k and m/n (n bits per element) combinations.
|
|
|
|
|
## - forceNBitsPerElem: Optional override for bits per element
|
|
|
|
|
var
|
|
|
|
|
kHashes: int
|
|
|
|
|
nBitsPerElem: int
|
|
|
|
|
|
|
|
|
|
if k < 1: # Calculate optimal k and use that
|
|
|
|
|
let bitsPerElem = ceil(-1.0 * (ln(errorRate) / (pow(ln(2.float), 2))))
|
|
|
|
|
kHashes = round(ln(2.float) * bitsPerElem).int
|
|
|
|
|
nBitsPerElem = round(bitsPerElem).int
|
|
|
|
|
else: # Use specified k if possible
|
|
|
|
|
if forceNBitsPerElem < 1: # Use lookup table
|
|
|
|
|
let mOverNRes = getMOverNBitsForK(k = k, targetError = errorRate)
|
|
|
|
|
if mOverNRes.isErr:
|
|
|
|
|
return err(mOverNRes.error)
|
|
|
|
|
nBitsPerElem = mOverNRes.value
|
|
|
|
|
else:
|
|
|
|
|
nBitsPerElem = forceNBitsPerElem
|
|
|
|
|
kHashes = k
|
|
|
|
|
|
|
|
|
|
let
|
|
|
|
|
mBits = capacity * nBitsPerElem
|
|
|
|
|
mInts = 1 + mBits div (sizeof(int) * 8)
|
|
|
|
|
|
2026-04-24 09:50:18 +02:00
|
|
|
return ok(
|
|
|
|
|
BloomFilter.init(
|
|
|
|
|
capacity = capacity,
|
|
|
|
|
errorRate = errorRate,
|
|
|
|
|
kHashes = kHashes,
|
|
|
|
|
mBits = mBits,
|
|
|
|
|
intArray = newSeq[int](mInts),
|
2025-02-11 13:23:19 +05:30
|
|
|
)
|
|
|
|
|
)
|
2025-01-13 13:49:28 +04:00
|
|
|
|
|
|
|
|
proc `$`*(bf: BloomFilter): string =
|
|
|
|
|
## Prints the configuration of the Bloom filter.
|
feat: make Persistence interface async (#69)
* feat: make Persistence interface async
The 14 Persistence proc fields now return Future[...] with
{.async: (raises: []), gcsafe.}, allowing real I/O backends (SQLite,
encrypted file, network) to suspend rather than block the Chronos event
loop the manager runs on.
Propagates through:
- ReliabilityManager.lock: system.Lock -> chronos.AsyncLock. Acquired
across awaits cleanly; matches the single-threaded Chronos worker the
FFI uses. Multi-OS-thread use is now explicitly the caller's
responsibility.
- sds_utils + sds.nim public API procs (wrapOutgoingMessage,
unwrapReceivedMessage, markDependenciesMet, setCallbacks,
resetReliabilityManager, cleanup, ensureChannel, removeChannel, the
getter snapshots, etc.) are now async.
- FFI request handlers in library/sds_thread/... await the new API.
- Tests converted via an asyncTest template that wraps each test body
in an async proc; setup/teardown use waitFor for their single async
call (ensureChannel / cleanup).
Lock scope is preserved exactly: the same call sites that held the
kernel Lock today hold AsyncLock now -- no new locking added.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
* refactor: drop asyncSpawn, add asyncSetup/asyncTeardown
Three asyncSpawn usages removed:
- sds.nim startPeriodicTasks: stored the periodic-task futures on
ReliabilityManager (new field `periodicTasks: seq[FutureBase]`) so
cleanup can cancel them on shutdown instead of leaking the loops
against a cleared manager.
- library/sds_thread/sds_thread.nim: fireSync moved BEFORE processing,
then `await SdsThreadRequest.process(...)` instead of asyncSpawn'ing
it. Aligns the worker with the SP-channel + lock assumption that
there are no concurrent requests; caller throughput is unchanged
because the caller only waits for receipt (fireSync), not processing.
- tests TestBus repair callback: replaced asyncSpawn(deliverExcept...)
with an explicit pending-delivery queue drained by `bus.drain()`.
Integration tests no longer rely on `sleepAsync(10ms)` to let
spawned deliveries finish — they await drain instead.
Tests also pick up an asyncSetup/asyncTeardown pair (tests/async_unittest.nim)
so suite fixtures can `await` directly. All `waitFor` in setup/teardown
blocks is gone; only the top-level asyncTest wrapper still uses waitFor
(once, to drive the async proc to completion).
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
* Correctly propagate error hidden by new async move
* Correctly handle future cancellation exceptions, +some housekeeping
* Apply suggestion from @Ivansete-status
Co-authored-by: Ivan FB <128452529+Ivansete-status@users.noreply.github.com>
* Stylistics, async default implication addressed, nph style run
* Remove leaking CancelledFuture from public facing + as a consequence it is tuneled into handling CatchableError everywhere
---------
Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
Co-authored-by: Ivan FB <128452529+Ivansete-status@users.noreply.github.com>
2026-05-25 22:30:15 +02:00
|
|
|
return
|
|
|
|
|
"Bloom filter with $1 capacity, $2 error rate, $3 hash functions, and requiring $4 bits of memory." %
|
2026-04-24 09:50:18 +02:00
|
|
|
[
|
|
|
|
|
$bf.capacity,
|
|
|
|
|
formatFloat(bf.errorRate, format = ffScientific, precision = 1),
|
|
|
|
|
$bf.kHashes,
|
|
|
|
|
$(bf.mBits div bf.capacity),
|
|
|
|
|
]
|
2025-01-13 13:49:28 +04:00
|
|
|
|
|
|
|
|
proc computeHashes(bf: BloomFilter, item: string): seq[int] =
|
|
|
|
|
var hashes = newSeq[int](bf.kHashes)
|
2025-02-11 13:23:19 +05:30
|
|
|
for i in 0 ..< bf.kHashes:
|
2025-01-13 13:49:28 +04:00
|
|
|
hashes[i] = hashN(item, i, bf.mBits)
|
2026-04-24 09:50:18 +02:00
|
|
|
return hashes
|
2025-01-13 13:49:28 +04:00
|
|
|
|
|
|
|
|
proc insert*(bf: var BloomFilter, item: string) =
|
|
|
|
|
## Insert an item (string) into the Bloom filter.
|
|
|
|
|
let hashSet = bf.computeHashes(item)
|
|
|
|
|
for h in hashSet:
|
|
|
|
|
let
|
|
|
|
|
intAddress = h div (sizeof(int) * 8)
|
|
|
|
|
bitOffset = h mod (sizeof(int) * 8)
|
|
|
|
|
bf.intArray[intAddress] = bf.intArray[intAddress] or (1 shl bitOffset)
|
|
|
|
|
|
|
|
|
|
proc lookup*(bf: BloomFilter, item: string): bool =
|
|
|
|
|
## Lookup an item (string) in the Bloom filter.
|
|
|
|
|
## If the item is present, ``lookup`` is guaranteed to return ``true``.
|
|
|
|
|
## If the item is not present, ``lookup`` will return ``false``
|
|
|
|
|
## with a probability 1 - ``bf.errorRate``.
|
|
|
|
|
let hashSet = bf.computeHashes(item)
|
|
|
|
|
for h in hashSet:
|
|
|
|
|
let
|
|
|
|
|
intAddress = h div (sizeof(int) * 8)
|
|
|
|
|
bitOffset = h mod (sizeof(int) * 8)
|
|
|
|
|
currentInt = bf.intArray[intAddress]
|
|
|
|
|
if currentInt != (currentInt or (1 shl bitOffset)):
|
|
|
|
|
return false
|
2026-04-24 09:50:18 +02:00
|
|
|
return true
|