diff --git a/nim-bloom/.DS_Store b/nim-bloom/.DS_Store deleted file mode 100644 index 8c4340f..0000000 Binary files a/nim-bloom/.DS_Store and /dev/null differ diff --git a/nim-bloom/.github/workflows/main.yml b/nim-bloom/.github/workflows/main.yml deleted file mode 100644 index f366b37..0000000 --- a/nim-bloom/.github/workflows/main.yml +++ /dev/null @@ -1,58 +0,0 @@ -name: website - -on: [push] # debugging only -#on: -# push: -# tags: -# - 'v*.*.*' - -jobs: - publish: - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v1 - - name: Set output - id: vars - run: echo ::set-output name=tag::${GITHUB_REF:10} - - name: Cache choosenim - id: cache-choosenim - uses: actions/cache@v1 - with: - path: ~/.choosenim - key: ${{ runner.os }}-choosenim-stable - - name: Cache nimble - id: cache-nimble - uses: actions/cache@v1 - with: - path: ~/.nimble - key: ${{ runner.os }}-nimble-stable - - uses: jiro4989/setup-nim-action@v1.0.2 - with: - nim-version: 'stable' - - name: Build and test - env: - RELEASE_VERSION: ${{ steps.vars.outputs.tag }} - run: | - nimble test -Y - - name: Build doc - env: - RELEASE_VERSION: ${{ steps.vars.outputs.tag }} - run: | - # Due to bug https://github.com/nim-lang/Nim/issues/14281, compile the documentation separately. - nimble doc --git.url:https://github.com/$GITHUB_REPOSITORY --git.commit:$RELEASE_VERSION bloom.nim - nimble doc --git.url:https://github.com/$GITHUB_REPOSITORY --git.commit:$RELEASE_VERSION private/probabilities.nim - find . - mkdir -p ./public - mv bloom.html probabilities.html nimdoc.out.css ./public/ - cd ./public/ - ln -s ./bloom.html index.html - cd ../ - - name: Deploy - if: success() - uses: crazy-max/ghaction-github-pages@v1.3.0 - with: - target_branch: gh-pages - build_dir: ./public - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/nim-bloom/.gitignore b/nim-bloom/.gitignore deleted file mode 100644 index dd8e2f7..0000000 --- a/nim-bloom/.gitignore +++ /dev/null @@ -1,6 +0,0 @@ -nimcache -nimcache/* -tests/test -bloom -*.html -*.css diff --git a/nim-bloom/LICENSE b/nim-bloom/LICENSE deleted file mode 100644 index 10ea866..0000000 --- a/nim-bloom/LICENSE +++ /dev/null @@ -1,20 +0,0 @@ -The MIT License (MIT) - -Copyright (c) 2013 Nick Greenfield - -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/nim-bloom/README.md b/nim-bloom/README.md deleted file mode 100644 index 339228a..0000000 --- a/nim-bloom/README.md +++ /dev/null @@ -1,41 +0,0 @@ -nim-bloom -============ - -Bloom filter implementation in Nim. Uses a C implementation of MurmurHash3 for optimal speed and numeric distribution. - -On a 10 year old Macbook Pro Retina the test case for 10M insertions executes in ~4.0 seconds and 10M lookups in ~3.5 seconds for a Bloom filter with a 1 in 1000 error rate (0.001). This is ~2.5M insertions/sec and ~2.9M lookups/sec on a single thread (but passing the `-d:release` flag to the Nim compiler and thus activating the C compiler's optimizations). If k is lowered to 5 or 6 vs. a larger "optimal" number, performance further increases to ~4M ops/sec. Note that this test is for a Bloom filter ~20-25MB in size and thus accurately reflects the cost of main memory accesses (vs. a smaller filter that might fit solely in L3 cache, for example, and can achieve several million additional ops/sec). - - -Currently supports inserting and looking up string elements. Forthcoming features include: -* Support for other types beyond strings -* Support for iterables in the insert method -* Persistence - - -quickstart -==== -Quick functionality demo: -``` -import bloom -var bf = initializeBloomFilter(capacity = 10000, errorRate = 0.001) -echo bf # Get characteristics of the Bloom filter -echo bf.lookup("An element not in the Bloom filter") # Prints 'false' -bf.insert("Here we go...") -assert(bf.lookup("Here we go...")) -``` - - -By default, the Bloom filter will use a mathematically optimal number of k hash functions, which minimizes the amount of error per bit of storage required. In many cases, however, it may be advantageous to specify a smaller value of k in order to save time hashing. This is supported by passing an explicit `k` parameter, which will then either create an optimal Bloom filter for the specified error rate.[1] - -[1] If `k` <= 12 and the number of required bytes per element is <= 4. If either of these conditions doesn't hold, a fully manual Bloom filter can be constructed by passing both `k` and `force_n_bits_per_elem`. - -Example: -``` -var bf2 = initializeBloomFilter(capacity = 10000, errorRate = 0.001, k = 5) -assert bf2.kHashes == 5 -assert bf2.nBitsPerElem == 18 - -var bf3 = initializeBloomFilter(capacity = 10000, errorRate = 0.001, k = 5, forceNBitsPerElem = 12) -assert bf3.kHashes == 5 -assert bf3.nBitsPerElem == 12 # But note, however, that bf.errorRate will *not* be correct -``` diff --git a/nim-bloom/bloom.nimble b/nim-bloom/bloom.nimble deleted file mode 100644 index 3398bd2..0000000 --- a/nim-bloom/bloom.nimble +++ /dev/null @@ -1,9 +0,0 @@ -# Package -version = "0.1.0" -author = "Boyd Greenfield" -description = "Efficient Bloom filter implementation for Nim using MurmurHash3." -license = "MIT" -srcDir = "src" - -# Dependencies -requires "nim >= 1.0.0" diff --git a/nim-bloom/src/bloom.nim b/nim-bloom/src/bloom.nim deleted file mode 100644 index d5f8f71..0000000 --- a/nim-bloom/src/bloom.nim +++ /dev/null @@ -1,244 +0,0 @@ -from math import ceil, ln, pow, round -import hashes -import strutils -import private/probabilities - -# Import MurmurHash3 code and compile at the same time as Nim code -{.compile: "murmur3.c".} - -type - BloomFilterError* = object of CatchableError - MurmurHashes = array[0..1, int] - BloomFilter* = object - capacity*: int - errorRate*: float - kHashes*: int - mBits*: int - intArray: seq[int] - nBitsPerElem*: int - useMurmurHash*: bool - -proc rawMurmurHash(key: cstring, len: int, seed: uint32, - outHashes: var MurmurHashes): void {. - importc: "MurmurHash3_x64_128".} - -proc murmurHash(key: string, seed = 0'u32): MurmurHashes = - rawMurmurHash(key, key.len, seed, outHashes = result) - -proc hashA(item: string, maxValue: int): int = - hash(item) mod maxValue - -proc hashB(item: string, maxValue: int): int = - hash(item & " b") mod maxValue - -proc hashN(item: string, n: int, maxValue: int): int = - ## Get the nth hash of a string using the formula hashA + n * hashB - ## which uses 2 hash functions vs. k and has comparable properties - ## See Kirsch and Mitzenmacher, 2008: - ## http://www.eecs.harvard.edu/~kirsch/pubs/bbbf/rsa.pdf - abs((hashA(item, maxValue) + n * hashB(item, maxValue))) mod maxValue - -proc getMOverNBitsForK(k: int, targetError: float, - probabilityTable = kErrors): int = - ## Returns the optimal number of m/n bits for a given k. - if k notin 0..12: - raise newException(BloomFilterError, - "K must be <= 12 if forceNBitsPerElem is not also specified.") - - for mOverN in 2..probabilityTable[k].high: - if probabilityTable[k][mOverN] < targetError: - return mOverN - - raise newException(BloomFilterError, - "Specified value of k and error rate for which is not achievable using less than 4 bytes / element.") - -proc initializeBloomFilter*(capacity: int, errorRate: float, k = 0, - forceNBitsPerElem = 0, - useMurmurHash = true): BloomFilter = - ## Initializes a Bloom filter, using a specified ``capacity``, - ## ``errorRate``, and – optionally – specific number of k hash functions. - ## If ``kHashes`` is < 1 (default argument is 0), ``kHashes`` will be - ## optimally calculated on the fly. Otherwise, ``kHashes`` will be set to - ## the passed integer, which requires that ``forceNBitsPerElem`` is - ## also set to be greater than 0. Otherwise a ``BloomFilterError`` - ## exception is raised. - ## See http://pages.cs.wisc.edu/~cao/papers/summary-cache/node8.html for - ## useful tables on k and m/n (n bits per element) combinations. - ## - ## The Bloom filter uses the MurmurHash3 implementation by default, - ## though it can fall back to using the built-in nim ``hash`` function - ## if ``useMurmurHash = false``. This is compiled alongside the Nim - ## code using the ``{.compile.}`` pragma. - var - kHashes: int - bitsPerElem: float - nBitsPerElem: int - - if k < 1: # Calculate optimal k and use that - bitsPerElem = ceil(-1.0 * (ln(errorRate) / (pow(ln(2.float), 2)))) - kHashes = round(ln(2.float) * bitsPerElem).int - nBitsPerElem = round(bitsPerElem).int - else: # Use specified k if possible - if forceNBitsPerElem < 1: # Use lookup table - nBitsPerElem = getMOverNBitsForK(k = k, targetError = errorRate) - else: - nBitsPerElem = forceNBitsPerElem - kHashes = k - - let - mBits = capacity * nBitsPerElem - mInts = 1 + mBits div (sizeof(int) * 8) - - BloomFilter(capacity: capacity, errorRate: errorRate, kHashes: kHashes, - mBits: mBits, intArray: newSeq[int](mInts), nBitsPerElem: nBitsPerElem, - useMurmurHash: useMurmurHash) - -proc `$`*(bf: BloomFilter): string = - ## Prints the capacity, set error rate, number of k hash functions, - ## and total bits of memory allocated by the Bloom filter. - "Bloom filter with $1 capacity, $2 error rate, $3 hash functions, and requiring $4 bits per stored element." % - [$bf.capacity, - formatFloat(bf.errorRate, format = ffScientific, precision = 1), - $bf.kHashes, $bf.nBitsPerElem] - -{.push overflowChecks: off.} - -proc hashMurmur(bf: BloomFilter, key: string): seq[int] = - result.newSeq(bf.kHashes) - let murmurHashes = murmurHash(key, seed = 0'u32) - for i in 0..> (32 - r)); -} - -static inline FORCE_INLINE uint64_t rotl64 ( uint64_t x, int8_t r ) -{ - return (x << r) | (x >> (64 - r)); -} - -#define ROTL32(x,y) rotl32(x,y) -#define ROTL64(x,y) rotl64(x,y) - -#define BIG_CONSTANT(x) (x##LLU) - -//----------------------------------------------------------------------------- -// Block read - if your platform needs to do endian-swapping or can only -// handle aligned reads, do the conversion here - -#define getblock(p, i) (p[i]) - -//----------------------------------------------------------------------------- -// Finalization mix - force all bits of a hash block to avalanche - -static inline FORCE_INLINE uint32_t fmix32 ( uint32_t h ) -{ - h ^= h >> 16; - h *= 0x85ebca6b; - h ^= h >> 13; - h *= 0xc2b2ae35; - h ^= h >> 16; - - return h; -} - -//---------- - -static inline FORCE_INLINE uint64_t fmix64 ( uint64_t k ) -{ - k ^= k >> 33; - k *= BIG_CONSTANT(0xff51afd7ed558ccd); - k ^= k >> 33; - k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53); - k ^= k >> 33; - - return k; -} - -//----------------------------------------------------------------------------- - -void MurmurHash3_x86_32 ( const void * key, int len, - uint32_t seed, void * out ) -{ - const uint8_t * data = (const uint8_t*)key; - const int nblocks = len / 4; - int i; - - uint32_t h1 = seed; - - uint32_t c1 = 0xcc9e2d51; - uint32_t c2 = 0x1b873593; - - //---------- - // body - - const uint32_t * blocks = (const uint32_t *)(data + nblocks*4); - - for(i = -nblocks; i; i++) - { - uint32_t k1 = getblock(blocks,i); - - k1 *= c1; - k1 = ROTL32(k1,15); - k1 *= c2; - - h1 ^= k1; - h1 = ROTL32(h1,13); - h1 = h1*5+0xe6546b64; - } - - //---------- - // tail - - const uint8_t * tail = (const uint8_t*)(data + nblocks*4); - - uint32_t k1 = 0; - - switch(len & 3) - { - case 3: k1 ^= tail[2] << 16; - case 2: k1 ^= tail[1] << 8; - case 1: k1 ^= tail[0]; - k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1; - }; - - //---------- - // finalization - - h1 ^= len; - - h1 = fmix32(h1); - - *(uint32_t*)out = h1; -} - -//----------------------------------------------------------------------------- - -void MurmurHash3_x86_128 ( const void * key, const int len, - uint32_t seed, void * out ) -{ - const uint8_t * data = (const uint8_t*)key; - const int nblocks = len / 16; - int i; - - uint32_t h1 = seed; - uint32_t h2 = seed; - uint32_t h3 = seed; - uint32_t h4 = seed; - - uint32_t c1 = 0x239b961b; - uint32_t c2 = 0xab0e9789; - uint32_t c3 = 0x38b34ae5; - uint32_t c4 = 0xa1e38b93; - - //---------- - // body - - const uint32_t * blocks = (const uint32_t *)(data + nblocks*16); - - for(i = -nblocks; i; i++) - { - uint32_t k1 = getblock(blocks,i*4+0); - uint32_t k2 = getblock(blocks,i*4+1); - uint32_t k3 = getblock(blocks,i*4+2); - uint32_t k4 = getblock(blocks,i*4+3); - - k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1; - - h1 = ROTL32(h1,19); h1 += h2; h1 = h1*5+0x561ccd1b; - - k2 *= c2; k2 = ROTL32(k2,16); k2 *= c3; h2 ^= k2; - - h2 = ROTL32(h2,17); h2 += h3; h2 = h2*5+0x0bcaa747; - - k3 *= c3; k3 = ROTL32(k3,17); k3 *= c4; h3 ^= k3; - - h3 = ROTL32(h3,15); h3 += h4; h3 = h3*5+0x96cd1c35; - - k4 *= c4; k4 = ROTL32(k4,18); k4 *= c1; h4 ^= k4; - - h4 = ROTL32(h4,13); h4 += h1; h4 = h4*5+0x32ac3b17; - } - - //---------- - // tail - - const uint8_t * tail = (const uint8_t*)(data + nblocks*16); - - uint32_t k1 = 0; - uint32_t k2 = 0; - uint32_t k3 = 0; - uint32_t k4 = 0; - - switch(len & 15) - { - case 15: k4 ^= tail[14] << 16; - case 14: k4 ^= tail[13] << 8; - case 13: k4 ^= tail[12] << 0; - k4 *= c4; k4 = ROTL32(k4,18); k4 *= c1; h4 ^= k4; - - case 12: k3 ^= tail[11] << 24; - case 11: k3 ^= tail[10] << 16; - case 10: k3 ^= tail[ 9] << 8; - case 9: k3 ^= tail[ 8] << 0; - k3 *= c3; k3 = ROTL32(k3,17); k3 *= c4; h3 ^= k3; - - case 8: k2 ^= tail[ 7] << 24; - case 7: k2 ^= tail[ 6] << 16; - case 6: k2 ^= tail[ 5] << 8; - case 5: k2 ^= tail[ 4] << 0; - k2 *= c2; k2 = ROTL32(k2,16); k2 *= c3; h2 ^= k2; - - case 4: k1 ^= tail[ 3] << 24; - case 3: k1 ^= tail[ 2] << 16; - case 2: k1 ^= tail[ 1] << 8; - case 1: k1 ^= tail[ 0] << 0; - k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1; - }; - - //---------- - // finalization - - h1 ^= len; h2 ^= len; h3 ^= len; h4 ^= len; - - h1 += h2; h1 += h3; h1 += h4; - h2 += h1; h3 += h1; h4 += h1; - - h1 = fmix32(h1); - h2 = fmix32(h2); - h3 = fmix32(h3); - h4 = fmix32(h4); - - h1 += h2; h1 += h3; h1 += h4; - h2 += h1; h3 += h1; h4 += h1; - - ((uint32_t*)out)[0] = h1; - ((uint32_t*)out)[1] = h2; - ((uint32_t*)out)[2] = h3; - ((uint32_t*)out)[3] = h4; -} - -//----------------------------------------------------------------------------- - -void MurmurHash3_x64_128 ( const void * key, const int len, - const uint32_t seed, void * out ) -{ - const uint8_t * data = (const uint8_t*)key; - const int nblocks = len / 16; - int i; - - uint64_t h1 = seed; - uint64_t h2 = seed; - - uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5); - uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f); - - //---------- - // body - - const uint64_t * blocks = (const uint64_t *)(data); - - for(i = 0; i < nblocks; i++) - { - uint64_t k1 = getblock(blocks,i*2+0); - uint64_t k2 = getblock(blocks,i*2+1); - - k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1; - - h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729; - - k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2; - - h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5; - } - - //---------- - // tail - - const uint8_t * tail = (const uint8_t*)(data + nblocks*16); - - uint64_t k1 = 0; - uint64_t k2 = 0; - - switch(len & 15) - { - case 15: k2 ^= (uint64_t)(tail[14]) << 48; - case 14: k2 ^= (uint64_t)(tail[13]) << 40; - case 13: k2 ^= (uint64_t)(tail[12]) << 32; - case 12: k2 ^= (uint64_t)(tail[11]) << 24; - case 11: k2 ^= (uint64_t)(tail[10]) << 16; - case 10: k2 ^= (uint64_t)(tail[ 9]) << 8; - case 9: k2 ^= (uint64_t)(tail[ 8]) << 0; - k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2; - - case 8: k1 ^= (uint64_t)(tail[ 7]) << 56; - case 7: k1 ^= (uint64_t)(tail[ 6]) << 48; - case 6: k1 ^= (uint64_t)(tail[ 5]) << 40; - case 5: k1 ^= (uint64_t)(tail[ 4]) << 32; - case 4: k1 ^= (uint64_t)(tail[ 3]) << 24; - case 3: k1 ^= (uint64_t)(tail[ 2]) << 16; - case 2: k1 ^= (uint64_t)(tail[ 1]) << 8; - case 1: k1 ^= (uint64_t)(tail[ 0]) << 0; - k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1; - }; - - //---------- - // finalization - - h1 ^= len; h2 ^= len; - - h1 += h2; - h2 += h1; - - h1 = fmix64(h1); - h2 = fmix64(h2); - - h1 += h2; - h2 += h1; - - ((uint64_t*)out)[0] = h1; - ((uint64_t*)out)[1] = h2; -} - -//----------------------------------------------------------------------------- diff --git a/nim-bloom/src/murmur3.h b/nim-bloom/src/murmur3.h deleted file mode 100644 index 6928384..0000000 --- a/nim-bloom/src/murmur3.h +++ /dev/null @@ -1,21 +0,0 @@ -//----------------------------------------------------------------------------- -// MurmurHash3 was written by Austin Appleby, and is placed in the -// public domain. The author hereby disclaims copyright to this source -// code. - -#ifndef _MURMURHASH3_H_ -#define _MURMURHASH3_H_ - -#include - -//----------------------------------------------------------------------------- - -void MurmurHash3_x86_32 (const void *key, int len, uint32_t seed, void *out); - -void MurmurHash3_x86_128(const void *key, int len, uint32_t seed, void *out); - -void MurmurHash3_x64_128(const void *key, int len, uint32_t seed, void *out); - -//----------------------------------------------------------------------------- - -#endif // _MURMURHASH3_H_ \ No newline at end of file diff --git a/nim-bloom/src/private/probabilities.nim b/nim-bloom/src/private/probabilities.nim deleted file mode 100644 index 59175f2..0000000 --- a/nim-bloom/src/private/probabilities.nim +++ /dev/null @@ -1,103 +0,0 @@ -# -# ### Probability table declaration, in private/ for readability ### -# Table for k hashes from 1..12 from http://pages.cs.wisc.edu/~cao/papers/summary-cache/node8.html -# Iterate along the sequence at position [k] until the error rate is < specified, otherwise -# raise an error. -# - -type - TErrorForK = seq[float] - TAllErrorRates* = array[0..12, TErrorForK] - -var kErrors*: TAllErrorRates - -kErrors[0] = @[1.0] -kErrors[1] = @[1.0, 1.0, - 0.3930000000, 0.2830000000, 0.2210000000, 0.1810000000, 0.1540000000, - 0.1330000000, 0.1180000000, 0.1050000000, 0.0952000000, 0.0869000000, - 0.0800000000, 0.0740000000, 0.0689000000, 0.0645000000, 0.0606000000, - 0.0571000000, 0.0540000000, 0.0513000000, 0.0488000000, 0.0465000000, - 0.0444000000, 0.0425000000, 0.0408000000, 0.0392000000, 0.0377000000, - 0.0364000000, 0.0351000000, 0.0339000000, 0.0328000000, 0.0317000000, - 0.0308000000 ] - -kErrors[2] = @[1.0, 1.0, - 0.4000000000, 0.2370000000, 0.1550000000, 0.1090000000, 0.0804000000, - 0.0618000000, 0.0489000000, 0.0397000000, 0.0329000000, 0.0276000000, - 0.0236000000, 0.0203000000, 0.0177000000, 0.0156000000, 0.0138000000, - 0.0123000000, 0.0111000000, 0.0099800000, 0.0090600000, 0.0082500000, - 0.0075500000, 0.0069400000, 0.0063900000, 0.0059100000, 0.0054800000, - 0.0051000000, 0.0047500000, 0.0044400000, 0.0041600000, 0.0039000000, - 0.0036700000 ] - -kErrors[3] = @[1.0, 1.0, 1.0, - 0.2530000000, 0.1470000000, 0.0920000000, 0.0609000000, 0.0423000000, - 0.0306000000, 0.0228000000, 0.0174000000, 0.0136000000, 0.0108000000, - 0.0087500000, 0.0071800000, 0.0059600000, 0.0050000000, 0.0042300000, - 0.0036200000, 0.0031200000, 0.0027000000, 0.0023600000, 0.0020700000, - 0.0018300000, 0.0016200000, 0.0014500000, 0.0012900000, 0.0011600000, - 0.0010500000, 0.0009490000, 0.0008620000, 0.0007850000, 0.0007170000 ] - -kErrors[4] = @[1.0, 1.0, 1.0, 1.0, - 0.1600000000, 0.0920000000, 0.0561000000, 0.0359000000, 0.0240000000, - 0.0166000000, 0.0118000000, 0.0086400000, 0.0064600000, 0.0049200000, - 0.0038100000, 0.0030000000, 0.0023900000, 0.0019300000, 0.0015800000, - 0.0013000000, 0.0010800000, 0.0009050000, 0.0007640000, 0.0006490000, - 0.0005550000, 0.0004780000, 0.0004130000, 0.0003590000, 0.0003140000, - 0.0002760000, 0.0002430000, 0.0002150000, 0.0001910000 ] - -kErrors[5] = @[1.0, 1.0, 1.0, 1.0, 1.0, - 0.1010000000, 0.0578000000, 0.0347000000, 0.0217000000, 0.0141000000, - 0.0094300000, 0.0065000000, 0.0045900000, 0.0033200000, 0.0024400000, - 0.0018300000, 0.0013900000, 0.0010700000, 0.0008390000, 0.0006630000, - 0.0005300000, 0.0004270000, 0.0003470000, 0.0002850000, 0.0002350000, - 0.0001960000, 0.0001640000, 0.0001380000, 0.0001170000, 0.0000996000, - 0.0000853000, 0.0000733000, 0.0000633000 ] - -kErrors[6] = @[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, - 0.0638000000, 0.0364000000, 0.0216000000, 0.0133000000, 0.0084400000, - 0.0055200000, 0.0037100000, 0.0025500000, 0.0017900000, 0.0012800000, - 0.0009350000, 0.0006920000, 0.0005190000, 0.0003940000, 0.0003030000, - 0.0002360000, 0.0001850000, 0.0001470000, 0.0001170000, 0.0000944000, - 0.0000766000, 0.0000626000, 0.0000515000, 0.0000426000, 0.0000355000, - 0.0000297000, 0.0000250000 ] - -kErrors[7] = @[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, - 0.0229000000, 0.0135000000, 0.0081900000, 0.0051300000, 0.0032900000, - 0.0021700000, 0.0014600000, 0.0010000000, 0.0007020000, 0.0004990000, - 0.0003600000, 0.0002640000, 0.0001960000, 0.0001470000, 0.0001120000, - 0.0000856000, 0.0000663000, 0.0000518000, 0.0000408000, 0.0000324000, - 0.0000259000, 0.0000209000, 0.0000169000, 0.0000138000, 0.0000113000 ] - -kErrors[8] = @[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, - 0.0145000000, 0.0084600000, 0.0050900000, 0.0031400000, 0.0019900000, - 0.0012900000, 0.0008520000, 0.0005740000, 0.0003940000, 0.0002750000, - 0.0001940000, 0.0001400000, 0.0001010000, 0.0000746000, 0.0000555000, - 0.0000417000, 0.0000316000, 0.0000242000, 0.0000187000, 0.0000146000, - 0.0000114000, 0.0000090100, 0.0000071600, 0.0000057300 ] - -kErrors[9] = @[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, - 0.0053100000, 0.0031700000, 0.0019400000, 0.0012100000, 0.0007750000, - 0.0005050000, 0.0003350000, 0.0002260000, 0.0001550000, 0.0001080000, - 0.0000759000, 0.0000542000, 0.0000392000, 0.0000286000, 0.0000211000, - 0.0000157000, 0.0000118000, 0.0000089600, 0.0000068500, 0.0000052800, - 0.0000041000, 0.0000032000] - -kErrors[10] = @[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, - 0.0033400000, 0.0019800000, 0.0012000000, 0.0007440000, 0.0004700000, - 0.0003020000, 0.0001980000, 0.0001320000, 0.0000889000, 0.0000609000, - 0.0000423000, 0.0000297000, 0.0000211000, 0.0000152000, 0.0000110000, - 0.0000080700, 0.0000059700, 0.0000044500, 0.0000033500, 0.0000025400, - 0.0000019400] - -kErrors[11] = @[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, - 0.0021000000, 0.0012400000, 0.0007470000, 0.0004590000, 0.0002870000, - 0.0001830000, 0.0001180000, 0.0000777000, 0.0000518000, 0.0000350000, - 0.0000240000, 0.0000166000, 0.0000116000, 0.0000082300, 0.0000058900, - 0.0000042500, 0.0000031000, 0.0000022800, 0.0000016900, 0.0000012600] - -kErrors[12] = @[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, - 0.0007780000, 0.0004660000, 0.0002840000, 0.0001760000, 0.0001110000, - 0.0000712000, 0.0000463000, 0.0000305000, 0.0000204000, 0.0000138000, - 0.0000094200, 0.0000065200, 0.0000045600, 0.0000032200, 0.0000022900, - 0.0000016500, 0.0000012000, 0.0000008740] diff --git a/nim-bloom/tests/config.nims b/nim-bloom/tests/config.nims deleted file mode 100644 index 80091ff..0000000 --- a/nim-bloom/tests/config.nims +++ /dev/null @@ -1 +0,0 @@ -switch("path", "$projectDir/../src") diff --git a/nim-bloom/tests/test.nim b/nim-bloom/tests/test.nim deleted file mode 100644 index 53c7e35..0000000 --- a/nim-bloom/tests/test.nim +++ /dev/null @@ -1,102 +0,0 @@ -import unittest -include bloom -from random import rand, randomize -import times - -suite "murmur": - # Test murmurhash 3 - setup: - var hashOutputs: MurmurHashes - hashOutputs = [0, 0] - rawMurmurHash("hello", 5, 0, hashOutputs) - - test "raw": - check int(hashOutputs[0]) == -3758069500696749310 # Correct murmur outputs (cast to int64) - check int(hashOutputs[1]) == 6565844092913065241 - - test "wrapped": - let hashOutputs2 = murmurHash("hello", 0) - check hashOutputs2[0] == hashOutputs[0] - check hashOutputs2[1] == hashOutputs[1] - - test "seed": - let hashOutputs3 = murmurHash("hello", 10) - check hashOutputs3[0] != hashOutputs[0] - check hashOutputs3[1] != hashOutputs[1] - - -suite "bloom": - - setup: - let nElementsToTest = 100000 - var bf = initializeBloomFilter(capacity = nElementsToTest, errorRate = 0.001) - randomize(2882) # Seed the RNG - var - sampleChars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789" - kTestElements, sampleLetters: seq[string] - kTestElements = newSeq[string](nElementsToTest) - sampleLetters = newSeq[string](62) - - for i in 0.. rm.config.resendInterval: + # Time to attempt resend + if unackMsg.resendAttempts < rm.config.maxResendAttempts: + var updatedMsg = unackMsg + updatedMsg.resendAttempts += 1 + updatedMsg.sendTime = now + newOutgoingBuffer.add(updatedMsg) + else: + if rm.onMessageSent != nil: + rm.onMessageSent(unackMsg.message.messageId) + else: + newOutgoingBuffer.add(unackMsg) + rm.outgoingBuffer = newOutgoingBuffer - except: - discard + except Exception as e: + logError("Error in checking unacknowledged messages: " & e.msg) proc periodicBufferSweep(rm: ReliabilityManager) {.async: (raises: [CancelledError]).} = ## Periodically sweeps the buffer to clean up and check unacknowledged messages. @@ -351,7 +355,8 @@ proc periodicBufferSweep(rm: ReliabilityManager) {.async: (raises: [CancelledErr rm.cleanBloomFilter() except Exception as e: logError("Error in periodic buffer sweep: " & e.msg) - await sleepAsync(chronos.seconds(rm.config.bufferSweepInterval.inSeconds)) + + await sleepAsync(chronos.milliseconds(rm.config.bufferSweepInterval.inMilliseconds)) proc periodicSyncMessage(rm: ReliabilityManager) {.async: (raises: [CancelledError]).} = ## Periodically notifies to send a sync message to maintain connectivity. diff --git a/src/utils.nim b/src/utils.nim index e5b3ccf..773d391 100644 --- a/src/utils.nim +++ b/src/utils.nim @@ -1,6 +1,6 @@ import std/[times, locks] import chronos, chronicles -import "../nim-bloom/src/bloom" +import ./bloom import ./common proc logError*(msg: string) = @@ -11,25 +11,35 @@ proc logInfo*(msg: string) = proc newRollingBloomFilter*(capacity: int, errorRate: float, window: times.Duration): RollingBloomFilter {.gcsafe.} = try: - var filter: BloomFilter + var filterResult: Result[BloomFilter, string] {.gcsafe.}: - filter = initializeBloomFilter(capacity, errorRate) - logInfo("Successfully initialized bloom filter") - RollingBloomFilter( - filter: filter, - window: window, - messages: @[] - ) + filterResult = initializeBloomFilter(capacity, errorRate) + + if filterResult.isOk: + logInfo("Successfully initialized bloom filter") + return RollingBloomFilter( + filter: filterResult.get(), # Extract the BloomFilter from Result + window: window, + messages: @[] + ) + else: + logError("Failed to initialize bloom filter: " & filterResult.error) + # Fall through to default case below + except: logError("Failed to initialize bloom filter") - var filter: BloomFilter - {.gcsafe.}: - filter = initializeBloomFilter(DefaultBloomFilterCapacity, DefaultBloomFilterErrorRate) - RollingBloomFilter( - filter: filter, + + # Default fallback case + let defaultResult = initializeBloomFilter(DefaultBloomFilterCapacity, DefaultBloomFilterErrorRate) + if defaultResult.isOk: + return RollingBloomFilter( + filter: defaultResult.get(), window: window, messages: @[] ) + else: + # If even default initialization fails, raise an exception + logError("Failed to initialize bloom filter with default parameters") proc add*(rbf: var RollingBloomFilter, messageId: MessageID) {.gcsafe.} = ## Adds a message ID to the rolling bloom filter. @@ -54,9 +64,14 @@ proc clean*(rbf: var RollingBloomFilter) {.gcsafe.} = let now = getTime() let cutoff = now - rbf.window var newMessages: seq[TimestampedMessageID] = @[] - var newFilter: BloomFilter - {.gcsafe.}: - newFilter = initializeBloomFilter(rbf.filter.capacity, rbf.filter.errorRate) + + # Initialize new filter + let newFilterResult = initializeBloomFilter(rbf.filter.capacity, rbf.filter.errorRate) + if newFilterResult.isErr: + logError("Failed to create new bloom filter: " & newFilterResult.error) + return + + var newFilter = newFilterResult.get() for msg in rbf.messages: if msg.timestamp > cutoff: diff --git a/tests/test_bloom.nim b/tests/test_bloom.nim new file mode 100644 index 0000000..7da555c --- /dev/null +++ b/tests/test_bloom.nim @@ -0,0 +1,142 @@ +import unittest, results, strutils +import ../src/bloom +from random import rand, randomize + +suite "bloom filter": + setup: + let nElementsToTest = 10000 + let bfResult = initializeBloomFilter(capacity = nElementsToTest, errorRate = 0.001) + check bfResult.isOk + var bf = bfResult.get + randomize(2882) # Seed the RNG + var + sampleChars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789" + testElements = newSeq[string](nElementsToTest) + + for i in 0.. 12 + let errorCase = getMOverNBitsForK(k = 13, targetError = 0.01) + check errorCase.isErr + check errorCase.error == "K must be <= 12 if forceNBitsPerElem is not also specified." + + # Test error case for unachievable error rate + let errorCase2 = getMOverNBitsForK(k = 2, targetError = 0.00001) + check errorCase2.isErr + check errorCase2.error == "Specified value of k and error rate not achievable using less than 4 bytes / element." + + # Test success cases + let case1 = getMOverNBitsForK(k = 2, targetError = 0.1) + check case1.isOk + check case1.value == 6 + + let case2 = getMOverNBitsForK(k = 7, targetError = 0.01) + check case2.isOk + check case2.value == 10 + + let case3 = getMOverNBitsForK(k = 7, targetError = 0.001) + check case3.isOk + check case3.value == 16 + + let bf2Result = initializeBloomFilter(10000, 0.001, k = 4, forceNBitsPerElem = 20) + check bf2Result.isOk + let bf2 = bf2Result.get + check bf2.kHashes == 4 + check bf2.mBits == 200000 + + test "string representation": + let bf3Result = initializeBloomFilter(1000, 0.01, k = 4) + check bf3Result.isOk + let bf3 = bf3Result.get + let str = $bf3 + check str.contains("1000") # Capacity + check str.contains("4 hash") # Hash functions + check str.contains("1.0e-02") # Error rate in scientific notation + +suite "bloom filter special cases": + test "different patterns of strings": + const testSize = 10_000 + let patterns = @[ + "shortstr", + repeat("a", 1000), # Very long string + "special@#$%^&*()", # Special characters + "unicode→★∑≈", # Unicode characters + repeat("pattern", 10) # Repeating pattern + ] + + let bfResult = initializeBloomFilter(testSize, 0.01) + check bfResult.isOk + var bf = bfResult.get + var inserted = newSeq[string](testSize) + + # Test pattern handling + for pattern in patterns: + bf.insert(pattern) + assert bf.lookup(pattern), "failed lookup pattern: " & pattern + + # Test general insertion and lookup + for i in 0.. 0 + rm.cleanup() + test "periodic sync": var syncCallCount = 0 rm.setCallbacks(