add nim bloom pkg

2026-07-15 20:29:44 +00:00 · 2024-11-09 10:13:34 +05:30 · 2024-11-09 10:13:34 +05:30 · 5d065c168b
commit 5d065c168b
parent a83dcc0331
11 changed files with 862 additions and 0 deletions
--- a/nim-bloom/.gitignore
+++ b/nim-bloom/.gitignore
@ -0,0 +1,7 @@
+nimcache
+nimcache/*
+tests/test
+bloom
+*.html
+*.css
+/.DS_Store
--- a/nim-bloom/LICENSE
+++ b/nim-bloom/LICENSE
@ -0,0 +1,20 @@
+The MIT License (MIT)
+
+Copyright (c) 2013 Nick Greenfield
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+the Software, and to permit persons to whom the Software is furnished to do so,
+subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--- a/nim-bloom/README.md
+++ b/nim-bloom/README.md
@ -0,0 +1,41 @@
+nim-bloom
+============
+
+Bloom filter implementation in Nim. Uses a C implementation of MurmurHash3 for optimal speed and numeric distribution.
+
+On a 10 year old Macbook Pro Retina the test case for 10M insertions executes in ~4.0 seconds and 10M lookups in ~3.5 seconds for a Bloom filter with a 1 in 1000 error rate (0.001). This is ~2.5M insertions/sec and ~2.9M lookups/sec on a single thread (but passing the `-d:release` flag to the Nim compiler and thus activating the C compiler's optimizations). If k is lowered to 5 or 6 vs. a larger "optimal" number, performance further increases to ~4M ops/sec. Note that this test is for a Bloom filter ~20-25MB in size and thus accurately reflects the cost of main memory accesses (vs. a smaller filter that might fit solely in L3 cache, for example, and can achieve several million additional ops/sec).
+
+
+Currently supports inserting and looking up string elements. Forthcoming features include:
+* Support for other types beyond strings
+* Support for iterables in the insert method
+* Persistence
+
+
+quickstart
+====
+Quick functionality demo:
+```
+import bloom
+var bf = initializeBloomFilter(capacity = 10000, errorRate = 0.001)
+echo bf # Get characteristics of the Bloom filter
+echo bf.lookup("An element not in the Bloom filter")  # Prints 'false'
+bf.insert("Here we go...")
+assert(bf.lookup("Here we go..."))
+```
+
+
+By default, the Bloom filter will use a mathematically optimal number of k hash functions, which minimizes the amount of error per bit of storage required. In many cases, however, it may be advantageous to specify a smaller value of k in order to save time hashing. This is supported by passing an explicit `k` parameter, which will then either create an optimal Bloom filter for the specified error rate.[1]
+
+[1] If `k` <= 12 and the number of required bytes per element is <= 4. If either of these conditions doesn't hold, a fully manual Bloom filter can be constructed by passing both `k` and `force_n_bits_per_elem`.
+
+Example:
+```
+var bf2 = initializeBloomFilter(capacity = 10000, errorRate = 0.001, k = 5)
+assert bf2.kHashes == 5
+assert bf2.nBitsPerElem == 18
+
+var bf3 = initializeBloomFilter(capacity = 10000, errorRate = 0.001, k = 5, forceNBitsPerElem = 12)
+assert bf3.kHashes == 5
+assert bf3.nBitsPerElem == 12   # But note, however, that bf.errorRate will *not* be correct
+```
--- a/nim-bloom/bloom.nimble
+++ b/nim-bloom/bloom.nimble
@ -0,0 +1,9 @@
+# Package
+version       = "0.1.0"
+author        = "Waku Team"
+description   = "Efficient Bloom filter implementation for Nim"
+license       = "MIT"
+srcDir        = "src"
+
+# Dependencies
+requires "nim >= 1.0.0"
--- a/nim-bloom/src/.DS_Store
+++ b/nim-bloom/src/.DS_Store
--- a/nim-bloom/src/bloom.nim
+++ b/nim-bloom/src/bloom.nim
@ -0,0 +1,244 @@
+from math import ceil, ln, pow, round
+import hashes
+import strutils
+import private/probabilities
+
+# Import MurmurHash3 code and compile at the same time as Nim code
+{.compile: "murmur3.c".}
+
+type
+  BloomFilterError = object of CatchableError
+  MurmurHashes = array[0..1, int]
+  BloomFilter* = object
+    capacity*: int
+    errorRate*: float
+    kHashes*: int
+    mBits*: int
+    intArray: seq[int]
+    nBitsPerElem*: int
+    useMurmurHash*: bool
+
+proc rawMurmurHash(key: cstring, len: int, seed: uint32,
+                     outHashes: var MurmurHashes): void {.
+  importc: "MurmurHash3_x64_128".}
+
+proc murmurHash(key: string, seed = 0'u32): MurmurHashes =
+  rawMurmurHash(key, key.len, seed, outHashes = result)
+
+proc hashA(item: string, maxValue: int): int =
+  hash(item) mod maxValue
+
+proc hashB(item: string, maxValue: int): int =
+  hash(item & " b") mod maxValue
+
+proc hashN(item: string, n: int, maxValue: int): int =
+  ## Get the nth hash of a string using the formula hashA + n * hashB
+  ## which uses 2 hash functions vs. k and has comparable properties
+  ## See Kirsch and Mitzenmacher, 2008:
+  ## http://www.eecs.harvard.edu/~kirsch/pubs/bbbf/rsa.pdf
+  abs((hashA(item, maxValue) + n * hashB(item, maxValue))) mod maxValue
+
+proc getMOverNBitsForK(k: int, targetError: float,
+    probabilityTable = kErrors): int =
+  ## Returns the optimal number of m/n bits for a given k.
+  if k notin 0..12:
+    raise newException(BloomFilterError,
+      "K must be <= 12 if forceNBitsPerElem is not also specified.")
+
+  for mOverN in 2..probabilityTable[k].high:
+    if probabilityTable[k][mOverN] < targetError:
+      return mOverN
+
+  raise newException(BloomFilterError,
+    "Specified value of k and error rate for which is not achievable using less than 4 bytes / element.")
+
+proc initializeBloomFilter*(capacity: int, errorRate: float, k = 0,
+                              forceNBitsPerElem = 0,
+                              useMurmurHash = true): BloomFilter =
+  ## Initializes a Bloom filter, using a specified ``capacity``,
+  ## ``errorRate``, and – optionally – specific number of k hash functions.
+  ## If ``kHashes`` is < 1 (default argument is 0), ``kHashes`` will be
+  ## optimally calculated on the fly. Otherwise, ``kHashes`` will be set to
+  ## the passed integer, which requires that ``forceNBitsPerElem`` is
+  ## also set to be greater than 0. Otherwise a ``BloomFilterError``
+  ## exception is raised.
+  ## See http://pages.cs.wisc.edu/~cao/papers/summary-cache/node8.html for
+  ## useful tables on k and m/n (n bits per element) combinations.
+  ##
+  ## The Bloom filter uses the MurmurHash3 implementation by default,
+  ## though it can fall back to using the built-in nim ``hash`` function
+  ## if ``useMurmurHash = false``. This is compiled alongside the Nim
+  ## code using the ``{.compile.}`` pragma.
+  var
+    kHashes: int
+    bitsPerElem: float
+    nBitsPerElem: int
+
+  if k < 1: # Calculate optimal k and use that
+    bitsPerElem = ceil(-1.0 * (ln(errorRate) / (pow(ln(2.float), 2))))
+    kHashes = round(ln(2.float) * bitsPerElem).int
+    nBitsPerElem = round(bitsPerElem).int
+  else: # Use specified k if possible
+    if forceNBitsPerElem < 1: # Use lookup table
+      nBitsPerElem = getMOverNBitsForK(k = k, targetError = errorRate)
+    else:
+      nBitsPerElem = forceNBitsPerElem
+    kHashes = k
+
+  let
+    mBits = capacity * nBitsPerElem
+    mInts = 1 + mBits div (sizeof(int) * 8)
+
+  BloomFilter(capacity: capacity, errorRate: errorRate, kHashes: kHashes,
+    mBits: mBits, intArray: newSeq[int](mInts), nBitsPerElem: nBitsPerElem,
+    useMurmurHash: useMurmurHash)
+
+proc `$`*(bf: BloomFilter): string =
+  ## Prints the capacity, set error rate, number of k hash functions,
+  ## and total bits of memory allocated by the Bloom filter.
+  "Bloom filter with $1 capacity, $2 error rate, $3 hash functions, and requiring $4 bits per stored element." %
+    [$bf.capacity,
+     formatFloat(bf.errorRate, format = ffScientific, precision = 1),
+     $bf.kHashes, $bf.nBitsPerElem]
+
+{.push overflowChecks: off.}
+
+proc hashMurmur(bf: BloomFilter, key: string): seq[int] =
+  result.newSeq(bf.kHashes)
+  let murmurHashes = murmurHash(key, seed = 0'u32)
+  for i in 0..<bf.kHashes:
+    result[i] = abs(murmurHashes[0] + i * murmurHashes[1]) mod bf.mBits
+
+{.pop.}
+
+proc hashNim(bf: BloomFilter, key: string): seq[int] =
+  result.newSeq(bf.kHashes)
+  for i in 0..<bf.kHashes:
+    result[i] = hashN(key, i, bf.mBits)
+
+proc hash(bf: BloomFilter, key: string): seq[int] =
+  if bf.useMurmurHash:
+    bf.hashMurmur(key)
+  else:
+    bf.hashNim(key)
+
+proc insert*(bf: var BloomFilter, item: string) =
+  ## Insert an item (string) into the Bloom filter.
+  var hashSet = bf.hash(item)
+  for h in hashSet:
+    let
+      intAddress = h div (sizeof(int) * 8)
+      bitOffset = h mod (sizeof(int) * 8)
+    bf.intArray[intAddress] = bf.intArray[intAddress] or (1 shl bitOffset)
+
+proc lookup*(bf: BloomFilter, item: string): bool =
+  ## Lookup an item (string) into the Bloom filter.
+  ## If the item is present, ``lookup`` is guaranteed to return ``true``.
+  ## If the item is not present, ``lookup`` will return ``false``
+  ## with a probability 1 - ``bf.errorRate``.
+  var hashSet = bf.hash(item)
+  for h in hashSet:
+    let
+      intAddress = h div (sizeof(int) * 8)
+      bitOffset = h mod (sizeof(int) * 8)
+      currentInt = bf.intArray[intAddress]
+    if currentInt != (currentInt or (1 shl bitOffset)):
+      return false
+  return true
+
+when isMainModule:
+  from random import rand, randomize
+  import times
+
+  # Test murmurhash 3
+  echo("Testing MurmurHash3 code...")
+  var hashOutputs: MurmurHashes
+  hashOutputs = [0, 0]
+  rawMurmurHash("hello", 5, 0, hashOutputs)
+  assert int(hashOutputs[0]) == -3758069500696749310 # Correct murmur outputs (cast to int64)
+  assert int(hashOutputs[1]) == 6565844092913065241
+
+  let hashOutputs2 = murmurHash("hello", 0)
+  assert hashOutputs2[0] == hashOutputs[0]
+  assert hashOutputs2[1] == hashOutputs[1]
+  let hashOutputs3 = murmurHash("hello", 10)
+  assert hashOutputs3[0] != hashOutputs[0]
+  assert hashOutputs3[1] != hashOutputs[1]
+
+  # Some quick and dirty tests (not complete)
+  var nElementsToTest = 100000
+  var bf = initializeBloomFilter(nElementsToTest, 0.001)
+  assert(bf of BloomFilter)
+  echo(bf)
+
+  var bf2 = initializeBloomFilter(10000, 0.001, k = 4,
+      forceNBitsPerElem = 20)
+  assert(bf2 of BloomFilter)
+  echo(bf2)
+
+  echo("Testing insertions and lookups...")
+  echo("Test element in BF2?: ", bf2.lookup("testing"))
+  echo("Inserting element.")
+  bf2.insert("testing")
+  echo("Test element in BF2?: ", bf2.lookup("testing"))
+  assert(bf2.lookup("testing"))
+
+  # Now test for speed with bf
+  randomize(2882) # Seed the RNG
+  var
+    sampleChars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
+    kTestElements, sampleLetters: seq[string]
+  kTestElements = newSeq[string](nElementsToTest)
+  sampleLetters = newSeq[string](62)
+
+  for i in 0..(nElementsToTest - 1):
+    var newString = ""
+    for j in 0..7:
+      newString.add(sampleChars[rand(51)])
+    kTestElements[i] = newString
+
+  var startTime, endTime: float
+  startTime = cpuTime()
+  for i in 0..(nElementsToTest - 1):
+    bf.insert(kTestElements[i])
+  endTime = cpuTime()
+  echo("Took ", formatFloat(endTime - startTime, format = ffDecimal,
+      precision = 4), " seconds to insert ", nElementsToTest, " items.")
+
+  var falsePositives = 0
+  for i in 0..(nElementsToTest - 1):
+    var falsePositiveString = ""
+    for j in 0..8: # By definition not in bf as 9 chars not 8
+      falsePositiveString.add(sampleChars[rand(51)])
+    if bf.lookup(falsePositiveString):
+      falsePositives += 1
+
+  echo("N false positives (of ", nElementsToTest, " lookups): ", falsePositives)
+  echo("False positive rate ", formatFloat(falsePositives / nElementsToTest,
+      format = ffDecimal, precision = 4))
+
+  var lookupErrors = 0
+  startTime = cpuTime()
+  for i in 0..(nElementsToTest - 1):
+    if not bf.lookup(kTestElements[i]):
+      lookupErrors += 1
+  endTime = cpuTime()
+  echo("Took ", formatFloat(endTime - startTime, format = ffDecimal,
+      precision = 4), " seconds to lookup ", nElementsToTest, " items.")
+
+  echo("N lookup errors (should be 0): ", lookupErrors)
+
+  # Finally test correct k / mOverN specification,
+  # first case raises an error, second works
+  try:
+    discard getMOverNBitsForK(k = 2, targetError = 0.00001)
+    assert false
+  except BloomFilterError:
+    assert true
+
+  assert getMOverNBitsForK(k = 2, targetError = 0.1) == 6
+  assert getMOverNBitsForK(k = 7, targetError = 0.01) == 10
+  assert getMOverNBitsForK(k = 7, targetError = 0.001) == 16
+
+  var bf3 = initializeBloomFilter(1000, 0.01, k = 4)
+  assert bf3.nBitsPerElem == 11
--- a/nim-bloom/src/murmur3.c
+++ b/nim-bloom/src/murmur3.c
@ -0,0 +1,314 @@
+//-----------------------------------------------------------------------------
+// MurmurHash3 was written by Austin Appleby, and is placed in the public
+// domain. The author hereby disclaims copyright to this source code.
+
+// Note - The x86 and x64 versions do _not_ produce the same results, as the
+// algorithms are optimized for their respective platforms. You can still
+// compile and run any of them on any platform, but your performance with the
+// non-native version will be less than optimal.
+
+#include "murmur3.h"
+
+//-----------------------------------------------------------------------------
+// Platform-specific functions and macros
+
+#ifdef __GNUC__
+#define FORCE_INLINE __attribute__((always_inline)) inline
+#else
+#define FORCE_INLINE
+#endif
+
+static inline FORCE_INLINE uint32_t rotl32 ( uint32_t x, int8_t r )
+{
+  return (x << r) | (x >> (32 - r));
+}
+
+static inline FORCE_INLINE uint64_t rotl64 ( uint64_t x, int8_t r )
+{
+  return (x << r) | (x >> (64 - r));
+}
+
+#define ROTL32(x,y) rotl32(x,y)
+#define ROTL64(x,y) rotl64(x,y)
+
+#define BIG_CONSTANT(x) (x##LLU)
+
+//-----------------------------------------------------------------------------
+// Block read - if your platform needs to do endian-swapping or can only
+// handle aligned reads, do the conversion here
+
+#define getblock(p, i) (p[i])
+
+//-----------------------------------------------------------------------------
+// Finalization mix - force all bits of a hash block to avalanche
+
+static inline FORCE_INLINE uint32_t fmix32 ( uint32_t h )
+{
+  h ^= h >> 16;
+  h *= 0x85ebca6b;
+  h ^= h >> 13;
+  h *= 0xc2b2ae35;
+  h ^= h >> 16;
+
+  return h;
+}
+
+//----------
+
+static inline FORCE_INLINE uint64_t fmix64 ( uint64_t k )
+{
+  k ^= k >> 33;
+  k *= BIG_CONSTANT(0xff51afd7ed558ccd);
+  k ^= k >> 33;
+  k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
+  k ^= k >> 33;
+
+  return k;
+}
+
+//-----------------------------------------------------------------------------
+
+void MurmurHash3_x86_32 ( const void * key, int len,
+                          uint32_t seed, void * out )
+{
+  const uint8_t * data = (const uint8_t*)key;
+  const int nblocks = len / 4;
+  int i;
+
+  uint32_t h1 = seed;
+
+  uint32_t c1 = 0xcc9e2d51;
+  uint32_t c2 = 0x1b873593;
+
+  //----------
+  // body
+
+  const uint32_t * blocks = (const uint32_t *)(data + nblocks*4);
+
+  for(i = -nblocks; i; i++)
+  {
+    uint32_t k1 = getblock(blocks,i);
+
+    k1 *= c1;
+    k1 = ROTL32(k1,15);
+    k1 *= c2;
+
+    h1 ^= k1;
+    h1 = ROTL32(h1,13);
+    h1 = h1*5+0xe6546b64;
+  }
+
+  //----------
+  // tail
+
+  const uint8_t * tail = (const uint8_t*)(data + nblocks*4);
+
+  uint32_t k1 = 0;
+
+  switch(len & 3)
+  {
+  case 3: k1 ^= tail[2] << 16;
+  case 2: k1 ^= tail[1] << 8;
+  case 1: k1 ^= tail[0];
+          k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
+  };
+
+  //----------
+  // finalization
+
+  h1 ^= len;
+
+  h1 = fmix32(h1);
+
+  *(uint32_t*)out = h1;
+}
+
+//-----------------------------------------------------------------------------
+
+void MurmurHash3_x86_128 ( const void * key, const int len,
+                           uint32_t seed, void * out )
+{
+  const uint8_t * data = (const uint8_t*)key;
+  const int nblocks = len / 16;
+  int i;
+
+  uint32_t h1 = seed;
+  uint32_t h2 = seed;
+  uint32_t h3 = seed;
+  uint32_t h4 = seed;
+
+  uint32_t c1 = 0x239b961b;
+  uint32_t c2 = 0xab0e9789;
+  uint32_t c3 = 0x38b34ae5;
+  uint32_t c4 = 0xa1e38b93;
+
+  //----------
+  // body
+
+  const uint32_t * blocks = (const uint32_t *)(data + nblocks*16);
+
+  for(i = -nblocks; i; i++)
+  {
+    uint32_t k1 = getblock(blocks,i*4+0);
+    uint32_t k2 = getblock(blocks,i*4+1);
+    uint32_t k3 = getblock(blocks,i*4+2);
+    uint32_t k4 = getblock(blocks,i*4+3);
+
+    k1 *= c1; k1  = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
+
+    h1 = ROTL32(h1,19); h1 += h2; h1 = h1*5+0x561ccd1b;
+
+    k2 *= c2; k2  = ROTL32(k2,16); k2 *= c3; h2 ^= k2;
+
+    h2 = ROTL32(h2,17); h2 += h3; h2 = h2*5+0x0bcaa747;
+
+    k3 *= c3; k3  = ROTL32(k3,17); k3 *= c4; h3 ^= k3;
+
+    h3 = ROTL32(h3,15); h3 += h4; h3 = h3*5+0x96cd1c35;
+
+    k4 *= c4; k4  = ROTL32(k4,18); k4 *= c1; h4 ^= k4;
+
+    h4 = ROTL32(h4,13); h4 += h1; h4 = h4*5+0x32ac3b17;
+  }
+
+  //----------
+  // tail
+
+  const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
+
+  uint32_t k1 = 0;
+  uint32_t k2 = 0;
+  uint32_t k3 = 0;
+  uint32_t k4 = 0;
+
+  switch(len & 15)
+  {
+  case 15: k4 ^= tail[14] << 16;
+  case 14: k4 ^= tail[13] << 8;
+  case 13: k4 ^= tail[12] << 0;
+           k4 *= c4; k4  = ROTL32(k4,18); k4 *= c1; h4 ^= k4;
+
+  case 12: k3 ^= tail[11] << 24;
+  case 11: k3 ^= tail[10] << 16;
+  case 10: k3 ^= tail[ 9] << 8;
+  case  9: k3 ^= tail[ 8] << 0;
+           k3 *= c3; k3  = ROTL32(k3,17); k3 *= c4; h3 ^= k3;
+
+  case  8: k2 ^= tail[ 7] << 24;
+  case  7: k2 ^= tail[ 6] << 16;
+  case  6: k2 ^= tail[ 5] << 8;
+  case  5: k2 ^= tail[ 4] << 0;
+           k2 *= c2; k2  = ROTL32(k2,16); k2 *= c3; h2 ^= k2;
+
+  case  4: k1 ^= tail[ 3] << 24;
+  case  3: k1 ^= tail[ 2] << 16;
+  case  2: k1 ^= tail[ 1] << 8;
+  case  1: k1 ^= tail[ 0] << 0;
+           k1 *= c1; k1  = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
+  };
+
+  //----------
+  // finalization
+
+  h1 ^= len; h2 ^= len; h3 ^= len; h4 ^= len;
+
+  h1 += h2; h1 += h3; h1 += h4;
+  h2 += h1; h3 += h1; h4 += h1;
+
+  h1 = fmix32(h1);
+  h2 = fmix32(h2);
+  h3 = fmix32(h3);
+  h4 = fmix32(h4);
+
+  h1 += h2; h1 += h3; h1 += h4;
+  h2 += h1; h3 += h1; h4 += h1;
+
+  ((uint32_t*)out)[0] = h1;
+  ((uint32_t*)out)[1] = h2;
+  ((uint32_t*)out)[2] = h3;
+  ((uint32_t*)out)[3] = h4;
+}
+
+//-----------------------------------------------------------------------------
+
+void MurmurHash3_x64_128 ( const void * key, const int len,
+                           const uint32_t seed, void * out )
+{
+  const uint8_t * data = (const uint8_t*)key;
+  const int nblocks = len / 16;
+  int i;
+
+  uint64_t h1 = seed;
+  uint64_t h2 = seed;
+
+  uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
+  uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);
+
+  //----------
+  // body
+
+  const uint64_t * blocks = (const uint64_t *)(data);
+
+  for(i = 0; i < nblocks; i++)
+  {
+    uint64_t k1 = getblock(blocks,i*2+0);
+    uint64_t k2 = getblock(blocks,i*2+1);
+
+    k1 *= c1; k1  = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
+
+    h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;
+
+    k2 *= c2; k2  = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
+
+    h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;
+  }
+
+  //----------
+  // tail
+
+  const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
+
+  uint64_t k1 = 0;
+  uint64_t k2 = 0;
+
+  switch(len & 15)
+  {
+  case 15: k2 ^= (uint64_t)(tail[14]) << 48;
+  case 14: k2 ^= (uint64_t)(tail[13]) << 40;
+  case 13: k2 ^= (uint64_t)(tail[12]) << 32;
+  case 12: k2 ^= (uint64_t)(tail[11]) << 24;
+  case 11: k2 ^= (uint64_t)(tail[10]) << 16;
+  case 10: k2 ^= (uint64_t)(tail[ 9]) << 8;
+  case  9: k2 ^= (uint64_t)(tail[ 8]) << 0;
+           k2 *= c2; k2  = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
+
+  case  8: k1 ^= (uint64_t)(tail[ 7]) << 56;
+  case  7: k1 ^= (uint64_t)(tail[ 6]) << 48;
+  case  6: k1 ^= (uint64_t)(tail[ 5]) << 40;
+  case  5: k1 ^= (uint64_t)(tail[ 4]) << 32;
+  case  4: k1 ^= (uint64_t)(tail[ 3]) << 24;
+  case  3: k1 ^= (uint64_t)(tail[ 2]) << 16;
+  case  2: k1 ^= (uint64_t)(tail[ 1]) << 8;
+  case  1: k1 ^= (uint64_t)(tail[ 0]) << 0;
+           k1 *= c1; k1  = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
+  };
+
+  //----------
+  // finalization
+
+  h1 ^= len; h2 ^= len;
+
+  h1 += h2;
+  h2 += h1;
+
+  h1 = fmix64(h1);
+  h2 = fmix64(h2);
+
+  h1 += h2;
+  h2 += h1;
+
+  ((uint64_t*)out)[0] = h1;
+  ((uint64_t*)out)[1] = h2;
+}
+
+//-----------------------------------------------------------------------------
--- a/nim-bloom/src/murmur3.h
+++ b/nim-bloom/src/murmur3.h
@ -0,0 +1,21 @@
+//-----------------------------------------------------------------------------
+// MurmurHash3 was written by Austin Appleby, and is placed in the
+// public domain. The author hereby disclaims copyright to this source
+// code.
+
+#ifndef _MURMURHASH3_H_
+#define _MURMURHASH3_H_
+
+#include <stdint.h>
+
+//-----------------------------------------------------------------------------
+
+void MurmurHash3_x86_32 (const void *key, int len, uint32_t seed, void *out);
+
+void MurmurHash3_x86_128(const void *key, int len, uint32_t seed, void *out);
+
+void MurmurHash3_x64_128(const void *key, int len, uint32_t seed, void *out);
+
+//-----------------------------------------------------------------------------
+
+#endif // _MURMURHASH3_H_
--- a/nim-bloom/src/private/probabilities.nim
+++ b/nim-bloom/src/private/probabilities.nim
@ -0,0 +1,103 @@
+#
+# ### Probability table declaration, in private/ for readability ###
+# Table for k hashes from 1..12 from http://pages.cs.wisc.edu/~cao/papers/summary-cache/node8.html
+# Iterate along the sequence at position [k] until the error rate is < specified, otherwise
+# raise an error.
+#
+
+type
+  TErrorForK = seq[float]
+  TAllErrorRates* = array[0..12, TErrorForK]
+
+var kErrors*: TAllErrorRates
+
+kErrors[0] = @[1.0]
+kErrors[1] = @[1.0, 1.0,
+              0.3930000000, 0.2830000000, 0.2210000000, 0.1810000000, 0.1540000000,
+              0.1330000000, 0.1180000000, 0.1050000000, 0.0952000000, 0.0869000000,
+              0.0800000000, 0.0740000000, 0.0689000000, 0.0645000000, 0.0606000000,
+              0.0571000000, 0.0540000000, 0.0513000000, 0.0488000000, 0.0465000000,
+              0.0444000000, 0.0425000000, 0.0408000000, 0.0392000000, 0.0377000000,
+              0.0364000000, 0.0351000000, 0.0339000000, 0.0328000000, 0.0317000000,
+              0.0308000000 ]
+
+kErrors[2] = @[1.0, 1.0,
+              0.4000000000, 0.2370000000, 0.1550000000, 0.1090000000, 0.0804000000,
+              0.0618000000, 0.0489000000, 0.0397000000, 0.0329000000, 0.0276000000,
+              0.0236000000, 0.0203000000, 0.0177000000, 0.0156000000, 0.0138000000,
+              0.0123000000, 0.0111000000, 0.0099800000, 0.0090600000, 0.0082500000,
+              0.0075500000, 0.0069400000, 0.0063900000, 0.0059100000, 0.0054800000,
+              0.0051000000, 0.0047500000, 0.0044400000, 0.0041600000, 0.0039000000,
+              0.0036700000 ]
+
+kErrors[3] = @[1.0, 1.0, 1.0,
+              0.2530000000, 0.1470000000, 0.0920000000, 0.0609000000, 0.0423000000,
+              0.0306000000, 0.0228000000, 0.0174000000, 0.0136000000, 0.0108000000,
+              0.0087500000, 0.0071800000, 0.0059600000, 0.0050000000, 0.0042300000,
+              0.0036200000, 0.0031200000, 0.0027000000, 0.0023600000, 0.0020700000,
+              0.0018300000, 0.0016200000, 0.0014500000, 0.0012900000, 0.0011600000,
+              0.0010500000, 0.0009490000, 0.0008620000, 0.0007850000, 0.0007170000 ]
+
+kErrors[4] = @[1.0, 1.0, 1.0, 1.0,
+              0.1600000000, 0.0920000000, 0.0561000000, 0.0359000000, 0.0240000000,
+              0.0166000000, 0.0118000000, 0.0086400000, 0.0064600000, 0.0049200000,
+              0.0038100000, 0.0030000000, 0.0023900000, 0.0019300000, 0.0015800000,
+              0.0013000000, 0.0010800000, 0.0009050000, 0.0007640000, 0.0006490000,
+              0.0005550000, 0.0004780000, 0.0004130000, 0.0003590000, 0.0003140000,
+              0.0002760000, 0.0002430000, 0.0002150000, 0.0001910000 ]
+
+kErrors[5] = @[1.0, 1.0, 1.0, 1.0, 1.0,
+              0.1010000000, 0.0578000000, 0.0347000000, 0.0217000000, 0.0141000000,
+              0.0094300000, 0.0065000000, 0.0045900000, 0.0033200000, 0.0024400000,
+              0.0018300000, 0.0013900000, 0.0010700000, 0.0008390000, 0.0006630000,
+              0.0005300000, 0.0004270000, 0.0003470000, 0.0002850000, 0.0002350000,
+              0.0001960000, 0.0001640000, 0.0001380000, 0.0001170000, 0.0000996000,
+              0.0000853000, 0.0000733000, 0.0000633000 ]
+
+kErrors[6] = @[1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+              0.0638000000, 0.0364000000, 0.0216000000, 0.0133000000, 0.0084400000,
+              0.0055200000, 0.0037100000, 0.0025500000, 0.0017900000, 0.0012800000,
+              0.0009350000, 0.0006920000, 0.0005190000, 0.0003940000, 0.0003030000,
+              0.0002360000, 0.0001850000, 0.0001470000, 0.0001170000, 0.0000944000,
+              0.0000766000, 0.0000626000, 0.0000515000, 0.0000426000, 0.0000355000,
+              0.0000297000, 0.0000250000 ]
+
+kErrors[7] = @[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+              0.0229000000, 0.0135000000, 0.0081900000, 0.0051300000, 0.0032900000,
+              0.0021700000, 0.0014600000, 0.0010000000, 0.0007020000, 0.0004990000,
+              0.0003600000, 0.0002640000, 0.0001960000, 0.0001470000, 0.0001120000,
+              0.0000856000, 0.0000663000, 0.0000518000, 0.0000408000, 0.0000324000,
+              0.0000259000, 0.0000209000, 0.0000169000, 0.0000138000, 0.0000113000 ]
+
+kErrors[8] = @[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+              0.0145000000, 0.0084600000, 0.0050900000, 0.0031400000, 0.0019900000,
+              0.0012900000, 0.0008520000, 0.0005740000, 0.0003940000, 0.0002750000,
+              0.0001940000, 0.0001400000, 0.0001010000, 0.0000746000, 0.0000555000,
+              0.0000417000, 0.0000316000, 0.0000242000, 0.0000187000, 0.0000146000,
+              0.0000114000, 0.0000090100, 0.0000071600, 0.0000057300 ]
+
+kErrors[9] = @[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+              0.0053100000, 0.0031700000, 0.0019400000, 0.0012100000, 0.0007750000,
+              0.0005050000, 0.0003350000, 0.0002260000, 0.0001550000, 0.0001080000,
+              0.0000759000, 0.0000542000, 0.0000392000, 0.0000286000, 0.0000211000,
+              0.0000157000, 0.0000118000, 0.0000089600, 0.0000068500, 0.0000052800,
+              0.0000041000, 0.0000032000]
+
+kErrors[10] = @[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+              0.0033400000, 0.0019800000, 0.0012000000, 0.0007440000, 0.0004700000,
+              0.0003020000, 0.0001980000, 0.0001320000, 0.0000889000, 0.0000609000,
+              0.0000423000, 0.0000297000, 0.0000211000, 0.0000152000, 0.0000110000,
+              0.0000080700, 0.0000059700, 0.0000044500, 0.0000033500, 0.0000025400,
+              0.0000019400]
+
+kErrors[11] = @[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+              0.0021000000, 0.0012400000, 0.0007470000, 0.0004590000, 0.0002870000,
+              0.0001830000, 0.0001180000, 0.0000777000, 0.0000518000, 0.0000350000,
+              0.0000240000, 0.0000166000, 0.0000116000, 0.0000082300, 0.0000058900,
+              0.0000042500, 0.0000031000, 0.0000022800, 0.0000016900, 0.0000012600]
+
+kErrors[12] = @[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+              0.0007780000, 0.0004660000, 0.0002840000, 0.0001760000, 0.0001110000,
+              0.0000712000, 0.0000463000, 0.0000305000, 0.0000204000, 0.0000138000,
+              0.0000094200, 0.0000065200, 0.0000045600, 0.0000032200, 0.0000022900,
+              0.0000016500, 0.0000012000, 0.0000008740]
--- a/nim-bloom/tests/config.nims
+++ b/nim-bloom/tests/config.nims
@ -0,0 +1 @@
+switch("path", "$projectDir/../src")
--- a/nim-bloom/tests/test.nim
+++ b/nim-bloom/tests/test.nim
@ -0,0 +1,102 @@
+import unittest
+include bloom
+from random import rand, randomize
+import times
+
+suite "murmur":
+  # Test murmurhash 3
+  setup:
+    var hashOutputs: MurmurHashes
+    hashOutputs = [0, 0]
+    rawMurmurHash("hello", 5, 0, hashOutputs)
+
+  test "raw":
+    check int(hashOutputs[0]) == -3758069500696749310 # Correct murmur outputs (cast to int64)
+    check int(hashOutputs[1]) == 6565844092913065241
+
+  test "wrapped":
+    let hashOutputs2 = murmurHash("hello", 0)
+    check hashOutputs2[0] == hashOutputs[0]
+    check hashOutputs2[1] == hashOutputs[1]
+
+  test "seed":
+    let hashOutputs3 = murmurHash("hello", 10)
+    check hashOutputs3[0] != hashOutputs[0]
+    check hashOutputs3[1] != hashOutputs[1]
+
+
+suite "bloom":
+
+  setup:
+    let nElementsToTest = 100000
+    var bf = initializeBloomFilter(capacity = nElementsToTest, errorRate = 0.001)
+    randomize(2882) # Seed the RNG
+    var
+      sampleChars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
+      kTestElements, sampleLetters: seq[string]
+    kTestElements = newSeq[string](nElementsToTest)
+    sampleLetters = newSeq[string](62)
+
+    for i in 0..<nElementsToTest:
+      var newString = ""
+      for j in 0..7:
+        newString.add(sampleChars[rand(51)])
+      kTestElements[i] = newString
+
+    for i in 0..<nElementsToTest:
+      bf.insert(kTestElements[i])
+
+  test "params":
+    check(bf.capacity == nElementsToTest)
+    check(bf.errorRate == 0.001)
+    check(bf.kHashes == 10)
+    check(bf.nBitsPerElem == 15)
+    check(bf.mBits == 15 * nElementsToTest)
+    check(bf.useMurmurHash == true)
+
+  test "not hit":
+    check(bf.lookup("nothing") == false)
+
+  test "hit":
+    bf.insert("hit")
+    check(bf.lookup("hit") == true)
+
+  test "force params":
+    var bf2 = initializeBloomFilter(10000, 0.001, k = 4, forceNBitsPerElem = 20)
+    check(bf2.capacity == 10000)
+    check(bf2.errorRate == 0.001)
+    check(bf2.kHashes == 4)
+    check(bf2.nBitsPerElem == 20)
+    check(bf2.mBits == 200000)
+    check(bf2.useMurmurHash == true)
+
+  test "error rate":
+    var falsePositives = 0
+    for i in 0..<nElementsToTest:
+      var falsePositiveString = ""
+      for j in 0..8: # By definition not in bf as 9 chars not 8
+        falsePositiveString.add(sampleChars[rand(51)])
+      if bf.lookup(falsePositiveString):
+        falsePositives += 1
+
+    check falsePositives / nElementsToTest < bf.errorRate
+
+  test "lookup errors":
+    var lookupErrors = 0
+    for i in 0..<nElementsToTest:
+      if not bf.lookup(kTestElements[i]):
+        lookupErrors += 1
+
+    check lookupErrors == 0
+
+  # Finally test correct k / mOverN specification,
+  test "k/(m/n) spec":
+    expect(BloomFilterError):
+      discard getMOverNBitsForK(k = 2, targetError = 0.00001)
+
+    check getMOverNBitsForK(k = 2, targetError = 0.1) == 6
+    check getMOverNBitsForK(k = 7, targetError = 0.01) == 10
+    check getMOverNBitsForK(k = 7, targetError = 0.001) == 16
+
+    var bf3 = initializeBloomFilter(1000, 0.01, k = 4)
+    check bf3.nBitsPerElem == 11