feat: simplify with only using nim hash

This commit is contained in:
shash256 2024-12-09 14:22:50 +04:00
parent 326cb1ea68
commit 3fbd89e868
16 changed files with 55 additions and 1756 deletions

6
.gitignore vendored
View File

@ -1,2 +1,6 @@
nimcache
nimcache/*
tests/bloom
nim-bloom/bloom
.DS_Store
src/.DS_Store

View File

@ -3,47 +3,18 @@ import hashes
import strutils
import private/probabilities
# Import MurmurHash3 code with both 128-bit and 32-bit implementations
{.compile: "murmur3.c".}
type
HashType* = enum
htMurmur128, # Default: MurmurHash3_x64_128
htMurmur32, # MurmurHash3_x86_32
htNimHash # Nim's Hash (currently Farm Hash)
BloomFilterError* = object of CatchableError
MurmurHashes = array[0..1, int]
BloomFilter* = object
capacity*: int
errorRate*: float
kHashes*: int
mBits*: int
intArray: seq[int]
hashType*: HashType
{.push overflowChecks: off.} # Turn off overflow checks for hashing operations
proc rawMurmurHash128(key: cstring, len: int, seed: uint32,
outHashes: var MurmurHashes): void {.
importc: "MurmurHash3_x64_128".}
proc rawMurmurHash32(key: cstring, len: int, seed: uint32,
outHashes: ptr uint32): void {.
importc: "MurmurHash3_x86_32".}
proc murmurHash128(key: string, seed = 0'u32): MurmurHashes =
var hashResult: MurmurHashes
rawMurmurHash128(key, key.len, seed, hashResult)
hashResult
proc murmurHash32(key: string, seed = 0'u32): uint32 =
var result: uint32
rawMurmurHash32(key, key.len, seed, addr result)
result
proc hashN(item: string, n: int, maxValue: int): int =
## Get the nth hash using Nim's built-in hash function using
## the double hashing technique from Kirsch and Mitzenmacher, 2008:
@ -76,8 +47,7 @@ proc getMOverNBitsForK(k: int, targetError: float,
"Specified value of k and error rate not achievable using less than 4 bytes / element.")
proc initializeBloomFilter*(capacity: int, errorRate: float, k = 0,
forceNBitsPerElem = 0,
hashType = htMurmur128): BloomFilter =
forceNBitsPerElem = 0): BloomFilter =
## Initializes a Bloom filter with specified parameters.
##
## Parameters:
@ -87,10 +57,6 @@ proc initializeBloomFilter*(capacity: int, errorRate: float, k = 0,
## See http://pages.cs.wisc.edu/~cao/papers/summary-cache/node8.html for
## useful tables on k and m/n (n bits per element) combinations.
## - forceNBitsPerElem: Optional override for bits per element
## - hashType: Choose hash function:
## * htMurmur128: MurmurHash3_x64_128 (default) - recommended
## * htMurmur32: MurmurHash3_x86_32
## * htNimHash: Nim's Hash
var
kHashes: int
nBitsPerElem: int
@ -115,49 +81,23 @@ proc initializeBloomFilter*(capacity: int, errorRate: float, k = 0,
errorRate: errorRate,
kHashes: kHashes,
mBits: mBits,
intArray: newSeq[int](mInts),
hashType: hashType
intArray: newSeq[int](mInts)
)
proc `$`*(bf: BloomFilter): string =
## Prints the configuration of the Bloom filter.
let hashType = case bf.hashType
of htMurmur128: "MurmurHash3_x64_128"
of htMurmur32: "MurmurHash3_x86_32"
of htNimHash: "NimHashHash"
"Bloom filter with $1 capacity, $2 error rate, $3 hash functions, and requiring $4 bits of memory. Using $5." %
"Bloom filter with $1 capacity, $2 error rate, $3 hash functions, and requiring $4 bits of memory." %
[$bf.capacity,
formatFloat(bf.errorRate, format = ffScientific, precision = 1),
$bf.kHashes,
$(bf.mBits div bf.capacity),
hashType]
{.push overflowChecks: off.} # Turn off overflow checks for hash computations
$(bf.mBits div bf.capacity)]
proc computeHashes(bf: BloomFilter, item: string): seq[int] =
var hashes = newSeq[int](bf.kHashes)
case bf.hashType
of htMurmur128:
let murmurHashes = murmurHash128(item, 0'u32)
for i in 0..<bf.kHashes:
hashes[i] = abs((murmurHashes[0].int64 + i.int64 * murmurHashes[1].int64).int) mod bf.mBits
of htMurmur32:
let baseHash = murmurHash32(item, 0'u32)
# let rotated = ((baseHash shl 13) or (baseHash shr (32 - 13)))
let secondHash = murmurHash32(item & " b", 0'u32)
for i in 0..<bf.kHashes:
hashes[i] = abs((baseHash.int64 + i.int64 * secondHash.int64).int) mod bf.mBits
of htNimHash:
for i in 0..<bf.kHashes:
hashes[i] = hashN(item, i, bf.mBits)
for i in 0..<bf.kHashes:
hashes[i] = hashN(item, i, bf.mBits)
hashes
{.pop.} # Restore overflow checks
proc insert*(bf: var BloomFilter, item: string) =
## Insert an item (string) into the Bloom filter.
let hashSet = bf.computeHashes(item)

10
nim-bloom/.gitignore vendored
View File

@ -1,10 +0,0 @@
nimcache
nimcache/*
tests/test
benches/bench
benches/bench_arch_end
bloom
*.html
*.css
.DS_Store
src/.DS_Store

View File

@ -1,20 +0,0 @@
The MIT License (MIT)
Copyright (c) 2013 Nick Greenfield
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

View File

@ -1,121 +0,0 @@
# nim-bloom
A high-performance Bloom filter implementation in Nim offering standard and custom hash function options with different performance characteristics and false positive rates.
## Features
- Fast string element insertion and lookup
- Configurable error rates
- Choice between standard Nim hash and custom MurmurHash3 (128-bit or 32-bit)
- Optimized for supporting different use cases of speed and accuracy
- Comprehensive test suite and benchmarks
## Usage
Basic usage (defaults to MurmurHash3_128):
```nim
import bloom2
# Initialize with default hash (MurmurHash3_128)
var bf = initializeBloomFilter(capacity = 10000, errorRate = 0.01)
# Or explicitly specify hash type
var bf32 = initializeBloomFilter(
capacity = 10000,
errorRate = 0.01,
hashType = htMurmur32 # Use 32-bit implementation
)
# Basic operations
bf.insert("test")
assert bf.lookup("test")
```
## Hash Function Selection
1. Use MurmurHash3_128 (default) when:
- You need the best balance of performance and accuracy
- Memory isn't severely constrained
- Working with large datasets
- False positive rates are important
2. Use MurmurHash3_32 when:
- Running on 32-bit systems
- Memory is constrained
- Working with smaller datasets
- String concatenation overhead for second hash, causing higher insertion and lookup times, is acceptable.
3. Use NimHash when:
- Consistency with Nim's hashing is important
- Working with smaller datasets where performance is less critical
- Future availability of better hash functions or performant implementations
Nim's Hash Implementation:
- Default (no flags): Uses FarmHash implementation
- With `-d:nimStringHash2`: Uses Nim's MurmurHash3_32 implementation
- Our implementation allows explicit choice regardless of compilation flags and our MurmurHash3_32 performs better because of directly using a native C Implementation
## Performance Characteristics
### For 1M items - Random Strings
```
Insertion Speed:
MurmurHash3_128: ~6.8M ops/sec
MurmurHash3_32: ~5.9M ops/sec
FarmHash: ~2.1M ops/sec
False Positive Rates:
MurmurHash3_128: ~0.84%
MurmurHash3_32: ~0.83%
FarmHash: ~0.82%
```
These measurements show MurmurHash3_128's balanced performance profile, offering best speed and competitive false positive rates.
Performance will vary based on:
- Choice of hash function
- Hardware specifications
- Data size and memory access patterns (inside vs outside cache)
- Compiler optimizations
For detailed benchmarks across different data patterns and sizes, see [benches](benches/).
## Implementation Details
### Double Hashing Technique
This implmentation uses the Kirsch-Mitzenmacher method to generate k hash values from two initial hashes. The implementation varies by hash type:
1. MurmurHash3_128:
```nim
h(i) = abs((hash1 + i * hash2) mod m)
```
- Uses both 64-bit hashes from 128-bit output
- Natural double-hash implementation
2. MurmurHash3_32:
```nim
let baseHash = murmurHash32(item, 0'u32)
let secondHash = murmurHash32(item & " b", 0'u32)
```
- Uses string concatention by default for the second hash
- Bit Rotation for second hash provides sufficient randomness in some use cases while being much faster than string concatenation (but results in higher FP rate)
- Choose between bit rotation or string concatenation as per your use-case.
3. Nim's Hash:
```nim
let
hashA = abs(hash(item)) mod maxValue
hashB = abs(hash(item & " b")) mod maxValue
h(i) = abs((hashA + n * hashB)) mod maxValue
```
- Farm Hash or Nim's Murmur Hash based (if compliation flag is passed)
- Uses string concatention by default.
- Lower FP rate than bit rotation but comes at the cost of higher insertion and lookup times.
*Tip:* Bit rotation values can be configurable as well. Use prime numbers for better mixing: 7, 11, 13, 17 for 32-bit; 21, 23, 27, 33 for 64-bit. Smaller rotations provides lesser mixing but as faster than higher rotations.
## Testing
Run the test suite:
```bash
nimble test
```

View File

@ -1,123 +0,0 @@
import times, random, strutils
include bloom
type
DataPattern = enum
dpRandom, # Random strings
dpSequential, # Sequential numbers
dpFixed, # Fixed length strings
dpLong, # Long strings
dpSpecial # Strings with special characters
type
BenchmarkResult = tuple[
insertTime: float,
lookupTime: float,
falsePositives: int
]
proc generateBenchData(pattern: DataPattern, size: int, isLookupData: bool = false): seq[string] =
result = newSeq[string](size)
let offset = if isLookupData: size * 2 else: 0 # Ensure lookup data is well separated
case pattern:
of dpRandom:
for i in 0..<size:
var s = ""
for j in 0..rand(5..15):
s.add(chr(rand(ord('a')..ord('z'))))
result[i] = s
of dpSequential:
for i in 0..<size:
result[i] = $(i + offset) # Add offset for lookup data
of dpFixed:
for i in 0..<size:
result[i] = "fixed" & align($(i + offset), 10, '0')
of dpLong:
for i in 0..<size:
result[i] = repeat("x", 100) & $(i + offset)
of dpSpecial:
for i in 0..<size:
result[i] = "test@" & $(i + offset) & "#$%^&*" & $rand(1000)
proc benchmarkHashType(hashType: HashType, size: int, errorRate: float,
data: seq[string], lookupData: seq[string]): BenchmarkResult =
# Initialize Bloom filter and run benchmark for given hash type
var bf = initializeBloomFilter(size, errorRate, hashType = hashType)
# Measure insert time
let startInsert = cpuTime()
for item in data:
bf.insert(item)
let insertTime = cpuTime() - startInsert
# Measure lookup time and count false positives
var falsePositives = 0
let startLookup = cpuTime()
for item in lookupData:
if bf.lookup(item): falsePositives.inc
let lookupTime = cpuTime() - startLookup
result = (insertTime, lookupTime, falsePositives)
proc printResults(hashName: string, result: BenchmarkResult,
dataSize: int, lookupDataSize: int) =
echo "\n", hashName, " Results:"
echo " Insert time: ", result.insertTime, "s (", dataSize.float/result.insertTime, " ops/sec)"
echo " Lookup time: ", result.lookupTime, "s (", lookupDataSize.float/result.lookupTime, " ops/sec)"
echo " False positives: ", result.falsePositives, " (",
result.falsePositives.float / lookupDataSize.float * 100, "%)"
proc runBenchmark(size: int, errorRate: float, pattern: DataPattern, name: string) =
echo "\n=== Benchmark: ", name, " ==="
echo "Size: ", size, " items"
echo "Pattern: ", pattern
# Generate test data
let data = generateBenchData(pattern, size, false)
let lookupData = generateBenchData(pattern, size div 2, true)
# Run benchmarks for each hash type
let nimHashResult = benchmarkHashType(htNimHash, size, errorRate, data, lookupData)
let murmur128Result = benchmarkHashType(htMurmur128, size, errorRate, data, lookupData)
let murmur32Result = benchmarkHashType(htMurmur32, size, errorRate, data, lookupData)
# Print individual results
printResults("Nim's Hash (Farm Hash)", nimHashResult, size, lookupData.len)
printResults("MurmurHash3_128", murmur128Result, size, lookupData.len)
printResults("MurmurHash3_32", murmur32Result, size, lookupData.len)
# Print comparisons
echo "\nComparison (higher means better/faster):"
echo " Insert Speed:"
echo " Murmur128 vs NimHash: ", nimHashResult.insertTime/murmur128Result.insertTime, "x faster"
echo " Murmur32 vs NimHash: ", nimHashResult.insertTime/murmur32Result.insertTime, "x faster"
echo " Murmur128 vs Murmur32: ", murmur32Result.insertTime/murmur128Result.insertTime, "x faster"
echo " Lookup Speed:"
echo " Murmur128 vs NimHash: ", nimHashResult.lookupTime/murmur128Result.lookupTime, "x faster"
echo " Murmur32 vs NimHash: ", nimHashResult.lookupTime/murmur32Result.lookupTime, "x faster"
echo " Murmur128 vs Murmur32: ", murmur32Result.lookupTime/murmur128Result.lookupTime, "x faster"
echo " False Positive Rates:"
let fpRateNimHash = nimHashResult.falsePositives.float / lookupData.len.float
let fpRateMurmur128 = murmur128Result.falsePositives.float / lookupData.len.float
let fpRateMurmur32 = murmur32Result.falsePositives.float / lookupData.len.float
echo " Murmur128 vs NimHash: ", fpRateNimHash/fpRateMurmur128, "x better"
echo " Murmur32 vs NimHash: ", fpRateNimHash/fpRateMurmur32, "x better"
echo " Murmur128 vs Murmur32: ", fpRateMurmur32/fpRateMurmur128, "x better"
when isMainModule:
const errorRate = 0.01
# Test each pattern
for pattern in [dpRandom, dpSequential, dpFixed, dpLong, dpSpecial]:
# Small dataset
runBenchmark(10_000, errorRate, pattern, "Small " & $pattern)
# Medium dataset
runBenchmark(100_000, errorRate, pattern, "Medium " & $pattern)
# Large dataset
runBenchmark(1_000_000, errorRate, pattern, "Large " & $pattern)

View File

@ -1,495 +0,0 @@
=== Benchmark: Small dpRandom ===
Size: 10000 items
Pattern: dpRandom
Nim's Hash (Farm Hash) Results:
Insert time: 0.006714000000000001s (1489425.081918379 ops/sec)
Lookup time: 0.003387999999999995s (1475796.930342387 ops/sec)
False positives: 51 (1.02%)
MurmurHash3_128 Results:
Insert time: 0.001955999999999999s (5112474.437627814 ops/sec)
Lookup time: 0.0009989999999999999s (5005005.005005006 ops/sec)
False positives: 25 (0.5%)
MurmurHash3_32 Results:
Insert time: 0.002221999999999995s (4500450.045004511 ops/sec)
Lookup time: 0.001138s (4393673.110720562 ops/sec)
False positives: 43 (0.86%)
Comparison (higher means better/faster):
Insert Speed:
Murmur128 vs NimHash: 3.432515337423315x faster
Murmur32 vs NimHash: 3.021602160216029x faster
Murmur128 vs Murmur32: 1.135991820040897x faster
Lookup Speed:
Murmur128 vs NimHash: 3.391391391391387x faster
Murmur32 vs NimHash: 2.977152899824249x faster
Murmur128 vs Murmur32: 1.139139139139139x faster
False Positive Rates:
Murmur128 vs NimHash: 2.04x better
Murmur32 vs NimHash: 1.186046511627907x better
Murmur128 vs Murmur32: 1.72x better
=== Benchmark: Medium dpRandom ===
Size: 100000 items
Pattern: dpRandom
Nim's Hash (Farm Hash) Results:
Insert time: 0.04622000000000001s (2163565.556036348 ops/sec)
Lookup time: 0.02333199999999999s (2142979.59883422 ops/sec)
False positives: 402 (0.804%)
MurmurHash3_128 Results:
Insert time: 0.013294s (7522190.461862494 ops/sec)
Lookup time: 0.006861999999999979s (7286505.392014012 ops/sec)
False positives: 405 (0.8099999999999999%)
MurmurHash3_32 Results:
Insert time: 0.01558700000000002s (6415602.745877968 ops/sec)
Lookup time: 0.008157999999999999s (6128953.174797745 ops/sec)
False positives: 406 (0.8120000000000001%)
Comparison (higher means better/faster):
Insert Speed:
Murmur128 vs NimHash: 3.476756431472846x faster
Murmur32 vs NimHash: 2.965291589144798x faster
Murmur128 vs Murmur32: 1.172483827290508x faster
Lookup Speed:
Murmur128 vs NimHash: 3.400174876129417x faster
Murmur32 vs NimHash: 2.860014709487619x faster
Murmur128 vs Murmur32: 1.188866219761006x faster
False Positive Rates:
Murmur128 vs NimHash: 0.9925925925925927x better
Murmur32 vs NimHash: 0.9901477832512315x better
Murmur128 vs Murmur32: 1.002469135802469x better
=== Benchmark: Large dpRandom ===
Size: 1000000 items
Pattern: dpRandom
Nim's Hash (Farm Hash) Results:
Insert time: 0.4711130000000001s (2122632.998877127 ops/sec)
Lookup time: 0.2430289999999999s (2057367.639252929 ops/sec)
False positives: 4104 (0.8208%)
MurmurHash3_128 Results:
Insert time: 0.1467989999999999s (6812035.504329053 ops/sec)
Lookup time: 0.07689400000000002s (6502457.929097197 ops/sec)
False positives: 4187 (0.8373999999999999%)
MurmurHash3_32 Results:
Insert time: 0.1697340000000001s (5891571.517786653 ops/sec)
Lookup time: 0.08597399999999999s (5815711.726801126 ops/sec)
False positives: 4130 (0.826%)
Comparison (higher means better/faster):
Insert Speed:
Murmur128 vs NimHash: 3.209238482550974x faster
Murmur32 vs NimHash: 2.775595932459024x faster
Murmur128 vs Murmur32: 1.156234034291788x faster
Lookup Speed:
Murmur128 vs NimHash: 3.160571696101125x faster
Murmur32 vs NimHash: 2.826773210505501x faster
Murmur128 vs Murmur32: 1.118084635992405x faster
False Positive Rates:
Murmur128 vs NimHash: 0.9801767375208981x better
Murmur32 vs NimHash: 0.9937046004842615x better
Murmur128 vs Murmur32: 0.9863864342010987x better
=== Benchmark: Small dpSequential ===
Size: 10000 items
Pattern: dpSequential
Nim's Hash (Farm Hash) Results:
Insert time: 0.004508000000000179s (2218278.615794056 ops/sec)
Lookup time: 0.002232000000000012s (2240143.369175615 ops/sec)
False positives: 42 (0.84%)
MurmurHash3_128 Results:
Insert time: 0.001201000000000008s (8326394.671107357 ops/sec)
Lookup time: 0.0006349999999999412s (7874015.748032225 ops/sec)
False positives: 47 (0.9400000000000001%)
MurmurHash3_32 Results:
Insert time: 0.00151200000000018s (6613756.613755827 ops/sec)
Lookup time: 0.0007530000000000037s (6640106.241699835 ops/sec)
False positives: 51 (1.02%)
Comparison (higher means better/faster):
Insert Speed:
Murmur128 vs NimHash: 3.753538717735346x faster
Murmur32 vs NimHash: 2.981481481481245x faster
Murmur128 vs Murmur32: 1.258950874271582x faster
Lookup Speed:
Murmur128 vs NimHash: 3.514960629921604x faster
Murmur32 vs NimHash: 2.964143426294822x faster
Murmur128 vs Murmur32: 1.185826771653659x faster
False Positive Rates:
Murmur128 vs NimHash: 0.8936170212765957x better
Murmur32 vs NimHash: 0.8235294117647057x better
Murmur128 vs Murmur32: 1.085106382978724x better
=== Benchmark: Medium dpSequential ===
Size: 100000 items
Pattern: dpSequential
Nim's Hash (Farm Hash) Results:
Insert time: 0.04492400000000019s (2225981.65791113 ops/sec)
Lookup time: 0.02218900000000001s (2253368.78633557 ops/sec)
False positives: 436 (0.872%)
MurmurHash3_128 Results:
Insert time: 0.012737s (7851142.341210647 ops/sec)
Lookup time: 0.006547999999999998s (7635919.364691511 ops/sec)
False positives: 422 (0.844%)
MurmurHash3_32 Results:
Insert time: 0.01476000000000011s (6775067.750677458 ops/sec)
Lookup time: 0.008092999999999684s (6178178.672927462 ops/sec)
False positives: 446 (0.8920000000000001%)
Comparison (higher means better/faster):
Insert Speed:
Murmur128 vs NimHash: 3.527047185365486x faster
Murmur32 vs NimHash: 3.043631436314354x faster
Murmur128 vs Murmur32: 1.1588286095627x faster
Lookup Speed:
Murmur128 vs NimHash: 3.388668295662801x faster
Murmur32 vs NimHash: 2.741752131471751x faster
Murmur128 vs Murmur32: 1.23594990836892x faster
False Positive Rates:
Murmur128 vs NimHash: 1.033175355450237x better
Murmur32 vs NimHash: 0.9775784753363228x better
Murmur128 vs Murmur32: 1.056872037914692x better
=== Benchmark: Large dpSequential ===
Size: 1000000 items
Pattern: dpSequential
Nim's Hash (Farm Hash) Results:
Insert time: 0.4540329999999999s (2202483.079423743 ops/sec)
Lookup time: 0.2223580000000003s (2248626.089459338 ops/sec)
False positives: 4037 (0.8074%)
MurmurHash3_128 Results:
Insert time: 0.1384119999999998s (7224807.097650506 ops/sec)
Lookup time: 0.07243599999999972s (6902645.093599894 ops/sec)
False positives: 4205 (0.8410000000000001%)
MurmurHash3_32 Results:
Insert time: 0.1643889999999999s (6083132.083046923 ops/sec)
Lookup time: 0.08048799999999989s (6212106.152469941 ops/sec)
False positives: 4024 (0.8048%)
Comparison (higher means better/faster):
Insert Speed:
Murmur128 vs NimHash: 3.280300840967551x faster
Murmur32 vs NimHash: 2.761942709062043x faster
Murmur128 vs Murmur32: 1.187678813975668x faster
Lookup Speed:
Murmur128 vs NimHash: 3.069716715445374x faster
Murmur32 vs NimHash: 2.762622999701826x faster
Murmur128 vs Murmur32: 1.111160196587335x faster
False Positive Rates:
Murmur128 vs NimHash: 0.9600475624256836x better
Murmur32 vs NimHash: 1.003230616302187x better
Murmur128 vs Murmur32: 0.9569560047562424x better
=== Benchmark: Small dpFixed ===
Size: 10000 items
Pattern: dpFixed
Nim's Hash (Farm Hash) Results:
Insert time: 0.004486999999999686s (2228660.574994585 ops/sec)
Lookup time: 0.002098999999999851s (2382086.707956339 ops/sec)
False positives: 43 (0.86%)
MurmurHash3_128 Results:
Insert time: 0.001263000000000236s (7917656.373711901 ops/sec)
Lookup time: 0.0006620000000001625s (7552870.090632588 ops/sec)
False positives: 47 (0.9400000000000001%)
MurmurHash3_32 Results:
Insert time: 0.001565000000000261s (6389776.357826412 ops/sec)
Lookup time: 0.0007539999999996994s (6631299.734750655 ops/sec)
False positives: 35 (0.7000000000000001%)
Comparison (higher means better/faster):
Insert Speed:
Murmur128 vs NimHash: 3.552652414884281x faster
Murmur32 vs NimHash: 2.86709265175651x faster
Murmur128 vs Murmur32: 1.239113222486119x faster
Lookup Speed:
Murmur128 vs NimHash: 3.170694864047335x faster
Murmur32 vs NimHash: 2.783819628648127x faster
Murmur128 vs Murmur32: 1.13897280966694x faster
False Positive Rates:
Murmur128 vs NimHash: 0.9148936170212766x better
Murmur32 vs NimHash: 1.228571428571429x better
Murmur128 vs Murmur32: 0.7446808510638298x better
=== Benchmark: Medium dpFixed ===
Size: 100000 items
Pattern: dpFixed
Nim's Hash (Farm Hash) Results:
Insert time: 0.04369800000000001s (2288434.253283903 ops/sec)
Lookup time: 0.02203400000000011s (2269220.295906316 ops/sec)
False positives: 411 (0.822%)
MurmurHash3_128 Results:
Insert time: 0.013401s (7462129.691814045 ops/sec)
Lookup time: 0.00719499999999984s (6949270.32661586 ops/sec)
False positives: 440 (0.88%)
MurmurHash3_32 Results:
Insert time: 0.01487600000000011s (6722237.160526973 ops/sec)
Lookup time: 0.007677999999999852s (6512112.529304632 ops/sec)
False positives: 433 (0.8659999999999999%)
Comparison (higher means better/faster):
Insert Speed:
Murmur128 vs NimHash: 3.260801432728903x faster
Murmur32 vs NimHash: 2.937483194407078x faster
Murmur128 vs Murmur32: 1.110066412954266x faster
Lookup Speed:
Murmur128 vs NimHash: 3.062404447533092x faster
Murmur32 vs NimHash: 2.86975774941398x faster
Murmur128 vs Murmur32: 1.067129951355111x faster
False Positive Rates:
Murmur128 vs NimHash: 0.934090909090909x better
Murmur32 vs NimHash: 0.9491916859122402x better
Murmur128 vs Murmur32: 0.9840909090909089x better
=== Benchmark: Large dpFixed ===
Size: 1000000 items
Pattern: dpFixed
Nim's Hash (Farm Hash) Results:
Insert time: 0.4461910000000002s (2241192.673092912 ops/sec)
Lookup time: 0.2275340000000003s (2197473.784137752 ops/sec)
False positives: 4084 (0.8168%)
MurmurHash3_128 Results:
Insert time: 0.1576560000000002s (6342923.834170589 ops/sec)
Lookup time: 0.07750000000000057s (6451612.903225759 ops/sec)
False positives: 4109 (0.8218%)
MurmurHash3_32 Results:
Insert time: 0.1656709999999997s (6036059.418968932 ops/sec)
Lookup time: 0.08378900000000034s (5967370.418551337 ops/sec)
False positives: 4085 (0.8170000000000001%)
Comparison (higher means better/faster):
Insert Speed:
Murmur128 vs NimHash: 2.830155528492411x faster
Murmur32 vs NimHash: 2.693235388209168x faster
Murmur128 vs Murmur32: 1.050838534530874x faster
Lookup Speed:
Murmur128 vs NimHash: 2.935922580645144x faster
Murmur32 vs NimHash: 2.715559321629324x faster
Murmur128 vs Murmur32: 1.081148387096771x faster
False Positive Rates:
Murmur128 vs NimHash: 0.9939157945972257x better
Murmur32 vs NimHash: 0.9997552019583843x better
Murmur128 vs Murmur32: 0.9941591628133366x better
=== Benchmark: Small dpLong ===
Size: 10000 items
Pattern: dpLong
Nim's Hash (Farm Hash) Results:
Insert time: 0.009589999999999321s (1042752.86757046 ops/sec)
Lookup time: 0.005217000000000027s (958405.2137243577 ops/sec)
False positives: 39 (0.7799999999999999%)
MurmurHash3_128 Results:
Insert time: 0.001508000000000287s (6631299.734746749 ops/sec)
Lookup time: 0.0007299999999998974s (6849315.068494113 ops/sec)
False positives: 56 (1.12%)
MurmurHash3_32 Results:
Insert time: 0.001934000000000324s (5170630.816958802 ops/sec)
Lookup time: 0.0009790000000000632s (5107252.298263205 ops/sec)
False positives: 40 (0.8%)
Comparison (higher means better/faster):
Insert Speed:
Murmur128 vs NimHash: 6.359416445621682x faster
Murmur32 vs NimHash: 4.95863495346314x faster
Murmur128 vs Murmur32: 1.282493368700236x faster
Lookup Speed:
Murmur128 vs NimHash: 7.146575342466795x faster
Murmur32 vs NimHash: 5.328907048007856x faster
Murmur128 vs Murmur32: 1.341095890411234x faster
False Positive Rates:
Murmur128 vs NimHash: 0.6964285714285714x better
Murmur32 vs NimHash: 0.975x better
Murmur128 vs Murmur32: 0.7142857142857143x better
=== Benchmark: Medium dpLong ===
Size: 100000 items
Pattern: dpLong
Nim's Hash (Farm Hash) Results:
Insert time: 0.09930899999999987s (1006958.080335117 ops/sec)
Lookup time: 0.04954899999999984s (1009102.100950577 ops/sec)
False positives: 393 (0.786%)
MurmurHash3_128 Results:
Insert time: 0.0148350000000006s (6740815.63869201 ops/sec)
Lookup time: 0.007865000000000677s (6357279.084551265 ops/sec)
False positives: 422 (0.844%)
MurmurHash3_32 Results:
Insert time: 0.0197699999999994s (5058168.942842845 ops/sec)
Lookup time: 0.01105200000000028s (4524068.041983236 ops/sec)
False positives: 404 (0.8080000000000001%)
Comparison (higher means better/faster):
Insert Speed:
Murmur128 vs NimHash: 6.69423660262864x faster
Murmur32 vs NimHash: 5.023216995447794x faster
Murmur128 vs Murmur32: 1.33265925176937x faster
Lookup Speed:
Murmur128 vs NimHash: 6.299936427208592x faster
Murmur32 vs NimHash: 4.483260948244532x faster
Murmur128 vs Murmur32: 1.405212968849248x faster
False Positive Rates:
Murmur128 vs NimHash: 0.9312796208530807x better
Murmur32 vs NimHash: 0.9727722772277229x better
Murmur128 vs Murmur32: 0.9573459715639812x better
=== Benchmark: Large dpLong ===
Size: 1000000 items
Pattern: dpLong
Nim's Hash (Farm Hash) Results:
Insert time: 1.032455000000001s (968565.2159174002 ops/sec)
Lookup time: 0.5242619999999993s (953721.6124762058 ops/sec)
False positives: 4143 (0.8286%)
MurmurHash3_128 Results:
Insert time: 0.1612590000000003s (6201204.273869975 ops/sec)
Lookup time: 0.08462199999999953s (5908628.961735751 ops/sec)
False positives: 4044 (0.8088%)
MurmurHash3_32 Results:
Insert time: 0.2145720000000004s (4660440.318401273 ops/sec)
Lookup time: 0.1070960000000003s (4668708.448494795 ops/sec)
False positives: 4039 (0.8078%)
Comparison (higher means better/faster):
Insert Speed:
Murmur128 vs NimHash: 6.402464358578429x faster
Murmur32 vs NimHash: 4.81169490893499x faster
Murmur128 vs Murmur32: 1.330604803452831x faster
Lookup Speed:
Murmur128 vs NimHash: 6.195339273475009x faster
Murmur32 vs NimHash: 4.895252857249551x faster
Murmur128 vs Murmur32: 1.265581054572108x faster
False Positive Rates:
Murmur128 vs NimHash: 1.024480712166172x better
Murmur32 vs NimHash: 1.025748947759346x better
Murmur128 vs Murmur32: 0.9987636003956479x better
=== Benchmark: Small dpSpecial ===
Size: 10000 items
Pattern: dpSpecial
Nim's Hash (Farm Hash) Results:
Insert time: 0.005548999999998472s (1802126.509281448 ops/sec)
Lookup time: 0.002686999999999884s (1860811.313732868 ops/sec)
False positives: 39 (0.7799999999999999%)
MurmurHash3_128 Results:
Insert time: 0.001593999999998985s (6273525.721459452 ops/sec)
Lookup time: 0.0008440000000007331s (5924170.616108599 ops/sec)
False positives: 33 (0.66%)
MurmurHash3_32 Results:
Insert time: 0.001865999999999701s (5359056.806003002 ops/sec)
Lookup time: 0.0009710000000016095s (5149330.587015152 ops/sec)
False positives: 51 (1.02%)
Comparison (higher means better/faster):
Insert Speed:
Murmur128 vs NimHash: 3.481179422836891x faster
Murmur32 vs NimHash: 2.973740621650247x faster
Murmur128 vs Murmur32: 1.170639899624146x faster
Lookup Speed:
Murmur128 vs NimHash: 3.183649289096623x faster
Murmur32 vs NimHash: 2.767250257461823x faster
Murmur128 vs Murmur32: 1.150473933650197x faster
False Positive Rates:
Murmur128 vs NimHash: 1.181818181818182x better
Murmur32 vs NimHash: 0.7647058823529411x better
Murmur128 vs Murmur32: 1.545454545454546x better
=== Benchmark: Medium dpSpecial ===
Size: 100000 items
Pattern: dpSpecial
Nim's Hash (Farm Hash) Results:
Insert time: 0.04318999999999917s (2315350.775642554 ops/sec)
Lookup time: 0.02338899999999988s (2137757.065287111 ops/sec)
False positives: 435 (0.8699999999999999%)
MurmurHash3_128 Results:
Insert time: 0.01633100000000098s (6123323.740125772 ops/sec)
Lookup time: 0.007922000000000651s (6311537.490532176 ops/sec)
False positives: 405 (0.8099999999999999%)
MurmurHash3_32 Results:
Insert time: 0.01658399999999993s (6029908.345393174 ops/sec)
Lookup time: 0.00944400000000023s (5294366.793731341 ops/sec)
False positives: 368 (0.736%)
Comparison (higher means better/faster):
Insert Speed:
Murmur128 vs NimHash: 2.64466352336027x faster
Murmur32 vs NimHash: 2.604317414375262x faster
Murmur128 vs Murmur32: 1.015492009062454x faster
Lookup Speed:
Murmur128 vs NimHash: 2.952411007321126x faster
Murmur32 vs NimHash: 2.476598898771634x faster
Murmur128 vs Murmur32: 1.192123201211746x faster
False Positive Rates:
Murmur128 vs NimHash: 1.074074074074074x better
Murmur32 vs NimHash: 1.182065217391304x better
Murmur128 vs Murmur32: 0.9086419753086421x better
=== Benchmark: Large dpSpecial ===
Size: 1000000 items
Pattern: dpSpecial
Nim's Hash (Farm Hash) Results:
Insert time: 0.4443290000000015s (2250584.589347075 ops/sec)
Lookup time: 0.2233900000000002s (2238238.058999953 ops/sec)
False positives: 4096 (0.8191999999999999%)
MurmurHash3_128 Results:
Insert time: 0.1440049999999999s (6944203.326273397 ops/sec)
Lookup time: 0.07467199999999963s (6695950.289265086 ops/sec)
False positives: 4137 (0.8274%)
MurmurHash3_32 Results:
Insert time: 0.1650030000000005s (6060495.869772048 ops/sec)
Lookup time: 0.08332200000000078s (6000816.110991038 ops/sec)
False positives: 4222 (0.8444%)
Comparison (higher means better/faster):
Insert Speed:
Murmur128 vs NimHash: 3.085510919759742x faster
Murmur32 vs NimHash: 2.692854069319953x faster
Murmur128 vs Murmur32: 1.145814381445092x faster
Lookup Speed:
Murmur128 vs NimHash: 2.991616670237858x faster
Murmur32 vs NimHash: 2.681044622068578x faster
Murmur128 vs Murmur32: 1.115839940004302x faster
False Positive Rates:
Murmur128 vs NimHash: 0.9900894367899443x better
Murmur32 vs NimHash: 0.9701563240170534x better
Murmur128 vs Murmur32: 1.020546289581822x better

View File

@ -1,495 +0,0 @@
=== Benchmark: Small dpRandom ===
Size: 10000 items
Pattern: dpRandom
Nim's Hash (MurmurHash3_32) Results:
Insert time: 0.004986000000000001s (2005615.724027276 ops/sec)
Lookup time: 0.00238s (2100840.336134454 ops/sec)
False positives: 43 (0.86%)
MurmurHash3_128 Results:
Insert time: 0.001388s (7204610.951008644 ops/sec)
Lookup time: 0.000954s (5241090.146750525 ops/sec)
False positives: 25 (0.5%)
MurmurHash3_32 Results:
Insert time: 0.001744000000000002s (5733944.954128432 ops/sec)
Lookup time: 0.000780999999999997s (6402048.655569807 ops/sec)
False positives: 43 (0.86%)
Comparison (higher means better/faster):
Insert Speed:
Murmur128 vs NimHash: 3.592219020172911x faster
Murmur32 vs NimHash: 2.858944954128437x faster
Murmur128 vs Murmur32: 1.256484149855909x faster
Lookup Speed:
Murmur128 vs NimHash: 2.49475890985325x faster
Murmur32 vs NimHash: 3.047375160051228x faster
Murmur128 vs Murmur32: 0.8186582809224288x faster
False Positive Rates:
Murmur128 vs NimHash: 1.72x better
Murmur32 vs NimHash: 1.0x better
Murmur128 vs Murmur32: 1.72x better
=== Benchmark: Medium dpRandom ===
Size: 100000 items
Pattern: dpRandom
Nim's Hash (MurmurHash3_32) Results:
Insert time: 0.050316s (1987439.383098816 ops/sec)
Lookup time: 0.02490799999999999s (2007387.184840213 ops/sec)
False positives: 406 (0.8120000000000001%)
MurmurHash3_128 Results:
Insert time: 0.01539100000000002s (6497303.618998109 ops/sec)
Lookup time: 0.008371999999999991s (5972288.580984239 ops/sec)
False positives: 405 (0.8099999999999999%)
MurmurHash3_32 Results:
Insert time: 0.01614399999999999s (6194251.734390489 ops/sec)
Lookup time: 0.008244000000000001s (6065016.982047549 ops/sec)
False positives: 406 (0.8120000000000001%)
Comparison (higher means better/faster):
Insert Speed:
Murmur128 vs NimHash: 3.269183288935089x faster
Murmur32 vs NimHash: 3.116699702675918x faster
Murmur128 vs Murmur32: 1.048924696251054x faster
Lookup Speed:
Murmur128 vs NimHash: 2.975155279503107x faster
Murmur32 vs NimHash: 3.021348859776805x faster
Murmur128 vs Murmur32: 0.9847109412326817x faster
False Positive Rates:
Murmur128 vs NimHash: 1.002469135802469x better
Murmur32 vs NimHash: 1.0x better
Murmur128 vs Murmur32: 1.002469135802469x better
=== Benchmark: Large dpRandom ===
Size: 1000000 items
Pattern: dpRandom
Nim's Hash (MurmurHash3_32) Results:
Insert time: 0.522163s (1915110.798735261 ops/sec)
Lookup time: 0.257452s (1942109.597128785 ops/sec)
False positives: 4130 (0.826%)
MurmurHash3_128 Results:
Insert time: 0.1587529999999999s (6299093.560436657 ops/sec)
Lookup time: 0.07841999999999993s (6375924.509053818 ops/sec)
False positives: 4187 (0.8373999999999999%)
MurmurHash3_32 Results:
Insert time: 0.170582s (5862283.242077123 ops/sec)
Lookup time: 0.08690500000000001s (5753408.894770151 ops/sec)
False positives: 4130 (0.826%)
Comparison (higher means better/faster):
Insert Speed:
Murmur128 vs NimHash: 3.289153590798286x faster
Murmur32 vs NimHash: 3.061067404532718x faster
Murmur128 vs Murmur32: 1.074511977726406x faster
Lookup Speed:
Murmur128 vs NimHash: 3.282989033409847x faster
Murmur32 vs NimHash: 2.96245325355273x faster
Murmur128 vs Murmur32: 1.108199438918644x faster
False Positive Rates:
Murmur128 vs NimHash: 0.9863864342010987x better
Murmur32 vs NimHash: 1.0x better
Murmur128 vs Murmur32: 0.9863864342010987x better
=== Benchmark: Small dpSequential ===
Size: 10000 items
Pattern: dpSequential
Nim's Hash (MurmurHash3_32) Results:
Insert time: 0.004666000000000059s (2143163.309044122 ops/sec)
Lookup time: 0.002341000000000149s (2135839.384878122 ops/sec)
False positives: 51 (1.02%)
MurmurHash3_128 Results:
Insert time: 0.001309999999999922s (7633587.786259995 ops/sec)
Lookup time: 0.0007189999999999142s (6954102.920724057 ops/sec)
False positives: 47 (0.9400000000000001%)
MurmurHash3_32 Results:
Insert time: 0.001513000000000098s (6609385.327164147 ops/sec)
Lookup time: 0.0007410000000001027s (6747638.32658476 ops/sec)
False positives: 51 (1.02%)
Comparison (higher means better/faster):
Insert Speed:
Murmur128 vs NimHash: 3.561832061068959x faster
Murmur32 vs NimHash: 3.08393919365483x faster
Murmur128 vs Murmur32: 1.154961832061212x faster
Lookup Speed:
Murmur128 vs NimHash: 3.25591098748321x faster
Murmur32 vs NimHash: 3.159244264507185x faster
Murmur128 vs Murmur32: 1.030598052851448x faster
False Positive Rates:
Murmur128 vs NimHash: 1.085106382978724x better
Murmur32 vs NimHash: 1.0x better
Murmur128 vs Murmur32: 1.085106382978724x better
=== Benchmark: Medium dpSequential ===
Size: 100000 items
Pattern: dpSequential
Nim's Hash (MurmurHash3_32) Results:
Insert time: 0.0480590000000003s (2080775.713185863 ops/sec)
Lookup time: 0.02284599999999992s (2188566.926376616 ops/sec)
False positives: 446 (0.8920000000000001%)
MurmurHash3_128 Results:
Insert time: 0.01357099999999978s (7368653.746960551 ops/sec)
Lookup time: 0.008311999999999653s (6015399.422521907 ops/sec)
False positives: 422 (0.844%)
MurmurHash3_32 Results:
Insert time: 0.01522699999999988s (6567281.802062178 ops/sec)
Lookup time: 0.007773999999999948s (6431695.394906141 ops/sec)
False positives: 446 (0.8920000000000001%)
Comparison (higher means better/faster):
Insert Speed:
Murmur128 vs NimHash: 3.541301304251793x faster
Murmur32 vs NimHash: 3.156169961253082x faster
Murmur128 vs Murmur32: 1.122024906049674x faster
Lookup Speed:
Murmur128 vs NimHash: 2.7485563041387x faster
Murmur32 vs NimHash: 2.938770259840504x faster
Murmur128 vs Murmur32: 0.9352743022136998x faster
False Positive Rates:
Murmur128 vs NimHash: 1.056872037914692x better
Murmur32 vs NimHash: 1.0x better
Murmur128 vs Murmur32: 1.056872037914692x better
=== Benchmark: Large dpSequential ===
Size: 1000000 items
Pattern: dpSequential
Nim's Hash (MurmurHash3_32) Results:
Insert time: 0.4705080000000001s (2125362.374284815 ops/sec)
Lookup time: 0.233136s (2144670.921693775 ops/sec)
False positives: 4024 (0.8048%)
MurmurHash3_128 Results:
Insert time: 0.1423380000000001s (7025530.778850338 ops/sec)
Lookup time: 0.07601600000000008s (6577562.61839612 ops/sec)
False positives: 4205 (0.8410000000000001%)
MurmurHash3_32 Results:
Insert time: 0.1583019999999999s (6317039.582570027 ops/sec)
Lookup time: 0.08089100000000027s (6181157.359904048 ops/sec)
False positives: 4024 (0.8048%)
Comparison (higher means better/faster):
Insert Speed:
Murmur128 vs NimHash: 3.305568435695316x faster
Murmur32 vs NimHash: 2.972217659915859x faster
Murmur128 vs Murmur32: 1.112155573353566x faster
Lookup Speed:
Murmur128 vs NimHash: 3.066933277204796x faster
Murmur32 vs NimHash: 2.88210060451718x faster
Murmur128 vs Murmur32: 1.064131235529365x faster
False Positive Rates:
Murmur128 vs NimHash: 0.9569560047562424x better
Murmur32 vs NimHash: 1.0x better
Murmur128 vs Murmur32: 0.9569560047562424x better
=== Benchmark: Small dpFixed ===
Size: 10000 items
Pattern: dpFixed
Nim's Hash (MurmurHash3_32) Results:
Insert time: 0.004584000000000366s (2181500.872600175 ops/sec)
Lookup time: 0.002313999999999705s (2160760.587727155 ops/sec)
False positives: 35 (0.7000000000000001%)
MurmurHash3_128 Results:
Insert time: 0.001360000000000028s (7352941.176470438 ops/sec)
Lookup time: 0.0006840000000001289s (7309941.520466458 ops/sec)
False positives: 47 (0.9400000000000001%)
MurmurHash3_32 Results:
Insert time: 0.001682999999999879s (5941770.647653428 ops/sec)
Lookup time: 0.0007459999999999134s (6702412.868633485 ops/sec)
False positives: 35 (0.7000000000000001%)
Comparison (higher means better/faster):
Insert Speed:
Murmur128 vs NimHash: 3.370588235294318x faster
Murmur32 vs NimHash: 2.723707664884548x faster
Murmur128 vs Murmur32: 1.237499999999886x faster
Lookup Speed:
Murmur128 vs NimHash: 3.383040935671446x faster
Murmur32 vs NimHash: 3.101876675603182x faster
Murmur128 vs Murmur32: 1.090643274853469x faster
False Positive Rates:
Murmur128 vs NimHash: 0.7446808510638298x better
Murmur32 vs NimHash: 1.0x better
Murmur128 vs Murmur32: 0.7446808510638298x better
=== Benchmark: Medium dpFixed ===
Size: 100000 items
Pattern: dpFixed
Nim's Hash (MurmurHash3_32) Results:
Insert time: 0.04620999999999986s (2164033.758926646 ops/sec)
Lookup time: 0.02306600000000003s (2167692.707881728 ops/sec)
False positives: 433 (0.8659999999999999%)
MurmurHash3_128 Results:
Insert time: 0.01405199999999995s (7116424.708226611 ops/sec)
Lookup time: 0.007185999999999915s (6957973.838018452 ops/sec)
False positives: 440 (0.88%)
MurmurHash3_32 Results:
Insert time: 0.01542299999999974s (6483822.86195952 ops/sec)
Lookup time: 0.007922999999999902s (6310740.880979504 ops/sec)
False positives: 433 (0.8659999999999999%)
Comparison (higher means better/faster):
Insert Speed:
Murmur128 vs NimHash: 3.288499857671507x faster
Murmur32 vs NimHash: 2.996174544511485x faster
Murmur128 vs Murmur32: 1.097566182749772x faster
Lookup Speed:
Murmur128 vs NimHash: 3.209852490954677x faster
Murmur32 vs NimHash: 2.911270983213469x faster
Murmur128 vs Murmur32: 1.10256053437239x faster
False Positive Rates:
Murmur128 vs NimHash: 0.9840909090909089x better
Murmur32 vs NimHash: 1.0x better
Murmur128 vs Murmur32: 0.9840909090909089x better
=== Benchmark: Large dpFixed ===
Size: 1000000 items
Pattern: dpFixed
Nim's Hash (MurmurHash3_32) Results:
Insert time: 0.4785590000000002s (2089606.506198817 ops/sec)
Lookup time: 0.2445870000000001s (2044262.368809462 ops/sec)
False positives: 4085 (0.8170000000000001%)
MurmurHash3_128 Results:
Insert time: 0.1526930000000002s (6549088.694308178 ops/sec)
Lookup time: 0.07818300000000011s (6395252.164792849 ops/sec)
False positives: 4109 (0.8218%)
MurmurHash3_32 Results:
Insert time: 0.1651470000000002s (6055211.417706643 ops/sec)
Lookup time: 0.08524499999999957s (5865446.653762713 ops/sec)
False positives: 4085 (0.8170000000000001%)
Comparison (higher means better/faster):
Insert Speed:
Murmur128 vs NimHash: 3.134125336459429x faster
Murmur32 vs NimHash: 2.897775920846275x faster
Murmur128 vs Murmur32: 1.081562350598914x faster
Lookup Speed:
Murmur128 vs NimHash: 3.128391082460378x faster
Murmur32 vs NimHash: 2.869224001407723x faster
Murmur128 vs Murmur32: 1.090326541575527x faster
False Positive Rates:
Murmur128 vs NimHash: 0.9941591628133366x better
Murmur32 vs NimHash: 1.0x better
Murmur128 vs Murmur32: 0.9941591628133366x better
=== Benchmark: Small dpLong ===
Size: 10000 items
Pattern: dpLong
Nim's Hash (MurmurHash3_32) Results:
Insert time: 0.008112999999999815s (1232589.670898586 ops/sec)
Lookup time: 0.003777000000000363s (1323801.959226772 ops/sec)
False positives: 40 (0.8%)
MurmurHash3_128 Results:
Insert time: 0.001597000000000293s (6261740.763931226 ops/sec)
Lookup time: 0.0007489999999998886s (6675567.423231967 ops/sec)
False positives: 56 (1.12%)
MurmurHash3_32 Results:
Insert time: 0.001947999999999617s (5133470.2258737 ops/sec)
Lookup time: 0.001180999999999877s (4233700.254022458 ops/sec)
False positives: 40 (0.8%)
Comparison (higher means better/faster):
Insert Speed:
Murmur128 vs NimHash: 5.080150281777287x faster
Murmur32 vs NimHash: 4.164784394251238x faster
Murmur128 vs Murmur32: 1.219787100813563x faster
Lookup Speed:
Murmur128 vs NimHash: 5.042723631509913x faster
Murmur32 vs NimHash: 3.198137171888872x faster
Murmur128 vs Murmur32: 1.576769025367226x faster
False Positive Rates:
Murmur128 vs NimHash: 0.7142857142857143x better
Murmur32 vs NimHash: 1.0x better
Murmur128 vs Murmur32: 0.7142857142857143x better
=== Benchmark: Medium dpLong ===
Size: 100000 items
Pattern: dpLong
Nim's Hash (MurmurHash3_32) Results:
Insert time: 0.07904900000000037s (1265038.140899942 ops/sec)
Lookup time: 0.04046299999999992s (1235696.809430841 ops/sec)
False positives: 404 (0.8080000000000001%)
MurmurHash3_128 Results:
Insert time: 0.015733s (6356066.86582343 ops/sec)
Lookup time: 0.007823000000000135s (6391409.945033764 ops/sec)
False positives: 422 (0.844%)
MurmurHash3_32 Results:
Insert time: 0.02339799999999936s (4273869.561501099 ops/sec)
Lookup time: 0.01039399999999979s (4810467.577448624 ops/sec)
False positives: 404 (0.8080000000000001%)
Comparison (higher means better/faster):
Insert Speed:
Murmur128 vs NimHash: 5.024407296764786x faster
Murmur32 vs NimHash: 3.37845114967102x faster
Murmur128 vs Murmur32: 1.487192525265326x faster
Lookup Speed:
Murmur128 vs NimHash: 5.172312412118013x faster
Murmur32 vs NimHash: 3.892918991726066x faster
Murmur128 vs Murmur32: 1.328646299373592x faster
False Positive Rates:
Murmur128 vs NimHash: 0.9573459715639812x better
Murmur32 vs NimHash: 1.0x better
Murmur128 vs Murmur32: 0.9573459715639812x better
=== Benchmark: Large dpLong ===
Size: 1000000 items
Pattern: dpLong
Nim's Hash (MurmurHash3_32) Results:
Insert time: 0.814165s (1228252.25844884 ops/sec)
Lookup time: 0.4165090000000005s (1200454.251888913 ops/sec)
False positives: 4039 (0.8078%)
MurmurHash3_128 Results:
Insert time: 0.1629300000000002s (6137605.10648744 ops/sec)
Lookup time: 0.08406800000000025s (5947566.255888073 ops/sec)
False positives: 4044 (0.8088%)
MurmurHash3_32 Results:
Insert time: 0.2405530000000002s (4157088.042967658 ops/sec)
Lookup time: 0.1411719999999992s (3541778.822996082 ops/sec)
False positives: 4039 (0.8078%)
Comparison (higher means better/faster):
Insert Speed:
Murmur128 vs NimHash: 4.997023261523347x faster
Murmur32 vs NimHash: 3.384555586502763x faster
Murmur128 vs Murmur32: 1.476419321180874x faster
Lookup Speed:
Murmur128 vs NimHash: 4.954429747347376x faster
Murmur32 vs NimHash: 2.950365511574554x faster
Murmur128 vs Murmur32: 1.679259646952452x faster
False Positive Rates:
Murmur128 vs NimHash: 0.9987636003956479x better
Murmur32 vs NimHash: 1.0x better
Murmur128 vs Murmur32: 0.9987636003956479x better
=== Benchmark: Small dpSpecial ===
Size: 10000 items
Pattern: dpSpecial
Nim's Hash (MurmurHash3_32) Results:
Insert time: 0.007111000000000089s (1406271.97299956 ops/sec)
Lookup time: 0.003309000000000673s (1511030.522816254 ops/sec)
False positives: 51 (1.02%)
MurmurHash3_128 Results:
Insert time: 0.001907000000000103s (5243838.489774232 ops/sec)
Lookup time: 0.0009350000000001302s (5347593.582886956 ops/sec)
False positives: 33 (0.66%)
MurmurHash3_32 Results:
Insert time: 0.001981000000000677s (5047955.577989189 ops/sec)
Lookup time: 0.0009660000000000224s (5175983.436852882 ops/sec)
False positives: 51 (1.02%)
Comparison (higher means better/faster):
Insert Speed:
Murmur128 vs NimHash: 3.728893550078503x faster
Murmur32 vs NimHash: 3.589601211508158x faster
Murmur128 vs Murmur32: 1.03880440482463x faster
Lookup Speed:
Murmur128 vs NimHash: 3.539037433155307x faster
Murmur32 vs NimHash: 3.425465838509933x faster
Murmur128 vs Murmur32: 1.033155080213784x faster
False Positive Rates:
Murmur128 vs NimHash: 1.545454545454546x better
Murmur32 vs NimHash: 1.0x better
Murmur128 vs Murmur32: 1.545454545454546x better
=== Benchmark: Medium dpSpecial ===
Size: 100000 items
Pattern: dpSpecial
Nim's Hash (MurmurHash3_32) Results:
Insert time: 0.05804300000000051s (1722860.637802993 ops/sec)
Lookup time: 0.03600399999999837s (1388734.585046169 ops/sec)
False positives: 368 (0.736%)
MurmurHash3_128 Results:
Insert time: 0.01871899999999904s (5342165.713980721 ops/sec)
Lookup time: 0.008850999999999942s (5649079.200090422 ops/sec)
False positives: 405 (0.8099999999999999%)
MurmurHash3_32 Results:
Insert time: 0.01748200000000111s (5720169.317011422 ops/sec)
Lookup time: 0.008943999999999619s (5590339.892665712 ops/sec)
False positives: 368 (0.736%)
Comparison (higher means better/faster):
Insert Speed:
Murmur128 vs NimHash: 3.100753245365857x faster
Murmur32 vs NimHash: 3.320157876672968x faster
Murmur128 vs Murmur32: 0.9339174101181689x faster
Lookup Speed:
Murmur128 vs NimHash: 4.067788950400927x faster
Murmur32 vs NimHash: 4.025491949910544x faster
Murmur128 vs Murmur32: 1.010507287312132x faster
False Positive Rates:
Murmur128 vs NimHash: 0.9086419753086421x better
Murmur32 vs NimHash: 1.0x better
Murmur128 vs Murmur32: 0.9086419753086421x better
=== Benchmark: Large dpSpecial ===
Size: 1000000 items
Pattern: dpSpecial
Nim's Hash (MurmurHash3_32) Results:
Insert time: 0.5143779999999989s (1944095.58729184 ops/sec)
Lookup time: 0.2718509999999998s (1839242.820515651 ops/sec)
False positives: 4222 (0.8444%)
MurmurHash3_128 Results:
Insert time: 0.1766860000000001s (5659757.988748398 ops/sec)
Lookup time: 0.08288000000000117s (6032818.532818447 ops/sec)
False positives: 4137 (0.8274%)
MurmurHash3_32 Results:
Insert time: 0.1758810000000004s (5685662.464962092 ops/sec)
Lookup time: 0.09260200000000118s (5399451.415736092 ops/sec)
False positives: 4222 (0.8444%)
Comparison (higher means better/faster):
Insert Speed:
Murmur128 vs NimHash: 2.911254994736417x faster
Murmur32 vs NimHash: 2.924579687402265x faster
Murmur128 vs Murmur32: 0.9954438948190591x faster
Lookup Speed:
Murmur128 vs NimHash: 3.280055501930454x faster
Murmur32 vs NimHash: 2.935692533638543x faster
Murmur128 vs Murmur32: 1.117302123552122x faster
False Positive Rates:
Murmur128 vs NimHash: 1.020546289581822x better
Murmur32 vs NimHash: 1.0x better
Murmur128 vs Murmur32: 1.020546289581822x better

View File

@ -1 +0,0 @@
switch("path", "$projectDir/../src")

View File

@ -1,9 +0,0 @@
# Package
version = "0.1.0"
author = "Waku Team"
description = "Efficient Bloom filter implementation for Nim"
license = "MIT"
srcDir = "src"
# Dependencies
requires "nim >= 1.0.0"

Binary file not shown.

View File

@ -1,314 +0,0 @@
//-----------------------------------------------------------------------------
// MurmurHash3 was written by Austin Appleby, and is placed in the public
// domain. The author hereby disclaims copyright to this source code.
// Note - The x86 and x64 versions do _not_ produce the same results, as the
// algorithms are optimized for their respective platforms. You can still
// compile and run any of them on any platform, but your performance with the
// non-native version will be less than optimal.
#include "murmur3.h"
//-----------------------------------------------------------------------------
// Platform-specific functions and macros
#ifdef __GNUC__
#define FORCE_INLINE __attribute__((always_inline)) inline
#else
#define FORCE_INLINE
#endif
static inline FORCE_INLINE uint32_t rotl32 ( uint32_t x, int8_t r )
{
return (x << r) | (x >> (32 - r));
}
static inline FORCE_INLINE uint64_t rotl64 ( uint64_t x, int8_t r )
{
return (x << r) | (x >> (64 - r));
}
#define ROTL32(x,y) rotl32(x,y)
#define ROTL64(x,y) rotl64(x,y)
#define BIG_CONSTANT(x) (x##LLU)
//-----------------------------------------------------------------------------
// Block read - if your platform needs to do endian-swapping or can only
// handle aligned reads, do the conversion here
#define getblock(p, i) (p[i])
//-----------------------------------------------------------------------------
// Finalization mix - force all bits of a hash block to avalanche
static inline FORCE_INLINE uint32_t fmix32 ( uint32_t h )
{
h ^= h >> 16;
h *= 0x85ebca6b;
h ^= h >> 13;
h *= 0xc2b2ae35;
h ^= h >> 16;
return h;
}
//----------
static inline FORCE_INLINE uint64_t fmix64 ( uint64_t k )
{
k ^= k >> 33;
k *= BIG_CONSTANT(0xff51afd7ed558ccd);
k ^= k >> 33;
k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
k ^= k >> 33;
return k;
}
//-----------------------------------------------------------------------------
void MurmurHash3_x86_32 ( const void * key, int len,
uint32_t seed, void * out )
{
const uint8_t * data = (const uint8_t*)key;
const int nblocks = len / 4;
int i;
uint32_t h1 = seed;
uint32_t c1 = 0xcc9e2d51;
uint32_t c2 = 0x1b873593;
//----------
// body
const uint32_t * blocks = (const uint32_t *)(data + nblocks*4);
for(i = -nblocks; i; i++)
{
uint32_t k1 = getblock(blocks,i);
k1 *= c1;
k1 = ROTL32(k1,15);
k1 *= c2;
h1 ^= k1;
h1 = ROTL32(h1,13);
h1 = h1*5+0xe6546b64;
}
//----------
// tail
const uint8_t * tail = (const uint8_t*)(data + nblocks*4);
uint32_t k1 = 0;
switch(len & 3)
{
case 3: k1 ^= tail[2] << 16;
case 2: k1 ^= tail[1] << 8;
case 1: k1 ^= tail[0];
k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
};
//----------
// finalization
h1 ^= len;
h1 = fmix32(h1);
*(uint32_t*)out = h1;
}
//-----------------------------------------------------------------------------
void MurmurHash3_x86_128 ( const void * key, const int len,
uint32_t seed, void * out )
{
const uint8_t * data = (const uint8_t*)key;
const int nblocks = len / 16;
int i;
uint32_t h1 = seed;
uint32_t h2 = seed;
uint32_t h3 = seed;
uint32_t h4 = seed;
uint32_t c1 = 0x239b961b;
uint32_t c2 = 0xab0e9789;
uint32_t c3 = 0x38b34ae5;
uint32_t c4 = 0xa1e38b93;
//----------
// body
const uint32_t * blocks = (const uint32_t *)(data + nblocks*16);
for(i = -nblocks; i; i++)
{
uint32_t k1 = getblock(blocks,i*4+0);
uint32_t k2 = getblock(blocks,i*4+1);
uint32_t k3 = getblock(blocks,i*4+2);
uint32_t k4 = getblock(blocks,i*4+3);
k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
h1 = ROTL32(h1,19); h1 += h2; h1 = h1*5+0x561ccd1b;
k2 *= c2; k2 = ROTL32(k2,16); k2 *= c3; h2 ^= k2;
h2 = ROTL32(h2,17); h2 += h3; h2 = h2*5+0x0bcaa747;
k3 *= c3; k3 = ROTL32(k3,17); k3 *= c4; h3 ^= k3;
h3 = ROTL32(h3,15); h3 += h4; h3 = h3*5+0x96cd1c35;
k4 *= c4; k4 = ROTL32(k4,18); k4 *= c1; h4 ^= k4;
h4 = ROTL32(h4,13); h4 += h1; h4 = h4*5+0x32ac3b17;
}
//----------
// tail
const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
uint32_t k1 = 0;
uint32_t k2 = 0;
uint32_t k3 = 0;
uint32_t k4 = 0;
switch(len & 15)
{
case 15: k4 ^= tail[14] << 16;
case 14: k4 ^= tail[13] << 8;
case 13: k4 ^= tail[12] << 0;
k4 *= c4; k4 = ROTL32(k4,18); k4 *= c1; h4 ^= k4;
case 12: k3 ^= tail[11] << 24;
case 11: k3 ^= tail[10] << 16;
case 10: k3 ^= tail[ 9] << 8;
case 9: k3 ^= tail[ 8] << 0;
k3 *= c3; k3 = ROTL32(k3,17); k3 *= c4; h3 ^= k3;
case 8: k2 ^= tail[ 7] << 24;
case 7: k2 ^= tail[ 6] << 16;
case 6: k2 ^= tail[ 5] << 8;
case 5: k2 ^= tail[ 4] << 0;
k2 *= c2; k2 = ROTL32(k2,16); k2 *= c3; h2 ^= k2;
case 4: k1 ^= tail[ 3] << 24;
case 3: k1 ^= tail[ 2] << 16;
case 2: k1 ^= tail[ 1] << 8;
case 1: k1 ^= tail[ 0] << 0;
k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
};
//----------
// finalization
h1 ^= len; h2 ^= len; h3 ^= len; h4 ^= len;
h1 += h2; h1 += h3; h1 += h4;
h2 += h1; h3 += h1; h4 += h1;
h1 = fmix32(h1);
h2 = fmix32(h2);
h3 = fmix32(h3);
h4 = fmix32(h4);
h1 += h2; h1 += h3; h1 += h4;
h2 += h1; h3 += h1; h4 += h1;
((uint32_t*)out)[0] = h1;
((uint32_t*)out)[1] = h2;
((uint32_t*)out)[2] = h3;
((uint32_t*)out)[3] = h4;
}
//-----------------------------------------------------------------------------
void MurmurHash3_x64_128 ( const void * key, const int len,
const uint32_t seed, void * out )
{
const uint8_t * data = (const uint8_t*)key;
const int nblocks = len / 16;
int i;
uint64_t h1 = seed;
uint64_t h2 = seed;
uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);
//----------
// body
const uint64_t * blocks = (const uint64_t *)(data);
for(i = 0; i < nblocks; i++)
{
uint64_t k1 = getblock(blocks,i*2+0);
uint64_t k2 = getblock(blocks,i*2+1);
k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;
k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;
}
//----------
// tail
const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
uint64_t k1 = 0;
uint64_t k2 = 0;
switch(len & 15)
{
case 15: k2 ^= (uint64_t)(tail[14]) << 48;
case 14: k2 ^= (uint64_t)(tail[13]) << 40;
case 13: k2 ^= (uint64_t)(tail[12]) << 32;
case 12: k2 ^= (uint64_t)(tail[11]) << 24;
case 11: k2 ^= (uint64_t)(tail[10]) << 16;
case 10: k2 ^= (uint64_t)(tail[ 9]) << 8;
case 9: k2 ^= (uint64_t)(tail[ 8]) << 0;
k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
case 8: k1 ^= (uint64_t)(tail[ 7]) << 56;
case 7: k1 ^= (uint64_t)(tail[ 6]) << 48;
case 6: k1 ^= (uint64_t)(tail[ 5]) << 40;
case 5: k1 ^= (uint64_t)(tail[ 4]) << 32;
case 4: k1 ^= (uint64_t)(tail[ 3]) << 24;
case 3: k1 ^= (uint64_t)(tail[ 2]) << 16;
case 2: k1 ^= (uint64_t)(tail[ 1]) << 8;
case 1: k1 ^= (uint64_t)(tail[ 0]) << 0;
k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
};
//----------
// finalization
h1 ^= len; h2 ^= len;
h1 += h2;
h2 += h1;
h1 = fmix64(h1);
h2 = fmix64(h2);
h1 += h2;
h2 += h1;
((uint64_t*)out)[0] = h1;
((uint64_t*)out)[1] = h2;
}
//-----------------------------------------------------------------------------

View File

@ -1,21 +0,0 @@
//-----------------------------------------------------------------------------
// MurmurHash3 was written by Austin Appleby, and is placed in the
// public domain. The author hereby disclaims copyright to this source
// code.
#ifndef _MURMURHASH3_H_
#define _MURMURHASH3_H_
#include <stdint.h>
//-----------------------------------------------------------------------------
void MurmurHash3_x86_32 (const void *key, int len, uint32_t seed, void *out);
void MurmurHash3_x86_128(const void *key, int len, uint32_t seed, void *out);
void MurmurHash3_x64_128(const void *key, int len, uint32_t seed, void *out);
//-----------------------------------------------------------------------------
#endif // _MURMURHASH3_H_

View File

@ -1 +0,0 @@
switch("path", "$projectDir/../src")

View File

@ -1,84 +1,7 @@
import unittest
import strutils
include bloom
include ../bloom_filter/bloom
from random import rand, randomize
suite "murmur":
# Test murmurhash3 implementations
setup:
var hashOutputs: MurmurHashes
hashOutputs = [0, 0]
rawMurmurHash128("hello", 5, 0'u32, hashOutputs)
test "murmur128 raw":
check int(hashOutputs[0]) == -3758069500696749310
check int(hashOutputs[1]) == 6565844092913065241
test "murmur128 wrapped":
let hashOutputs2 = murmurHash128("hello", 0'u32)
check hashOutputs2[0] == hashOutputs[0]
check hashOutputs2[1] == hashOutputs[1]
test "murmur32":
let hash1 = murmurHash32("hello", 0'u32)
let hash2 = murmurHash32("hello", 0'u32)
check hash1 == hash2 # Same input should give same output
let hash3 = murmurHash32("hello", 10'u32)
check hash1 != hash3 # Different seeds should give different outputs
suite "hash quality":
test "hash type selection":
let bfMurmur128 = initializeBloomFilter(100, 0.01, hashType = htMurmur128)
let bfMurmur32 = initializeBloomFilter(100, 0.01, hashType = htMurmur32)
let bfNimHash = initializeBloomFilter(100, 0.01, hashType = htNimHash)
check bfMurmur128.hashType == htMurmur128
check bfMurmur32.hashType == htMurmur32
check bfNimHash.hashType == htNimHash
test "quality across hash types":
const testSize = 10_000
let patterns = @[
"shortstr",
repeat("a", 1000), # Very long string
"special@#$%^&*()", # Special characters
"unicode→★∑≈", # Unicode characters
repeat("pattern", 10) # Repeating pattern
]
for hashType in [htMurmur128, htMurmur32, htNimHash]:
var bf = initializeBloomFilter(testSize, 0.01, hashType = hashType)
var inserted = newSeq[string](testSize)
# Test pattern handling
for pattern in patterns:
bf.insert(pattern)
check bf.lookup(pattern)
# Test general insertion and lookup
for i in 0..<testSize:
inserted[i] = $i & "test" & $rand(1000)
bf.insert(inserted[i])
# Verify all insertions
var lookupErrors = 0
for item in inserted:
if not bf.lookup(item):
lookupErrors.inc
check lookupErrors == 0
# Check false positive rate
var falsePositives = 0
let fpTestSize = testSize div 2
for i in 0..<fpTestSize:
let testItem = "notpresent" & $i & $rand(1000)
if bf.lookup(testItem):
falsePositives.inc
let fpRate = falsePositives.float / fpTestSize.float
check fpRate < bf.errorRate * 1.5 # Allow some margin but should be close to target
suite "bloom filter":
setup:
let nElementsToTest = 10000
@ -148,4 +71,46 @@ suite "bloom filter":
let str = $bf3
check str.contains("1000") # Capacity
check str.contains("4 hash") # Hash functions
check str.contains("1.0e-02") # Error rate in scientific notation
check str.contains("1.0e-02") # Error rate in scientific notation
suite "bloom filter special cases":
test "different patterns of strings":
const testSize = 10_000
let patterns = @[
"shortstr",
repeat("a", 1000), # Very long string
"special@#$%^&*()", # Special characters
"unicode→★∑≈", # Unicode characters
repeat("pattern", 10) # Repeating pattern
]
var bf = initializeBloomFilter(testSize, 0.01)
var inserted = newSeq[string](testSize)
# Test pattern handling
for pattern in patterns:
bf.insert(pattern)
check bf.lookup(pattern)
# Test general insertion and lookup
for i in 0..<testSize:
inserted[i] = $i & "test" & $rand(1000)
bf.insert(inserted[i])
# Verify all insertions
var lookupErrors = 0
for item in inserted:
if not bf.lookup(item):
lookupErrors.inc
check lookupErrors == 0
# Check false positive rate
var falsePositives = 0
let fpTestSize = testSize div 2
for i in 0..<fpTestSize:
let testItem = "notpresent" & $i & $rand(1000)
if bf.lookup(testItem):
falsePositives.inc
let fpRate = falsePositives.float / fpTestSize.float
check fpRate < bf.errorRate * 1.5 # Allow some margin but should be close to target