mirror of
https://github.com/logos-messaging/nim-sds.git
synced 2026-01-05 15:43:09 +00:00
chore: minor wordings
This commit is contained in:
parent
cd7095aa9c
commit
326cb1ea68
@ -46,7 +46,7 @@ assert bf.lookup("test")
|
||||
- String concatenation overhead for second hash, causing higher insertion and lookup times, is acceptable.
|
||||
|
||||
3. Use NimHash when:
|
||||
- Consistency with Nim's default hashing is important
|
||||
- Consistency with Nim's hashing is important
|
||||
- Working with smaller datasets where performance is less critical
|
||||
- Future availability of better hash functions or performant implementations
|
||||
|
||||
@ -100,7 +100,7 @@ let secondHash = murmurHash32(item & " b", 0'u32)
|
||||
- Bit Rotation for second hash provides sufficient randomness in some use cases while being much faster than string concatenation (but results in higher FP rate)
|
||||
- Choose between bit rotation or string concatenation as per your use-case.
|
||||
|
||||
3. Nim's Default Hash:
|
||||
3. Nim's Hash:
|
||||
```nim
|
||||
let
|
||||
hashA = abs(hash(item)) mod maxValue
|
||||
|
||||
@ -83,7 +83,7 @@ proc runBenchmark(size: int, errorRate: float, pattern: DataPattern, name: strin
|
||||
let murmur32Result = benchmarkHashType(htMurmur32, size, errorRate, data, lookupData)
|
||||
|
||||
# Print individual results
|
||||
printResults("Nim's Default (Farm Hash)", nimHashResult, size, lookupData.len)
|
||||
printResults("Nim's Hash (Farm Hash)", nimHashResult, size, lookupData.len)
|
||||
printResults("MurmurHash3_128", murmur128Result, size, lookupData.len)
|
||||
printResults("MurmurHash3_32", murmur32Result, size, lookupData.len)
|
||||
|
||||
|
||||
@ -3,7 +3,7 @@
|
||||
Size: 10000 items
|
||||
Pattern: dpRandom
|
||||
|
||||
Nim's Default (Farm Hash) Results:
|
||||
Nim's Hash (Farm Hash) Results:
|
||||
Insert time: 0.006714000000000001s (1489425.081918379 ops/sec)
|
||||
Lookup time: 0.003387999999999995s (1475796.930342387 ops/sec)
|
||||
False positives: 51 (1.02%)
|
||||
@ -36,7 +36,7 @@ Comparison (higher means better/faster):
|
||||
Size: 100000 items
|
||||
Pattern: dpRandom
|
||||
|
||||
Nim's Default (Farm Hash) Results:
|
||||
Nim's Hash (Farm Hash) Results:
|
||||
Insert time: 0.04622000000000001s (2163565.556036348 ops/sec)
|
||||
Lookup time: 0.02333199999999999s (2142979.59883422 ops/sec)
|
||||
False positives: 402 (0.804%)
|
||||
@ -69,7 +69,7 @@ Comparison (higher means better/faster):
|
||||
Size: 1000000 items
|
||||
Pattern: dpRandom
|
||||
|
||||
Nim's Default (Farm Hash) Results:
|
||||
Nim's Hash (Farm Hash) Results:
|
||||
Insert time: 0.4711130000000001s (2122632.998877127 ops/sec)
|
||||
Lookup time: 0.2430289999999999s (2057367.639252929 ops/sec)
|
||||
False positives: 4104 (0.8208%)
|
||||
@ -102,7 +102,7 @@ Comparison (higher means better/faster):
|
||||
Size: 10000 items
|
||||
Pattern: dpSequential
|
||||
|
||||
Nim's Default (Farm Hash) Results:
|
||||
Nim's Hash (Farm Hash) Results:
|
||||
Insert time: 0.004508000000000179s (2218278.615794056 ops/sec)
|
||||
Lookup time: 0.002232000000000012s (2240143.369175615 ops/sec)
|
||||
False positives: 42 (0.84%)
|
||||
@ -135,7 +135,7 @@ Comparison (higher means better/faster):
|
||||
Size: 100000 items
|
||||
Pattern: dpSequential
|
||||
|
||||
Nim's Default (Farm Hash) Results:
|
||||
Nim's Hash (Farm Hash) Results:
|
||||
Insert time: 0.04492400000000019s (2225981.65791113 ops/sec)
|
||||
Lookup time: 0.02218900000000001s (2253368.78633557 ops/sec)
|
||||
False positives: 436 (0.872%)
|
||||
@ -168,7 +168,7 @@ Comparison (higher means better/faster):
|
||||
Size: 1000000 items
|
||||
Pattern: dpSequential
|
||||
|
||||
Nim's Default (Farm Hash) Results:
|
||||
Nim's Hash (Farm Hash) Results:
|
||||
Insert time: 0.4540329999999999s (2202483.079423743 ops/sec)
|
||||
Lookup time: 0.2223580000000003s (2248626.089459338 ops/sec)
|
||||
False positives: 4037 (0.8074%)
|
||||
@ -201,7 +201,7 @@ Comparison (higher means better/faster):
|
||||
Size: 10000 items
|
||||
Pattern: dpFixed
|
||||
|
||||
Nim's Default (Farm Hash) Results:
|
||||
Nim's Hash (Farm Hash) Results:
|
||||
Insert time: 0.004486999999999686s (2228660.574994585 ops/sec)
|
||||
Lookup time: 0.002098999999999851s (2382086.707956339 ops/sec)
|
||||
False positives: 43 (0.86%)
|
||||
@ -234,7 +234,7 @@ Comparison (higher means better/faster):
|
||||
Size: 100000 items
|
||||
Pattern: dpFixed
|
||||
|
||||
Nim's Default (Farm Hash) Results:
|
||||
Nim's Hash (Farm Hash) Results:
|
||||
Insert time: 0.04369800000000001s (2288434.253283903 ops/sec)
|
||||
Lookup time: 0.02203400000000011s (2269220.295906316 ops/sec)
|
||||
False positives: 411 (0.822%)
|
||||
@ -267,7 +267,7 @@ Comparison (higher means better/faster):
|
||||
Size: 1000000 items
|
||||
Pattern: dpFixed
|
||||
|
||||
Nim's Default (Farm Hash) Results:
|
||||
Nim's Hash (Farm Hash) Results:
|
||||
Insert time: 0.4461910000000002s (2241192.673092912 ops/sec)
|
||||
Lookup time: 0.2275340000000003s (2197473.784137752 ops/sec)
|
||||
False positives: 4084 (0.8168%)
|
||||
@ -300,7 +300,7 @@ Comparison (higher means better/faster):
|
||||
Size: 10000 items
|
||||
Pattern: dpLong
|
||||
|
||||
Nim's Default (Farm Hash) Results:
|
||||
Nim's Hash (Farm Hash) Results:
|
||||
Insert time: 0.009589999999999321s (1042752.86757046 ops/sec)
|
||||
Lookup time: 0.005217000000000027s (958405.2137243577 ops/sec)
|
||||
False positives: 39 (0.7799999999999999%)
|
||||
@ -333,7 +333,7 @@ Comparison (higher means better/faster):
|
||||
Size: 100000 items
|
||||
Pattern: dpLong
|
||||
|
||||
Nim's Default (Farm Hash) Results:
|
||||
Nim's Hash (Farm Hash) Results:
|
||||
Insert time: 0.09930899999999987s (1006958.080335117 ops/sec)
|
||||
Lookup time: 0.04954899999999984s (1009102.100950577 ops/sec)
|
||||
False positives: 393 (0.786%)
|
||||
@ -366,7 +366,7 @@ Comparison (higher means better/faster):
|
||||
Size: 1000000 items
|
||||
Pattern: dpLong
|
||||
|
||||
Nim's Default (Farm Hash) Results:
|
||||
Nim's Hash (Farm Hash) Results:
|
||||
Insert time: 1.032455000000001s (968565.2159174002 ops/sec)
|
||||
Lookup time: 0.5242619999999993s (953721.6124762058 ops/sec)
|
||||
False positives: 4143 (0.8286%)
|
||||
@ -399,7 +399,7 @@ Comparison (higher means better/faster):
|
||||
Size: 10000 items
|
||||
Pattern: dpSpecial
|
||||
|
||||
Nim's Default (Farm Hash) Results:
|
||||
Nim's Hash (Farm Hash) Results:
|
||||
Insert time: 0.005548999999998472s (1802126.509281448 ops/sec)
|
||||
Lookup time: 0.002686999999999884s (1860811.313732868 ops/sec)
|
||||
False positives: 39 (0.7799999999999999%)
|
||||
@ -432,7 +432,7 @@ Comparison (higher means better/faster):
|
||||
Size: 100000 items
|
||||
Pattern: dpSpecial
|
||||
|
||||
Nim's Default (Farm Hash) Results:
|
||||
Nim's Hash (Farm Hash) Results:
|
||||
Insert time: 0.04318999999999917s (2315350.775642554 ops/sec)
|
||||
Lookup time: 0.02338899999999988s (2137757.065287111 ops/sec)
|
||||
False positives: 435 (0.8699999999999999%)
|
||||
@ -465,7 +465,7 @@ Comparison (higher means better/faster):
|
||||
Size: 1000000 items
|
||||
Pattern: dpSpecial
|
||||
|
||||
Nim's Default (Farm Hash) Results:
|
||||
Nim's Hash (Farm Hash) Results:
|
||||
Insert time: 0.4443290000000015s (2250584.589347075 ops/sec)
|
||||
Lookup time: 0.2233900000000002s (2238238.058999953 ops/sec)
|
||||
False positives: 4096 (0.8191999999999999%)
|
||||
|
||||
@ -3,7 +3,7 @@
|
||||
Size: 10000 items
|
||||
Pattern: dpRandom
|
||||
|
||||
Nim's Default (MurmurHash3_32) Results:
|
||||
Nim's Hash (MurmurHash3_32) Results:
|
||||
Insert time: 0.004986000000000001s (2005615.724027276 ops/sec)
|
||||
Lookup time: 0.00238s (2100840.336134454 ops/sec)
|
||||
False positives: 43 (0.86%)
|
||||
@ -36,7 +36,7 @@ Comparison (higher means better/faster):
|
||||
Size: 100000 items
|
||||
Pattern: dpRandom
|
||||
|
||||
Nim's Default (MurmurHash3_32) Results:
|
||||
Nim's Hash (MurmurHash3_32) Results:
|
||||
Insert time: 0.050316s (1987439.383098816 ops/sec)
|
||||
Lookup time: 0.02490799999999999s (2007387.184840213 ops/sec)
|
||||
False positives: 406 (0.8120000000000001%)
|
||||
@ -69,7 +69,7 @@ Comparison (higher means better/faster):
|
||||
Size: 1000000 items
|
||||
Pattern: dpRandom
|
||||
|
||||
Nim's Default (MurmurHash3_32) Results:
|
||||
Nim's Hash (MurmurHash3_32) Results:
|
||||
Insert time: 0.522163s (1915110.798735261 ops/sec)
|
||||
Lookup time: 0.257452s (1942109.597128785 ops/sec)
|
||||
False positives: 4130 (0.826%)
|
||||
@ -102,7 +102,7 @@ Comparison (higher means better/faster):
|
||||
Size: 10000 items
|
||||
Pattern: dpSequential
|
||||
|
||||
Nim's Default (MurmurHash3_32) Results:
|
||||
Nim's Hash (MurmurHash3_32) Results:
|
||||
Insert time: 0.004666000000000059s (2143163.309044122 ops/sec)
|
||||
Lookup time: 0.002341000000000149s (2135839.384878122 ops/sec)
|
||||
False positives: 51 (1.02%)
|
||||
@ -135,7 +135,7 @@ Comparison (higher means better/faster):
|
||||
Size: 100000 items
|
||||
Pattern: dpSequential
|
||||
|
||||
Nim's Default (MurmurHash3_32) Results:
|
||||
Nim's Hash (MurmurHash3_32) Results:
|
||||
Insert time: 0.0480590000000003s (2080775.713185863 ops/sec)
|
||||
Lookup time: 0.02284599999999992s (2188566.926376616 ops/sec)
|
||||
False positives: 446 (0.8920000000000001%)
|
||||
@ -168,7 +168,7 @@ Comparison (higher means better/faster):
|
||||
Size: 1000000 items
|
||||
Pattern: dpSequential
|
||||
|
||||
Nim's Default (MurmurHash3_32) Results:
|
||||
Nim's Hash (MurmurHash3_32) Results:
|
||||
Insert time: 0.4705080000000001s (2125362.374284815 ops/sec)
|
||||
Lookup time: 0.233136s (2144670.921693775 ops/sec)
|
||||
False positives: 4024 (0.8048%)
|
||||
@ -201,7 +201,7 @@ Comparison (higher means better/faster):
|
||||
Size: 10000 items
|
||||
Pattern: dpFixed
|
||||
|
||||
Nim's Default (MurmurHash3_32) Results:
|
||||
Nim's Hash (MurmurHash3_32) Results:
|
||||
Insert time: 0.004584000000000366s (2181500.872600175 ops/sec)
|
||||
Lookup time: 0.002313999999999705s (2160760.587727155 ops/sec)
|
||||
False positives: 35 (0.7000000000000001%)
|
||||
@ -234,7 +234,7 @@ Comparison (higher means better/faster):
|
||||
Size: 100000 items
|
||||
Pattern: dpFixed
|
||||
|
||||
Nim's Default (MurmurHash3_32) Results:
|
||||
Nim's Hash (MurmurHash3_32) Results:
|
||||
Insert time: 0.04620999999999986s (2164033.758926646 ops/sec)
|
||||
Lookup time: 0.02306600000000003s (2167692.707881728 ops/sec)
|
||||
False positives: 433 (0.8659999999999999%)
|
||||
@ -267,7 +267,7 @@ Comparison (higher means better/faster):
|
||||
Size: 1000000 items
|
||||
Pattern: dpFixed
|
||||
|
||||
Nim's Default (MurmurHash3_32) Results:
|
||||
Nim's Hash (MurmurHash3_32) Results:
|
||||
Insert time: 0.4785590000000002s (2089606.506198817 ops/sec)
|
||||
Lookup time: 0.2445870000000001s (2044262.368809462 ops/sec)
|
||||
False positives: 4085 (0.8170000000000001%)
|
||||
@ -300,7 +300,7 @@ Comparison (higher means better/faster):
|
||||
Size: 10000 items
|
||||
Pattern: dpLong
|
||||
|
||||
Nim's Default (MurmurHash3_32) Results:
|
||||
Nim's Hash (MurmurHash3_32) Results:
|
||||
Insert time: 0.008112999999999815s (1232589.670898586 ops/sec)
|
||||
Lookup time: 0.003777000000000363s (1323801.959226772 ops/sec)
|
||||
False positives: 40 (0.8%)
|
||||
@ -333,7 +333,7 @@ Comparison (higher means better/faster):
|
||||
Size: 100000 items
|
||||
Pattern: dpLong
|
||||
|
||||
Nim's Default (MurmurHash3_32) Results:
|
||||
Nim's Hash (MurmurHash3_32) Results:
|
||||
Insert time: 0.07904900000000037s (1265038.140899942 ops/sec)
|
||||
Lookup time: 0.04046299999999992s (1235696.809430841 ops/sec)
|
||||
False positives: 404 (0.8080000000000001%)
|
||||
@ -366,7 +366,7 @@ Comparison (higher means better/faster):
|
||||
Size: 1000000 items
|
||||
Pattern: dpLong
|
||||
|
||||
Nim's Default (MurmurHash3_32) Results:
|
||||
Nim's Hash (MurmurHash3_32) Results:
|
||||
Insert time: 0.814165s (1228252.25844884 ops/sec)
|
||||
Lookup time: 0.4165090000000005s (1200454.251888913 ops/sec)
|
||||
False positives: 4039 (0.8078%)
|
||||
@ -399,7 +399,7 @@ Comparison (higher means better/faster):
|
||||
Size: 10000 items
|
||||
Pattern: dpSpecial
|
||||
|
||||
Nim's Default (MurmurHash3_32) Results:
|
||||
Nim's Hash (MurmurHash3_32) Results:
|
||||
Insert time: 0.007111000000000089s (1406271.97299956 ops/sec)
|
||||
Lookup time: 0.003309000000000673s (1511030.522816254 ops/sec)
|
||||
False positives: 51 (1.02%)
|
||||
@ -432,7 +432,7 @@ Comparison (higher means better/faster):
|
||||
Size: 100000 items
|
||||
Pattern: dpSpecial
|
||||
|
||||
Nim's Default (MurmurHash3_32) Results:
|
||||
Nim's Hash (MurmurHash3_32) Results:
|
||||
Insert time: 0.05804300000000051s (1722860.637802993 ops/sec)
|
||||
Lookup time: 0.03600399999999837s (1388734.585046169 ops/sec)
|
||||
False positives: 368 (0.736%)
|
||||
@ -465,7 +465,7 @@ Comparison (higher means better/faster):
|
||||
Size: 1000000 items
|
||||
Pattern: dpSpecial
|
||||
|
||||
Nim's Default (MurmurHash3_32) Results:
|
||||
Nim's Hash (MurmurHash3_32) Results:
|
||||
Insert time: 0.5143779999999989s (1944095.58729184 ops/sec)
|
||||
Lookup time: 0.2718509999999998s (1839242.820515651 ops/sec)
|
||||
False positives: 4222 (0.8444%)
|
||||
|
||||
@ -10,7 +10,7 @@ type
|
||||
HashType* = enum
|
||||
htMurmur128, # Default: MurmurHash3_x64_128
|
||||
htMurmur32, # MurmurHash3_x86_32
|
||||
htNimHash # Nim's Default Hash (currently Farm Hash)
|
||||
htNimHash # Nim's Hash (currently Farm Hash)
|
||||
|
||||
BloomFilterError* = object of CatchableError
|
||||
|
||||
@ -35,7 +35,9 @@ proc rawMurmurHash32(key: cstring, len: int, seed: uint32,
|
||||
importc: "MurmurHash3_x86_32".}
|
||||
|
||||
proc murmurHash128(key: string, seed = 0'u32): MurmurHashes =
|
||||
rawMurmurHash128(key, key.len, seed, result)
|
||||
var hashResult: MurmurHashes
|
||||
rawMurmurHash128(key, key.len, seed, hashResult)
|
||||
hashResult
|
||||
|
||||
proc murmurHash32(key: string, seed = 0'u32): uint32 =
|
||||
var result: uint32
|
||||
@ -88,7 +90,7 @@ proc initializeBloomFilter*(capacity: int, errorRate: float, k = 0,
|
||||
## - hashType: Choose hash function:
|
||||
## * htMurmur128: MurmurHash3_x64_128 (default) - recommended
|
||||
## * htMurmur32: MurmurHash3_x86_32
|
||||
## * htNimHash: Nim's Default Hash
|
||||
## * htNimHash: Nim's Hash
|
||||
var
|
||||
kHashes: int
|
||||
nBitsPerElem: int
|
||||
|
||||
@ -1,9 +1,7 @@
|
||||
import unittest
|
||||
import sets
|
||||
import strutils
|
||||
include bloom
|
||||
from random import rand, randomize
|
||||
import times
|
||||
|
||||
suite "murmur":
|
||||
# Test murmurhash3 implementations
|
||||
@ -124,7 +122,7 @@ suite "bloom filter":
|
||||
falsePositives.inc
|
||||
|
||||
let actualErrorRate = falsePositives.float / testSize.float
|
||||
check actualErrorRate < bf.errorRate * 2.0 # Allow some margin
|
||||
check actualErrorRate < bf.errorRate * 1.5 # Allow some margin
|
||||
|
||||
test "perfect recall":
|
||||
var lookupErrors = 0
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user