diff --git a/nim-bloom/README.md b/nim-bloom/README.md index c255397..b042ea8 100644 --- a/nim-bloom/README.md +++ b/nim-bloom/README.md @@ -46,7 +46,7 @@ assert bf.lookup("test") - String concatenation overhead for second hash, causing higher insertion and lookup times, is acceptable. 3. Use NimHash when: - - Consistency with Nim's default hashing is important + - Consistency with Nim's hashing is important - Working with smaller datasets where performance is less critical - Future availability of better hash functions or performant implementations @@ -100,7 +100,7 @@ let secondHash = murmurHash32(item & " b", 0'u32) - Bit Rotation for second hash provides sufficient randomness in some use cases while being much faster than string concatenation (but results in higher FP rate) - Choose between bit rotation or string concatenation as per your use-case. -3. Nim's Default Hash: +3. Nim's Hash: ```nim let hashA = abs(hash(item)) mod maxValue diff --git a/nim-bloom/benches/bench.nim b/nim-bloom/benches/bench.nim index 8fd9425..82d7ca9 100644 --- a/nim-bloom/benches/bench.nim +++ b/nim-bloom/benches/bench.nim @@ -83,7 +83,7 @@ proc runBenchmark(size: int, errorRate: float, pattern: DataPattern, name: strin let murmur32Result = benchmarkHashType(htMurmur32, size, errorRate, data, lookupData) # Print individual results - printResults("Nim's Default (Farm Hash)", nimHashResult, size, lookupData.len) + printResults("Nim's Hash (Farm Hash)", nimHashResult, size, lookupData.len) printResults("MurmurHash3_128", murmur128Result, size, lookupData.len) printResults("MurmurHash3_32", murmur32Result, size, lookupData.len) diff --git a/nim-bloom/benches/bench_results.txt b/nim-bloom/benches/bench_results.txt index cbaa53e..57da231 100644 --- a/nim-bloom/benches/bench_results.txt +++ b/nim-bloom/benches/bench_results.txt @@ -3,7 +3,7 @@ Size: 10000 items Pattern: dpRandom -Nim's Default (Farm Hash) Results: +Nim's Hash (Farm Hash) Results: Insert time: 0.006714000000000001s (1489425.081918379 ops/sec) Lookup time: 0.003387999999999995s (1475796.930342387 ops/sec) False positives: 51 (1.02%) @@ -36,7 +36,7 @@ Comparison (higher means better/faster): Size: 100000 items Pattern: dpRandom -Nim's Default (Farm Hash) Results: +Nim's Hash (Farm Hash) Results: Insert time: 0.04622000000000001s (2163565.556036348 ops/sec) Lookup time: 0.02333199999999999s (2142979.59883422 ops/sec) False positives: 402 (0.804%) @@ -69,7 +69,7 @@ Comparison (higher means better/faster): Size: 1000000 items Pattern: dpRandom -Nim's Default (Farm Hash) Results: +Nim's Hash (Farm Hash) Results: Insert time: 0.4711130000000001s (2122632.998877127 ops/sec) Lookup time: 0.2430289999999999s (2057367.639252929 ops/sec) False positives: 4104 (0.8208%) @@ -102,7 +102,7 @@ Comparison (higher means better/faster): Size: 10000 items Pattern: dpSequential -Nim's Default (Farm Hash) Results: +Nim's Hash (Farm Hash) Results: Insert time: 0.004508000000000179s (2218278.615794056 ops/sec) Lookup time: 0.002232000000000012s (2240143.369175615 ops/sec) False positives: 42 (0.84%) @@ -135,7 +135,7 @@ Comparison (higher means better/faster): Size: 100000 items Pattern: dpSequential -Nim's Default (Farm Hash) Results: +Nim's Hash (Farm Hash) Results: Insert time: 0.04492400000000019s (2225981.65791113 ops/sec) Lookup time: 0.02218900000000001s (2253368.78633557 ops/sec) False positives: 436 (0.872%) @@ -168,7 +168,7 @@ Comparison (higher means better/faster): Size: 1000000 items Pattern: dpSequential -Nim's Default (Farm Hash) Results: +Nim's Hash (Farm Hash) Results: Insert time: 0.4540329999999999s (2202483.079423743 ops/sec) Lookup time: 0.2223580000000003s (2248626.089459338 ops/sec) False positives: 4037 (0.8074%) @@ -201,7 +201,7 @@ Comparison (higher means better/faster): Size: 10000 items Pattern: dpFixed -Nim's Default (Farm Hash) Results: +Nim's Hash (Farm Hash) Results: Insert time: 0.004486999999999686s (2228660.574994585 ops/sec) Lookup time: 0.002098999999999851s (2382086.707956339 ops/sec) False positives: 43 (0.86%) @@ -234,7 +234,7 @@ Comparison (higher means better/faster): Size: 100000 items Pattern: dpFixed -Nim's Default (Farm Hash) Results: +Nim's Hash (Farm Hash) Results: Insert time: 0.04369800000000001s (2288434.253283903 ops/sec) Lookup time: 0.02203400000000011s (2269220.295906316 ops/sec) False positives: 411 (0.822%) @@ -267,7 +267,7 @@ Comparison (higher means better/faster): Size: 1000000 items Pattern: dpFixed -Nim's Default (Farm Hash) Results: +Nim's Hash (Farm Hash) Results: Insert time: 0.4461910000000002s (2241192.673092912 ops/sec) Lookup time: 0.2275340000000003s (2197473.784137752 ops/sec) False positives: 4084 (0.8168%) @@ -300,7 +300,7 @@ Comparison (higher means better/faster): Size: 10000 items Pattern: dpLong -Nim's Default (Farm Hash) Results: +Nim's Hash (Farm Hash) Results: Insert time: 0.009589999999999321s (1042752.86757046 ops/sec) Lookup time: 0.005217000000000027s (958405.2137243577 ops/sec) False positives: 39 (0.7799999999999999%) @@ -333,7 +333,7 @@ Comparison (higher means better/faster): Size: 100000 items Pattern: dpLong -Nim's Default (Farm Hash) Results: +Nim's Hash (Farm Hash) Results: Insert time: 0.09930899999999987s (1006958.080335117 ops/sec) Lookup time: 0.04954899999999984s (1009102.100950577 ops/sec) False positives: 393 (0.786%) @@ -366,7 +366,7 @@ Comparison (higher means better/faster): Size: 1000000 items Pattern: dpLong -Nim's Default (Farm Hash) Results: +Nim's Hash (Farm Hash) Results: Insert time: 1.032455000000001s (968565.2159174002 ops/sec) Lookup time: 0.5242619999999993s (953721.6124762058 ops/sec) False positives: 4143 (0.8286%) @@ -399,7 +399,7 @@ Comparison (higher means better/faster): Size: 10000 items Pattern: dpSpecial -Nim's Default (Farm Hash) Results: +Nim's Hash (Farm Hash) Results: Insert time: 0.005548999999998472s (1802126.509281448 ops/sec) Lookup time: 0.002686999999999884s (1860811.313732868 ops/sec) False positives: 39 (0.7799999999999999%) @@ -432,7 +432,7 @@ Comparison (higher means better/faster): Size: 100000 items Pattern: dpSpecial -Nim's Default (Farm Hash) Results: +Nim's Hash (Farm Hash) Results: Insert time: 0.04318999999999917s (2315350.775642554 ops/sec) Lookup time: 0.02338899999999988s (2137757.065287111 ops/sec) False positives: 435 (0.8699999999999999%) @@ -465,7 +465,7 @@ Comparison (higher means better/faster): Size: 1000000 items Pattern: dpSpecial -Nim's Default (Farm Hash) Results: +Nim's Hash (Farm Hash) Results: Insert time: 0.4443290000000015s (2250584.589347075 ops/sec) Lookup time: 0.2233900000000002s (2238238.058999953 ops/sec) False positives: 4096 (0.8191999999999999%) diff --git a/nim-bloom/benches/bench_results_nimStringHash2.txt b/nim-bloom/benches/bench_results_nimStringHash2.txt index 03540c9..e7857a9 100644 --- a/nim-bloom/benches/bench_results_nimStringHash2.txt +++ b/nim-bloom/benches/bench_results_nimStringHash2.txt @@ -3,7 +3,7 @@ Size: 10000 items Pattern: dpRandom -Nim's Default (MurmurHash3_32) Results: +Nim's Hash (MurmurHash3_32) Results: Insert time: 0.004986000000000001s (2005615.724027276 ops/sec) Lookup time: 0.00238s (2100840.336134454 ops/sec) False positives: 43 (0.86%) @@ -36,7 +36,7 @@ Comparison (higher means better/faster): Size: 100000 items Pattern: dpRandom -Nim's Default (MurmurHash3_32) Results: +Nim's Hash (MurmurHash3_32) Results: Insert time: 0.050316s (1987439.383098816 ops/sec) Lookup time: 0.02490799999999999s (2007387.184840213 ops/sec) False positives: 406 (0.8120000000000001%) @@ -69,7 +69,7 @@ Comparison (higher means better/faster): Size: 1000000 items Pattern: dpRandom -Nim's Default (MurmurHash3_32) Results: +Nim's Hash (MurmurHash3_32) Results: Insert time: 0.522163s (1915110.798735261 ops/sec) Lookup time: 0.257452s (1942109.597128785 ops/sec) False positives: 4130 (0.826%) @@ -102,7 +102,7 @@ Comparison (higher means better/faster): Size: 10000 items Pattern: dpSequential -Nim's Default (MurmurHash3_32) Results: +Nim's Hash (MurmurHash3_32) Results: Insert time: 0.004666000000000059s (2143163.309044122 ops/sec) Lookup time: 0.002341000000000149s (2135839.384878122 ops/sec) False positives: 51 (1.02%) @@ -135,7 +135,7 @@ Comparison (higher means better/faster): Size: 100000 items Pattern: dpSequential -Nim's Default (MurmurHash3_32) Results: +Nim's Hash (MurmurHash3_32) Results: Insert time: 0.0480590000000003s (2080775.713185863 ops/sec) Lookup time: 0.02284599999999992s (2188566.926376616 ops/sec) False positives: 446 (0.8920000000000001%) @@ -168,7 +168,7 @@ Comparison (higher means better/faster): Size: 1000000 items Pattern: dpSequential -Nim's Default (MurmurHash3_32) Results: +Nim's Hash (MurmurHash3_32) Results: Insert time: 0.4705080000000001s (2125362.374284815 ops/sec) Lookup time: 0.233136s (2144670.921693775 ops/sec) False positives: 4024 (0.8048%) @@ -201,7 +201,7 @@ Comparison (higher means better/faster): Size: 10000 items Pattern: dpFixed -Nim's Default (MurmurHash3_32) Results: +Nim's Hash (MurmurHash3_32) Results: Insert time: 0.004584000000000366s (2181500.872600175 ops/sec) Lookup time: 0.002313999999999705s (2160760.587727155 ops/sec) False positives: 35 (0.7000000000000001%) @@ -234,7 +234,7 @@ Comparison (higher means better/faster): Size: 100000 items Pattern: dpFixed -Nim's Default (MurmurHash3_32) Results: +Nim's Hash (MurmurHash3_32) Results: Insert time: 0.04620999999999986s (2164033.758926646 ops/sec) Lookup time: 0.02306600000000003s (2167692.707881728 ops/sec) False positives: 433 (0.8659999999999999%) @@ -267,7 +267,7 @@ Comparison (higher means better/faster): Size: 1000000 items Pattern: dpFixed -Nim's Default (MurmurHash3_32) Results: +Nim's Hash (MurmurHash3_32) Results: Insert time: 0.4785590000000002s (2089606.506198817 ops/sec) Lookup time: 0.2445870000000001s (2044262.368809462 ops/sec) False positives: 4085 (0.8170000000000001%) @@ -300,7 +300,7 @@ Comparison (higher means better/faster): Size: 10000 items Pattern: dpLong -Nim's Default (MurmurHash3_32) Results: +Nim's Hash (MurmurHash3_32) Results: Insert time: 0.008112999999999815s (1232589.670898586 ops/sec) Lookup time: 0.003777000000000363s (1323801.959226772 ops/sec) False positives: 40 (0.8%) @@ -333,7 +333,7 @@ Comparison (higher means better/faster): Size: 100000 items Pattern: dpLong -Nim's Default (MurmurHash3_32) Results: +Nim's Hash (MurmurHash3_32) Results: Insert time: 0.07904900000000037s (1265038.140899942 ops/sec) Lookup time: 0.04046299999999992s (1235696.809430841 ops/sec) False positives: 404 (0.8080000000000001%) @@ -366,7 +366,7 @@ Comparison (higher means better/faster): Size: 1000000 items Pattern: dpLong -Nim's Default (MurmurHash3_32) Results: +Nim's Hash (MurmurHash3_32) Results: Insert time: 0.814165s (1228252.25844884 ops/sec) Lookup time: 0.4165090000000005s (1200454.251888913 ops/sec) False positives: 4039 (0.8078%) @@ -399,7 +399,7 @@ Comparison (higher means better/faster): Size: 10000 items Pattern: dpSpecial -Nim's Default (MurmurHash3_32) Results: +Nim's Hash (MurmurHash3_32) Results: Insert time: 0.007111000000000089s (1406271.97299956 ops/sec) Lookup time: 0.003309000000000673s (1511030.522816254 ops/sec) False positives: 51 (1.02%) @@ -432,7 +432,7 @@ Comparison (higher means better/faster): Size: 100000 items Pattern: dpSpecial -Nim's Default (MurmurHash3_32) Results: +Nim's Hash (MurmurHash3_32) Results: Insert time: 0.05804300000000051s (1722860.637802993 ops/sec) Lookup time: 0.03600399999999837s (1388734.585046169 ops/sec) False positives: 368 (0.736%) @@ -465,7 +465,7 @@ Comparison (higher means better/faster): Size: 1000000 items Pattern: dpSpecial -Nim's Default (MurmurHash3_32) Results: +Nim's Hash (MurmurHash3_32) Results: Insert time: 0.5143779999999989s (1944095.58729184 ops/sec) Lookup time: 0.2718509999999998s (1839242.820515651 ops/sec) False positives: 4222 (0.8444%) diff --git a/nim-bloom/src/bloom.nim b/nim-bloom/src/bloom.nim index 896162e..ba3bdc8 100644 --- a/nim-bloom/src/bloom.nim +++ b/nim-bloom/src/bloom.nim @@ -10,7 +10,7 @@ type HashType* = enum htMurmur128, # Default: MurmurHash3_x64_128 htMurmur32, # MurmurHash3_x86_32 - htNimHash # Nim's Default Hash (currently Farm Hash) + htNimHash # Nim's Hash (currently Farm Hash) BloomFilterError* = object of CatchableError @@ -35,7 +35,9 @@ proc rawMurmurHash32(key: cstring, len: int, seed: uint32, importc: "MurmurHash3_x86_32".} proc murmurHash128(key: string, seed = 0'u32): MurmurHashes = - rawMurmurHash128(key, key.len, seed, result) + var hashResult: MurmurHashes + rawMurmurHash128(key, key.len, seed, hashResult) + hashResult proc murmurHash32(key: string, seed = 0'u32): uint32 = var result: uint32 @@ -88,7 +90,7 @@ proc initializeBloomFilter*(capacity: int, errorRate: float, k = 0, ## - hashType: Choose hash function: ## * htMurmur128: MurmurHash3_x64_128 (default) - recommended ## * htMurmur32: MurmurHash3_x86_32 - ## * htNimHash: Nim's Default Hash + ## * htNimHash: Nim's Hash var kHashes: int nBitsPerElem: int diff --git a/nim-bloom/tests/test.nim b/nim-bloom/tests/test.nim index 2e05ae7..88d70fe 100644 --- a/nim-bloom/tests/test.nim +++ b/nim-bloom/tests/test.nim @@ -1,9 +1,7 @@ import unittest -import sets import strutils include bloom from random import rand, randomize -import times suite "murmur": # Test murmurhash3 implementations @@ -124,7 +122,7 @@ suite "bloom filter": falsePositives.inc let actualErrorRate = falsePositives.float / testSize.float - check actualErrorRate < bf.errorRate * 2.0 # Allow some margin + check actualErrorRate < bf.errorRate * 1.5 # Allow some margin test "perfect recall": var lookupErrors = 0