nim-sds/tests/test_bloom.nim

146 lines
4.5 KiB
Nim

import unittest, results, strutils
import ../src/bloom
from random import rand, randomize
suite "bloom filter":
setup:
let nElementsToTest = 10000
let bfResult = initializeBloomFilter(capacity = nElementsToTest, errorRate = 0.001)
check bfResult.isOk
var bf = bfResult.get
randomize(2882) # Seed the RNG
var
sampleChars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
testElements = newSeq[string](nElementsToTest)
for i in 0 ..< nElementsToTest:
var newString = ""
for j in 0 .. 7:
newString.add(sampleChars[rand(51)])
testElements[i] = newString
for item in testElements:
bf.insert(item)
test "initialization parameters":
check bf.capacity == nElementsToTest
check bf.errorRate == 0.001
check bf.kHashes == 10
check bf.mBits div bf.capacity == 15 # bits per element
test "basic operations":
check bf.lookup("nonexistent") == false # Test empty lookup
let bf2Result = initializeBloomFilter(100, 0.01)
check bf2Result.isOk
var bf2 = bf2Result.get
bf2.insert("test string")
check bf2.lookup("test string") == true
check bf2.lookup("different string") == false
test "error rate":
var falsePositives = 0
let testSize = nElementsToTest div 2
for i in 0 ..< testSize:
var testString = ""
for j in 0 .. 8: # Different length than setup
testString.add(sampleChars[rand(51)])
if bf.lookup(testString):
falsePositives.inc()
let actualErrorRate = falsePositives.float / testSize.float
check actualErrorRate < bf.errorRate * 1.5 # Allow some margin
test "perfect recall":
var lookupErrors = 0
for item in testElements:
if not bf.lookup(item):
lookupErrors.inc()
check lookupErrors == 0
test "k/m bits specification":
# Test error case for k > 12
let errorCase = getMOverNBitsForK(k = 13, targetError = 0.01)
check errorCase.isErr
check errorCase.error ==
"K must be <= 12 if forceNBitsPerElem is not also specified."
# Test error case for unachievable error rate
let errorCase2 = getMOverNBitsForK(k = 2, targetError = 0.00001)
check errorCase2.isErr
check errorCase2.error ==
"Specified value of k and error rate not achievable using less than 4 bytes / element."
# Test success cases
let case1 = getMOverNBitsForK(k = 2, targetError = 0.1)
check case1.isOk
check case1.value == 6
let case2 = getMOverNBitsForK(k = 7, targetError = 0.01)
check case2.isOk
check case2.value == 10
let case3 = getMOverNBitsForK(k = 7, targetError = 0.001)
check case3.isOk
check case3.value == 16
let bf2Result = initializeBloomFilter(10000, 0.001, k = 4, forceNBitsPerElem = 20)
check bf2Result.isOk
let bf2 = bf2Result.get
check bf2.kHashes == 4
check bf2.mBits == 200000
test "string representation":
let bf3Result = initializeBloomFilter(1000, 0.01, k = 4)
check bf3Result.isOk
let bf3 = bf3Result.get
let str = $bf3
check str.contains("1000") # Capacity
check str.contains("4 hash") # Hash functions
check str.contains("1.0e-02") # Error rate in scientific notation
suite "bloom filter special cases":
test "different patterns of strings":
const testSize = 10_000
let patterns =
@[
"shortstr",
repeat("a", 1000), # Very long string
"special@#$%^&*()", # Special characters
"unicode→★∑≈", # Unicode characters
repeat("pattern", 10), # Repeating pattern
]
let bfResult = initializeBloomFilter(testSize, 0.01)
check bfResult.isOk
var bf = bfResult.get
var inserted = newSeq[string](testSize)
# Test pattern handling
for pattern in patterns:
bf.insert(pattern)
assert bf.lookup(pattern), "failed lookup pattern: " & pattern
# Test general insertion and lookup
for i in 0 ..< testSize:
inserted[i] = $i & "test" & $rand(1000)
bf.insert(inserted[i])
# Verify all insertions
var lookupErrors = 0
for item in inserted:
if not bf.lookup(item):
lookupErrors.inc()
check lookupErrors == 0
# Check false positive rate
var falsePositives = 0
let fpTestSize = testSize div 2
for i in 0 ..< fpTestSize:
let testItem = "notpresent" & $i & $rand(1000)
if bf.lookup(testItem):
falsePositives.inc()
let fpRate = falsePositives.float / fpTestSize.float
check fpRate < bf.errorRate * 1.5 # Allow some margin but should be close to target