147 lines
4.6 KiB
TypeScript
Raw Normal View History

import { hashN } from "./nim_hashn/nim_hashn.mjs";
import { getMOverNBitsForK } from "./probabilities.js";
export interface BloomFilterOptions {
// The expected maximum number of elements for which this BloomFilter is sized.
capacity: number;
// The desired false-positive rate (between 0 and 1).
errorRate: number;
// (Optional) The exact number of hash functions, if the user wants to override the automatic calculation.
kHashes?: number;
// (Optional) Force a specific number of bits per element instead of using a table or optimal formula.
forceNBitsPerElem?: number;
}
const sizeOfInt = 8;
/**
* A probabilistic data structure that tracks memberships in a set.
* Supports time and space efficient lookups, but may return false-positives.
* Can never return false-negatives.
* A bloom filter can tell us if an element is:
* - Definitely not in the set
* - Potentially in the set (with a probability depending on the false-positive rate)
*/
export class BloomFilter {
public totalBits: number;
public data: Array<bigint> = [];
public kHashes: number;
public errorRate: number;
public options: BloomFilterOptions;
private hashN: (item: string, n: number, maxValue: number) => number;
public constructor(
options: BloomFilterOptions,
hashN: (item: string, n: number, maxValue: number) => number
) {
this.options = options;
let nBitsPerElem: number;
let k = options.kHashes ?? 0;
const forceNBitsPerElem = options.forceNBitsPerElem ?? 0;
if (k < 1) {
// Calculate optimal k based on target error rate
const bitsPerElem = Math.ceil(
-1.0 * (Math.log(options.errorRate) / Math.pow(Math.log(2), 2))
);
k = Math.round(Math.log(2) * bitsPerElem);
nBitsPerElem = Math.round(bitsPerElem);
} else {
// Use specified k if possible
if (forceNBitsPerElem < 1) {
// Use lookup table
nBitsPerElem = getMOverNBitsForK(k, options.errorRate);
} else {
nBitsPerElem = forceNBitsPerElem;
}
}
const mBits = options.capacity * nBitsPerElem;
const mInts = 1 + Math.floor(mBits / (sizeOfInt * 8));
this.totalBits = mBits;
this.data = new Array<bigint>(mInts);
this.data.fill(BigInt(0));
this.kHashes = k;
this.hashN = hashN;
this.errorRate = options.errorRate;
}
public computeHashes(item: string): number[] {
const hashes = new Array<number>(this.kHashes);
for (let i = 0; i < this.kHashes; i++) {
hashes[i] = this.hashN(item, i, this.totalBits);
}
return hashes;
}
// Adds an item to the bloom filter by computing its hash values
// and setting corresponding bits in "data".
public insert(item: string): void {
const hashSet = this.computeHashes(item);
for (const h of hashSet) {
const intAddress = Math.floor(h / (sizeOfInt * 8));
const bitOffset = h % (sizeOfInt * 8);
this.data[intAddress] =
this.data[intAddress] | (BigInt(1) << BigInt(bitOffset));
}
}
// Checks if the item is potentially in the bloom filter.
// The method is guaranteed to return "true" for items that were inserted,
// but might also return "true" for items that were never inserted
// (purpose of false-positive probability).
public lookup(item: string): boolean {
const hashSet = this.computeHashes(item);
for (const h of hashSet) {
const intAddress = Math.floor(h / (sizeOfInt * 8));
const bitOffset = h % (sizeOfInt * 8);
const currentInt = this.data[intAddress];
if (currentInt != (currentInt | (BigInt(1) << BigInt(bitOffset)))) {
return false;
}
}
return true;
}
public toBytes(): Uint8Array {
const buffer = new ArrayBuffer(this.data.length * 8);
const view = new DataView(buffer);
for (let i = 0; i < this.data.length; i++) {
view.setBigInt64(i * 8, this.data[i]);
}
return new Uint8Array(buffer);
}
public static fromBytes(
bytes: Uint8Array,
options: BloomFilterOptions,
hashN: (item: string, n: number, maxValue: number) => number
): BloomFilter {
const bloomFilter = new BloomFilter(options, hashN);
const view = new DataView(bytes.buffer);
for (let i = 0; i < bloomFilter.data.length; i++) {
bloomFilter.data[i] = view.getBigUint64(i * 8, false);
}
return bloomFilter;
}
}
export class DefaultBloomFilter extends BloomFilter {
public constructor(options: BloomFilterOptions) {
super(options, hashN);
}
public static fromBytes(
bytes: Uint8Array,
options: BloomFilterOptions
): DefaultBloomFilter {
return BloomFilter.fromBytes(bytes, options, hashN);
}
}