js-waku/packages/sds/src/probabilities.ts
Arseniy Klempner be93e4b71f
feat(sds): migrate bloomfilter to bigint and import hashn function from nim
Uses an array of bigint to store sufficient bits in bloom filter.
Updates all arithmetic to explicitly cast to bigint where necessary.
Makes the hashn function for bloomfilter a parameter.
Adds an implementation of hashn generated using nim compiler.
Adds tests.
2025-02-05 17:27:41 -08:00

169 lines
8.5 KiB
TypeScript

// This file contains the probability tables used to determine the optimal number of
// hash functions (k) and bits per element (m/n) for a Bloom filter.
//
// These are used to determine how to construct a Bloom filter that can perform
// lookups with false-positive rate low enough to be satisfactory.
/**
* Represents the error rates for a given number of hash functions (k) across
* different (m/n) ratios (i.e., bits per element).
*/
type TErrorForK = Float32Array;
/**
* An array where each index corresponds to a value of k (the number of hash functions),
* and each element is a vector of false-positive rates for varying bits-per-element ratios.
* Example:
* ```ts
* // Probability of a false positive upon lookup when using 1 hash function (k=1)
* // and 15 bits per element (mOverN=15):
* const falsePositiveRate = kErrors[1][15];
* ```
*/
type TAllErrorRates = Array<TErrorForK>;
/**
* Table of false positive rates for values of k from 0 to 12, and bits-per-element
* ratios ranging from 0 up to around 32. Each Float32Array is indexed by mOverN,
* so kErrors[k][mOverN] gives the estimated false-positive probability.
*
* These values mirror commonly used reference data found in Bloom filter literature,
* such as:
* https://pages.cs.wisc.edu/~cao/papers/summary-cache/node8.html
* https://dl.acm.org/doi/pdf/10.1145/362686.362692
*/
// prettier-ignore
export const kErrors: TAllErrorRates = [
new Float32Array([1.0]),
new Float32Array([1.0, 1.0, 0.3930000000, 0.2830000000, 0.2210000000, 0.1810000000,
0.1540000000, 0.1330000000, 0.1180000000, 0.1050000000, 0.0952000000,
0.0869000000, 0.0800000000, 0.0740000000, 0.0689000000, 0.0645000000,
0.0606000000, 0.0571000000, 0.0540000000, 0.0513000000, 0.0488000000,
0.0465000000, 0.0444000000, 0.0425000000, 0.0408000000, 0.0392000000,
0.0377000000, 0.0364000000, 0.0351000000, 0.0339000000, 0.0328000000,
0.0317000000, 0.0308000000]),
new Float32Array([1.0, 1.0, 0.4000000000, 0.2370000000, 0.1550000000, 0.1090000000,
0.0804000000, 0.0618000000, 0.0489000000, 0.0397000000, 0.0329000000,
0.0276000000, 0.0236000000, 0.0203000000, 0.0177000000, 0.0156000000,
0.0138000000, 0.0123000000, 0.0111000000, 0.0099800000, 0.0090600000,
0.0082500000, 0.0075500000, 0.0069400000, 0.0063900000, 0.0059100000,
0.0054800000, 0.0051000000, 0.0047500000, 0.0044400000, 0.0041600000,
0.0039000000, 0.0036700000]),
new Float32Array([1.0, 1.0, 1.0, 0.2530000000, 0.1470000000, 0.0920000000,
0.0609000000, 0.0423000000, 0.0306000000, 0.0228000000, 0.0174000000,
0.0136000000, 0.0108000000, 0.0087500000, 0.0071800000, 0.0059600000,
0.0108000000, 0.0087500000, 0.0071800000, 0.0059600000, 0.0050000000,
0.0042300000, 0.0036200000, 0.0031200000, 0.0027000000, 0.0023600000,
0.0020700000, 0.0018300000, 0.0016200000, 0.0014500000, 0.0012900000,
0.0011600000, 0.0010500000, 0.0009490000, 0.0008620000, 0.0007850000,
0.0007170000]),
new Float32Array([1.0, 1.0, 1.0, 1.0, 0.1600000000, 0.0920000000, 0.0561000000, 0.0359000000,
0.0240000000, 0.0166000000, 0.0118000000, 0.0086400000, 0.0064600000,
0.0049200000, 0.0038100000, 0.0030000000, 0.0023900000, 0.0019300000,
0.0015800000, 0.0013000000, 0.0010800000, 0.0009050000, 0.0007640000,
0.0006490000, 0.0005550000, 0.0004780000, 0.0004130000, 0.0003590000,
0.0003140000, 0.0002760000, 0.0002430000, 0.0002150000, 0.0001910000]),
new Float32Array([1.0, 1.0, 1.0, 1.0, 1.0, 0.1010000000, 0.0578000000, 0.0347000000,
0.0217000000, 0.0141000000, 0.0094300000, 0.0065000000, 0.0045900000,
0.0033200000, 0.0024400000, 0.0018300000, 0.0013900000, 0.0010700000,
0.0008390000, 0.0006630000, 0.0005300000, 0.0004270000, 0.0003470000,
0.0002850000, 0.0002350000, 0.0001960000, 0.0001640000, 0.0001380000,
0.0001170000, 0.0000996000, 0.0000853000, 0.0000733000, 0.0000633000]),
new Float32Array([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0638000000, 0.0364000000, 0.0216000000,
0.0133000000, 0.0084400000, 0.0055200000, 0.0037100000, 0.0025500000,
0.0017900000, 0.0012800000, 0.0009350000, 0.0006920000, 0.0005190000,
0.0003940000, 0.0003030000, 0.0002360000, 0.0001850000, 0.0001470000,
0.0001170000, 0.0000944000, 0.0000766000, 0.0000626000, 0.0000515000,
0.0000426000, 0.0000355000, 0.0000297000, 0.0000250000]),
new Float32Array([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0229000000, 0.0135000000, 0.0081900000,
0.0051300000, 0.0032900000, 0.0021700000, 0.0014600000, 0.0010000000,
0.0007020000, 0.0004990000, 0.0003600000, 0.0002640000, 0.0001960000,
0.0001470000, 0.0001120000, 0.0000856000, 0.0000663000, 0.0000518000,
0.0000408000, 0.0000324000, 0.0000259000, 0.0000209000, 0.0000169000,
0.0000138000, 0.0000113000]),
new Float32Array([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
1.0, 0.0145000000, 0.0084600000, 0.0050900000, 0.0031400000, 0.0019900000,
0.0012900000, 0.0008520000, 0.0005740000, 0.0003940000, 0.0002750000,
0.0001940000, 0.0001400000, 0.0001010000, 0.0000746000, 0.0000555000,
0.0000417000, 0.0000316000, 0.0000242000, 0.0000187000, 0.0000146000,
0.0000114000, 0.0000090100, 0.0000071600, 0.0000057300]),
new Float32Array([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0053100000, 0.0031700000,
0.0019400000, 0.0012100000, 0.0007750000, 0.0005050000, 0.0003350000,
0.0002260000, 0.0001550000, 0.0001080000, 0.0000759000, 0.0000542000,
0.0000392000, 0.0000286000, 0.0000211000, 0.0000157000, 0.0000118000,
0.0000089600, 0.0000068500, 0.0000052800, 0.0000041000, 0.0000032000]),
new Float32Array([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0033400000,
0.0019800000, 0.0012000000, 0.0007440000, 0.0004700000, 0.0003020000,
0.0001980000, 0.0001320000, 0.0000889000, 0.0000609000, 0.0000423000,
0.0000297000, 0.0000211000, 0.0000152000, 0.0000110000, 0.0000080700,
0.0000059700, 0.0000044500, 0.0000033500, 0.0000025400, 0.0000019400]),
new Float32Array([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
0.0021000000, 0.0012400000, 0.0007470000, 0.0004590000, 0.0002870000,
0.0001830000, 0.0001180000, 0.0000777000, 0.0000518000, 0.0000350000,
0.0000240000, 0.0000166000, 0.0000116000, 0.0000082300, 0.0000058900,
0.0000042500, 0.0000031000, 0.0000022800, 0.0000016900, 0.0000012600]),
new Float32Array([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
0.0007780000, 0.0004660000, 0.0002840000, 0.0001760000, 0.0001110000,
0.0000712000, 0.0000463000, 0.0000305000, 0.0000204000, 0.0000138000,
0.0000094200, 0.0000065200, 0.0000045600, 0.0000032200, 0.0000022900,
0.0000016500, 0.0000012000, 0.0000008740]),
]
export const KTooLargeError = "K must be <= 12";
export const NoSuitableRatioError =
"Specified value of k and error rate not achievable using less than 4 bytes / element.";
/**
* Given a number of hash functions (k) and a target false-positive rate (targetError),
* determines the minimum (m/n) bits-per-element that satisfies the error threshold.
*
* In the context of a Bloom filter:
* - m is the total number of bits in the filter.
* - n is the number of elements you expect to insert.
* Thus, (m/n) describes how many bits are assigned per inserted element.
*
* Example:
* ```ts
* // We want to use 3 hash functions (k=3) and a false-positive rate of 1% (targetError=0.01).
* const mOverN = getMOverNBitsForK(3, 0.01);
* // The function will iterate through the error tables and find the smallest m/n that satisfies the error threshold.
* // In this case, kErrors[3][5] is the first value in the vector kErrors[3] that is less than 0.01 (0.0920000000).
* console.log(mOverN); // 5
* ```
*
* @param k - The number of hash functions.
* @param targetError - The desired maximum false-positive rate.
* @param probabilityTable - An optional table of false-positive probabilities indexed by k.
* @returns The smallest (m/n) bit ratio for which the false-positive rate is below targetError.
* @throws If k is out of range or if no suitable ratio can be found.
*/
export function getMOverNBitsForK(
k: number,
targetError: number,
probabilityTable = kErrors
): number {
// Returns the optimal number of m/n bits for a given k.
if (k < 0 || k > 12) {
throw new Error(KTooLargeError);
}
for (let mOverN = 2; mOverN < probabilityTable[k].length; mOverN++) {
if (probabilityTable[k][mOverN] < targetError) {
return mOverN;
}
}
throw new Error(NoSuitableRatioError);
}