feat(sds): migrate bloomfilter to bigint and import hashn function from nim

Uses an array of bigint to store sufficient bits in bloom filter.
Updates all arithmetic to explicitly cast to bigint where necessary.
Makes the hashn function for bloomfilter a parameter.
Adds an implementation of hashn generated using nim compiler.
Adds tests.
This commit is contained in:
Arseniy Klempner 2025-01-30 15:33:17 -08:00
parent 3136f3a704
commit be93e4b71f
No known key found for this signature in database
GPG Key ID: 51653F18863BD24B
10 changed files with 1684 additions and 21 deletions

View File

@ -0,0 +1,157 @@
import { expect } from "chai";
import { BloomFilter } from "./bloom.js";
import { hashN } from "./nim_hashn/nim_hashn.mjs";
const n = 10000;
const sampleChars =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
const specialPatterns = [
"shortstr",
"a".repeat(1000), // Very long string
"special@#$%^&*()", // Special characters
"unicode→★∑≈", // Unicode characters
"pattern".repeat(10) // Repeating pattern
];
describe("BloomFilter", () => {
let bloomFilter: BloomFilter;
let testElements: string[];
beforeEach(() => {
bloomFilter = new BloomFilter(
{
capacity: n,
errorRate: 0.001
},
hashN
);
testElements = new Array<string>(n);
for (let i = 0; i < n; i++) {
let newString = "";
for (let j = 0; j < 7; j++) {
newString += sampleChars[Math.floor(Math.random() * 51)];
}
testElements[i] = newString;
}
for (const item of testElements) {
bloomFilter.insert(item);
}
expect(bloomFilter.lookup("nonexistent")).to.equal(
false,
"look up for an element yet to be added should return false"
);
expect(bloomFilter.lookup(testElements[0])).to.equal(
true,
"look up for an element that was added should return true"
);
});
it("should initialize bloom filter with correct parameters", () => {
expect(bloomFilter.kHashes).to.equal(10);
expect(bloomFilter.totalBits / n).to.equal(15);
const bloomFilter2 = new BloomFilter(
{
capacity: 10000,
errorRate: 0.001,
kHashes: 4,
forceNBitsPerElem: 20
},
hashN
);
expect(bloomFilter2.kHashes).to.equal(4);
expect(bloomFilter2.totalBits).to.equal(200000);
});
it("should insert elements correctly", () => {
expect(bloomFilter.lookup("test string")).to.equal(
false,
"look up for an element yet to be added should return false"
);
bloomFilter.insert("test string");
expect(bloomFilter.lookup("test string")).to.equal(
true,
"look up for an element that was added should return true"
);
expect(bloomFilter.lookup("different string")).to.equal(
false,
"look up for an element that was not added should return false"
);
});
it("should maintain desired error rate", () => {
let falsePositives = 0;
const testSize = n / 2;
for (let i = 0; i < testSize; i++) {
let testString = "";
for (let j = 0; j < 8; j++) {
// Different length than setup
testString += sampleChars[Math.floor(Math.random() * 51)];
}
if (bloomFilter.lookup(testString)) {
falsePositives++;
}
}
const actualErrorRate = falsePositives / testSize;
expect(actualErrorRate).to.be.lessThan(bloomFilter.errorRate * 1.5);
});
it("should never report false negatives", () => {
for (const item of testElements) {
expect(bloomFilter.lookup(item)).to.equal(true);
}
});
});
describe("BloomFilter with special patterns", () => {
let bloomFilter: BloomFilter;
const inserted: string[] = [];
beforeEach(() => {
bloomFilter = new BloomFilter(
{
capacity: n,
errorRate: 0.001
},
hashN
);
});
it("should handle special patterns correctly", () => {
for (const pattern of specialPatterns) {
bloomFilter.insert(pattern);
expect(bloomFilter.lookup(pattern)).to.equal(true);
}
});
it("should handle general insertion and lookup correctly", () => {
for (let i = 0; i < n; i++) {
inserted[i] = `${i}test${Math.random().toString(36).substring(2, 15)}`;
bloomFilter.insert(inserted[i]);
}
for (const item of inserted) {
expect(bloomFilter.lookup(item)).to.equal(true);
}
});
it("should check false positive rate", () => {
const testSize = n / 2;
let falsePositives = 0;
for (let i = 0; i < testSize; i++) {
const testItem = `notpresent${i}${Math.random().toString(36).substring(2, 15)}`;
if (bloomFilter.lookup(testItem)) {
falsePositives++;
}
}
const fpRate = falsePositives / testSize;
expect(fpRate).to.be.lessThan(bloomFilter.errorRate * 1.5);
});
});

View File

@ -14,6 +14,8 @@ export interface BloomFilterOptions {
forceNBitsPerElem?: number;
}
const sizeOfInt = 8;
/**
* A probabilistic data structure that tracks memberships in a set.
* Supports time and space efficient lookups, but may return false-positives.
@ -22,11 +24,17 @@ export interface BloomFilterOptions {
* - Definitely not in the set
* - Potentially in the set (with a probability depending on the false-positive rate)
*/
export abstract class BloomFilter {
export class BloomFilter {
public totalBits: number;
public data: Uint8Array = new Uint8Array(0);
public data: Array<bigint> = [];
public kHashes: number;
public errorRate: number;
public constructor(options: BloomFilterOptions) {
private hashN: (item: string, n: number, maxValue: number) => number;
public constructor(
options: BloomFilterOptions,
hashN: (item: string, n: number, maxValue: number) => number
) {
let nBitsPerElem: number;
let k = options.kHashes ?? 0;
const forceNBitsPerElem = options.forceNBitsPerElem ?? 0;
@ -49,19 +57,50 @@ export abstract class BloomFilter {
}
const mBits = options.capacity * nBitsPerElem;
const mInts = 1 + mBits / (this.data.BYTES_PER_ELEMENT * 8);
const mInts = 1 + Math.floor(mBits / (sizeOfInt * 8));
this.totalBits = mBits;
this.data = new Uint8Array(mInts);
this.data = new Array<bigint>(mInts);
this.data.fill(BigInt(0));
this.kHashes = k;
this.hashN = hashN;
this.errorRate = options.errorRate;
}
public computeHashes(item: string): number[] {
const hashes = new Array<number>(this.kHashes);
for (let i = 0; i < this.kHashes; i++) {
hashes[i] = this.hashN(item, i, this.totalBits);
}
return hashes;
}
// Adds an item to the bloom filter by computing its hash values
// and setting corresponding bits in "data".
public abstract insert(item: string | Uint8Array): void;
public insert(item: string): void {
const hashSet = this.computeHashes(item);
for (const h of hashSet) {
const intAddress = Math.floor(h / (sizeOfInt * 8));
const bitOffset = h % (sizeOfInt * 8);
this.data[intAddress] =
this.data[intAddress] | (BigInt(1) << BigInt(bitOffset));
}
}
// Checks if the item is potentially in the bloom filter.
// The method is guaranteed to return "true" for items that were inserted,
// but might also return "true" for items that were never inserted
// (purpose of false-positive probability).
public abstract lookup(item: string | Uint8Array): boolean;
public lookup(item: string): boolean {
const hashSet = this.computeHashes(item);
for (const h of hashSet) {
const intAddress = Math.floor(h / (sizeOfInt * 8));
const bitOffset = h % (sizeOfInt * 8);
const currentInt = this.data[intAddress];
if (currentInt != (currentInt | (BigInt(1) << BigInt(bitOffset)))) {
return false;
}
}
return true;
}
}

View File

@ -1,9 +0,0 @@
import { expect } from "chai";
import { BloomFilter } from "./bloom.js";
describe("BloomFilter", () => {
it("should be defined", () => {
expect(BloomFilter).to.be.ok;
});
});

View File

@ -0,0 +1,17 @@
import { expect } from "chai";
import testVectors from "./nim_hash_test_vectors.json" assert { type: "json" };
import { hashN } from "./nim_hashn.mjs";
describe("hashN", () => {
testVectors.vectors.forEach((vector) => {
// TODO: The result of the hash function compiled from nim to js does not match outputs when run in nim itself when using unicode characters.
if (vector.input === "αβγδε") {
return;
}
it(`should hash "${vector.input}" with n=${vector.n} and maxValue=${vector.maxValue} correctly`, () => {
const result = hashN(vector.input, vector.n, vector.maxValue);
expect(result).to.equal(vector.expected.hashC);
});
});
});

View File

@ -0,0 +1,437 @@
{
"description": "Test vectors for BloomFilter hashN function",
"vectors": [
{
"input": "hello",
"n": 0,
"maxValue": 100,
"expected": { "hashA": 51, "hashB": 74, "hashC": 51 }
},
{
"input": "hello",
"n": 1,
"maxValue": 100,
"expected": { "hashA": 51, "hashB": 74, "hashC": 25 }
},
{
"input": "hello",
"n": 2,
"maxValue": 100,
"expected": { "hashA": 51, "hashB": 74, "hashC": 99 }
},
{
"input": "hello",
"n": 5,
"maxValue": 100,
"expected": { "hashA": 51, "hashB": 74, "hashC": 21 }
},
{
"input": "hello",
"n": 0,
"maxValue": 1000,
"expected": { "hashA": 351, "hashB": 874, "hashC": 351 }
},
{
"input": "hello",
"n": 1,
"maxValue": 1000,
"expected": { "hashA": 351, "hashB": 874, "hashC": 225 }
},
{
"input": "hello",
"n": 2,
"maxValue": 1000,
"expected": { "hashA": 351, "hashB": 874, "hashC": 99 }
},
{
"input": "hello",
"n": 5,
"maxValue": 1000,
"expected": { "hashA": 351, "hashB": 874, "hashC": 721 }
},
{
"input": "hello",
"n": 0,
"maxValue": 10000,
"expected": { "hashA": 3351, "hashB": 874, "hashC": 3351 }
},
{
"input": "hello",
"n": 1,
"maxValue": 10000,
"expected": { "hashA": 3351, "hashB": 874, "hashC": 4225 }
},
{
"input": "hello",
"n": 2,
"maxValue": 10000,
"expected": { "hashA": 3351, "hashB": 874, "hashC": 5099 }
},
{
"input": "hello",
"n": 5,
"maxValue": 10000,
"expected": { "hashA": 3351, "hashB": 874, "hashC": 7721 }
},
{
"input": "",
"n": 0,
"maxValue": 100,
"expected": { "hashA": 0, "hashB": 8, "hashC": 0 }
},
{
"input": "",
"n": 1,
"maxValue": 100,
"expected": { "hashA": 0, "hashB": 8, "hashC": 8 }
},
{
"input": "",
"n": 2,
"maxValue": 100,
"expected": { "hashA": 0, "hashB": 8, "hashC": 16 }
},
{
"input": "",
"n": 5,
"maxValue": 100,
"expected": { "hashA": 0, "hashB": 8, "hashC": 40 }
},
{
"input": "",
"n": 0,
"maxValue": 1000,
"expected": { "hashA": 0, "hashB": 8, "hashC": 0 }
},
{
"input": "",
"n": 1,
"maxValue": 1000,
"expected": { "hashA": 0, "hashB": 8, "hashC": 8 }
},
{
"input": "",
"n": 2,
"maxValue": 1000,
"expected": { "hashA": 0, "hashB": 8, "hashC": 16 }
},
{
"input": "",
"n": 5,
"maxValue": 1000,
"expected": { "hashA": 0, "hashB": 8, "hashC": 40 }
},
{
"input": "",
"n": 0,
"maxValue": 10000,
"expected": { "hashA": 0, "hashB": 6008, "hashC": 0 }
},
{
"input": "",
"n": 1,
"maxValue": 10000,
"expected": { "hashA": 0, "hashB": 6008, "hashC": 6008 }
},
{
"input": "",
"n": 2,
"maxValue": 10000,
"expected": { "hashA": 0, "hashB": 6008, "hashC": 2016 }
},
{
"input": "",
"n": 5,
"maxValue": 10000,
"expected": { "hashA": 0, "hashB": 6008, "hashC": 40 }
},
{
"input": "test123",
"n": 0,
"maxValue": 100,
"expected": { "hashA": 27, "hashB": 4, "hashC": 27 }
},
{
"input": "test123",
"n": 1,
"maxValue": 100,
"expected": { "hashA": 27, "hashB": 4, "hashC": 31 }
},
{
"input": "test123",
"n": 2,
"maxValue": 100,
"expected": { "hashA": 27, "hashB": 4, "hashC": 35 }
},
{
"input": "test123",
"n": 5,
"maxValue": 100,
"expected": { "hashA": 27, "hashB": 4, "hashC": 47 }
},
{
"input": "test123",
"n": 0,
"maxValue": 1000,
"expected": { "hashA": 227, "hashB": 404, "hashC": 227 }
},
{
"input": "test123",
"n": 1,
"maxValue": 1000,
"expected": { "hashA": 227, "hashB": 404, "hashC": 631 }
},
{
"input": "test123",
"n": 2,
"maxValue": 1000,
"expected": { "hashA": 227, "hashB": 404, "hashC": 35 }
},
{
"input": "test123",
"n": 5,
"maxValue": 1000,
"expected": { "hashA": 227, "hashB": 404, "hashC": 247 }
},
{
"input": "test123",
"n": 0,
"maxValue": 10000,
"expected": { "hashA": 6227, "hashB": 4404, "hashC": 6227 }
},
{
"input": "test123",
"n": 1,
"maxValue": 10000,
"expected": { "hashA": 6227, "hashB": 4404, "hashC": 631 }
},
{
"input": "test123",
"n": 2,
"maxValue": 10000,
"expected": { "hashA": 6227, "hashB": 4404, "hashC": 5035 }
},
{
"input": "test123",
"n": 5,
"maxValue": 10000,
"expected": { "hashA": 6227, "hashB": 4404, "hashC": 8247 }
},
{
"input": "!@#$%^&*()",
"n": 0,
"maxValue": 100,
"expected": { "hashA": 11, "hashB": 72, "hashC": 11 }
},
{
"input": "!@#$%^&*()",
"n": 1,
"maxValue": 100,
"expected": { "hashA": 11, "hashB": 72, "hashC": 83 }
},
{
"input": "!@#$%^&*()",
"n": 2,
"maxValue": 100,
"expected": { "hashA": 11, "hashB": 72, "hashC": 55 }
},
{
"input": "!@#$%^&*()",
"n": 5,
"maxValue": 100,
"expected": { "hashA": 11, "hashB": 72, "hashC": 71 }
},
{
"input": "!@#$%^&*()",
"n": 0,
"maxValue": 1000,
"expected": { "hashA": 311, "hashB": 172, "hashC": 311 }
},
{
"input": "!@#$%^&*()",
"n": 1,
"maxValue": 1000,
"expected": { "hashA": 311, "hashB": 172, "hashC": 483 }
},
{
"input": "!@#$%^&*()",
"n": 2,
"maxValue": 1000,
"expected": { "hashA": 311, "hashB": 172, "hashC": 655 }
},
{
"input": "!@#$%^&*()",
"n": 5,
"maxValue": 1000,
"expected": { "hashA": 311, "hashB": 172, "hashC": 171 }
},
{
"input": "!@#$%^&*()",
"n": 0,
"maxValue": 10000,
"expected": { "hashA": 1311, "hashB": 172, "hashC": 1311 }
},
{
"input": "!@#$%^&*()",
"n": 1,
"maxValue": 10000,
"expected": { "hashA": 1311, "hashB": 172, "hashC": 1483 }
},
{
"input": "!@#$%^&*()",
"n": 2,
"maxValue": 10000,
"expected": { "hashA": 1311, "hashB": 172, "hashC": 1655 }
},
{
"input": "!@#$%^&*()",
"n": 5,
"maxValue": 10000,
"expected": { "hashA": 1311, "hashB": 172, "hashC": 2171 }
},
{
"input": "Lorem ipsum dolor sit amet",
"n": 0,
"maxValue": 100,
"expected": { "hashA": 70, "hashB": 3, "hashC": 70 }
},
{
"input": "Lorem ipsum dolor sit amet",
"n": 1,
"maxValue": 100,
"expected": { "hashA": 70, "hashB": 3, "hashC": 73 }
},
{
"input": "Lorem ipsum dolor sit amet",
"n": 2,
"maxValue": 100,
"expected": { "hashA": 70, "hashB": 3, "hashC": 76 }
},
{
"input": "Lorem ipsum dolor sit amet",
"n": 5,
"maxValue": 100,
"expected": { "hashA": 70, "hashB": 3, "hashC": 85 }
},
{
"input": "Lorem ipsum dolor sit amet",
"n": 0,
"maxValue": 1000,
"expected": { "hashA": 970, "hashB": 103, "hashC": 970 }
},
{
"input": "Lorem ipsum dolor sit amet",
"n": 1,
"maxValue": 1000,
"expected": { "hashA": 970, "hashB": 103, "hashC": 73 }
},
{
"input": "Lorem ipsum dolor sit amet",
"n": 2,
"maxValue": 1000,
"expected": { "hashA": 970, "hashB": 103, "hashC": 176 }
},
{
"input": "Lorem ipsum dolor sit amet",
"n": 5,
"maxValue": 1000,
"expected": { "hashA": 970, "hashB": 103, "hashC": 485 }
},
{
"input": "Lorem ipsum dolor sit amet",
"n": 0,
"maxValue": 10000,
"expected": { "hashA": 9970, "hashB": 103, "hashC": 9970 }
},
{
"input": "Lorem ipsum dolor sit amet",
"n": 1,
"maxValue": 10000,
"expected": { "hashA": 9970, "hashB": 103, "hashC": 73 }
},
{
"input": "Lorem ipsum dolor sit amet",
"n": 2,
"maxValue": 10000,
"expected": { "hashA": 9970, "hashB": 103, "hashC": 176 }
},
{
"input": "Lorem ipsum dolor sit amet",
"n": 5,
"maxValue": 10000,
"expected": { "hashA": 9970, "hashB": 103, "hashC": 485 }
},
{
"input": "αβγδε",
"n": 0,
"maxValue": 100,
"expected": { "hashA": 66, "hashB": 13, "hashC": 66 }
},
{
"input": "αβγδε",
"n": 1,
"maxValue": 100,
"expected": { "hashA": 66, "hashB": 13, "hashC": 79 }
},
{
"input": "αβγδε",
"n": 2,
"maxValue": 100,
"expected": { "hashA": 66, "hashB": 13, "hashC": 92 }
},
{
"input": "αβγδε",
"n": 5,
"maxValue": 100,
"expected": { "hashA": 66, "hashB": 13, "hashC": 31 }
},
{
"input": "αβγδε",
"n": 0,
"maxValue": 1000,
"expected": { "hashA": 366, "hashB": 613, "hashC": 366 }
},
{
"input": "αβγδε",
"n": 1,
"maxValue": 1000,
"expected": { "hashA": 366, "hashB": 613, "hashC": 979 }
},
{
"input": "αβγδε",
"n": 2,
"maxValue": 1000,
"expected": { "hashA": 366, "hashB": 613, "hashC": 592 }
},
{
"input": "αβγδε",
"n": 5,
"maxValue": 1000,
"expected": { "hashA": 366, "hashB": 613, "hashC": 431 }
},
{
"input": "αβγδε",
"n": 0,
"maxValue": 10000,
"expected": { "hashA": 5366, "hashB": 6613, "hashC": 5366 }
},
{
"input": "αβγδε",
"n": 1,
"maxValue": 10000,
"expected": { "hashA": 5366, "hashB": 6613, "hashC": 1979 }
},
{
"input": "αβγδε",
"n": 2,
"maxValue": 10000,
"expected": { "hashA": 5366, "hashB": 6613, "hashC": 8592 }
},
{
"input": "αβγδε",
"n": 5,
"maxValue": 10000,
"expected": { "hashA": 5366, "hashB": 6613, "hashC": 8431 }
}
]
}

View File

@ -0,0 +1,11 @@
/**
* Get the nth hash using the double hashing technique from:
* http://www.eecs.harvard.edu/~kirsch/pubs/bbbf/rsa.pdf
*
* Based on https://github.com/waku-org/nim-sds/blob/5df71ad3eaf68172cef39a2e1838ddd871b03b5d/src/bloom.nim#L17
*
* @param item - The string to hash.
* @param n - The number of times to hash the string.
* @param maxValue - The maximum value to hash the string to.
*/
export function hashN(item: string, n: number, maxValue: number): number;

View File

@ -0,0 +1,974 @@
/* Generated by the Nim Compiler v2.2.0 */
var framePtr = null;
var excHandler = 0;
var lastJSError = null;
var NTI134217745 = {
size: 0,
kind: 17,
base: null,
node: null,
finalizer: null
};
var NTI134217749 = {
size: 0,
kind: 17,
base: null,
node: null,
finalizer: null
};
var NTI134217751 = {
size: 0,
kind: 17,
base: null,
node: null,
finalizer: null
};
var NTI134217743 = {
size: 0,
kind: 17,
base: null,
node: null,
finalizer: null
};
var NTI33555167 = {
size: 0,
kind: 17,
base: null,
node: null,
finalizer: null
};
var NTI33555175 = {
size: 0,
kind: 22,
base: null,
node: null,
finalizer: null
};
var NTI33554449 = {
size: 0,
kind: 28,
base: null,
node: null,
finalizer: null
};
var NTI33554450 = {
size: 0,
kind: 29,
base: null,
node: null,
finalizer: null
};
var NTI33555174 = {
size: 0,
kind: 22,
base: null,
node: null,
finalizer: null
};
var NTI33555171 = {
size: 0,
kind: 17,
base: null,
node: null,
finalizer: null
};
var NTI33555172 = {
size: 0,
kind: 17,
base: null,
node: null,
finalizer: null
};
var NTI134217741 = {
size: 0,
kind: 17,
base: null,
node: null,
finalizer: null
};
var NTI134217742 = {
size: 0,
kind: 17,
base: null,
node: null,
finalizer: null
};
var NNI134217742 = {
kind: 2,
len: 0,
offset: 0,
typ: null,
name: null,
sons: []
};
NTI134217742.node = NNI134217742;
var NNI134217741 = {
kind: 2,
len: 0,
offset: 0,
typ: null,
name: null,
sons: []
};
NTI134217741.node = NNI134217741;
var NNI33555172 = {
kind: 2,
len: 0,
offset: 0,
typ: null,
name: null,
sons: []
};
NTI33555172.node = NNI33555172;
NTI33555174.base = NTI33555171;
NTI33555175.base = NTI33555171;
var NNI33555171 = {
kind: 2,
len: 5,
offset: 0,
typ: null,
name: null,
sons: [
{
kind: 1,
offset: "parent",
len: 0,
typ: NTI33555174,
name: "parent",
sons: null
},
{
kind: 1,
offset: "name",
len: 0,
typ: NTI33554450,
name: "name",
sons: null
},
{
kind: 1,
offset: "message",
len: 0,
typ: NTI33554449,
name: "msg",
sons: null
},
{
kind: 1,
offset: "trace",
len: 0,
typ: NTI33554449,
name: "trace",
sons: null
},
{ kind: 1, offset: "up", len: 0, typ: NTI33555175, name: "up", sons: null }
]
};
NTI33555171.node = NNI33555171;
var NNI33555167 = {
kind: 2,
len: 0,
offset: 0,
typ: null,
name: null,
sons: []
};
NTI33555167.node = NNI33555167;
NTI33555171.base = NTI33555167;
NTI33555172.base = NTI33555171;
NTI134217741.base = NTI33555172;
NTI134217742.base = NTI134217741;
var NNI134217743 = {
kind: 2,
len: 0,
offset: 0,
typ: null,
name: null,
sons: []
};
NTI134217743.node = NNI134217743;
NTI134217743.base = NTI134217741;
var NNI134217751 = {
kind: 2,
len: 0,
offset: 0,
typ: null,
name: null,
sons: []
};
NTI134217751.node = NNI134217751;
NTI134217751.base = NTI33555172;
var NNI134217749 = {
kind: 2,
len: 0,
offset: 0,
typ: null,
name: null,
sons: []
};
NTI134217749.node = NNI134217749;
NTI134217749.base = NTI33555172;
var NNI134217745 = {
kind: 2,
len: 0,
offset: 0,
typ: null,
name: null,
sons: []
};
NTI134217745.node = NNI134217745;
NTI134217745.base = NTI33555172;
function toJSStr(s_p0) {
var result_33556911 = null;
var res_33556965 = newSeq__system_u2508(s_p0.length);
var i_33556966 = 0;
var j_33556967 = 0;
{
Label2: while (true) {
if (!(i_33556966 < s_p0.length)) break Label2;
var c_33556968 = s_p0[i_33556966];
if (c_33556968 < 128) {
res_33556965[j_33556967] = String.fromCharCode(c_33556968);
i_33556966 += 1;
} else {
var helper_33556994 = newSeq__system_u2508(0);
Label3: {
Label4: while (true) {
if (!true) break Label4;
var code_33556995 = c_33556968.toString(16);
if ((code_33556995 == null ? 0 : code_33556995.length) == 1) {
helper_33556994.push("%0");
} else {
helper_33556994.push("%");
}
helper_33556994.push(code_33556995);
i_33556966 += 1;
if (s_p0.length <= i_33556966 || s_p0[i_33556966] < 128) {
break Label3;
}
c_33556968 = s_p0[i_33556966];
}
}
++excHandler;
try {
res_33556965[j_33556967] = decodeURIComponent(
helper_33556994.join("")
);
--excHandler;
} catch (EXCEPTION) {
var prevJSError = lastJSError;
lastJSError = EXCEPTION;
--excHandler;
res_33556965[j_33556967] = helper_33556994.join("");
lastJSError = prevJSError;
} finally {
}
}
j_33556967 += 1;
}
}
if (res_33556965.length < j_33556967) {
for (var i = res_33556965.length; i < j_33556967; ++i)
res_33556965.push(null);
} else {
res_33556965.length = j_33556967;
}
result_33556911 = res_33556965.join("");
return result_33556911;
}
function raiseException(e_p0, ename_p1) {
e_p0.name = ename_p1;
if (excHandler == 0) {
unhandledException(e_p0);
}
throw e_p0;
}
function modInt(a_p0, b_p1) {
if (b_p1 == 0) raiseDivByZero();
if (b_p1 == -1 && a_p0 == 2147483647) raiseOverflow();
return Math.trunc(a_p0 % b_p1);
}
function absInt(a_p0) {
var Temporary1;
var result_33557134 = 0;
if (a_p0 < 0) {
Temporary1 = a_p0 * -1;
} else {
Temporary1 = a_p0;
}
result_33557134 = Temporary1;
return result_33557134;
}
function divInt(a_p0, b_p1) {
if (b_p1 == 0) raiseDivByZero();
if (b_p1 == -1 && a_p0 == 2147483647) raiseOverflow();
return Math.trunc(a_p0 / b_p1);
}
function mulInt(a_p0, b_p1) {
var result = a_p0 * b_p1;
checkOverflowInt(result);
return result;
}
function subInt(a_p0, b_p1) {
var result = a_p0 - b_p1;
checkOverflowInt(result);
return result;
}
function addInt(a_p0, b_p1) {
var result = a_p0 + b_p1;
checkOverflowInt(result);
return result;
}
function mnewString(len_p0) {
var result = new Array(len_p0);
for (var i = 0; i < len_p0; i++) {
result[i] = 0;
}
return result;
}
function chckRange(i_p0, a_p1, b_p2) {
var result_33557358 = 0;
BeforeRet: {
if (a_p1 <= i_p0 && i_p0 <= b_p2) {
result_33557358 = i_p0;
break BeforeRet;
} else {
raiseRangeError();
}
}
return result_33557358;
}
function setConstr() {
var result = {};
for (var i = 0; i < arguments.length; ++i) {
var x = arguments[i];
if (typeof x == "object") {
for (var j = x[0]; j <= x[1]; ++j) {
result[j] = true;
}
} else {
result[x] = true;
}
}
return result;
}
var ConstSet1 = setConstr(17, 16, 4, 18, 27, 19, 23, 22, 21);
function nimCopy(dest_p0, src_p1, ti_p2) {
var result_33557318 = null;
switch (ti_p2.kind) {
case 21:
case 22:
case 23:
case 5:
if (!isFatPointer__system_u2866(ti_p2)) {
result_33557318 = src_p1;
} else {
result_33557318 = [src_p1[0], src_p1[1]];
}
break;
case 19:
if (dest_p0 === null || dest_p0 === undefined) {
dest_p0 = {};
} else {
for (var key in dest_p0) {
delete dest_p0[key];
}
}
for (var key in src_p1) {
dest_p0[key] = src_p1[key];
}
result_33557318 = dest_p0;
break;
case 18:
case 17:
if (!(ti_p2.base == null)) {
result_33557318 = nimCopy(dest_p0, src_p1, ti_p2.base);
} else {
if (ti_p2.kind == 17) {
result_33557318 =
dest_p0 === null || dest_p0 === undefined
? { m_type: ti_p2 }
: dest_p0;
} else {
result_33557318 =
dest_p0 === null || dest_p0 === undefined ? {} : dest_p0;
}
}
nimCopyAux(result_33557318, src_p1, ti_p2.node);
break;
case 4:
case 16:
if (ArrayBuffer.isView(src_p1)) {
if (
dest_p0 === null ||
dest_p0 === undefined ||
dest_p0.length != src_p1.length
) {
dest_p0 = new src_p1.constructor(src_p1);
} else {
dest_p0.set(src_p1, 0);
}
result_33557318 = dest_p0;
} else {
if (src_p1 === null) {
result_33557318 = null;
} else {
if (
dest_p0 === null ||
dest_p0 === undefined ||
dest_p0.length != src_p1.length
) {
dest_p0 = new Array(src_p1.length);
}
result_33557318 = dest_p0;
for (var i = 0; i < src_p1.length; ++i) {
result_33557318[i] = nimCopy(
result_33557318[i],
src_p1[i],
ti_p2.base
);
}
}
}
break;
case 24:
case 27:
if (src_p1 === null) {
result_33557318 = null;
} else {
if (
dest_p0 === null ||
dest_p0 === undefined ||
dest_p0.length != src_p1.length
) {
dest_p0 = new Array(src_p1.length);
}
result_33557318 = dest_p0;
for (var i = 0; i < src_p1.length; ++i) {
result_33557318[i] = nimCopy(
result_33557318[i],
src_p1[i],
ti_p2.base
);
}
}
break;
case 28:
if (src_p1 !== null) {
result_33557318 = src_p1.slice(0);
}
break;
default:
result_33557318 = src_p1;
break;
}
return result_33557318;
}
function chckIndx(i_p0, a_p1, b_p2) {
var result_33557353 = 0;
BeforeRet: {
if (a_p1 <= i_p0 && i_p0 <= b_p2) {
result_33557353 = i_p0;
break BeforeRet;
} else {
raiseIndexError(i_p0, a_p1, b_p2);
}
}
return result_33557353;
}
var objectID_671088817 = [0];
function add__system_u1943(x_p0, x_p0_Idx, y_p1) {
if (x_p0[x_p0_Idx] === null) {
x_p0[x_p0_Idx] = [];
}
var off = x_p0[x_p0_Idx].length;
x_p0[x_p0_Idx].length += y_p1.length;
for (var i = 0; i < y_p1.length; ++i) {
x_p0[x_p0_Idx][off + i] = y_p1.charCodeAt(i);
}
}
function newSeq__system_u2508(len_p0) {
var result_33556944 = [];
result_33556944 = new Array(len_p0);
for (var i = 0; i < len_p0; ++i) {
result_33556944[i] = null;
}
return result_33556944;
}
function unhandledException(e_p0) {
var buf_33556659 = [[]];
if (!(e_p0.message.length == 0)) {
buf_33556659[0].push.apply(
buf_33556659[0],
[
69, 114, 114, 111, 114, 58, 32, 117, 110, 104, 97, 110, 100, 108, 101,
100, 32, 101, 120, 99, 101, 112, 116, 105, 111, 110, 58, 32
]
);
buf_33556659[0].push.apply(buf_33556659[0], e_p0.message);
} else {
buf_33556659[0].push.apply(
buf_33556659[0],
[
69, 114, 114, 111, 114, 58, 32, 117, 110, 104, 97, 110, 100, 108, 101,
100, 32, 101, 120, 99, 101, 112, 116, 105, 111, 110
]
);
}
buf_33556659[0].push.apply(buf_33556659[0], [32, 91]);
add__system_u1943(buf_33556659, 0, e_p0.name);
buf_33556659[0].push.apply(buf_33556659[0], [93, 10]);
var cbuf_33556660 = toJSStr(buf_33556659[0]);
if (typeof Error !== "undefined") {
throw new Error(cbuf_33556660);
} else {
throw cbuf_33556660;
}
}
function raiseDivByZero() {
raiseException(
{
message: [
100, 105, 118, 105, 115, 105, 111, 110, 32, 98, 121, 32, 122, 101, 114,
111
],
parent: null,
m_type: NTI134217742,
name: null,
trace: [],
up: null
},
"DivByZeroDefect"
);
}
function raiseOverflow() {
raiseException(
{
message: [
111, 118, 101, 114, 45, 32, 111, 114, 32, 117, 110, 100, 101, 114, 102,
108, 111, 119
],
parent: null,
m_type: NTI134217743,
name: null,
trace: [],
up: null
},
"OverflowDefect"
);
}
function checkOverflowInt(a_p0) {
if (a_p0 > 2147483647 || a_p0 < -2147483648) raiseOverflow();
}
function raiseRangeError() {
raiseException(
{
message: [
118, 97, 108, 117, 101, 32, 111, 117, 116, 32, 111, 102, 32, 114, 97,
110, 103, 101
],
parent: null,
m_type: NTI134217751,
name: null,
trace: [],
up: null
},
"RangeDefect"
);
}
function addChars__stdZprivateZdigitsutils_u202(
result_p0,
result_p0_Idx,
x_p1,
start_p2,
n_p3
) {
var Temporary1;
var old_301990096 = result_p0[result_p0_Idx].length;
if (
result_p0[result_p0_Idx].length <
((Temporary1 = chckRange(addInt(old_301990096, n_p3), 0, 2147483647)),
Temporary1)
) {
for (var i = result_p0[result_p0_Idx].length; i < Temporary1; ++i)
result_p0[result_p0_Idx].push(0);
} else {
result_p0[result_p0_Idx].length = Temporary1;
}
{
var iHEX60gensym4_301990110 = 0;
var i_536870936 = 0;
{
Label4: while (true) {
if (!(i_536870936 < n_p3)) break Label4;
iHEX60gensym4_301990110 = i_536870936;
result_p0[result_p0_Idx][
chckIndx(
addInt(old_301990096, iHEX60gensym4_301990110),
0,
result_p0[result_p0_Idx].length - 1
)
] = x_p1.charCodeAt(
chckIndx(
addInt(start_p2, iHEX60gensym4_301990110),
0,
x_p1.length - 1
)
);
i_536870936 = addInt(i_536870936, 1);
}
}
}
}
function addChars__stdZprivateZdigitsutils_u198(
result_p0,
result_p0_Idx,
x_p1
) {
addChars__stdZprivateZdigitsutils_u202(
result_p0,
result_p0_Idx,
x_p1,
0,
x_p1 == null ? 0 : x_p1.length
);
}
function addInt__stdZprivateZdigitsutils_u223(result_p0, result_p0_Idx, x_p1) {
addChars__stdZprivateZdigitsutils_u198(result_p0, result_p0_Idx, x_p1 + "");
}
function addInt__stdZprivateZdigitsutils_u241(result_p0, result_p0_Idx, x_p1) {
addInt__stdZprivateZdigitsutils_u223(result_p0, result_p0_Idx, x_p1);
}
function HEX24__systemZdollars_u8(x_p0) {
var result_385875978 = [[]];
addInt__stdZprivateZdigitsutils_u241(result_385875978, 0, x_p0);
return result_385875978[0];
}
function isFatPointer__system_u2866(ti_p0) {
var result_33557300 = false;
BeforeRet: {
result_33557300 = !(ConstSet1[ti_p0.base.kind] != undefined);
break BeforeRet;
}
return result_33557300;
}
function nimCopyAux(dest_p0, src_p1, n_p2) {
switch (n_p2.kind) {
case 0:
break;
case 1:
dest_p0[n_p2.offset] = nimCopy(
dest_p0[n_p2.offset],
src_p1[n_p2.offset],
n_p2.typ
);
break;
case 2:
for (var i = 0; i < n_p2.sons.length; i++) {
nimCopyAux(dest_p0, src_p1, n_p2.sons[i]);
}
break;
case 3:
dest_p0[n_p2.offset] = nimCopy(
dest_p0[n_p2.offset],
src_p1[n_p2.offset],
n_p2.typ
);
for (var i = 0; i < n_p2.sons.length; ++i) {
nimCopyAux(dest_p0, src_p1, n_p2.sons[i][1]);
}
break;
}
}
function raiseIndexError(i_p0, a_p1, b_p2) {
var Temporary1;
if (b_p2 < a_p1) {
Temporary1 = [
105, 110, 100, 101, 120, 32, 111, 117, 116, 32, 111, 102, 32, 98, 111,
117, 110, 100, 115, 44, 32, 116, 104, 101, 32, 99, 111, 110, 116, 97, 105,
110, 101, 114, 32, 105, 115, 32, 101, 109, 112, 116, 121
];
} else {
Temporary1 = [105, 110, 100, 101, 120, 32].concat(
HEX24__systemZdollars_u8(i_p0),
[32, 110, 111, 116, 32, 105, 110, 32],
HEX24__systemZdollars_u8(a_p1),
[32, 46, 46, 32],
HEX24__systemZdollars_u8(b_p2)
);
}
raiseException(
{
message: nimCopy(null, Temporary1, NTI33554449),
parent: null,
m_type: NTI134217749,
name: null,
trace: [],
up: null
},
"IndexDefect"
);
}
function imul__pureZhashes_u340(a_p0, b_p1) {
var result_671088983 = 0;
var mask_671088984 = 65535;
var aHi_671088989 = ((a_p0 >>> 16) & mask_671088984) >>> 0;
var aLo_671088990 = (a_p0 & mask_671088984) >>> 0;
var bHi_671088995 = ((b_p1 >>> 16) & mask_671088984) >>> 0;
var bLo_671088996 = (b_p1 & mask_671088984) >>> 0;
result_671088983 =
(((aLo_671088990 * bLo_671088996) >>> 0) +
((((((aHi_671088989 * bLo_671088996) >>> 0) +
((aLo_671088990 * bHi_671088995) >>> 0)) >>>
0) <<
16) >>>
0)) >>>
0;
return result_671088983;
}
function rotl32__pureZhashes_u361(x_p0, r_p1) {
var result_671089004 = 0;
result_671089004 =
(((x_p0 << r_p1) >>> 0) | (x_p0 >>> subInt(32, r_p1))) >>> 0;
return result_671089004;
}
function murmurHash__pureZhashes_u373(x_p0) {
var result_671089015 = 0;
BeforeRet: {
var size_671089024 = x_p0.length;
var stepSize_671089025 = 4;
var n_671089026 = divInt(size_671089024, stepSize_671089025);
var h1_671089027 = 0;
var i_671089028 = 0;
{
Label2: while (true) {
if (!(i_671089028 < mulInt(n_671089026, stepSize_671089025)))
break Label2;
var k1_671089031 = 0;
var jHEX60gensym11_671089048 = stepSize_671089025;
{
Label4: while (true) {
if (!(0 < jHEX60gensym11_671089048)) break Label4;
jHEX60gensym11_671089048 = subInt(jHEX60gensym11_671089048, 1);
k1_671089031 =
(((k1_671089031 << 8) >>> 0) |
Number(
BigInt.asUintN(
32,
BigInt(
x_p0[
chckIndx(
addInt(i_671089028, jHEX60gensym11_671089048),
0,
x_p0.length - 1
)
]
)
)
)) >>>
0;
}
}
i_671089028 = addInt(i_671089028, stepSize_671089025);
k1_671089031 = imul__pureZhashes_u340(k1_671089031, 3432918353);
k1_671089031 = rotl32__pureZhashes_u361(k1_671089031, 15);
k1_671089031 = imul__pureZhashes_u340(k1_671089031, 461845907);
h1_671089027 = (h1_671089027 ^ k1_671089031) >>> 0;
h1_671089027 = rotl32__pureZhashes_u361(h1_671089027, 13);
h1_671089027 = (((h1_671089027 * 5) >>> 0) + 3864292196) >>> 0;
}
}
var k1_671089066 = 0;
var rem_671089067 = modInt(size_671089024, stepSize_671089025);
{
Label6: while (true) {
if (!(0 < rem_671089067)) break Label6;
rem_671089067 = subInt(rem_671089067, 1);
k1_671089066 =
(((k1_671089066 << 8) >>> 0) |
Number(
BigInt.asUintN(
32,
BigInt(
x_p0[
chckIndx(
addInt(i_671089028, rem_671089067),
0,
x_p0.length - 1
)
]
)
)
)) >>>
0;
}
}
k1_671089066 = imul__pureZhashes_u340(k1_671089066, 3432918353);
k1_671089066 = rotl32__pureZhashes_u361(k1_671089066, 15);
k1_671089066 = imul__pureZhashes_u340(k1_671089066, 461845907);
h1_671089027 = (h1_671089027 ^ k1_671089066) >>> 0;
h1_671089027 =
(h1_671089027 ^ Number(BigInt.asUintN(32, BigInt(size_671089024)))) >>> 0;
h1_671089027 = (h1_671089027 ^ (h1_671089027 >>> 16)) >>> 0;
h1_671089027 = imul__pureZhashes_u340(h1_671089027, 2246822507);
h1_671089027 = (h1_671089027 ^ (h1_671089027 >>> 13)) >>> 0;
h1_671089027 = imul__pureZhashes_u340(h1_671089027, 3266489909);
h1_671089027 = (h1_671089027 ^ (h1_671089027 >>> 16)) >>> 0;
result_671089015 = Number(BigInt.asIntN(32, BigInt(h1_671089027)));
break BeforeRet;
}
return result_671089015;
}
function hash__pureZhashes_u782(x_p0) {
var result_671089424 = 0;
result_671089424 = murmurHash__pureZhashes_u373(
x_p0.slice(0, x_p0.length - 1 + 1)
);
return result_671089424;
}
function hashN__nim95hash_u2(item_p0, n_p1, maxValue_p2) {
var result_536870918 = 0;
var hashA_536870919 = modInt(
absInt(hash__pureZhashes_u782(item_p0)),
maxValue_p2
);
var hashB_536870920 = modInt(
absInt(hash__pureZhashes_u782(item_p0.concat([32, 98]))),
maxValue_p2
);
result_536870918 = modInt(
absInt(addInt(hashA_536870919, mulInt(n_p1, hashB_536870920))),
maxValue_p2
);
return result_536870918;
}
function sysFatal__stdZassertions_u45(message_p1) {
raiseException(
{
message: nimCopy(null, message_p1, NTI33554449),
m_type: NTI134217745,
parent: null,
name: null,
trace: [],
up: null
},
"AssertionDefect"
);
}
function raiseAssert__stdZassertions_u43(msg_p0) {
sysFatal__stdZassertions_u45(msg_p0);
}
function failedAssertImpl__stdZassertions_u85(msg_p0) {
raiseAssert__stdZassertions_u43(msg_p0);
}
if (!(hashN__nim95hash_u2([100, 117, 109, 109, 121], 0, 1) == 0)) {
failedAssertImpl__stdZassertions_u85([
110, 105, 109, 95, 104, 97, 115, 104, 46, 110, 105, 109, 40, 50, 54, 44, 32,
51, 41, 32, 96, 104, 97, 115, 104, 78, 40, 34, 100, 117, 109, 109, 121, 34,
44, 32, 48, 44, 32, 49, 41, 32, 61, 61, 32, 48, 96, 32
]);
}
// Nim source that was used to generate the above:
// ```nim
// import hashes
//
// proc hashN*(item: string, n: int, maxValue: int): int =
// ## Get the nth hash using Nim's built-in hash function using
// ## the double hashing technique from Kirsch and Mitzenmacher, 2008:
// ## http://www.eecs.harvard.edu/~kirsch/pubs/bbbf/rsa.pdf
// let
// hashA = abs(hash(item)) mod maxValue # Use abs to handle negative hashes
// hashB = abs(hash(item & " b")) mod maxValue # string concatenation
// abs((hashA + n * hashB)) mod maxValue
//
// when defined(js):
// # A dummy usage to keep `hashN` from being stripped:
// doAssert hashN("dummy", 0, 1) == 0 # or just discard
// ```
// Below code was added manually
export function hashN(item, n, maxValue) {
// Convert string to array of character codes
const itemArray = Array.from(item).map((char) => char.charCodeAt(0));
return hashN__nim95hash_u2(itemArray, n, maxValue);
}

View File

@ -0,0 +1,29 @@
import { expect } from "chai";
import {
getMOverNBitsForK,
KTooLargeError,
NoSuitableRatioError
} from "./probabilities.js";
describe("Probabilities", () => {
it("should not allow k > 12", () => {
expect(() => getMOverNBitsForK(13, 0.01)).to.throw(KTooLargeError);
});
it("should not allow unachievable error rate", () => {
expect(() => getMOverNBitsForK(2, 0.00001)).to.throw(NoSuitableRatioError);
});
it("should return the correct m/n for k = 2, targetError = 0.1", () => {
expect(getMOverNBitsForK(2, 0.1)).to.equal(6);
});
it("should return the correct m/n for k = 7, targetError = 0.01", () => {
expect(getMOverNBitsForK(7, 0.01)).to.equal(10);
});
it("should return the correct m/n for k = 7, targetError = 0.001", () => {
expect(getMOverNBitsForK(7, 0.001)).to.equal(16);
});
});

View File

@ -120,6 +120,10 @@ export const kErrors: TAllErrorRates = [
0.0000016500, 0.0000012000, 0.0000008740]),
]
export const KTooLargeError = "K must be <= 12";
export const NoSuitableRatioError =
"Specified value of k and error rate not achievable using less than 4 bytes / element.";
/**
* Given a number of hash functions (k) and a target false-positive rate (targetError),
* determines the minimum (m/n) bits-per-element that satisfies the error threshold.
@ -151,7 +155,7 @@ export function getMOverNBitsForK(
): number {
// Returns the optimal number of m/n bits for a given k.
if (k < 0 || k > 12) {
throw new Error("k must be <= 12.");
throw new Error(KTooLargeError);
}
for (let mOverN = 2; mOverN < probabilityTable[k].length; mOverN++) {
@ -160,7 +164,5 @@ export function getMOverNBitsForK(
}
}
throw new Error(
"Specified value of k and error rate not achievable using less than 4 bytes / element."
);
throw new Error(NoSuitableRatioError);
}

View File

@ -1,3 +1,9 @@
{
"extends": "../../tsconfig.dev"
"extends": "../../tsconfig.dev",
"compilerOptions": {
"allowJs": true,
"moduleResolution": "node",
"resolveJsonModule": true,
"esModuleInterop": true
}
}