Merge pull request #2253 from waku-org/feat/sds-bigint-nimhashn

feat(sds): migrate bloomfilter to bigint and import hashn function from nim
This commit is contained in:
Arseniy Klempner 2025-02-07 11:59:27 -08:00 committed by GitHub
commit 053e4901e7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 1684 additions and 21 deletions

View File

@ -0,0 +1,157 @@
import { expect } from "chai";
import { BloomFilter } from "./bloom.js";
import { hashN } from "./nim_hashn/nim_hashn.mjs";
const n = 10000;
const sampleChars =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
const specialPatterns = [
"shortstr",
"a".repeat(1000), // Very long string
"special@#$%^&*()", // Special characters
"unicode→★∑≈", // Unicode characters
"pattern".repeat(10) // Repeating pattern
];
describe("BloomFilter", () => {
let bloomFilter: BloomFilter;
let testElements: string[];
beforeEach(() => {
bloomFilter = new BloomFilter(
{
capacity: n,
errorRate: 0.001
},
hashN
);
testElements = new Array<string>(n);
for (let i = 0; i < n; i++) {
let newString = "";
for (let j = 0; j < 7; j++) {
newString += sampleChars[Math.floor(Math.random() * 51)];
}
testElements[i] = newString;
}
for (const item of testElements) {
bloomFilter.insert(item);
}
expect(bloomFilter.lookup("nonexistent")).to.equal(
false,
"look up for an element yet to be added should return false"
);
expect(bloomFilter.lookup(testElements[0])).to.equal(
true,
"look up for an element that was added should return true"
);
});
it("should initialize bloom filter with correct parameters", () => {
expect(bloomFilter.kHashes).to.equal(10);
expect(bloomFilter.totalBits / n).to.equal(15);
const bloomFilter2 = new BloomFilter(
{
capacity: 10000,
errorRate: 0.001,
kHashes: 4,
forceNBitsPerElem: 20
},
hashN
);
expect(bloomFilter2.kHashes).to.equal(4);
expect(bloomFilter2.totalBits).to.equal(200000);
});
it("should insert elements correctly", () => {
expect(bloomFilter.lookup("test string")).to.equal(
false,
"look up for an element yet to be added should return false"
);
bloomFilter.insert("test string");
expect(bloomFilter.lookup("test string")).to.equal(
true,
"look up for an element that was added should return true"
);
expect(bloomFilter.lookup("different string")).to.equal(
false,
"look up for an element that was not added should return false"
);
});
it("should maintain desired error rate", () => {
let falsePositives = 0;
const testSize = n / 2;
for (let i = 0; i < testSize; i++) {
let testString = "";
for (let j = 0; j < 8; j++) {
// Different length than setup
testString += sampleChars[Math.floor(Math.random() * 51)];
}
if (bloomFilter.lookup(testString)) {
falsePositives++;
}
}
const actualErrorRate = falsePositives / testSize;
expect(actualErrorRate).to.be.lessThan(bloomFilter.errorRate * 1.5);
});
it("should never report false negatives", () => {
for (const item of testElements) {
expect(bloomFilter.lookup(item)).to.equal(true);
}
});
});
describe("BloomFilter with special patterns", () => {
let bloomFilter: BloomFilter;
const inserted: string[] = [];
beforeEach(() => {
bloomFilter = new BloomFilter(
{
capacity: n,
errorRate: 0.001
},
hashN
);
});
it("should handle special patterns correctly", () => {
for (const pattern of specialPatterns) {
bloomFilter.insert(pattern);
expect(bloomFilter.lookup(pattern)).to.equal(true);
}
});
it("should handle general insertion and lookup correctly", () => {
for (let i = 0; i < n; i++) {
inserted[i] = `${i}test${Math.random().toString(36).substring(2, 15)}`;
bloomFilter.insert(inserted[i]);
}
for (const item of inserted) {
expect(bloomFilter.lookup(item)).to.equal(true);
}
});
it("should check false positive rate", () => {
const testSize = n / 2;
let falsePositives = 0;
for (let i = 0; i < testSize; i++) {
const testItem = `notpresent${i}${Math.random().toString(36).substring(2, 15)}`;
if (bloomFilter.lookup(testItem)) {
falsePositives++;
}
}
const fpRate = falsePositives / testSize;
expect(fpRate).to.be.lessThan(bloomFilter.errorRate * 1.5);
});
});

View File

@ -14,6 +14,8 @@ export interface BloomFilterOptions {
forceNBitsPerElem?: number;
}
const sizeOfInt = 8;
/**
* A probabilistic data structure that tracks memberships in a set.
* Supports time and space efficient lookups, but may return false-positives.
@ -22,11 +24,17 @@ export interface BloomFilterOptions {
* - Definitely not in the set
* - Potentially in the set (with a probability depending on the false-positive rate)
*/
export abstract class BloomFilter {
export class BloomFilter {
public totalBits: number;
public data: Uint8Array = new Uint8Array(0);
public data: Array<bigint> = [];
public kHashes: number;
public errorRate: number;
public constructor(options: BloomFilterOptions) {
private hashN: (item: string, n: number, maxValue: number) => number;
public constructor(
options: BloomFilterOptions,
hashN: (item: string, n: number, maxValue: number) => number
) {
let nBitsPerElem: number;
let k = options.kHashes ?? 0;
const forceNBitsPerElem = options.forceNBitsPerElem ?? 0;
@ -49,19 +57,50 @@ export abstract class BloomFilter {
}
const mBits = options.capacity * nBitsPerElem;
const mInts = 1 + mBits / (this.data.BYTES_PER_ELEMENT * 8);
const mInts = 1 + Math.floor(mBits / (sizeOfInt * 8));
this.totalBits = mBits;
this.data = new Uint8Array(mInts);
this.data = new Array<bigint>(mInts);
this.data.fill(BigInt(0));
this.kHashes = k;
this.hashN = hashN;
this.errorRate = options.errorRate;
}
public computeHashes(item: string): number[] {
const hashes = new Array<number>(this.kHashes);
for (let i = 0; i < this.kHashes; i++) {
hashes[i] = this.hashN(item, i, this.totalBits);
}
return hashes;
}
// Adds an item to the bloom filter by computing its hash values
// and setting corresponding bits in "data".
public abstract insert(item: string | Uint8Array): void;
public insert(item: string): void {
const hashSet = this.computeHashes(item);
for (const h of hashSet) {
const intAddress = Math.floor(h / (sizeOfInt * 8));
const bitOffset = h % (sizeOfInt * 8);
this.data[intAddress] =
this.data[intAddress] | (BigInt(1) << BigInt(bitOffset));
}
}
// Checks if the item is potentially in the bloom filter.
// The method is guaranteed to return "true" for items that were inserted,
// but might also return "true" for items that were never inserted
// (purpose of false-positive probability).
public abstract lookup(item: string | Uint8Array): boolean;
public lookup(item: string): boolean {
const hashSet = this.computeHashes(item);
for (const h of hashSet) {
const intAddress = Math.floor(h / (sizeOfInt * 8));
const bitOffset = h % (sizeOfInt * 8);
const currentInt = this.data[intAddress];
if (currentInt != (currentInt | (BigInt(1) << BigInt(bitOffset)))) {
return false;
}
}
return true;
}
}

View File

@ -1,9 +0,0 @@
import { expect } from "chai";
import { BloomFilter } from "./bloom.js";
describe("BloomFilter", () => {
it("should be defined", () => {
expect(BloomFilter).to.be.ok;
});
});

View File

@ -0,0 +1,17 @@
import { expect } from "chai";
import testVectors from "./nim_hash_test_vectors.json" assert { type: "json" };
import { hashN } from "./nim_hashn.mjs";
describe("hashN", () => {
testVectors.vectors.forEach((vector) => {
// TODO: The result of the hash function compiled from nim to js does not match outputs when run in nim itself when using unicode characters.
if (vector.input === "αβγδε") {
return;
}
it(`should hash "${vector.input}" with n=${vector.n} and maxValue=${vector.maxValue} correctly`, () => {
const result = hashN(vector.input, vector.n, vector.maxValue);
expect(result).to.equal(vector.expected.hashC);
});
});
});

View File

@ -0,0 +1,437 @@
{
"description": "Test vectors for BloomFilter hashN function",
"vectors": [
{
"input": "hello",
"n": 0,
"maxValue": 100,
"expected": { "hashA": 51, "hashB": 74, "hashC": 51 }
},
{
"input": "hello",
"n": 1,
"maxValue": 100,
"expected": { "hashA": 51, "hashB": 74, "hashC": 25 }
},
{
"input": "hello",
"n": 2,
"maxValue": 100,
"expected": { "hashA": 51, "hashB": 74, "hashC": 99 }
},
{
"input": "hello",
"n": 5,
"maxValue": 100,
"expected": { "hashA": 51, "hashB": 74, "hashC": 21 }
},
{
"input": "hello",
"n": 0,
"maxValue": 1000,
"expected": { "hashA": 351, "hashB": 874, "hashC": 351 }
},
{
"input": "hello",
"n": 1,
"maxValue": 1000,
"expected": { "hashA": 351, "hashB": 874, "hashC": 225 }
},
{
"input": "hello",
"n": 2,
"maxValue": 1000,
"expected": { "hashA": 351, "hashB": 874, "hashC": 99 }
},
{
"input": "hello",
"n": 5,
"maxValue": 1000,
"expected": { "hashA": 351, "hashB": 874, "hashC": 721 }
},
{
"input": "hello",
"n": 0,
"maxValue": 10000,
"expected": { "hashA": 3351, "hashB": 874, "hashC": 3351 }
},
{
"input": "hello",
"n": 1,
"maxValue": 10000,
"expected": { "hashA": 3351, "hashB": 874, "hashC": 4225 }
},
{
"input": "hello",
"n": 2,
"maxValue": 10000,
"expected": { "hashA": 3351, "hashB": 874, "hashC": 5099 }
},
{
"input": "hello",
"n": 5,
"maxValue": 10000,
"expected": { "hashA": 3351, "hashB": 874, "hashC": 7721 }
},
{
"input": "",
"n": 0,
"maxValue": 100,
"expected": { "hashA": 0, "hashB": 8, "hashC": 0 }
},
{
"input": "",
"n": 1,
"maxValue": 100,
"expected": { "hashA": 0, "hashB": 8, "hashC": 8 }
},
{
"input": "",
"n": 2,
"maxValue": 100,
"expected": { "hashA": 0, "hashB": 8, "hashC": 16 }
},
{
"input": "",
"n": 5,
"maxValue": 100,
"expected": { "hashA": 0, "hashB": 8, "hashC": 40 }
},
{
"input": "",
"n": 0,
"maxValue": 1000,
"expected": { "hashA": 0, "hashB": 8, "hashC": 0 }
},
{
"input": "",
"n": 1,
"maxValue": 1000,
"expected": { "hashA": 0, "hashB": 8, "hashC": 8 }
},
{
"input": "",
"n": 2,
"maxValue": 1000,
"expected": { "hashA": 0, "hashB": 8, "hashC": 16 }
},
{
"input": "",
"n": 5,
"maxValue": 1000,
"expected": { "hashA": 0, "hashB": 8, "hashC": 40 }
},
{
"input": "",
"n": 0,
"maxValue": 10000,
"expected": { "hashA": 0, "hashB": 6008, "hashC": 0 }
},
{
"input": "",
"n": 1,
"maxValue": 10000,
"expected": { "hashA": 0, "hashB": 6008, "hashC": 6008 }
},
{
"input": "",
"n": 2,
"maxValue": 10000,
"expected": { "hashA": 0, "hashB": 6008, "hashC": 2016 }
},
{
"input": "",
"n": 5,
"maxValue": 10000,
"expected": { "hashA": 0, "hashB": 6008, "hashC": 40 }
},
{
"input": "test123",
"n": 0,
"maxValue": 100,
"expected": { "hashA": 27, "hashB": 4, "hashC": 27 }
},
{
"input": "test123",
"n": 1,
"maxValue": 100,
"expected": { "hashA": 27, "hashB": 4, "hashC": 31 }
},
{
"input": "test123",
"n": 2,
"maxValue": 100,
"expected": { "hashA": 27, "hashB": 4, "hashC": 35 }
},
{
"input": "test123",
"n": 5,
"maxValue": 100,
"expected": { "hashA": 27, "hashB": 4, "hashC": 47 }
},
{
"input": "test123",
"n": 0,
"maxValue": 1000,
"expected": { "hashA": 227, "hashB": 404, "hashC": 227 }
},
{
"input": "test123",
"n": 1,
"maxValue": 1000,
"expected": { "hashA": 227, "hashB": 404, "hashC": 631 }
},
{
"input": "test123",
"n": 2,
"maxValue": 1000,
"expected": { "hashA": 227, "hashB": 404, "hashC": 35 }
},
{
"input": "test123",
"n": 5,
"maxValue": 1000,
"expected": { "hashA": 227, "hashB": 404, "hashC": 247 }
},
{
"input": "test123",
"n": 0,
"maxValue": 10000,
"expected": { "hashA": 6227, "hashB": 4404, "hashC": 6227 }
},
{
"input": "test123",
"n": 1,
"maxValue": 10000,
"expected": { "hashA": 6227, "hashB": 4404, "hashC": 631 }
},
{
"input": "test123",
"n": 2,
"maxValue": 10000,
"expected": { "hashA": 6227, "hashB": 4404, "hashC": 5035 }
},
{
"input": "test123",
"n": 5,
"maxValue": 10000,
"expected": { "hashA": 6227, "hashB": 4404, "hashC": 8247 }
},
{
"input": "!@#$%^&*()",
"n": 0,
"maxValue": 100,
"expected": { "hashA": 11, "hashB": 72, "hashC": 11 }
},
{
"input": "!@#$%^&*()",
"n": 1,
"maxValue": 100,
"expected": { "hashA": 11, "hashB": 72, "hashC": 83 }
},
{
"input": "!@#$%^&*()",
"n": 2,
"maxValue": 100,
"expected": { "hashA": 11, "hashB": 72, "hashC": 55 }
},
{
"input": "!@#$%^&*()",
"n": 5,
"maxValue": 100,
"expected": { "hashA": 11, "hashB": 72, "hashC": 71 }
},
{
"input": "!@#$%^&*()",
"n": 0,
"maxValue": 1000,
"expected": { "hashA": 311, "hashB": 172, "hashC": 311 }
},
{
"input": "!@#$%^&*()",
"n": 1,
"maxValue": 1000,
"expected": { "hashA": 311, "hashB": 172, "hashC": 483 }
},
{
"input": "!@#$%^&*()",
"n": 2,
"maxValue": 1000,
"expected": { "hashA": 311, "hashB": 172, "hashC": 655 }
},
{
"input": "!@#$%^&*()",
"n": 5,
"maxValue": 1000,
"expected": { "hashA": 311, "hashB": 172, "hashC": 171 }
},
{
"input": "!@#$%^&*()",
"n": 0,
"maxValue": 10000,
"expected": { "hashA": 1311, "hashB": 172, "hashC": 1311 }
},
{
"input": "!@#$%^&*()",
"n": 1,
"maxValue": 10000,
"expected": { "hashA": 1311, "hashB": 172, "hashC": 1483 }
},
{
"input": "!@#$%^&*()",
"n": 2,
"maxValue": 10000,
"expected": { "hashA": 1311, "hashB": 172, "hashC": 1655 }
},
{
"input": "!@#$%^&*()",
"n": 5,
"maxValue": 10000,
"expected": { "hashA": 1311, "hashB": 172, "hashC": 2171 }
},
{
"input": "Lorem ipsum dolor sit amet",
"n": 0,
"maxValue": 100,
"expected": { "hashA": 70, "hashB": 3, "hashC": 70 }
},
{
"input": "Lorem ipsum dolor sit amet",
"n": 1,
"maxValue": 100,
"expected": { "hashA": 70, "hashB": 3, "hashC": 73 }
},
{
"input": "Lorem ipsum dolor sit amet",
"n": 2,
"maxValue": 100,
"expected": { "hashA": 70, "hashB": 3, "hashC": 76 }
},
{
"input": "Lorem ipsum dolor sit amet",
"n": 5,
"maxValue": 100,
"expected": { "hashA": 70, "hashB": 3, "hashC": 85 }
},
{
"input": "Lorem ipsum dolor sit amet",
"n": 0,
"maxValue": 1000,
"expected": { "hashA": 970, "hashB": 103, "hashC": 970 }
},
{
"input": "Lorem ipsum dolor sit amet",
"n": 1,
"maxValue": 1000,
"expected": { "hashA": 970, "hashB": 103, "hashC": 73 }
},
{
"input": "Lorem ipsum dolor sit amet",
"n": 2,
"maxValue": 1000,
"expected": { "hashA": 970, "hashB": 103, "hashC": 176 }
},
{
"input": "Lorem ipsum dolor sit amet",
"n": 5,
"maxValue": 1000,
"expected": { "hashA": 970, "hashB": 103, "hashC": 485 }
},
{
"input": "Lorem ipsum dolor sit amet",
"n": 0,
"maxValue": 10000,
"expected": { "hashA": 9970, "hashB": 103, "hashC": 9970 }
},
{
"input": "Lorem ipsum dolor sit amet",
"n": 1,
"maxValue": 10000,
"expected": { "hashA": 9970, "hashB": 103, "hashC": 73 }
},
{
"input": "Lorem ipsum dolor sit amet",
"n": 2,
"maxValue": 10000,
"expected": { "hashA": 9970, "hashB": 103, "hashC": 176 }
},
{
"input": "Lorem ipsum dolor sit amet",
"n": 5,
"maxValue": 10000,
"expected": { "hashA": 9970, "hashB": 103, "hashC": 485 }
},
{
"input": "αβγδε",
"n": 0,
"maxValue": 100,
"expected": { "hashA": 66, "hashB": 13, "hashC": 66 }
},
{
"input": "αβγδε",
"n": 1,
"maxValue": 100,
"expected": { "hashA": 66, "hashB": 13, "hashC": 79 }
},
{
"input": "αβγδε",
"n": 2,
"maxValue": 100,
"expected": { "hashA": 66, "hashB": 13, "hashC": 92 }
},
{
"input": "αβγδε",
"n": 5,
"maxValue": 100,
"expected": { "hashA": 66, "hashB": 13, "hashC": 31 }
},
{
"input": "αβγδε",
"n": 0,
"maxValue": 1000,
"expected": { "hashA": 366, "hashB": 613, "hashC": 366 }
},
{
"input": "αβγδε",
"n": 1,
"maxValue": 1000,
"expected": { "hashA": 366, "hashB": 613, "hashC": 979 }
},
{
"input": "αβγδε",
"n": 2,
"maxValue": 1000,
"expected": { "hashA": 366, "hashB": 613, "hashC": 592 }
},
{
"input": "αβγδε",
"n": 5,
"maxValue": 1000,
"expected": { "hashA": 366, "hashB": 613, "hashC": 431 }
},
{
"input": "αβγδε",
"n": 0,
"maxValue": 10000,
"expected": { "hashA": 5366, "hashB": 6613, "hashC": 5366 }
},
{
"input": "αβγδε",
"n": 1,
"maxValue": 10000,
"expected": { "hashA": 5366, "hashB": 6613, "hashC": 1979 }
},
{
"input": "αβγδε",
"n": 2,
"maxValue": 10000,
"expected": { "hashA": 5366, "hashB": 6613, "hashC": 8592 }
},
{
"input": "αβγδε",
"n": 5,
"maxValue": 10000,
"expected": { "hashA": 5366, "hashB": 6613, "hashC": 8431 }
}
]
}

View File

@ -0,0 +1,11 @@
/**
* Get the nth hash using the double hashing technique from:
* http://www.eecs.harvard.edu/~kirsch/pubs/bbbf/rsa.pdf
*
* Based on https://github.com/waku-org/nim-sds/blob/5df71ad3eaf68172cef39a2e1838ddd871b03b5d/src/bloom.nim#L17
*
* @param item - The string to hash.
* @param n - The number of times to hash the string.
* @param maxValue - The maximum value to hash the string to.
*/
export function hashN(item: string, n: number, maxValue: number): number;

View File

@ -0,0 +1,974 @@
/* Generated by the Nim Compiler v2.2.0 */
var framePtr = null;
var excHandler = 0;
var lastJSError = null;
var NTI134217745 = {
size: 0,
kind: 17,
base: null,
node: null,
finalizer: null
};
var NTI134217749 = {
size: 0,
kind: 17,
base: null,
node: null,
finalizer: null
};
var NTI134217751 = {
size: 0,
kind: 17,
base: null,
node: null,
finalizer: null
};
var NTI134217743 = {
size: 0,
kind: 17,
base: null,
node: null,
finalizer: null
};
var NTI33555167 = {
size: 0,
kind: 17,
base: null,
node: null,
finalizer: null
};
var NTI33555175 = {
size: 0,
kind: 22,
base: null,
node: null,
finalizer: null
};
var NTI33554449 = {
size: 0,
kind: 28,
base: null,
node: null,
finalizer: null
};
var NTI33554450 = {
size: 0,
kind: 29,
base: null,
node: null,
finalizer: null
};
var NTI33555174 = {
size: 0,
kind: 22,
base: null,
node: null,
finalizer: null
};
var NTI33555171 = {
size: 0,
kind: 17,
base: null,
node: null,
finalizer: null
};
var NTI33555172 = {
size: 0,
kind: 17,
base: null,
node: null,
finalizer: null
};
var NTI134217741 = {
size: 0,
kind: 17,
base: null,
node: null,
finalizer: null
};
var NTI134217742 = {
size: 0,
kind: 17,
base: null,
node: null,
finalizer: null
};
var NNI134217742 = {
kind: 2,
len: 0,
offset: 0,
typ: null,
name: null,
sons: []
};
NTI134217742.node = NNI134217742;
var NNI134217741 = {
kind: 2,
len: 0,
offset: 0,
typ: null,
name: null,
sons: []
};
NTI134217741.node = NNI134217741;
var NNI33555172 = {
kind: 2,
len: 0,
offset: 0,
typ: null,
name: null,
sons: []
};
NTI33555172.node = NNI33555172;
NTI33555174.base = NTI33555171;
NTI33555175.base = NTI33555171;
var NNI33555171 = {
kind: 2,
len: 5,
offset: 0,
typ: null,
name: null,
sons: [
{
kind: 1,
offset: "parent",
len: 0,
typ: NTI33555174,
name: "parent",
sons: null
},
{
kind: 1,
offset: "name",
len: 0,
typ: NTI33554450,
name: "name",
sons: null
},
{
kind: 1,
offset: "message",
len: 0,
typ: NTI33554449,
name: "msg",
sons: null
},
{
kind: 1,
offset: "trace",
len: 0,
typ: NTI33554449,
name: "trace",
sons: null
},
{ kind: 1, offset: "up", len: 0, typ: NTI33555175, name: "up", sons: null }
]
};
NTI33555171.node = NNI33555171;
var NNI33555167 = {
kind: 2,
len: 0,
offset: 0,
typ: null,
name: null,
sons: []
};
NTI33555167.node = NNI33555167;
NTI33555171.base = NTI33555167;
NTI33555172.base = NTI33555171;
NTI134217741.base = NTI33555172;
NTI134217742.base = NTI134217741;
var NNI134217743 = {
kind: 2,
len: 0,
offset: 0,
typ: null,
name: null,
sons: []
};
NTI134217743.node = NNI134217743;
NTI134217743.base = NTI134217741;
var NNI134217751 = {
kind: 2,
len: 0,
offset: 0,
typ: null,
name: null,
sons: []
};
NTI134217751.node = NNI134217751;
NTI134217751.base = NTI33555172;
var NNI134217749 = {
kind: 2,
len: 0,
offset: 0,
typ: null,
name: null,
sons: []
};
NTI134217749.node = NNI134217749;
NTI134217749.base = NTI33555172;
var NNI134217745 = {
kind: 2,
len: 0,
offset: 0,
typ: null,
name: null,
sons: []
};
NTI134217745.node = NNI134217745;
NTI134217745.base = NTI33555172;
function toJSStr(s_p0) {
var result_33556911 = null;
var res_33556965 = newSeq__system_u2508(s_p0.length);
var i_33556966 = 0;
var j_33556967 = 0;
{
Label2: while (true) {
if (!(i_33556966 < s_p0.length)) break Label2;
var c_33556968 = s_p0[i_33556966];
if (c_33556968 < 128) {
res_33556965[j_33556967] = String.fromCharCode(c_33556968);
i_33556966 += 1;
} else {
var helper_33556994 = newSeq__system_u2508(0);
Label3: {
Label4: while (true) {
if (!true) break Label4;
var code_33556995 = c_33556968.toString(16);
if ((code_33556995 == null ? 0 : code_33556995.length) == 1) {
helper_33556994.push("%0");
} else {
helper_33556994.push("%");
}
helper_33556994.push(code_33556995);
i_33556966 += 1;
if (s_p0.length <= i_33556966 || s_p0[i_33556966] < 128) {
break Label3;
}
c_33556968 = s_p0[i_33556966];
}
}
++excHandler;
try {
res_33556965[j_33556967] = decodeURIComponent(
helper_33556994.join("")
);
--excHandler;
} catch (EXCEPTION) {
var prevJSError = lastJSError;
lastJSError = EXCEPTION;
--excHandler;
res_33556965[j_33556967] = helper_33556994.join("");
lastJSError = prevJSError;
} finally {
}
}
j_33556967 += 1;
}
}
if (res_33556965.length < j_33556967) {
for (var i = res_33556965.length; i < j_33556967; ++i)
res_33556965.push(null);
} else {
res_33556965.length = j_33556967;
}
result_33556911 = res_33556965.join("");
return result_33556911;
}
function raiseException(e_p0, ename_p1) {
e_p0.name = ename_p1;
if (excHandler == 0) {
unhandledException(e_p0);
}
throw e_p0;
}
function modInt(a_p0, b_p1) {
if (b_p1 == 0) raiseDivByZero();
if (b_p1 == -1 && a_p0 == 2147483647) raiseOverflow();
return Math.trunc(a_p0 % b_p1);
}
function absInt(a_p0) {
var Temporary1;
var result_33557134 = 0;
if (a_p0 < 0) {
Temporary1 = a_p0 * -1;
} else {
Temporary1 = a_p0;
}
result_33557134 = Temporary1;
return result_33557134;
}
function divInt(a_p0, b_p1) {
if (b_p1 == 0) raiseDivByZero();
if (b_p1 == -1 && a_p0 == 2147483647) raiseOverflow();
return Math.trunc(a_p0 / b_p1);
}
function mulInt(a_p0, b_p1) {
var result = a_p0 * b_p1;
checkOverflowInt(result);
return result;
}
function subInt(a_p0, b_p1) {
var result = a_p0 - b_p1;
checkOverflowInt(result);
return result;
}
function addInt(a_p0, b_p1) {
var result = a_p0 + b_p1;
checkOverflowInt(result);
return result;
}
function mnewString(len_p0) {
var result = new Array(len_p0);
for (var i = 0; i < len_p0; i++) {
result[i] = 0;
}
return result;
}
function chckRange(i_p0, a_p1, b_p2) {
var result_33557358 = 0;
BeforeRet: {
if (a_p1 <= i_p0 && i_p0 <= b_p2) {
result_33557358 = i_p0;
break BeforeRet;
} else {
raiseRangeError();
}
}
return result_33557358;
}
function setConstr() {
var result = {};
for (var i = 0; i < arguments.length; ++i) {
var x = arguments[i];
if (typeof x == "object") {
for (var j = x[0]; j <= x[1]; ++j) {
result[j] = true;
}
} else {
result[x] = true;
}
}
return result;
}
var ConstSet1 = setConstr(17, 16, 4, 18, 27, 19, 23, 22, 21);
function nimCopy(dest_p0, src_p1, ti_p2) {
var result_33557318 = null;
switch (ti_p2.kind) {
case 21:
case 22:
case 23:
case 5:
if (!isFatPointer__system_u2866(ti_p2)) {
result_33557318 = src_p1;
} else {
result_33557318 = [src_p1[0], src_p1[1]];
}
break;
case 19:
if (dest_p0 === null || dest_p0 === undefined) {
dest_p0 = {};
} else {
for (var key in dest_p0) {
delete dest_p0[key];
}
}
for (var key in src_p1) {
dest_p0[key] = src_p1[key];
}
result_33557318 = dest_p0;
break;
case 18:
case 17:
if (!(ti_p2.base == null)) {
result_33557318 = nimCopy(dest_p0, src_p1, ti_p2.base);
} else {
if (ti_p2.kind == 17) {
result_33557318 =
dest_p0 === null || dest_p0 === undefined
? { m_type: ti_p2 }
: dest_p0;
} else {
result_33557318 =
dest_p0 === null || dest_p0 === undefined ? {} : dest_p0;
}
}
nimCopyAux(result_33557318, src_p1, ti_p2.node);
break;
case 4:
case 16:
if (ArrayBuffer.isView(src_p1)) {
if (
dest_p0 === null ||
dest_p0 === undefined ||
dest_p0.length != src_p1.length
) {
dest_p0 = new src_p1.constructor(src_p1);
} else {
dest_p0.set(src_p1, 0);
}
result_33557318 = dest_p0;
} else {
if (src_p1 === null) {
result_33557318 = null;
} else {
if (
dest_p0 === null ||
dest_p0 === undefined ||
dest_p0.length != src_p1.length
) {
dest_p0 = new Array(src_p1.length);
}
result_33557318 = dest_p0;
for (var i = 0; i < src_p1.length; ++i) {
result_33557318[i] = nimCopy(
result_33557318[i],
src_p1[i],
ti_p2.base
);
}
}
}
break;
case 24:
case 27:
if (src_p1 === null) {
result_33557318 = null;
} else {
if (
dest_p0 === null ||
dest_p0 === undefined ||
dest_p0.length != src_p1.length
) {
dest_p0 = new Array(src_p1.length);
}
result_33557318 = dest_p0;
for (var i = 0; i < src_p1.length; ++i) {
result_33557318[i] = nimCopy(
result_33557318[i],
src_p1[i],
ti_p2.base
);
}
}
break;
case 28:
if (src_p1 !== null) {
result_33557318 = src_p1.slice(0);
}
break;
default:
result_33557318 = src_p1;
break;
}
return result_33557318;
}
function chckIndx(i_p0, a_p1, b_p2) {
var result_33557353 = 0;
BeforeRet: {
if (a_p1 <= i_p0 && i_p0 <= b_p2) {
result_33557353 = i_p0;
break BeforeRet;
} else {
raiseIndexError(i_p0, a_p1, b_p2);
}
}
return result_33557353;
}
var objectID_671088817 = [0];
function add__system_u1943(x_p0, x_p0_Idx, y_p1) {
if (x_p0[x_p0_Idx] === null) {
x_p0[x_p0_Idx] = [];
}
var off = x_p0[x_p0_Idx].length;
x_p0[x_p0_Idx].length += y_p1.length;
for (var i = 0; i < y_p1.length; ++i) {
x_p0[x_p0_Idx][off + i] = y_p1.charCodeAt(i);
}
}
function newSeq__system_u2508(len_p0) {
var result_33556944 = [];
result_33556944 = new Array(len_p0);
for (var i = 0; i < len_p0; ++i) {
result_33556944[i] = null;
}
return result_33556944;
}
function unhandledException(e_p0) {
var buf_33556659 = [[]];
if (!(e_p0.message.length == 0)) {
buf_33556659[0].push.apply(
buf_33556659[0],
[
69, 114, 114, 111, 114, 58, 32, 117, 110, 104, 97, 110, 100, 108, 101,
100, 32, 101, 120, 99, 101, 112, 116, 105, 111, 110, 58, 32
]
);
buf_33556659[0].push.apply(buf_33556659[0], e_p0.message);
} else {
buf_33556659[0].push.apply(
buf_33556659[0],
[
69, 114, 114, 111, 114, 58, 32, 117, 110, 104, 97, 110, 100, 108, 101,
100, 32, 101, 120, 99, 101, 112, 116, 105, 111, 110
]
);
}
buf_33556659[0].push.apply(buf_33556659[0], [32, 91]);
add__system_u1943(buf_33556659, 0, e_p0.name);
buf_33556659[0].push.apply(buf_33556659[0], [93, 10]);
var cbuf_33556660 = toJSStr(buf_33556659[0]);
if (typeof Error !== "undefined") {
throw new Error(cbuf_33556660);
} else {
throw cbuf_33556660;
}
}
function raiseDivByZero() {
raiseException(
{
message: [
100, 105, 118, 105, 115, 105, 111, 110, 32, 98, 121, 32, 122, 101, 114,
111
],
parent: null,
m_type: NTI134217742,
name: null,
trace: [],
up: null
},
"DivByZeroDefect"
);
}
function raiseOverflow() {
raiseException(
{
message: [
111, 118, 101, 114, 45, 32, 111, 114, 32, 117, 110, 100, 101, 114, 102,
108, 111, 119
],
parent: null,
m_type: NTI134217743,
name: null,
trace: [],
up: null
},
"OverflowDefect"
);
}
function checkOverflowInt(a_p0) {
if (a_p0 > 2147483647 || a_p0 < -2147483648) raiseOverflow();
}
function raiseRangeError() {
raiseException(
{
message: [
118, 97, 108, 117, 101, 32, 111, 117, 116, 32, 111, 102, 32, 114, 97,
110, 103, 101
],
parent: null,
m_type: NTI134217751,
name: null,
trace: [],
up: null
},
"RangeDefect"
);
}
function addChars__stdZprivateZdigitsutils_u202(
result_p0,
result_p0_Idx,
x_p1,
start_p2,
n_p3
) {
var Temporary1;
var old_301990096 = result_p0[result_p0_Idx].length;
if (
result_p0[result_p0_Idx].length <
((Temporary1 = chckRange(addInt(old_301990096, n_p3), 0, 2147483647)),
Temporary1)
) {
for (var i = result_p0[result_p0_Idx].length; i < Temporary1; ++i)
result_p0[result_p0_Idx].push(0);
} else {
result_p0[result_p0_Idx].length = Temporary1;
}
{
var iHEX60gensym4_301990110 = 0;
var i_536870936 = 0;
{
Label4: while (true) {
if (!(i_536870936 < n_p3)) break Label4;
iHEX60gensym4_301990110 = i_536870936;
result_p0[result_p0_Idx][
chckIndx(
addInt(old_301990096, iHEX60gensym4_301990110),
0,
result_p0[result_p0_Idx].length - 1
)
] = x_p1.charCodeAt(
chckIndx(
addInt(start_p2, iHEX60gensym4_301990110),
0,
x_p1.length - 1
)
);
i_536870936 = addInt(i_536870936, 1);
}
}
}
}
function addChars__stdZprivateZdigitsutils_u198(
result_p0,
result_p0_Idx,
x_p1
) {
addChars__stdZprivateZdigitsutils_u202(
result_p0,
result_p0_Idx,
x_p1,
0,
x_p1 == null ? 0 : x_p1.length
);
}
function addInt__stdZprivateZdigitsutils_u223(result_p0, result_p0_Idx, x_p1) {
addChars__stdZprivateZdigitsutils_u198(result_p0, result_p0_Idx, x_p1 + "");
}
function addInt__stdZprivateZdigitsutils_u241(result_p0, result_p0_Idx, x_p1) {
addInt__stdZprivateZdigitsutils_u223(result_p0, result_p0_Idx, x_p1);
}
function HEX24__systemZdollars_u8(x_p0) {
var result_385875978 = [[]];
addInt__stdZprivateZdigitsutils_u241(result_385875978, 0, x_p0);
return result_385875978[0];
}
function isFatPointer__system_u2866(ti_p0) {
var result_33557300 = false;
BeforeRet: {
result_33557300 = !(ConstSet1[ti_p0.base.kind] != undefined);
break BeforeRet;
}
return result_33557300;
}
function nimCopyAux(dest_p0, src_p1, n_p2) {
switch (n_p2.kind) {
case 0:
break;
case 1:
dest_p0[n_p2.offset] = nimCopy(
dest_p0[n_p2.offset],
src_p1[n_p2.offset],
n_p2.typ
);
break;
case 2:
for (var i = 0; i < n_p2.sons.length; i++) {
nimCopyAux(dest_p0, src_p1, n_p2.sons[i]);
}
break;
case 3:
dest_p0[n_p2.offset] = nimCopy(
dest_p0[n_p2.offset],
src_p1[n_p2.offset],
n_p2.typ
);
for (var i = 0; i < n_p2.sons.length; ++i) {
nimCopyAux(dest_p0, src_p1, n_p2.sons[i][1]);
}
break;
}
}
function raiseIndexError(i_p0, a_p1, b_p2) {
var Temporary1;
if (b_p2 < a_p1) {
Temporary1 = [
105, 110, 100, 101, 120, 32, 111, 117, 116, 32, 111, 102, 32, 98, 111,
117, 110, 100, 115, 44, 32, 116, 104, 101, 32, 99, 111, 110, 116, 97, 105,
110, 101, 114, 32, 105, 115, 32, 101, 109, 112, 116, 121
];
} else {
Temporary1 = [105, 110, 100, 101, 120, 32].concat(
HEX24__systemZdollars_u8(i_p0),
[32, 110, 111, 116, 32, 105, 110, 32],
HEX24__systemZdollars_u8(a_p1),
[32, 46, 46, 32],
HEX24__systemZdollars_u8(b_p2)
);
}
raiseException(
{
message: nimCopy(null, Temporary1, NTI33554449),
parent: null,
m_type: NTI134217749,
name: null,
trace: [],
up: null
},
"IndexDefect"
);
}
function imul__pureZhashes_u340(a_p0, b_p1) {
var result_671088983 = 0;
var mask_671088984 = 65535;
var aHi_671088989 = ((a_p0 >>> 16) & mask_671088984) >>> 0;
var aLo_671088990 = (a_p0 & mask_671088984) >>> 0;
var bHi_671088995 = ((b_p1 >>> 16) & mask_671088984) >>> 0;
var bLo_671088996 = (b_p1 & mask_671088984) >>> 0;
result_671088983 =
(((aLo_671088990 * bLo_671088996) >>> 0) +
((((((aHi_671088989 * bLo_671088996) >>> 0) +
((aLo_671088990 * bHi_671088995) >>> 0)) >>>
0) <<
16) >>>
0)) >>>
0;
return result_671088983;
}
function rotl32__pureZhashes_u361(x_p0, r_p1) {
var result_671089004 = 0;
result_671089004 =
(((x_p0 << r_p1) >>> 0) | (x_p0 >>> subInt(32, r_p1))) >>> 0;
return result_671089004;
}
function murmurHash__pureZhashes_u373(x_p0) {
var result_671089015 = 0;
BeforeRet: {
var size_671089024 = x_p0.length;
var stepSize_671089025 = 4;
var n_671089026 = divInt(size_671089024, stepSize_671089025);
var h1_671089027 = 0;
var i_671089028 = 0;
{
Label2: while (true) {
if (!(i_671089028 < mulInt(n_671089026, stepSize_671089025)))
break Label2;
var k1_671089031 = 0;
var jHEX60gensym11_671089048 = stepSize_671089025;
{
Label4: while (true) {
if (!(0 < jHEX60gensym11_671089048)) break Label4;
jHEX60gensym11_671089048 = subInt(jHEX60gensym11_671089048, 1);
k1_671089031 =
(((k1_671089031 << 8) >>> 0) |
Number(
BigInt.asUintN(
32,
BigInt(
x_p0[
chckIndx(
addInt(i_671089028, jHEX60gensym11_671089048),
0,
x_p0.length - 1
)
]
)
)
)) >>>
0;
}
}
i_671089028 = addInt(i_671089028, stepSize_671089025);
k1_671089031 = imul__pureZhashes_u340(k1_671089031, 3432918353);
k1_671089031 = rotl32__pureZhashes_u361(k1_671089031, 15);
k1_671089031 = imul__pureZhashes_u340(k1_671089031, 461845907);
h1_671089027 = (h1_671089027 ^ k1_671089031) >>> 0;
h1_671089027 = rotl32__pureZhashes_u361(h1_671089027, 13);
h1_671089027 = (((h1_671089027 * 5) >>> 0) + 3864292196) >>> 0;
}
}
var k1_671089066 = 0;
var rem_671089067 = modInt(size_671089024, stepSize_671089025);
{
Label6: while (true) {
if (!(0 < rem_671089067)) break Label6;
rem_671089067 = subInt(rem_671089067, 1);
k1_671089066 =
(((k1_671089066 << 8) >>> 0) |
Number(
BigInt.asUintN(
32,
BigInt(
x_p0[
chckIndx(
addInt(i_671089028, rem_671089067),
0,
x_p0.length - 1
)
]
)
)
)) >>>
0;
}
}
k1_671089066 = imul__pureZhashes_u340(k1_671089066, 3432918353);
k1_671089066 = rotl32__pureZhashes_u361(k1_671089066, 15);
k1_671089066 = imul__pureZhashes_u340(k1_671089066, 461845907);
h1_671089027 = (h1_671089027 ^ k1_671089066) >>> 0;
h1_671089027 =
(h1_671089027 ^ Number(BigInt.asUintN(32, BigInt(size_671089024)))) >>> 0;
h1_671089027 = (h1_671089027 ^ (h1_671089027 >>> 16)) >>> 0;
h1_671089027 = imul__pureZhashes_u340(h1_671089027, 2246822507);
h1_671089027 = (h1_671089027 ^ (h1_671089027 >>> 13)) >>> 0;
h1_671089027 = imul__pureZhashes_u340(h1_671089027, 3266489909);
h1_671089027 = (h1_671089027 ^ (h1_671089027 >>> 16)) >>> 0;
result_671089015 = Number(BigInt.asIntN(32, BigInt(h1_671089027)));
break BeforeRet;
}
return result_671089015;
}
function hash__pureZhashes_u782(x_p0) {
var result_671089424 = 0;
result_671089424 = murmurHash__pureZhashes_u373(
x_p0.slice(0, x_p0.length - 1 + 1)
);
return result_671089424;
}
function hashN__nim95hash_u2(item_p0, n_p1, maxValue_p2) {
var result_536870918 = 0;
var hashA_536870919 = modInt(
absInt(hash__pureZhashes_u782(item_p0)),
maxValue_p2
);
var hashB_536870920 = modInt(
absInt(hash__pureZhashes_u782(item_p0.concat([32, 98]))),
maxValue_p2
);
result_536870918 = modInt(
absInt(addInt(hashA_536870919, mulInt(n_p1, hashB_536870920))),
maxValue_p2
);
return result_536870918;
}
function sysFatal__stdZassertions_u45(message_p1) {
raiseException(
{
message: nimCopy(null, message_p1, NTI33554449),
m_type: NTI134217745,
parent: null,
name: null,
trace: [],
up: null
},
"AssertionDefect"
);
}
function raiseAssert__stdZassertions_u43(msg_p0) {
sysFatal__stdZassertions_u45(msg_p0);
}
function failedAssertImpl__stdZassertions_u85(msg_p0) {
raiseAssert__stdZassertions_u43(msg_p0);
}
if (!(hashN__nim95hash_u2([100, 117, 109, 109, 121], 0, 1) == 0)) {
failedAssertImpl__stdZassertions_u85([
110, 105, 109, 95, 104, 97, 115, 104, 46, 110, 105, 109, 40, 50, 54, 44, 32,
51, 41, 32, 96, 104, 97, 115, 104, 78, 40, 34, 100, 117, 109, 109, 121, 34,
44, 32, 48, 44, 32, 49, 41, 32, 61, 61, 32, 48, 96, 32
]);
}
// Nim source that was used to generate the above:
// ```nim
// import hashes
//
// proc hashN*(item: string, n: int, maxValue: int): int =
// ## Get the nth hash using Nim's built-in hash function using
// ## the double hashing technique from Kirsch and Mitzenmacher, 2008:
// ## http://www.eecs.harvard.edu/~kirsch/pubs/bbbf/rsa.pdf
// let
// hashA = abs(hash(item)) mod maxValue # Use abs to handle negative hashes
// hashB = abs(hash(item & " b")) mod maxValue # string concatenation
// abs((hashA + n * hashB)) mod maxValue
//
// when defined(js):
// # A dummy usage to keep `hashN` from being stripped:
// doAssert hashN("dummy", 0, 1) == 0 # or just discard
// ```
// Below code was added manually
export function hashN(item, n, maxValue) {
// Convert string to array of character codes
const itemArray = Array.from(item).map((char) => char.charCodeAt(0));
return hashN__nim95hash_u2(itemArray, n, maxValue);
}

View File

@ -0,0 +1,29 @@
import { expect } from "chai";
import {
getMOverNBitsForK,
KTooLargeError,
NoSuitableRatioError
} from "./probabilities.js";
describe("Probabilities", () => {
it("should not allow k > 12", () => {
expect(() => getMOverNBitsForK(13, 0.01)).to.throw(KTooLargeError);
});
it("should not allow unachievable error rate", () => {
expect(() => getMOverNBitsForK(2, 0.00001)).to.throw(NoSuitableRatioError);
});
it("should return the correct m/n for k = 2, targetError = 0.1", () => {
expect(getMOverNBitsForK(2, 0.1)).to.equal(6);
});
it("should return the correct m/n for k = 7, targetError = 0.01", () => {
expect(getMOverNBitsForK(7, 0.01)).to.equal(10);
});
it("should return the correct m/n for k = 7, targetError = 0.001", () => {
expect(getMOverNBitsForK(7, 0.001)).to.equal(16);
});
});

View File

@ -120,6 +120,10 @@ export const kErrors: TAllErrorRates = [
0.0000016500, 0.0000012000, 0.0000008740]),
]
export const KTooLargeError = "K must be <= 12";
export const NoSuitableRatioError =
"Specified value of k and error rate not achievable using less than 4 bytes / element.";
/**
* Given a number of hash functions (k) and a target false-positive rate (targetError),
* determines the minimum (m/n) bits-per-element that satisfies the error threshold.
@ -151,7 +155,7 @@ export function getMOverNBitsForK(
): number {
// Returns the optimal number of m/n bits for a given k.
if (k < 0 || k > 12) {
throw new Error("k must be <= 12.");
throw new Error(KTooLargeError);
}
for (let mOverN = 2; mOverN < probabilityTable[k].length; mOverN++) {
@ -160,7 +164,5 @@ export function getMOverNBitsForK(
}
}
throw new Error(
"Specified value of k and error rate not achievable using less than 4 bytes / element."
);
throw new Error(NoSuitableRatioError);
}

View File

@ -1,3 +1,9 @@
{
"extends": "../../tsconfig.dev"
"extends": "../../tsconfig.dev",
"compilerOptions": {
"allowJs": true,
"moduleResolution": "node",
"resolveJsonModule": true,
"esModuleInterop": true
}
}