From 468512fa85a5e6c1618803338e0e9d17e1a9c4b7 Mon Sep 17 00:00:00 2001 From: Arseniy Klempner Date: Mon, 27 Jan 2025 17:29:27 -0800 Subject: [PATCH] feat(sds): create package for sds and add protobuf def Adds a new package for the browser implementation of scalable data sync. Ports some of the nim implementation of bloom filter to ts. Adds protobuf definition for SDS messages. --- package-lock.json | 186 ++++++++++---------- package.json | 3 +- packages/proto/src/generated/sds_message.ts | 126 +++++++++++++ packages/proto/src/lib/sds_message.proto | 11 ++ packages/sds/.eslintrc.cjs | 6 + packages/sds/.mocharc.cjs | 27 +++ packages/sds/README.md | 3 + packages/sds/package.json | 84 +++++++++ packages/sds/rollup.config.js | 24 +++ packages/sds/src/bloom.ts | 67 +++++++ packages/sds/src/index.spec.ts | 9 + packages/sds/src/index.ts | 3 + packages/sds/src/probabilities.ts | 166 +++++++++++++++++ packages/sds/tsconfig.dev.json | 3 + packages/sds/tsconfig.json | 10 ++ packages/sds/typedoc.json | 4 + 16 files changed, 642 insertions(+), 90 deletions(-) create mode 100644 packages/proto/src/generated/sds_message.ts create mode 100644 packages/proto/src/lib/sds_message.proto create mode 100644 packages/sds/.eslintrc.cjs create mode 100644 packages/sds/.mocharc.cjs create mode 100644 packages/sds/README.md create mode 100644 packages/sds/package.json create mode 100644 packages/sds/rollup.config.js create mode 100644 packages/sds/src/bloom.ts create mode 100644 packages/sds/src/index.spec.ts create mode 100644 packages/sds/src/index.ts create mode 100644 packages/sds/src/probabilities.ts create mode 100644 packages/sds/tsconfig.dev.json create mode 100644 packages/sds/tsconfig.json create mode 100644 packages/sds/typedoc.json diff --git a/package-lock.json b/package-lock.json index 74256f1727..fbdaf688d2 100644 --- a/package-lock.json +++ b/package-lock.json @@ -19,7 +19,8 @@ "packages/tests", "packages/browser-tests", "packages/build-utils", - "packages/react-native-polyfills" + "packages/react-native-polyfills", + "packages/sds" ], "devDependencies": { "@size-limit/preset-big-lib": "^11.0.2", @@ -10805,6 +10806,10 @@ "resolved": "packages/sdk", "link": true }, + "node_modules/@waku/sds": { + "resolved": "packages/sds", + "link": true + }, "node_modules/@waku/tests": { "resolved": "packages/tests", "link": true @@ -11684,64 +11689,6 @@ "ajv": "^6.9.1" } }, - "node_modules/allure-commandline": { - "version": "2.32.0", - "resolved": "https://registry.npmjs.org/allure-commandline/-/allure-commandline-2.32.0.tgz", - "integrity": "sha512-W03ors+ks8uy0SgQILHQvtvR0iadAfDYmTFC3p8Pk4pi8KXUW1cF+z8FN2+7deH3FE2cuYgjhhA+CdLdJfzOMQ==", - "dev": true, - "license": "Apache-2.0", - "bin": { - "allure": "bin/allure" - } - }, - "node_modules/allure-js-commons": { - "version": "2.15.1", - "resolved": "https://registry.npmjs.org/allure-js-commons/-/allure-js-commons-2.15.1.tgz", - "integrity": "sha512-5V/VINplbu0APnfSZOkYpKOzucO36Q2EtTD1kqjWjl7n6tj7Hh+IHCZsH3Vpk/LXRDfj9RuXugBBvwYKV5YMJw==", - "dev": true, - "license": "Apache-2.0", - "dependencies": { - "md5": "^2.3.0", - "properties": "^1.2.1", - "strip-ansi": "^5.2.0" - } - }, - "node_modules/allure-js-commons/node_modules/ansi-regex": { - "version": "4.1.1", - "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-4.1.1.tgz", - "integrity": "sha512-ILlv4k/3f6vfQ4OoP2AGvirOktlQ98ZEL1k9FaQjxa3L1abBgbuTDAdPOpvbGncC0BTVQrl+OM8xZGK6tWXt7g==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6" - } - }, - "node_modules/allure-js-commons/node_modules/strip-ansi": { - "version": "5.2.0", - "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-5.2.0.tgz", - "integrity": "sha512-DuRs1gKbBqsMKIZlrffwlug8MHkcnpjs5VPmL1PAh+mA30U0DTotfDZ0d2UUsXpPmPmMMJ6W773MaA3J+lbiWA==", - "dev": true, - "license": "MIT", - "dependencies": { - "ansi-regex": "^4.1.0" - }, - "engines": { - "node": ">=6" - } - }, - "node_modules/allure-mocha": { - "version": "2.15.1", - "resolved": "https://registry.npmjs.org/allure-mocha/-/allure-mocha-2.15.1.tgz", - "integrity": "sha512-4Hk2qUR6LdAUXNpPe73MV3DPKrBH7zy57lbAdb/D0poNIkdGEkzUYkpVPtW1imYfjqFXKBFEPOSJWqznGuiyjg==", - "dev": true, - "license": "Apache-2.0", - "dependencies": { - "allure-js-commons": "2.15.1" - }, - "peerDependencies": { - "mocha": ">=6.2.x" - } - }, "node_modules/anser": { "version": "1.4.10", "resolved": "https://registry.npmjs.org/anser/-/anser-1.4.10.tgz", @@ -25835,23 +25782,6 @@ "node": ">= 14.0.0" } }, - "node_modules/mocha-multi-reporters": { - "version": "1.5.1", - "resolved": "https://registry.npmjs.org/mocha-multi-reporters/-/mocha-multi-reporters-1.5.1.tgz", - "integrity": "sha512-Yb4QJOaGLIcmB0VY7Wif5AjvLMUFAdV57D2TWEva1Y0kU/3LjKpeRVmlMIfuO1SVbauve459kgtIizADqxMWPg==", - "dev": true, - "license": "MIT", - "dependencies": { - "debug": "^4.1.1", - "lodash": "^4.17.15" - }, - "engines": { - "node": ">=6.0.0" - }, - "peerDependencies": { - "mocha": ">=3.1.2" - } - }, "node_modules/mocha/node_modules/cliui": { "version": "7.0.4", "resolved": "https://registry.npmjs.org/cliui/-/cliui-7.0.4.tgz", @@ -33016,16 +32946,6 @@ "integrity": "sha512-wnD2ZE+l+SPC/uoS0vXeE9L1+0wuaMqKlfz9AMUo38JsyLSBWSFcHR1Rri62LZc12vLr1gb3jl7iwQhgwpAbGQ==", "license": "ISC" }, - "node_modules/properties": { - "version": "1.2.1", - "resolved": "https://registry.npmjs.org/properties/-/properties-1.2.1.tgz", - "integrity": "sha512-qYNxyMj1JeW54i/EWEFsM1cVwxJbtgPp8+0Wg9XjNaK6VE/c4oRi6PNu5p7w1mNXEIQIjV5Wwn8v8Gz82/QzdQ==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=0.10" - } - }, "node_modules/proto-list": { "version": "1.2.4", "resolved": "https://registry.npmjs.org/proto-list/-/proto-list-1.2.4.tgz", @@ -40715,6 +40635,24 @@ } } }, + "packages/scalable-data-sync": { + "version": "0.0.1", + "extraneous": true, + "license": "MIT OR Apache-2.0", + "devDependencies": { + "@rollup/plugin-commonjs": "^25.0.7", + "@rollup/plugin-json": "^6.0.0", + "@rollup/plugin-node-resolve": "^15.2.3", + "@waku/build-utils": "*", + "cspell": "^8.6.1", + "fast-check": "^3.19.0", + "npm-run-all": "^4.1.5", + "rollup": "^4.12.0" + }, + "engines": { + "node": ">=20" + } + }, "packages/sdk": { "name": "@waku/sdk", "version": "0.0.29", @@ -40841,6 +40779,79 @@ "url": "https://opencollective.com/sinon" } }, + "packages/sds": { + "name": "@waku/sds", + "version": "0.0.1", + "license": "MIT OR Apache-2.0", + "dependencies": { + "chai": "^5.1.2" + }, + "devDependencies": { + "@rollup/plugin-commonjs": "^25.0.7", + "@rollup/plugin-json": "^6.0.0", + "@rollup/plugin-node-resolve": "^15.2.3", + "@waku/build-utils": "*", + "cspell": "^8.6.1", + "fast-check": "^3.19.0", + "npm-run-all": "^4.1.5", + "rollup": "^4.12.0" + }, + "engines": { + "node": ">=20" + } + }, + "packages/sds/node_modules/assertion-error": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/assertion-error/-/assertion-error-2.0.1.tgz", + "integrity": "sha512-Izi8RQcffqCeNVgFigKli1ssklIbpHnCYc6AknXGYoB6grJqyeby7jv12JUQgmTAnIDnbck1uxksT4dzN3PWBA==", + "engines": { + "node": ">=12" + } + }, + "packages/sds/node_modules/chai": { + "version": "5.1.2", + "resolved": "https://registry.npmjs.org/chai/-/chai-5.1.2.tgz", + "integrity": "sha512-aGtmf24DW6MLHHG5gCx4zaI3uBq3KRtxeVs0DjFH6Z0rDNbsvTxFASFvdj79pxjxZ8/5u3PIiN3IwEIQkiiuPw==", + "dependencies": { + "assertion-error": "^2.0.1", + "check-error": "^2.1.1", + "deep-eql": "^5.0.1", + "loupe": "^3.1.0", + "pathval": "^2.0.0" + }, + "engines": { + "node": ">=12" + } + }, + "packages/sds/node_modules/check-error": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/check-error/-/check-error-2.1.1.tgz", + "integrity": "sha512-OAlb+T7V4Op9OwdkjmguYRqncdlx5JiofwOAUkmTF+jNdHwzTaTs4sRAGpzLF3oOz5xAyDGrPgeIDFQmDOTiJw==", + "engines": { + "node": ">= 16" + } + }, + "packages/sds/node_modules/deep-eql": { + "version": "5.0.2", + "resolved": "https://registry.npmjs.org/deep-eql/-/deep-eql-5.0.2.tgz", + "integrity": "sha512-h5k/5U50IJJFpzfL6nO9jaaumfjO/f2NjK/oYB2Djzm4p9L+3T9qWpZqZ2hAbLPuuYq9wrU08WQyBTL5GbPk5Q==", + "engines": { + "node": ">=6" + } + }, + "packages/sds/node_modules/loupe": { + "version": "3.1.3", + "resolved": "https://registry.npmjs.org/loupe/-/loupe-3.1.3.tgz", + "integrity": "sha512-kkIp7XSkP78ZxJEsSxW3712C6teJVoeHHwgo9zJ380de7IYyJ2ISlxojcH2pC5OFLewESmnRi/+XCDIEEVyoug==" + }, + "packages/sds/node_modules/pathval": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/pathval/-/pathval-2.0.0.tgz", + "integrity": "sha512-vE7JKRyES09KiunauX7nd2Q9/L7lhok4smP9RZTDeD4MVs72Dp2qNFVz39Nz5a0FVEW0BJR6C0DYrq6unoziZA==", + "engines": { + "node": ">= 14.16" + } + }, "packages/tests": { "name": "@waku/tests", "version": "0.0.1", @@ -40875,8 +40886,6 @@ "@waku/message-encryption": "*", "@waku/relay": "*", "@waku/sdk": "*", - "allure-commandline": "^2.27.0", - "allure-mocha": "^2.9.2", "chai": "^4.3.10", "cspell": "^8.6.1", "datastore-core": "^10.0.2", @@ -40884,7 +40893,6 @@ "interface-datastore": "^8.2.10", "libp2p": "2.1.8", "mocha": "^10.3.0", - "mocha-multi-reporters": "^1.5.1", "npm-run-all": "^4.1.5" }, "engines": { diff --git a/package.json b/package.json index 05b2caeb79..e30f0ded92 100644 --- a/package.json +++ b/package.json @@ -16,7 +16,8 @@ "packages/tests", "packages/browser-tests", "packages/build-utils", - "packages/react-native-polyfills" + "packages/react-native-polyfills", + "packages/sds" ], "scripts": { "prepare": "husky", diff --git a/packages/proto/src/generated/sds_message.ts b/packages/proto/src/generated/sds_message.ts new file mode 100644 index 0000000000..757756b6f9 --- /dev/null +++ b/packages/proto/src/generated/sds_message.ts @@ -0,0 +1,126 @@ +/* eslint-disable import/export */ +/* eslint-disable complexity */ +/* eslint-disable @typescript-eslint/no-namespace */ +/* eslint-disable @typescript-eslint/no-unnecessary-boolean-literal-compare */ +/* eslint-disable @typescript-eslint/no-empty-interface */ + +import { type Codec, decodeMessage, type DecodeOptions, encodeMessage, MaxLengthError, message } from 'protons-runtime' +import type { Uint8ArrayList } from 'uint8arraylist' + +export interface SdsMessage { + messageId: string + channelId: string + lamportTimestamp?: number + causalHistory: string[] + bloomFilter?: Uint8Array + content?: Uint8Array +} + +export namespace SdsMessage { + let _codec: Codec + + export const codec = (): Codec => { + if (_codec == null) { + _codec = message((obj, w, opts = {}) => { + if (opts.lengthDelimited !== false) { + w.fork() + } + + if ((obj.messageId != null && obj.messageId !== '')) { + w.uint32(18) + w.string(obj.messageId) + } + + if ((obj.channelId != null && obj.channelId !== '')) { + w.uint32(26) + w.string(obj.channelId) + } + + if (obj.lamportTimestamp != null) { + w.uint32(80) + w.int32(obj.lamportTimestamp) + } + + if (obj.causalHistory != null) { + for (const value of obj.causalHistory) { + w.uint32(90) + w.string(value) + } + } + + if (obj.bloomFilter != null) { + w.uint32(98) + w.bytes(obj.bloomFilter) + } + + if (obj.content != null) { + w.uint32(162) + w.bytes(obj.content) + } + + if (opts.lengthDelimited !== false) { + w.ldelim() + } + }, (reader, length, opts = {}) => { + const obj: any = { + messageId: '', + channelId: '', + causalHistory: [] + } + + const end = length == null ? reader.len : reader.pos + length + + while (reader.pos < end) { + const tag = reader.uint32() + + switch (tag >>> 3) { + case 2: { + obj.messageId = reader.string() + break + } + case 3: { + obj.channelId = reader.string() + break + } + case 10: { + obj.lamportTimestamp = reader.int32() + break + } + case 11: { + if (opts.limits?.causalHistory != null && obj.causalHistory.length === opts.limits.causalHistory) { + throw new MaxLengthError('Decode error - map field "causalHistory" had too many elements') + } + + obj.causalHistory.push(reader.string()) + break + } + case 12: { + obj.bloomFilter = reader.bytes() + break + } + case 20: { + obj.content = reader.bytes() + break + } + default: { + reader.skipType(tag & 7) + break + } + } + } + + return obj + }) + } + + return _codec + } + + export const encode = (obj: Partial): Uint8Array => { + return encodeMessage(obj, SdsMessage.codec()) + } + + export const decode = (buf: Uint8Array | Uint8ArrayList, opts?: DecodeOptions): SdsMessage => { + return decodeMessage(buf, SdsMessage.codec(), opts) + } +} diff --git a/packages/proto/src/lib/sds_message.proto b/packages/proto/src/lib/sds_message.proto new file mode 100644 index 0000000000..f0396e6cc8 --- /dev/null +++ b/packages/proto/src/lib/sds_message.proto @@ -0,0 +1,11 @@ +syntax = "proto3"; + +message SdsMessage { + // 1 Reserved for sender/participant id + string message_id = 2; // Unique identifier of the message + string channel_id = 3; // Identifier of the channel to which the message belongs + optional int32 lamport_timestamp = 10; // Logical timestamp for causal ordering in channel + repeated string causal_history = 11; // List of preceding message IDs that this message causally depends on. Generally 2 or 3 message IDs are included. + optional bytes bloom_filter = 12; // Bloom filter representing received message IDs in channel + optional bytes content = 20; // Actual content of the message +} \ No newline at end of file diff --git a/packages/sds/.eslintrc.cjs b/packages/sds/.eslintrc.cjs new file mode 100644 index 0000000000..5867f7a78a --- /dev/null +++ b/packages/sds/.eslintrc.cjs @@ -0,0 +1,6 @@ +module.exports = { + parserOptions: { + tsconfigRootDir: __dirname, + project: "./tsconfig.dev.json", + }, + }; \ No newline at end of file diff --git a/packages/sds/.mocharc.cjs b/packages/sds/.mocharc.cjs new file mode 100644 index 0000000000..77cc8af51b --- /dev/null +++ b/packages/sds/.mocharc.cjs @@ -0,0 +1,27 @@ +const config = { + extension: ['ts'], + spec: 'src/**/*.spec.ts', + require: ['ts-node/register', 'isomorphic-fetch'], + loader: 'ts-node/esm', + 'node-option': [ + 'experimental-specifier-resolution=node', + 'loader=ts-node/esm' + ], + exit: true, + retries: 4 +}; + +if (process.env.CI) { + console.log("Running tests in parallel"); + config.parallel = true; + config.jobs = 6; + console.log("Using JSON reporter for test results"); + config.reporter = 'json'; + config.reporterOptions = { + output: 'reports/mocha-results.json' + }; +} else { + console.log("Running tests serially. To enable parallel execution update mocha config"); +} + +module.exports = config; diff --git a/packages/sds/README.md b/packages/sds/README.md new file mode 100644 index 0000000000..01866257e2 --- /dev/null +++ b/packages/sds/README.md @@ -0,0 +1,3 @@ +# Scalable Data Sync + +Typescript implementation of the [Scalable Data Sync protocol](https://github.com/vacp2p/rfc-index/blob/main/vac/raw/sds.md) for message reliability of distributed logs in the browser. \ No newline at end of file diff --git a/packages/sds/package.json b/packages/sds/package.json new file mode 100644 index 0000000000..6784e31bc4 --- /dev/null +++ b/packages/sds/package.json @@ -0,0 +1,84 @@ +{ + "name": "@waku/sds", + "version": "0.0.1", + "description": "Scalable Data Sync implementation for the browser. Based on https://github.com/vacp2p/rfc-index/blob/main/vac/raw/sds.md", + "types": "./dist/index.d.ts", + "module": "./dist/index.js", + "exports": { + ".": { + "types": "./dist/index.d.ts", + "import": "./dist/index.js" + } + }, + "typesVersions": { + "*": { + "*": [ + "*", + "dist/*", + "dist/*/index" + ] + } + }, + "type": "module", + "author": "Waku Team", + "homepage": "https://github.com/waku-org/js-waku/tree/master/packages/scalable-data-sync#readme", + "repository": { + "type": "git", + "url": "https://github.com/waku-org/js-waku.git" + }, + "bugs": { + "url": "https://github.com/waku-org/js-waku/issues" + }, + "license": "MIT OR Apache-2.0", + "keywords": [ + "waku", + "decentralized", + "secure", + "communication", + "web3", + "ethereum", + "dapps", + "privacy" + ], + "scripts": { + "build": "run-s build:**", + "build:esm": "tsc", + "build:bundle": "rollup --config rollup.config.js", + "fix": "run-s fix:*", + "fix:lint": "eslint src *.js --fix", + "check": "run-s check:*", + "check:lint": "eslint src *.js", + "check:spelling": "cspell \"{README.md,src/**/*.ts}\"", + "check:tsc": "tsc -p tsconfig.dev.json", + "prepublish": "npm run build", + "reset-hard": "git clean -dfx -e .idea && git reset --hard && npm i && npm run build", + "test": "NODE_ENV=test run-s test:*", + "test:node": "NODE_ENV=test TS_NODE_PROJECT=./tsconfig.dev.json mocha" + }, + "engines": { + "node": ">=20" + }, + "dependencies": { + "chai": "^5.1.2" + }, + "devDependencies": { + "@rollup/plugin-commonjs": "^25.0.7", + "@rollup/plugin-json": "^6.0.0", + "@rollup/plugin-node-resolve": "^15.2.3", + "@waku/build-utils": "*", + "cspell": "^8.6.1", + "fast-check": "^3.19.0", + "npm-run-all": "^4.1.5", + "rollup": "^4.12.0" + }, + "files": [ + "dist", + "bundle", + "src/**/*.ts", + "!**/*.spec.*", + "!**/*.json", + "CHANGELOG.md", + "LICENSE", + "README.md" + ] +} diff --git a/packages/sds/rollup.config.js b/packages/sds/rollup.config.js new file mode 100644 index 0000000000..4d0757b1c9 --- /dev/null +++ b/packages/sds/rollup.config.js @@ -0,0 +1,24 @@ +import commonjs from "@rollup/plugin-commonjs"; +import json from "@rollup/plugin-json"; +import { nodeResolve } from "@rollup/plugin-node-resolve"; +import { extractExports } from "@waku/build-utils"; + +import * as packageJson from "./package.json" assert { type: "json" }; + +const input = extractExports(packageJson); + +export default { + input, + output: { + dir: "bundle", + format: "esm" + }, + plugins: [ + commonjs(), + json(), + nodeResolve({ + browser: true, + preferBuiltins: false + }) + ] +}; diff --git a/packages/sds/src/bloom.ts b/packages/sds/src/bloom.ts new file mode 100644 index 0000000000..0515c843bf --- /dev/null +++ b/packages/sds/src/bloom.ts @@ -0,0 +1,67 @@ +import { getMOverNBitsForK } from "./probabilities.js"; + +export interface BloomFilterOptions { + // The expected maximum number of elements for which this BloomFilter is sized. + capacity: number; + + // The desired false-positive rate (between 0 and 1). + errorRate: number; + + // (Optional) The exact number of hash functions, if the user wants to override the automatic calculation. + kHashes?: number; + + // (Optional) Force a specific number of bits per element instead of using a table or optimal formula. + forceNBitsPerElem?: number; +} + +/** + * A probabilistic data structure that tracks memberships in a set. + * Supports time and space efficient lookups, but may return false-positives. + * Can never return false-negatives. + * A bloom filter can tell us if an element is: + * - Definitely not in the set + * - Potentially in the set (with a probability depending on the false-positive rate) + */ +export abstract class BloomFilter { + public totalBits: number; + public data: Uint8Array = new Uint8Array(0); + + public constructor(options: BloomFilterOptions) { + let nBitsPerElem: number; + let k = options.kHashes ?? 0; + const forceNBitsPerElem = options.forceNBitsPerElem ?? 0; + + if (k < 1) { + // Calculate optimal k based on target error rate + const bitsPerElem = Math.ceil( + -1.0 * (Math.log(options.errorRate) / Math.pow(Math.log(2), 2)) + ); + k = Math.round(Math.log(2) * bitsPerElem); + nBitsPerElem = Math.round(bitsPerElem); + } else { + // Use specified k if possible + if (forceNBitsPerElem < 1) { + // Use lookup table + nBitsPerElem = getMOverNBitsForK(k, options.errorRate); + } else { + nBitsPerElem = forceNBitsPerElem; + } + } + + const mBits = options.capacity * nBitsPerElem; + const mInts = 1 + mBits / (this.data.BYTES_PER_ELEMENT * 8); + + this.totalBits = mBits; + this.data = new Uint8Array(mInts); + } + + // Adds an item to the bloom filter by computing its hash values + // and setting corresponding bits in "data". + public abstract insert(item: string | Uint8Array): void; + + // Checks if the item is potentially in the bloom filter. + // The method is guaranteed to return "true" for items that were inserted, + // but might also return "true" for items that were never inserted + // (purpose of false-positive probability). + public abstract lookup(item: string | Uint8Array): boolean; +} diff --git a/packages/sds/src/index.spec.ts b/packages/sds/src/index.spec.ts new file mode 100644 index 0000000000..da0b0e325a --- /dev/null +++ b/packages/sds/src/index.spec.ts @@ -0,0 +1,9 @@ +import { expect } from "chai"; + +import { BloomFilter } from "./bloom.js"; + +describe("BloomFilter", () => { + it("should be defined", () => { + expect(BloomFilter).to.be.ok; + }); +}); diff --git a/packages/sds/src/index.ts b/packages/sds/src/index.ts new file mode 100644 index 0000000000..b82033fa72 --- /dev/null +++ b/packages/sds/src/index.ts @@ -0,0 +1,3 @@ +import { BloomFilter } from "./bloom.js"; + +export { BloomFilter }; diff --git a/packages/sds/src/probabilities.ts b/packages/sds/src/probabilities.ts new file mode 100644 index 0000000000..b141c28cb7 --- /dev/null +++ b/packages/sds/src/probabilities.ts @@ -0,0 +1,166 @@ +// This file contains the probability tables used to determine the optimal number of +// hash functions (k) and bits per element (m/n) for a Bloom filter. +// +// These are used to determine how to construct a Bloom filter that can perform +// lookups with false-positive rate low enough to be satisfactory. + +/** + * Represents the error rates for a given number of hash functions (k) across + * different (m/n) ratios (i.e., bits per element). + */ +type TErrorForK = Float32Array; + +/** + * An array where each index corresponds to a value of k (the number of hash functions), + * and each element is a vector of false-positive rates for varying bits-per-element ratios. + * Example: + * ```ts + * // Probability of a false positive upon lookup when using 1 hash function (k=1) + * // and 15 bits per element (mOverN=15): + * const falsePositiveRate = kErrors[1][15]; + * ``` + */ +type TAllErrorRates = Array; + +/** + * Table of false positive rates for values of k from 0 to 12, and bits-per-element + * ratios ranging from 0 up to around 32. Each Float32Array is indexed by mOverN, + * so kErrors[k][mOverN] gives the estimated false-positive probability. + * + * These values mirror commonly used reference data found in Bloom filter literature, + * such as: + * https://pages.cs.wisc.edu/~cao/papers/summary-cache/node8.html + * https://dl.acm.org/doi/pdf/10.1145/362686.362692 + */ +// prettier-ignore +export const kErrors: TAllErrorRates = [ + new Float32Array([1.0]), + new Float32Array([1.0, 1.0, 0.3930000000, 0.2830000000, 0.2210000000, 0.1810000000, + 0.1540000000, 0.1330000000, 0.1180000000, 0.1050000000, 0.0952000000, + 0.0869000000, 0.0800000000, 0.0740000000, 0.0689000000, 0.0645000000, + 0.0606000000, 0.0571000000, 0.0540000000, 0.0513000000, 0.0488000000, + 0.0465000000, 0.0444000000, 0.0425000000, 0.0408000000, 0.0392000000, + 0.0377000000, 0.0364000000, 0.0351000000, 0.0339000000, 0.0328000000, + 0.0317000000, 0.0308000000]), + + new Float32Array([1.0, 1.0, 0.4000000000, 0.2370000000, 0.1550000000, 0.1090000000, + 0.0804000000, 0.0618000000, 0.0489000000, 0.0397000000, 0.0329000000, + 0.0276000000, 0.0236000000, 0.0203000000, 0.0177000000, 0.0156000000, + 0.0138000000, 0.0123000000, 0.0111000000, 0.0099800000, 0.0090600000, + 0.0082500000, 0.0075500000, 0.0069400000, 0.0063900000, 0.0059100000, + 0.0054800000, 0.0051000000, 0.0047500000, 0.0044400000, 0.0041600000, + 0.0039000000, 0.0036700000]), + + new Float32Array([1.0, 1.0, 1.0, 0.2530000000, 0.1470000000, 0.0920000000, + 0.0609000000, 0.0423000000, 0.0306000000, 0.0228000000, 0.0174000000, + 0.0136000000, 0.0108000000, 0.0087500000, 0.0071800000, 0.0059600000, + 0.0108000000, 0.0087500000, 0.0071800000, 0.0059600000, 0.0050000000, + 0.0042300000, 0.0036200000, 0.0031200000, 0.0027000000, 0.0023600000, + 0.0020700000, 0.0018300000, 0.0016200000, 0.0014500000, 0.0012900000, + 0.0011600000, 0.0010500000, 0.0009490000, 0.0008620000, 0.0007850000, + 0.0007170000]), + + new Float32Array([1.0, 1.0, 1.0, 1.0, 0.1600000000, 0.0920000000, 0.0561000000, 0.0359000000, + 0.0240000000, 0.0166000000, 0.0118000000, 0.0086400000, 0.0064600000, + 0.0049200000, 0.0038100000, 0.0030000000, 0.0023900000, 0.0019300000, + 0.0015800000, 0.0013000000, 0.0010800000, 0.0009050000, 0.0007640000, + 0.0006490000, 0.0005550000, 0.0004780000, 0.0004130000, 0.0003590000, + 0.0003140000, 0.0002760000, 0.0002430000, 0.0002150000, 0.0001910000]), + + new Float32Array([1.0, 1.0, 1.0, 1.0, 1.0, 0.1010000000, 0.0578000000, 0.0347000000, + 0.0217000000, 0.0141000000, 0.0094300000, 0.0065000000, 0.0045900000, + 0.0033200000, 0.0024400000, 0.0018300000, 0.0013900000, 0.0010700000, + 0.0008390000, 0.0006630000, 0.0005300000, 0.0004270000, 0.0003470000, + 0.0002850000, 0.0002350000, 0.0001960000, 0.0001640000, 0.0001380000, + 0.0001170000, 0.0000996000, 0.0000853000, 0.0000733000, 0.0000633000]), + + new Float32Array([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0638000000, 0.0364000000, 0.0216000000, + 0.0133000000, 0.0084400000, 0.0055200000, 0.0037100000, 0.0025500000, + 0.0017900000, 0.0012800000, 0.0009350000, 0.0006920000, 0.0005190000, + 0.0003940000, 0.0003030000, 0.0002360000, 0.0001850000, 0.0001470000, + 0.0001170000, 0.0000944000, 0.0000766000, 0.0000626000, 0.0000515000, + 0.0000426000, 0.0000355000, 0.0000297000, 0.0000250000]), + + new Float32Array([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0229000000, 0.0135000000, 0.0081900000, + 0.0051300000, 0.0032900000, 0.0021700000, 0.0014600000, 0.0010000000, + 0.0007020000, 0.0004990000, 0.0003600000, 0.0002640000, 0.0001960000, + 0.0001470000, 0.0001120000, 0.0000856000, 0.0000663000, 0.0000518000, + 0.0000408000, 0.0000324000, 0.0000259000, 0.0000209000, 0.0000169000, + 0.0000138000, 0.0000113000]), + + new Float32Array([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, + 1.0, 0.0145000000, 0.0084600000, 0.0050900000, 0.0031400000, 0.0019900000, + 0.0012900000, 0.0008520000, 0.0005740000, 0.0003940000, 0.0002750000, + 0.0001940000, 0.0001400000, 0.0001010000, 0.0000746000, 0.0000555000, + 0.0000417000, 0.0000316000, 0.0000242000, 0.0000187000, 0.0000146000, + 0.0000114000, 0.0000090100, 0.0000071600, 0.0000057300]), + + new Float32Array([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0053100000, 0.0031700000, + 0.0019400000, 0.0012100000, 0.0007750000, 0.0005050000, 0.0003350000, + 0.0002260000, 0.0001550000, 0.0001080000, 0.0000759000, 0.0000542000, + 0.0000392000, 0.0000286000, 0.0000211000, 0.0000157000, 0.0000118000, + 0.0000089600, 0.0000068500, 0.0000052800, 0.0000041000, 0.0000032000]), + + new Float32Array([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0033400000, + 0.0019800000, 0.0012000000, 0.0007440000, 0.0004700000, 0.0003020000, + 0.0001980000, 0.0001320000, 0.0000889000, 0.0000609000, 0.0000423000, + 0.0000297000, 0.0000211000, 0.0000152000, 0.0000110000, 0.0000080700, + 0.0000059700, 0.0000044500, 0.0000033500, 0.0000025400, 0.0000019400]), + + new Float32Array([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, + 0.0021000000, 0.0012400000, 0.0007470000, 0.0004590000, 0.0002870000, + 0.0001830000, 0.0001180000, 0.0000777000, 0.0000518000, 0.0000350000, + 0.0000240000, 0.0000166000, 0.0000116000, 0.0000082300, 0.0000058900, + 0.0000042500, 0.0000031000, 0.0000022800, 0.0000016900, 0.0000012600]), + + new Float32Array([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, + 0.0007780000, 0.0004660000, 0.0002840000, 0.0001760000, 0.0001110000, + 0.0000712000, 0.0000463000, 0.0000305000, 0.0000204000, 0.0000138000, + 0.0000094200, 0.0000065200, 0.0000045600, 0.0000032200, 0.0000022900, + 0.0000016500, 0.0000012000, 0.0000008740]), +] + +/** + * Given a number of hash functions (k) and a target false-positive rate (targetError), + * determines the minimum (m/n) bits-per-element that satisfies the error threshold. + * + * In the context of a Bloom filter: + * - m is the total number of bits in the filter. + * - n is the number of elements you expect to insert. + * Thus, (m/n) describes how many bits are assigned per inserted element. + * + * Example: + * ```ts + * // We want to use 3 hash functions (k=3) and a false-positive rate of 1% (targetError=0.01). + * const mOverN = getMOverNBitsForK(3, 0.01); + * // The function will iterate through the error tables and find the smallest m/n that satisfies the error threshold. + * // In this case, kErrors[3][5] is the first value in the vector kErrors[3] that is less than 0.01 (0.0920000000). + * console.log(mOverN); // 5 + * ``` + * + * @param k - The number of hash functions. + * @param targetError - The desired maximum false-positive rate. + * @param probabilityTable - An optional table of false-positive probabilities indexed by k. + * @returns The smallest (m/n) bit ratio for which the false-positive rate is below targetError. + * @throws If k is out of range or if no suitable ratio can be found. + */ +export function getMOverNBitsForK( + k: number, + targetError: number, + probabilityTable = kErrors +): number { + // Returns the optimal number of m/n bits for a given k. + if (k < 0 || k > 12) { + throw new Error("k must be <= 12."); + } + + for (let mOverN = 2; mOverN < probabilityTable[k].length; mOverN++) { + if (probabilityTable[k][mOverN] < targetError) { + return mOverN; + } + } + + throw new Error( + "Specified value of k and error rate not achievable using less than 4 bytes / element." + ); +} diff --git a/packages/sds/tsconfig.dev.json b/packages/sds/tsconfig.dev.json new file mode 100644 index 0000000000..4f7c34af3c --- /dev/null +++ b/packages/sds/tsconfig.dev.json @@ -0,0 +1,3 @@ +{ + "extends": "../../tsconfig.dev" +} diff --git a/packages/sds/tsconfig.json b/packages/sds/tsconfig.json new file mode 100644 index 0000000000..eebbc51585 --- /dev/null +++ b/packages/sds/tsconfig.json @@ -0,0 +1,10 @@ +{ + "extends": "../../tsconfig", + "compilerOptions": { + "outDir": "dist/", + "rootDir": "src", + "tsBuildInfoFile": "dist/.tsbuildinfo" + }, + "include": ["src"], + "exclude": ["src/**/*.spec.ts", "src/test_utils"] +} diff --git a/packages/sds/typedoc.json b/packages/sds/typedoc.json new file mode 100644 index 0000000000..00aa3dc064 --- /dev/null +++ b/packages/sds/typedoc.json @@ -0,0 +1,4 @@ +{ + "extends": ["../../typedoc.base.json"], + "entryPoints": ["src/index.ts"] +} \ No newline at end of file