From 9bd1e88bc90c7aef878fd15fcf3e735321e9a2bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dandelion=20Man=C3=A9?= Date: Wed, 10 Jul 2019 14:23:32 +0100 Subject: [PATCH] add `analysis/timeline/filterTimelineCred` This adds the `filterTimelineCred` module, which dramatically reduces the size of timeline cred by throwing away all nodes that are not a user or repository. It also supports serialization / deserialization. Test plan: unit tests included --- src/analysis/timeline/filterTimelineCred.js | 66 +++++++++++++++++++ .../timeline/filterTimelineCred.test.js | 54 +++++++++++++++ 2 files changed, 120 insertions(+) create mode 100644 src/analysis/timeline/filterTimelineCred.js create mode 100644 src/analysis/timeline/filterTimelineCred.test.js diff --git a/src/analysis/timeline/filterTimelineCred.js b/src/analysis/timeline/filterTimelineCred.js new file mode 100644 index 0000000..2d89473 --- /dev/null +++ b/src/analysis/timeline/filterTimelineCred.js @@ -0,0 +1,66 @@ +// @flow + +import {toObject, fromObject} from "../../util/map"; +import {type Interval} from "./interval"; +import {NodeAddress, type NodeAddressT} from "../../core/graph"; +import {type FullTimelineCred} from "./distributionToCred"; + +export type FilteredTimelineCred = {| + +intervals: $ReadOnlyArray, + +addressToCred: Map>, +|}; + +/** + * Compress FullTimelineCred by discarding most nodes' cred. + * + * FullTimelineCred contains the cred at every interval for every node in the + * graph. This could be tens of thousands of nodes and hundreds of intervals; + * it's ungainly to store. To avoid this issue, we compress the cred down by + * removing cred for most nodes. (We care a lot about users' cred; not so much + * about the cred for every individual comment ever.) + * + * Right now, we do this by filtering out every node that doesn't match an + * inclusion address prefix. In the future, we may have more sophisticated + * logic, like keeping the top k nodes for each type. + */ +export function filterTimelineCred( + fullCred: FullTimelineCred, + nodeOrder: $ReadOnlyArray, + inclusionPrefixes: $ReadOnlyArray +): FilteredTimelineCred { + const intervals = fullCred.map((x) => x.interval); + const addressToCred = new Map(); + function hasMatch(x: NodeAddressT): boolean { + for (const prefix of inclusionPrefixes) { + if (NodeAddress.hasPrefix(x, prefix)) { + return true; + } + } + return false; + } + for (let i = 0; i < nodeOrder.length; i++) { + const addr = nodeOrder[i]; + if (hasMatch(addr)) { + const addrCred = fullCred.map(({cred}) => cred[i]); + addressToCred.set(addr, addrCred); + } + } + return {intervals, addressToCred}; +} + +export type FilteredTimelineCredJSON = {| + +intervals: $ReadOnlyArray, + +addressToCred: {[NodeAddressT]: $ReadOnlyArray}, +|}; + +export function filteredTimelineCredToJSON( + x: FilteredTimelineCred +): FilteredTimelineCredJSON { + return {intervals: x.intervals, addressToCred: toObject(x.addressToCred)}; +} + +export function filteredTimelineCredFromJSON( + x: FilteredTimelineCredJSON +): FilteredTimelineCred { + return {intervals: x.intervals, addressToCred: fromObject(x.addressToCred)}; +} diff --git a/src/analysis/timeline/filterTimelineCred.test.js b/src/analysis/timeline/filterTimelineCred.test.js new file mode 100644 index 0000000..d4740fe --- /dev/null +++ b/src/analysis/timeline/filterTimelineCred.test.js @@ -0,0 +1,54 @@ +// @flow + +import {NodeAddress} from "../../core/graph"; +import { + filterTimelineCred, + filteredTimelineCredToJSON, + filteredTimelineCredFromJSON, +} from "./filterTimelineCred"; + +describe("src/analysis/timeline/filterTimelineCred", () => { + const na = (...parts) => NodeAddress.fromParts(parts); + describe("filterTimelineCred", () => { + it("returns an empty object for empty cred", () => { + expect(filterTimelineCred([], [], [])).toEqual({ + intervals: [], + addressToCred: new Map(), + }); + }); + it("appropriately filters a simple example", () => { + const fullCred = [ + { + interval: {startTimeMs: 0, endTimeMs: 10}, + cred: new Float64Array([1, 2, 3]), + }, + { + interval: {startTimeMs: 10, endTimeMs: 20}, + cred: new Float64Array([4, 5, 6]), + }, + ]; + const nodeOrder = [na("foo"), na("bar"), na("zod")]; + const prefixes = [na("foo"), na("bar")]; + const expected = { + intervals: fullCred.map((x) => x.interval), + addressToCred: new Map().set(na("foo"), [1, 4]).set(na("bar"), [2, 5]), + }; + expect(filterTimelineCred(fullCred, nodeOrder, prefixes)).toEqual( + expected + ); + }); + }); + + it("JSON serialization", () => { + const i0 = {startTimeMs: 0, endTimeMs: 10}; + const i1 = {startTimeMs: 10, endTimeMs: 20}; + const intervals = [i0, i1]; + const fc = { + intervals, + addressToCred: new Map().set(na("foo"), [1, 4]).set(na("bar"), [2, 5]), + }; + const json = filteredTimelineCredToJSON(fc); + const fc_ = filteredTimelineCredFromJSON(json); + expect(fc).toEqual(fc_); + }); +});