add `analysis/timeline/filterTimelineCred`

This adds the `filterTimelineCred` module, which dramatically reduces
the size of timeline cred by throwing away all nodes that are not a user
or repository. It also supports serialization / deserialization.

Test plan: unit tests included
This commit is contained in:
Dandelion Mané 2019-07-10 14:23:32 +01:00
parent 162f73c3e9
commit 9bd1e88bc9
2 changed files with 120 additions and 0 deletions

View File

@ -0,0 +1,66 @@
// @flow
import {toObject, fromObject} from "../../util/map";
import {type Interval} from "./interval";
import {NodeAddress, type NodeAddressT} from "../../core/graph";
import {type FullTimelineCred} from "./distributionToCred";
export type FilteredTimelineCred = {|
+intervals: $ReadOnlyArray<Interval>,
+addressToCred: Map<NodeAddressT, $ReadOnlyArray<number>>,
|};
/**
* Compress FullTimelineCred by discarding most nodes' cred.
*
* FullTimelineCred contains the cred at every interval for every node in the
* graph. This could be tens of thousands of nodes and hundreds of intervals;
* it's ungainly to store. To avoid this issue, we compress the cred down by
* removing cred for most nodes. (We care a lot about users' cred; not so much
* about the cred for every individual comment ever.)
*
* Right now, we do this by filtering out every node that doesn't match an
* inclusion address prefix. In the future, we may have more sophisticated
* logic, like keeping the top k nodes for each type.
*/
export function filterTimelineCred(
fullCred: FullTimelineCred,
nodeOrder: $ReadOnlyArray<NodeAddressT>,
inclusionPrefixes: $ReadOnlyArray<NodeAddressT>
): FilteredTimelineCred {
const intervals = fullCred.map((x) => x.interval);
const addressToCred = new Map();
function hasMatch(x: NodeAddressT): boolean {
for (const prefix of inclusionPrefixes) {
if (NodeAddress.hasPrefix(x, prefix)) {
return true;
}
}
return false;
}
for (let i = 0; i < nodeOrder.length; i++) {
const addr = nodeOrder[i];
if (hasMatch(addr)) {
const addrCred = fullCred.map(({cred}) => cred[i]);
addressToCred.set(addr, addrCred);
}
}
return {intervals, addressToCred};
}
export type FilteredTimelineCredJSON = {|
+intervals: $ReadOnlyArray<Interval>,
+addressToCred: {[NodeAddressT]: $ReadOnlyArray<number>},
|};
export function filteredTimelineCredToJSON(
x: FilteredTimelineCred
): FilteredTimelineCredJSON {
return {intervals: x.intervals, addressToCred: toObject(x.addressToCred)};
}
export function filteredTimelineCredFromJSON(
x: FilteredTimelineCredJSON
): FilteredTimelineCred {
return {intervals: x.intervals, addressToCred: fromObject(x.addressToCred)};
}

View File

@ -0,0 +1,54 @@
// @flow
import {NodeAddress} from "../../core/graph";
import {
filterTimelineCred,
filteredTimelineCredToJSON,
filteredTimelineCredFromJSON,
} from "./filterTimelineCred";
describe("src/analysis/timeline/filterTimelineCred", () => {
const na = (...parts) => NodeAddress.fromParts(parts);
describe("filterTimelineCred", () => {
it("returns an empty object for empty cred", () => {
expect(filterTimelineCred([], [], [])).toEqual({
intervals: [],
addressToCred: new Map(),
});
});
it("appropriately filters a simple example", () => {
const fullCred = [
{
interval: {startTimeMs: 0, endTimeMs: 10},
cred: new Float64Array([1, 2, 3]),
},
{
interval: {startTimeMs: 10, endTimeMs: 20},
cred: new Float64Array([4, 5, 6]),
},
];
const nodeOrder = [na("foo"), na("bar"), na("zod")];
const prefixes = [na("foo"), na("bar")];
const expected = {
intervals: fullCred.map((x) => x.interval),
addressToCred: new Map().set(na("foo"), [1, 4]).set(na("bar"), [2, 5]),
};
expect(filterTimelineCred(fullCred, nodeOrder, prefixes)).toEqual(
expected
);
});
});
it("JSON serialization", () => {
const i0 = {startTimeMs: 0, endTimeMs: 10};
const i1 = {startTimeMs: 10, endTimeMs: 20};
const intervals = [i0, i1];
const fc = {
intervals,
addressToCred: new Map().set(na("foo"), [1, 4]).set(na("bar"), [2, 5]),
};
const json = filteredTimelineCredToJSON(fc);
const fc_ = filteredTimelineCredFromJSON(json);
expect(fc).toEqual(fc_);
});
});