add `analysis/timeline/filterTimelineCred`
This adds the `filterTimelineCred` module, which dramatically reduces the size of timeline cred by throwing away all nodes that are not a user or repository. It also supports serialization / deserialization. Test plan: unit tests included
This commit is contained in:
parent
162f73c3e9
commit
9bd1e88bc9
|
@ -0,0 +1,66 @@
|
|||
// @flow
|
||||
|
||||
import {toObject, fromObject} from "../../util/map";
|
||||
import {type Interval} from "./interval";
|
||||
import {NodeAddress, type NodeAddressT} from "../../core/graph";
|
||||
import {type FullTimelineCred} from "./distributionToCred";
|
||||
|
||||
export type FilteredTimelineCred = {|
|
||||
+intervals: $ReadOnlyArray<Interval>,
|
||||
+addressToCred: Map<NodeAddressT, $ReadOnlyArray<number>>,
|
||||
|};
|
||||
|
||||
/**
|
||||
* Compress FullTimelineCred by discarding most nodes' cred.
|
||||
*
|
||||
* FullTimelineCred contains the cred at every interval for every node in the
|
||||
* graph. This could be tens of thousands of nodes and hundreds of intervals;
|
||||
* it's ungainly to store. To avoid this issue, we compress the cred down by
|
||||
* removing cred for most nodes. (We care a lot about users' cred; not so much
|
||||
* about the cred for every individual comment ever.)
|
||||
*
|
||||
* Right now, we do this by filtering out every node that doesn't match an
|
||||
* inclusion address prefix. In the future, we may have more sophisticated
|
||||
* logic, like keeping the top k nodes for each type.
|
||||
*/
|
||||
export function filterTimelineCred(
|
||||
fullCred: FullTimelineCred,
|
||||
nodeOrder: $ReadOnlyArray<NodeAddressT>,
|
||||
inclusionPrefixes: $ReadOnlyArray<NodeAddressT>
|
||||
): FilteredTimelineCred {
|
||||
const intervals = fullCred.map((x) => x.interval);
|
||||
const addressToCred = new Map();
|
||||
function hasMatch(x: NodeAddressT): boolean {
|
||||
for (const prefix of inclusionPrefixes) {
|
||||
if (NodeAddress.hasPrefix(x, prefix)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
for (let i = 0; i < nodeOrder.length; i++) {
|
||||
const addr = nodeOrder[i];
|
||||
if (hasMatch(addr)) {
|
||||
const addrCred = fullCred.map(({cred}) => cred[i]);
|
||||
addressToCred.set(addr, addrCred);
|
||||
}
|
||||
}
|
||||
return {intervals, addressToCred};
|
||||
}
|
||||
|
||||
export type FilteredTimelineCredJSON = {|
|
||||
+intervals: $ReadOnlyArray<Interval>,
|
||||
+addressToCred: {[NodeAddressT]: $ReadOnlyArray<number>},
|
||||
|};
|
||||
|
||||
export function filteredTimelineCredToJSON(
|
||||
x: FilteredTimelineCred
|
||||
): FilteredTimelineCredJSON {
|
||||
return {intervals: x.intervals, addressToCred: toObject(x.addressToCred)};
|
||||
}
|
||||
|
||||
export function filteredTimelineCredFromJSON(
|
||||
x: FilteredTimelineCredJSON
|
||||
): FilteredTimelineCred {
|
||||
return {intervals: x.intervals, addressToCred: fromObject(x.addressToCred)};
|
||||
}
|
|
@ -0,0 +1,54 @@
|
|||
// @flow
|
||||
|
||||
import {NodeAddress} from "../../core/graph";
|
||||
import {
|
||||
filterTimelineCred,
|
||||
filteredTimelineCredToJSON,
|
||||
filteredTimelineCredFromJSON,
|
||||
} from "./filterTimelineCred";
|
||||
|
||||
describe("src/analysis/timeline/filterTimelineCred", () => {
|
||||
const na = (...parts) => NodeAddress.fromParts(parts);
|
||||
describe("filterTimelineCred", () => {
|
||||
it("returns an empty object for empty cred", () => {
|
||||
expect(filterTimelineCred([], [], [])).toEqual({
|
||||
intervals: [],
|
||||
addressToCred: new Map(),
|
||||
});
|
||||
});
|
||||
it("appropriately filters a simple example", () => {
|
||||
const fullCred = [
|
||||
{
|
||||
interval: {startTimeMs: 0, endTimeMs: 10},
|
||||
cred: new Float64Array([1, 2, 3]),
|
||||
},
|
||||
{
|
||||
interval: {startTimeMs: 10, endTimeMs: 20},
|
||||
cred: new Float64Array([4, 5, 6]),
|
||||
},
|
||||
];
|
||||
const nodeOrder = [na("foo"), na("bar"), na("zod")];
|
||||
const prefixes = [na("foo"), na("bar")];
|
||||
const expected = {
|
||||
intervals: fullCred.map((x) => x.interval),
|
||||
addressToCred: new Map().set(na("foo"), [1, 4]).set(na("bar"), [2, 5]),
|
||||
};
|
||||
expect(filterTimelineCred(fullCred, nodeOrder, prefixes)).toEqual(
|
||||
expected
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
it("JSON serialization", () => {
|
||||
const i0 = {startTimeMs: 0, endTimeMs: 10};
|
||||
const i1 = {startTimeMs: 10, endTimeMs: 20};
|
||||
const intervals = [i0, i1];
|
||||
const fc = {
|
||||
intervals,
|
||||
addressToCred: new Map().set(na("foo"), [1, 4]).set(na("bar"), [2, 5]),
|
||||
};
|
||||
const json = filteredTimelineCredToJSON(fc);
|
||||
const fc_ = filteredTimelineCredFromJSON(json);
|
||||
expect(fc).toEqual(fc_);
|
||||
});
|
||||
});
|
Loading…
Reference in New Issue