add analysis/timeline/filterTimelineCred

This adds the `filterTimelineCred` module, which dramatically reduces the size of timeline cred by throwing away all nodes that are not a user or repository. It also supports serialization / deserialization. Test plan: unit tests included
2025-02-28 12:10:30 +00:00 · 2019-07-10 14:23:32 +01:00 · 2019-07-10 14:23:32 +01:00 · 9bd1e88bc9
commit 9bd1e88bc9
parent 162f73c3e9
2 changed files with 120 additions and 0 deletions
--- a/src/analysis/timeline/filterTimelineCred.js
+++ b/src/analysis/timeline/filterTimelineCred.js
@ -0,0 +1,66 @@
+// @flow
+
+import {toObject, fromObject} from "../../util/map";
+import {type Interval} from "./interval";
+import {NodeAddress, type NodeAddressT} from "../../core/graph";
+import {type FullTimelineCred} from "./distributionToCred";
+
+export type FilteredTimelineCred = {|
+  +intervals: $ReadOnlyArray<Interval>,
+  +addressToCred: Map<NodeAddressT, $ReadOnlyArray<number>>,
+|};
+
+/**
+ * Compress FullTimelineCred by discarding most nodes' cred.
+ *
+ * FullTimelineCred contains the cred at every interval for every node in the
+ * graph. This could be tens of thousands of nodes and hundreds of intervals;
+ * it's ungainly to store. To avoid this issue, we compress the cred down by
+ * removing cred for most nodes. (We care a lot about users' cred; not so much
+ * about the cred for every individual comment ever.)
+ *
+ * Right now, we do this by filtering out every node that doesn't match an
+ * inclusion address prefix. In the future, we may have more sophisticated
+ * logic, like keeping the top k nodes for each type.
+ */
+export function filterTimelineCred(
+  fullCred: FullTimelineCred,
+  nodeOrder: $ReadOnlyArray<NodeAddressT>,
+  inclusionPrefixes: $ReadOnlyArray<NodeAddressT>
+): FilteredTimelineCred {
+  const intervals = fullCred.map((x) => x.interval);
+  const addressToCred = new Map();
+  function hasMatch(x: NodeAddressT): boolean {
+    for (const prefix of inclusionPrefixes) {
+      if (NodeAddress.hasPrefix(x, prefix)) {
+        return true;
+      }
+    }
+    return false;
+  }
+  for (let i = 0; i < nodeOrder.length; i++) {
+    const addr = nodeOrder[i];
+    if (hasMatch(addr)) {
+      const addrCred = fullCred.map(({cred}) => cred[i]);
+      addressToCred.set(addr, addrCred);
+    }
+  }
+  return {intervals, addressToCred};
+}
+
+export type FilteredTimelineCredJSON = {|
+  +intervals: $ReadOnlyArray<Interval>,
+  +addressToCred: {[NodeAddressT]: $ReadOnlyArray<number>},
+|};
+
+export function filteredTimelineCredToJSON(
+  x: FilteredTimelineCred
+): FilteredTimelineCredJSON {
+  return {intervals: x.intervals, addressToCred: toObject(x.addressToCred)};
+}
+
+export function filteredTimelineCredFromJSON(
+  x: FilteredTimelineCredJSON
+): FilteredTimelineCred {
+  return {intervals: x.intervals, addressToCred: fromObject(x.addressToCred)};
+}
--- a/src/analysis/timeline/filterTimelineCred.test.js
+++ b/src/analysis/timeline/filterTimelineCred.test.js
@ -0,0 +1,54 @@
+// @flow
+
+import {NodeAddress} from "../../core/graph";
+import {
+  filterTimelineCred,
+  filteredTimelineCredToJSON,
+  filteredTimelineCredFromJSON,
+} from "./filterTimelineCred";
+
+describe("src/analysis/timeline/filterTimelineCred", () => {
+  const na = (...parts) => NodeAddress.fromParts(parts);
+  describe("filterTimelineCred", () => {
+    it("returns an empty object for empty cred", () => {
+      expect(filterTimelineCred([], [], [])).toEqual({
+        intervals: [],
+        addressToCred: new Map(),
+      });
+    });
+    it("appropriately filters a simple example", () => {
+      const fullCred = [
+        {
+          interval: {startTimeMs: 0, endTimeMs: 10},
+          cred: new Float64Array([1, 2, 3]),
+        },
+        {
+          interval: {startTimeMs: 10, endTimeMs: 20},
+          cred: new Float64Array([4, 5, 6]),
+        },
+      ];
+      const nodeOrder = [na("foo"), na("bar"), na("zod")];
+      const prefixes = [na("foo"), na("bar")];
+      const expected = {
+        intervals: fullCred.map((x) => x.interval),
+        addressToCred: new Map().set(na("foo"), [1, 4]).set(na("bar"), [2, 5]),
+      };
+      expect(filterTimelineCred(fullCred, nodeOrder, prefixes)).toEqual(
+        expected
+      );
+    });
+  });
+
+  it("JSON serialization", () => {
+    const i0 = {startTimeMs: 0, endTimeMs: 10};
+    const i1 = {startTimeMs: 10, endTimeMs: 20};
+    const intervals = [i0, i1];
+    const fc = {
+      intervals,
+      addressToCred: new Map().set(na("foo"), [1, 4]).set(na("bar"), [2, 5]),
+    };
+    const json = filteredTimelineCredToJSON(fc);
+    const fc_ = filteredTimelineCredFromJSON(json);
+    expect(fc).toEqual(fc_);
+  });
+});