Improve distributionToCred (#1654)

This commit makes several small improvements to the distributionToCred module: - We rename the output `FullTimelineCred` data structure to `TimelineCredScores`, which is more descriptive - We re-organize that data structure so that rather than being an array of `{interval, cred}` objects, it has an `intervals` property and a `intervalCredScores` property, both of which are arrays. This will make downstream usage cleaner. - An unused variable is removed. - We document invariants about the TimelineCredScores data type. - We mark the TimelineCredScores data type opaque, so that clients recieving a TimelineCredScores can trust that the invariants are maintained. Test plan: - The rename is robustly tested by `yarn flow`. - That the refactor lands without changing existing semantics is robustly tested by `yarn test --full`, since we snapshot a full cred load; thus we know that cred scores haven't changed. (Also, we have existing unit tests). - The newly documented invariants aren't robustly tested by the test code, but it's easy to see that they hold by reading the algorithm.
2020-02-09 11:58:19 -08:00 · 2020-02-09 11:58:19 -08:00 · 8dac968a69
parent db94bb50fb
commit 8dac968a69
3 changed files with 83 additions and 68 deletions
--- a/src/analysis/timeline/timelineCred.js
+++ b/src/analysis/timeline/timelineCred.js
@ -199,17 +199,20 @@ export class TimelineCred {
      fullParams.intervalDecay,
      fullParams.alpha
    );
-    const cred = distributionToCred(distribution, nodeOrder, scorePrefixes);
+    const credScores = distributionToCred(
+      distribution,
+      nodeOrder,
+      scorePrefixes
+    );
    const addressToCred = new Map();
    for (let i = 0; i < nodeOrder.length; i++) {
      const addr = nodeOrder[i];
-      const addrCred = cred.map(({cred}) => cred[i]);
+      const addrCred = credScores.intervalCredScores.map((cred) => cred[i]);
      addressToCred.set(addr, addrCred);
    }
-    const intervals = cred.map((x) => x.interval);
    return new TimelineCred(
      weightedGraph,
-      intervals,
+      credScores.intervals,
      addressToCred,
      fullParams,
      plugins
--- a/src/core/algorithm/distributionToCred.js
+++ b/src/core/algorithm/distributionToCred.js
@ -9,26 +9,41 @@ import {type Interval} from "../interval";
 import {type TimelineDistributions} from "./timelinePagerank";
 import {NodeAddress, type NodeAddressT} from "../../core/graph";

-/**
- * Represents the full timeline cred for a graph.
- */
-export type FullTimelineCred = $ReadOnlyArray<{|
-  // The interval for this slice.
-  +interval: Interval,
-  // The cred for each node.
-  // (Uses the graph's canonical node ordering.)
-  +cred: Float64Array,
-|}>;
+export opaque type NodeOrderedCredScores: Float64Array = Float64Array;

 /**
- * Convert a TimelineDistribution into TimelineCred.
+ * Represents cred scores over time.
+ *
+ * It contains an array of intervals, which give timing information, and an
+ * array of CredTimeSlices, which are Float64Arrays. Each CredTimeSlice
+ * contains cred scores for an interval. The cred scores are included in
+ * node-address-sorted order, and as such the CredScores can only be
+ * interpreted in the context of an associated Graph.
+ *
+ * As invariants, it is guaranteed that:
+ * - intervals and intervalCredScores will always have the same length
+ * - all of the intervalCredScores will have a consistent implicit node ordering
+ *
+ * The type is marked opaque so that no-one else can construct instances that
+ * don't conform to these invariants.
+ */
+export opaque type TimelineCredScores: {|
+  +intervals: $ReadOnlyArray<Interval>,
+  +intervalCredScores: $ReadOnlyArray<NodeOrderedCredScores>,
+|} = {|
+  +intervals: $ReadOnlyArray<Interval>,
+  +intervalCredScores: $ReadOnlyArray<NodeOrderedCredScores>,
+|};
+
+/**
+ * Convert a TimelineDistribution into CredScores.
 *
 * The difference between the distribution and cred is that cred has been
 * re-normalized to present human-agreeable scores, rather than a probability
 * distribution.
 *
 * This implementation normalizes the scores so that in each interval, the
- * total score of every node matching scoringNodePrefix is equal to the
+ * total score of every node matching a scoringNodePrefix is equal to the
 * interval's weight.
 *
 * Edge cases:
@ -42,22 +57,19 @@ export function distributionToCred(
  ds: TimelineDistributions,
  nodeOrder: $ReadOnlyArray<NodeAddressT>,
  scoringNodePrefixes: $ReadOnlyArray<NodeAddressT>
-): FullTimelineCred {
+): TimelineCredScores {
  if (ds.length === 0) {
-    return [];
+    return {intervals: [], intervalCredScores: []};
  }
-  const intervals = ds.map((x) => x.interval);
  const scoringNodeIndices = [];
-  const cred = new Array(nodeOrder.length);
  for (let i = 0; i < nodeOrder.length; i++) {
    const addr = nodeOrder[i];
    if (scoringNodePrefixes.some((x) => NodeAddress.hasPrefix(addr, x))) {
      scoringNodeIndices.push(i);
    }
-    cred[i] = new Array(intervals.length);
  }
-
-  return ds.map(({interval, distribution, intervalWeight}) => {
+  const intervals = ds.map((x) => x.interval);
+  const intervalCredScores = ds.map(({distribution, intervalWeight}) => {
    const intervalTotalScore = sum(
      scoringNodeIndices.map((x) => distribution[x])
    );
@ -65,6 +77,7 @@ export function distributionToCred(
    const intervalNormalizer =
      intervalTotalScore === 0 ? 0 : intervalWeight / intervalTotalScore;
    const cred = distribution.map((x) => x * intervalNormalizer);
-    return {interval, cred};
+    return cred;
  });
+  return {intervalCredScores, intervals};
 }
--- a/src/core/algorithm/distributionToCred.test.js
+++ b/src/core/algorithm/distributionToCred.test.js
@ -21,16 +21,16 @@ describe("src/core/algorithm/distributionToCred", () => {
      ];
      const nodeOrder = [na("foo"), na("bar")];
      const actual = distributionToCred(ds, nodeOrder, [NodeAddress.empty]);
-      const expected = [
-        {
-          interval: {startTimeMs: 0, endTimeMs: 10},
-          cred: new Float64Array([1, 1]),
-        },
-        {
-          interval: {startTimeMs: 10, endTimeMs: 20},
-          cred: new Float64Array([9, 1]),
-        },
-      ];
+      const expected = {
+        intervals: [
+          {startTimeMs: 0, endTimeMs: 10},
+          {startTimeMs: 10, endTimeMs: 20},
+        ],
+        intervalCredScores: [
+          new Float64Array([1, 1]),
+          new Float64Array([9, 1]),
+        ],
+      };
      expect(expected).toEqual(actual);
    });
    it("correctly handles multiple scoring prefixes", () => {
@ -48,16 +48,16 @@ describe("src/core/algorithm/distributionToCred", () => {
      ];
      const nodeOrder = [na("foo"), na("bar")];
      const actual = distributionToCred(ds, nodeOrder, [na("foo"), na("bar")]);
-      const expected = [
-        {
-          interval: {startTimeMs: 0, endTimeMs: 10},
-          cred: new Float64Array([1, 1]),
-        },
-        {
-          interval: {startTimeMs: 10, endTimeMs: 20},
-          cred: new Float64Array([9, 1]),
-        },
-      ];
+      const expected = {
+        intervals: [
+          {startTimeMs: 0, endTimeMs: 10},
+          {startTimeMs: 10, endTimeMs: 20},
+        ],
+        intervalCredScores: [
+          new Float64Array([1, 1]),
+          new Float64Array([9, 1]),
+        ],
+      };
      expect(expected).toEqual(actual);
    });
    it("works in a case where some nodes are scoring", () => {
@ -75,16 +75,16 @@ describe("src/core/algorithm/distributionToCred", () => {
      ];
      const nodeOrder = [na("foo"), na("bar")];
      const actual = distributionToCred(ds, nodeOrder, [na("bar")]);
-      const expected = [
-        {
-          interval: {startTimeMs: 0, endTimeMs: 10},
-          cred: new Float64Array([2, 2]),
-        },
-        {
-          interval: {startTimeMs: 10, endTimeMs: 20},
-          cred: new Float64Array([90, 10]),
-        },
-      ];
+      const expected = {
+        intervals: [
+          {startTimeMs: 0, endTimeMs: 10},
+          {startTimeMs: 10, endTimeMs: 20},
+        ],
+        intervalCredScores: [
+          new Float64Array([2, 2]),
+          new Float64Array([90, 10]),
+        ],
+      };
      expect(expected).toEqual(actual);
    });
    it("handles the case where no nodes are scoring", () => {
@ -97,12 +97,10 @@ describe("src/core/algorithm/distributionToCred", () => {
      ];
      const nodeOrder = [na("foo"), na("bar")];
      const actual = distributionToCred(ds, nodeOrder, []);
-      const expected = [
-        {
-          interval: {startTimeMs: 0, endTimeMs: 10},
-          cred: new Float64Array([0, 0]),
-        },
-      ];
+      const expected = {
+        intervals: [{startTimeMs: 0, endTimeMs: 10}],
+        intervalCredScores: [new Float64Array([0, 0])],
+      };
      expect(actual).toEqual(expected);
    });

@ -116,17 +114,18 @@ describe("src/core/algorithm/distributionToCred", () => {
      ];
      const nodeOrder = [na("foo"), na("bar")];
      const actual = distributionToCred(ds, nodeOrder, [na("bar")]);
-      const expected = [
-        {
-          interval: {startTimeMs: 0, endTimeMs: 10},
-          cred: new Float64Array([0, 0]),
-        },
-      ];
+      const expected = {
+        intervals: [{startTimeMs: 0, endTimeMs: 10}],
+        intervalCredScores: [new Float64Array([0, 0])],
+      };
      expect(actual).toEqual(expected);
    });

-    it("returns empty array if no intervals are present", () => {
-      expect(distributionToCred([], [], [])).toEqual([]);
+    it("returns empty CredScores if no intervals are present", () => {
+      expect(distributionToCred([], [], [])).toEqual({
+        intervals: [],
+        intervalCredScores: [],
+      });
    });
  });
 });