add analysis/timeline/distributionToCred

This module takes the timeline distributions created by `timelinePagerank`, and re-normalizes the scores into cred. For details on the algorithm, read comments and docstrings in the module. Test plan: Unit tests added.
2025-02-06 01:33:44 +00:00 · 2019-07-10 14:18:44 +01:00 · 2019-07-10 14:18:44 +01:00 · 162f73c3e9
commit 162f73c3e9
parent 87720c4868
2 changed files with 142 additions and 0 deletions
--- a/src/analysis/timeline/distributionToCred.js
+++ b/src/analysis/timeline/distributionToCred.js
@ -0,0 +1,63 @@
+// @flow
+
+/**
+ * Takes timeline distributions and uses them to create normalized cred.
+ */
+
+import {sum} from "d3-array";
+import {type Interval} from "./interval";
+import {type TimelineDistributions} from "./timelinePagerank";
+import {NodeAddress, type NodeAddressT} from "../../core/graph";
+
+/**
+ * Represents the full timeline cred for a graph.
+ */
+export type FullTimelineCred = $ReadOnlyArray<{|
+  // The interval for this slice.
+  +interval: Interval,
+  // The cred for each node.
+  // (Uses the graph's canonical node ordering.)
+  +cred: Float64Array,
+|}>;
+
+/**
+ * Convert a TimelineDistribution into TimelineCred.
+ *
+ * The difference between the distribution and cred is that cred has been
+ * re-normalized to present human-agreeable scores, rather than a probability
+ * distribution.
+ *
+ * This implementation normalizes the scores so that in each interval, the
+ * total score of every node matching scoringNodePrefix is equal to the
+ * interval's weight.
+ */
+export function distributionToCred(
+  ds: TimelineDistributions,
+  nodeOrder: $ReadOnlyArray<NodeAddressT>,
+  scoringNodePrefix: NodeAddressT
+): FullTimelineCred {
+  if (ds.length === 0) {
+    return [];
+  }
+  const intervals = ds.map((x) => x.interval);
+  const scoringNodeIndices = [];
+  const cred = new Array(nodeOrder.length);
+  for (let i = 0; i < nodeOrder.length; i++) {
+    if (NodeAddress.hasPrefix(nodeOrder[i], scoringNodePrefix)) {
+      scoringNodeIndices.push(i);
+    }
+    cred[i] = new Array(intervals.length);
+  }
+  if (scoringNodeIndices.length === 0) {
+    throw new Error("no nodes matched scoringNodePrefix");
+  }
+
+  return ds.map(({interval, distribution, intervalWeight}) => {
+    const intervalTotalScore = sum(
+      scoringNodeIndices.map((x) => distribution[x])
+    );
+    const intervalNormalizer = intervalWeight / intervalTotalScore;
+    const cred = distribution.map((x) => x * intervalNormalizer);
+    return {interval, cred};
+  });
+}
--- a/src/analysis/timeline/distributionToCred.test.js
+++ b/src/analysis/timeline/distributionToCred.test.js
@ -0,0 +1,79 @@
+// @flow
+
+import {NodeAddress} from "../../core/graph";
+import {distributionToCred} from "./distributionToCred";
+
+describe("src/analysis/timeline/distributionToCred", () => {
+  const na = (...parts) => NodeAddress.fromParts(parts);
+  describe("distributionToCred", () => {
+    it("works in a case where all nodes are scoring", () => {
+      const ds = [
+        {
+          interval: {startTimeMs: 0, endTimeMs: 10},
+          intervalWeight: 2,
+          distribution: new Float64Array([0.5, 0.5]),
+        },
+        {
+          interval: {startTimeMs: 10, endTimeMs: 20},
+          intervalWeight: 10,
+          distribution: new Float64Array([0.9, 0.1]),
+        },
+      ];
+      const nodeOrder = [na("foo"), na("bar")];
+      const actual = distributionToCred(ds, nodeOrder, NodeAddress.empty);
+      const expected = [
+        {
+          interval: {startTimeMs: 0, endTimeMs: 10},
+          cred: new Float64Array([1, 1]),
+        },
+        {
+          interval: {startTimeMs: 10, endTimeMs: 20},
+          cred: new Float64Array([9, 1]),
+        },
+      ];
+      expect(expected).toEqual(actual);
+    });
+    it("works in a case where some nodes are scoring", () => {
+      const ds = [
+        {
+          interval: {startTimeMs: 0, endTimeMs: 10},
+          intervalWeight: 2,
+          distribution: new Float64Array([0.5, 0.5]),
+        },
+        {
+          interval: {startTimeMs: 10, endTimeMs: 20},
+          intervalWeight: 10,
+          distribution: new Float64Array([0.9, 0.1]),
+        },
+      ];
+      const nodeOrder = [na("foo"), na("bar")];
+      const actual = distributionToCred(ds, nodeOrder, na("bar"));
+      const expected = [
+        {
+          interval: {startTimeMs: 0, endTimeMs: 10},
+          cred: new Float64Array([2, 2]),
+        },
+        {
+          interval: {startTimeMs: 10, endTimeMs: 20},
+          cred: new Float64Array([90, 10]),
+        },
+      ];
+      expect(expected).toEqual(actual);
+    });
+    it("errors when no nodes are scoring", () => {
+      const ds = [
+        {
+          interval: {startTimeMs: 0, endTimeMs: 10},
+          intervalWeight: 2,
+          distribution: new Float64Array([0.5, 0.5]),
+        },
+      ];
+      const nodeOrder = [na("foo"), na("bar")];
+      const fail = () => distributionToCred(ds, nodeOrder, na("zod"));
+      expect(fail).toThrowError("no nodes matched scoringNodePrefix");
+    });
+    it("returns empty array if no intervals are present", () => {
+      expect(distributionToCred([], [], NodeAddress.empty)).toEqual([]);
+    });
+  });
+});