Factor out _intervalResult in timelinePagerank (#1799)

As part of work for #1773, I want to add a lot more complexity to the logic for computing individual time-slices of pagerank scores, so that we can trace how much score flowed on each individual edge. This means adding more complexity to the _computeTimelineDistribution function; however, that function is an untested wrapper, so I'm hesitant to add more complexity directly. Instead, I'm first factoring out an _intervalResult method and a corresponding type, which computes the scores for a given timeline interval. I've also added sanity checking tests for this method. In a followon commit, I'll add more logic for tracking edge-level score flows. Test plan: This is just a refactor, maintaining existing behavior and adding tests. `yarn test --full` passes. Since our sharness tests include doing a full load (including timeline cred computation) on realistic data from GitHub, this gives us confidence that there hasn't been any change to cred semantics.
2020-05-30 14:14:32 -07:00 · 2020-05-30 14:14:32 -07:00 · 27aeacc906
parent 71c0b0d66d
commit 27aeacc906
2 changed files with 109 additions and 25 deletions
--- a/src/core/algorithm/timelinePagerank.js
+++ b/src/core/algorithm/timelinePagerank.js
@ -24,10 +24,7 @@ import {
 } from "./graphToMarkovChain";
 import {findStationaryDistribution, type PagerankParams} from "./markovChain";

-/**
- * Represents raw PageRank distributions on a graph over time.
- */
-export type TimelineDistributions = $ReadOnlyArray<{|
+export type IntervalResult = {|
  // The interval for this slice
  +interval: Interval,
  // The total node weight within this interval (normalized to account for the
@ -37,7 +34,11 @@ export type TimelineDistributions = $ReadOnlyArray<{|
  // The raw score distribution over nodes for this interval (i.e. sums to 1).
  // Uses the canonical graph node order.
  +distribution: Distribution,
-|}>;
+|};
+/**
+ * Represents raw PageRank distributions on a graph over time.
+ */
+export type TimelineDistributions = $ReadOnlyArray<IntervalResult>;

 export const SYNTHETIC_LOOP_WEIGHT = 1e-3;

@ -206,6 +207,30 @@ export async function _computeTimelineDistribution(
    const nodeToConnections = NullUtil.get(
      nodeToConnectionsIterator.next().value
    );
+    const result = await _intervalResult(
+      nodeWeights,
+      nodeToConnections,
+      nodeOrder,
+      interval,
+      pi0,
+      alpha
+    );
+    results.push(result);
+    // Use the latest convergce results as the starting point for the next run
+    // of PageRank
+    pi0 = result.distribution;
+  }
+  return results;
+}
+
+export async function _intervalResult(
+  nodeWeights: Map<NodeAddressT, number>,
+  nodeToConnections: NodeToConnections,
+  nodeOrder: $ReadOnlyArray<NodeAddressT>,
+  interval: Interval,
+  pi0: Distribution | null,
+  alpha: number
+): Promise<IntervalResult> {
  const {chain} = createOrderedSparseMarkovChain(nodeToConnections);

  const seed = weightedDistribution(nodeOrder, nodeWeights);
@ -220,14 +245,9 @@ export async function _computeTimelineDistribution(
    yieldAfterMs: 30,
  });
  const intervalWeight = sum(nodeWeights.values());
-    results.push({
+  return {
    interval,
    intervalWeight,
    distribution: distributionResult.pi,
-    });
-    // Use the latest convergce results as the starting point for the next run
-    // of PageRank
-    pi0 = distributionResult.pi;
-  }
-  return results;
+  };
 }
--- a/src/core/algorithm/timelinePagerank.test.js
+++ b/src/core/algorithm/timelinePagerank.test.js
@ -2,12 +2,13 @@

 import {sum} from "d3-array";
 import * as NullUtil from "../../util/null";
-import {node, edge} from "../graphTestUtil";
+import {node, edge, advancedGraph} from "../graphTestUtil";
 import {Graph, type EdgeAddressT, type Edge} from "../graph";
 import {
  _timelineNodeWeights,
  _timelineNodeToConnections,
  SYNTHETIC_LOOP_WEIGHT,
+  _intervalResult,
 } from "./timelinePagerank";
 import {
  createConnections,
@ -107,4 +108,67 @@ describe("src/core/algorithm/timelinePagerank", () => {
      expect(chains[3]).toEqual(chain4);
    });
  });
+
+  describe("_intervalResult", () => {
+    async function example() {
+      const {graph1, nodes} = advancedGraph();
+      const g = graph1();
+      const nodeWeights = new Map()
+        .set(nodes.src.address, 1)
+        .set(nodes.isolated.address, 2);
+      const edgeFn = (_unused_edge) => ({forwards: 1, backwards: 0.5});
+      const nodeToConnections = createConnections(g, edgeFn, 1e-3);
+      const nodeOrder = Array.from(g.nodes()).map((x) => x.address);
+      const interval = {endTimeMs: 1000, startTimeMs: 0};
+      const pi0 = null;
+      const alpha = 0.05;
+      const result = await _intervalResult(
+        nodeWeights,
+        nodeToConnections,
+        nodeOrder,
+        interval,
+        pi0,
+        alpha
+      );
+      return {
+        graph: g,
+        nodes,
+        nodeOrder,
+        nodeWeights,
+        edgeFn,
+        nodeToConnections,
+        interval,
+        pi0,
+        alpha,
+        result,
+      };
+    }
+    it("passes through the interval", async () => {
+      const {result, interval} = await example();
+      expect(result.interval).toEqual(interval);
+    });
+    it("computes the summed nodeWeight", async () => {
+      const {result, nodeWeights} = await example();
+      const actualIntervalWeight = sum(nodeWeights.values());
+      expect(result.intervalWeight).toEqual(actualIntervalWeight);
+    });
+    it("produces sane score distribution on an example graph", async () => {
+      const {result, nodes, nodeOrder} = await example();
+      function getScore(a) {
+        const idx = nodeOrder.indexOf(a.address);
+        if (idx === -1) {
+          throw new Error("bad address");
+        }
+        return result.distribution[idx];
+      }
+      const isoScore = getScore(nodes.isolated);
+      const srcScore = getScore(nodes.src);
+      const dstScore = getScore(nodes.dst);
+      expect(isoScore + srcScore + dstScore).toBeCloseTo(1);
+      // It has 2/3rd weight, and is isolated, so it's simple
+      expect(isoScore).toBeCloseTo(2 / 3);
+      // src has the weight, and dst doesnt, so it should have a higher score
+      expect(srcScore).toBeGreaterThan(dstScore);
+    });
+  });
 });