Refactor timelinePagerank, exposing connections (#1797)

This commit refactors internal helper methods in timelinePagerank so that rather than piping around an OrderedSparseMarkovChain, we instead provide the NodeToConnections from which that OSMC may be derived. This is important because the NodeToConnections has the information necessary to derive how score flowed across individual edges, and not just on the adjacency topology of the graph. This will allow us to compute the OutputEdge format with edge-specific cred flows as documented in #1773. Test plan: `yarn test` passes. It's a simple refactor.
2020-05-30 14:02:23 -07:00 · 2020-05-30 14:02:23 -07:00 · 71c0b0d66d
parent d1144217b0
commit 71c0b0d66d
2 changed files with 30 additions and 30 deletions
--- a/src/core/algorithm/timelinePagerank.js
+++ b/src/core/algorithm/timelinePagerank.js
@ -20,12 +20,9 @@ import {type Distribution} from "./distribution";
 import {
  createOrderedSparseMarkovChain,
  createConnections,
+  type NodeToConnections,
 } from "./graphToMarkovChain";
-import {
-  findStationaryDistribution,
-  type PagerankParams,
-  type SparseMarkovChain,
-} from "./markovChain";
+import {findStationaryDistribution, type PagerankParams} from "./markovChain";

 /**
 * Represents raw PageRank distributions on a graph over time.
@ -42,6 +39,8 @@ export type TimelineDistributions = $ReadOnlyArray<{|
  +distribution: Distribution,
 |}>;

+export const SYNTHETIC_LOOP_WEIGHT = 1e-3;
+
 /**
 * Runs timeline PageRank on a graph.
 *
@ -127,7 +126,7 @@ export async function timelinePagerank(
    nodeEvaluator,
    intervalDecay
  );
-  const markovChainIterator = _timelineMarkovChain(
+  const nodeToConnectionsIterator = _timelineNodeToConnections(
    weightedGraph.graph,
    edgeCreationHistory,
    edgeEvaluator,
@ -137,7 +136,7 @@ export async function timelinePagerank(
    nodeOrder,
    intervals,
    nodeWeightIterator,
-    markovChainIterator,
+    nodeToConnectionsIterator,
    alpha
  );
 }
@ -166,12 +165,12 @@ export function* _timelineNodeWeights(
  }
 }

-export function* _timelineMarkovChain(
+export function* _timelineNodeToConnections(
  graph: Graph,
  edgeCreationHistory: $ReadOnlyArray<$ReadOnlyArray<Edge>>,
  edgeEvaluator: EdgeWeightEvaluator,
  intervalDecay: number
-): Iterator<SparseMarkovChain> {
+): Iterator<NodeToConnections> {
  const edgeWeights = new Map();
  for (const edges of edgeCreationHistory) {
    for (const [address, {forwards, backwards}] of edgeWeights.entries()) {
@ -187,19 +186,7 @@ export function* _timelineMarkovChain(
    const currentEdgeWeight = (e: Edge) => {
      return NullUtil.orElse(edgeWeights.get(e.address), defaultEdgeWeight);
    };
-    // Construct a new Markov chain corresponding to the current weights
-    // of the edges.
-    // TODO: Rather than constructing a markov chain from scratch, we can
-    // update the markov chain in-place. This should result in a significant
-    // performance improvement. We will need to change the markov chain
-    // representation to do so (we should add a `totalOutWeight` array to the
-    // chain, so that we can efficiently update the total weight as we add new
-    // connections, rather than needing to re-normalize the whole chain for
-    // each interval).
-    const chain = createOrderedSparseMarkovChain(
-      createConnections(graph, currentEdgeWeight, 1e-3)
-    ).chain;
-    yield chain;
+    yield createConnections(graph, currentEdgeWeight, SYNTHETIC_LOOP_WEIGHT);
  }
 }

@ -209,14 +196,17 @@ export async function _computeTimelineDistribution(
  nodeOrder: $ReadOnlyArray<NodeAddressT>,
  intervals: $ReadOnlyArray<Interval>,
  nodeWeightIterator: Iterator<Map<NodeAddressT, number>>,
-  markovChainIterator: Iterator<SparseMarkovChain>,
+  nodeToConnectionsIterator: Iterator<NodeToConnections>,
  alpha: number
 ): Promise<TimelineDistributions> {
  const results = [];
  let pi0: Distribution | null = null;
  for (const interval of intervals) {
    const nodeWeights = NullUtil.get(nodeWeightIterator.next().value);
-    const chain = NullUtil.get(markovChainIterator.next().value);
+    const nodeToConnections = NullUtil.get(
+      nodeToConnectionsIterator.next().value
+    );
+    const {chain} = createOrderedSparseMarkovChain(nodeToConnections);

    const seed = weightedDistribution(nodeOrder, nodeWeights);
    if (pi0 == null) {
--- a/src/core/algorithm/timelinePagerank.test.js
+++ b/src/core/algorithm/timelinePagerank.test.js
@ -4,7 +4,11 @@ import {sum} from "d3-array";
 import * as NullUtil from "../../util/null";
 import {node, edge} from "../graphTestUtil";
 import {Graph, type EdgeAddressT, type Edge} from "../graph";
-import {_timelineNodeWeights, _timelineMarkovChain} from "./timelinePagerank";
+import {
+  _timelineNodeWeights,
+  _timelineNodeToConnections,
+  SYNTHETIC_LOOP_WEIGHT,
+} from "./timelinePagerank";
 import {
  createConnections,
  createOrderedSparseMarkovChain,
@ -51,7 +55,7 @@ describe("src/core/algorithm/timelinePagerank", () => {
    });
  });

-  describe("_timelineMarkovChain", () => {
+  describe("_timelineNodeToConnections", () => {
    it("works for a simple case", () => {
      const a = node("a");
      const b = node("b");
@ -64,19 +68,25 @@ describe("src/core/algorithm/timelinePagerank", () => {
      ): SparseMarkovChain {
        const edgeWeight = (e: Edge) =>
          NullUtil.orElse(w.get(e.address), {forwards: 0, backwards: 0});
-        const connections = createConnections(graph, edgeWeight, 1e-3);
-        return createOrderedSparseMarkovChain(connections).chain;
+        const nodeToConnections = createConnections(
+          graph,
+          edgeWeight,
+          SYNTHETIC_LOOP_WEIGHT
+        );
+        return createOrderedSparseMarkovChain(nodeToConnections).chain;
      }

      const edgeCreationHistory = [[], [e1], [], [e2]];
      const edgeEvaluator = (_) => ({forwards: 1, backwards: 0});
-      const chainIterator = _timelineMarkovChain(
+      const nodeToConnectionsIterator = _timelineNodeToConnections(
        graph,
        edgeCreationHistory,
        edgeEvaluator,
        0.5
      );
-      const chains = Array.from(chainIterator);
+      const chains = Array.from(nodeToConnectionsIterator).map(
+        (x) => createOrderedSparseMarkovChain(x).chain
+      );

      const w1 = new Map();
      const chain1 = weightsToChain(w1);