Report Markov Chain convergence statistics (#1053)

This commit modifies `markovChain.findStationaryDistribution` so that in addition to returning the final distribution, it also reports the final convergence delta. This is motivated by the proposed API for the new PagerankGraph (see [#1020]). Also, I think it makes a nice addition to the test code. Note that this slightly changes the output from `findStationaryDistribution`, because we now return the first distribution that is sufficiently converged, rather than that distribution with one additional Markov action. Test plan: Unit tests are updated, and `yarn test` passes. [#1020]: https://github.com/sourcecred/sourcecred/issues/1020 Thanks to @BrianLitwin for semi-pair-programming it Thanks to @wchargin for extensive review feedback.
2025-02-17 06:56:36 +00:00 · 2019-02-12 19:28:53 -07:00 · 2019-02-12 19:28:53 -07:00 · dcda8bde1d
commit dcda8bde1d
parent c428ee01a3
5 changed files with 131 additions and 61 deletions
--- a/src/analysis/snapshots/pagerankNodeDecomposition.test.js.snap
+++ b/src/analysis/snapshots/pagerankNodeDecomposition.test.js.snap
@ -3,7 +3,7 @@
 exports[`analysis/pagerankNodeDecomposition decompose has the expected output on a simple asymmetric chain 1`] = `
 Map {
  "NodeAddress[\\"n1\\"]" => Object {
-    "score": 0.19117656878499834,
+    "score": 0.19117611352146152,
    "scoredConnections": Array [
      Object {
        "connection": Object {
@ -17,7 +17,7 @@ Map {
          },
          "weight": 0.1875,
        },
-        "connectionScore": 0.1102941261444197,
+        "connectionScore": 0.11029408674138254,
        "source": "NodeAddress[\\"sink\\"]",
      },
      Object {
@ -32,7 +32,7 @@ Map {
          },
          "weight": 0.3,
        },
-        "connectionScore": 0.066176427533429,
+        "connectionScore": 0.06617662715734954,
        "source": "NodeAddress[\\"n2\\"]",
      },
      Object {
@ -42,13 +42,13 @@ Map {
          },
          "weight": 0.07692307692307693,
        },
-        "connectionScore": 0.014705889906538334,
+        "connectionScore": 0.014705854886266271,
        "source": "NodeAddress[\\"n1\\"]",
      },
    ],
  },
  "NodeAddress[\\"n2\\"]" => Object {
-    "score": 0.22058809177809668,
+    "score": 0.22058875719116514,
    "scoredConnections": Array [
      Object {
        "connection": Object {
@ -62,7 +62,7 @@ Map {
          },
          "weight": 0.1875,
        },
-        "connectionScore": 0.1102941261444197,
+        "connectionScore": 0.11029408674138254,
        "source": "NodeAddress[\\"sink\\"]",
      },
      Object {
@ -77,7 +77,7 @@ Map {
          },
          "weight": 0.46153846153846156,
        },
-        "connectionScore": 0.08823533943923001,
+        "connectionScore": 0.08823512931759762,
        "source": "NodeAddress[\\"n1\\"]",
      },
      Object {
@ -87,13 +87,13 @@ Map {
          },
          "weight": 0.1,
        },
-        "connectionScore": 0.02205880917780967,
+        "connectionScore": 0.022058875719116515,
        "source": "NodeAddress[\\"n2\\"]",
      },
    ],
  },
  "NodeAddress[\\"sink\\"]" => Object {
-    "score": 0.5882353394369051,
+    "score": 0.5882351292873735,
    "scoredConnections": Array [
      Object {
        "connection": Object {
@ -107,7 +107,7 @@ Map {
          },
          "weight": 0.375,
        },
-        "connectionScore": 0.2205882522888394,
+        "connectionScore": 0.22058817348276508,
        "source": "NodeAddress[\\"sink\\"]",
      },
      Object {
@ -122,7 +122,7 @@ Map {
          },
          "weight": 0.6,
        },
-        "connectionScore": 0.132352855066858,
+        "connectionScore": 0.13235325431469908,
        "source": "NodeAddress[\\"n2\\"]",
      },
      Object {
@ -137,7 +137,7 @@ Map {
          },
          "weight": 0.1875,
        },
-        "connectionScore": 0.1102941261444197,
+        "connectionScore": 0.11029408674138254,
        "source": "NodeAddress[\\"sink\\"]",
      },
      Object {
@ -152,7 +152,7 @@ Map {
          },
          "weight": 0.46153846153846156,
        },
-        "connectionScore": 0.08823533943923001,
+        "connectionScore": 0.08823512931759762,
        "source": "NodeAddress[\\"n1\\"]",
      },
      Object {
@ -162,7 +162,7 @@ Map {
          },
          "weight": 0.0625,
        },
-        "connectionScore": 0.03676470871480657,
+        "connectionScore": 0.036764695580460846,
        "source": "NodeAddress[\\"sink\\"]",
      },
    ],
--- a/src/analysis/pagerank.js
+++ b/src/analysis/pagerank.js
@ -58,13 +58,16 @@ export async function pagerank(
    fullOptions.selfLoopWeight
  );
  const osmc = createOrderedSparseMarkovChain(connections);
-  const distribution = await findStationaryDistribution(osmc.chain, {
+  const distributionResult = await findStationaryDistribution(osmc.chain, {
    verbose: fullOptions.verbose,
    convergenceThreshold: fullOptions.convergenceThreshold,
    maxIterations: fullOptions.maxIterations,
    yieldAfterMs: 30,
  });
-  const pi = distributionToNodeDistribution(osmc.nodeOrder, distribution);
+  const pi = distributionToNodeDistribution(
+    osmc.nodeOrder,
+    distributionResult.pi
+  );
  const scores = scoreByConstantTotal(
    pi,
    fullOptions.totalScore,
--- a/src/analysis/pagerankNodeDecomposition.test.js
+++ b/src/analysis/pagerankNodeDecomposition.test.js
@ -7,7 +7,10 @@ import {
  createOrderedSparseMarkovChain,
 } from "../core/attribution/graphToMarkovChain";
 import {findStationaryDistribution} from "../core/attribution/markovChain";
-import {decompose} from "./pagerankNodeDecomposition";
+import {
+  decompose,
+  type PagerankNodeDecomposition,
+} from "./pagerankNodeDecomposition";
 import * as MapUtil from "../util/map";

 import {advancedGraph} from "../core/graphTestUtil";
@ -16,7 +19,7 @@ import {advancedGraph} from "../core/graphTestUtil";
 * Format a decomposition to be shown in a snapshot. This converts
 * addresses and edges to strings to avoid NUL characters.
 */
-function formatDecomposition(d) {
+function formatDecomposition(d: PagerankNodeDecomposition) {
  return MapUtil.mapEntries(d, (key, {score, scoredConnections}) => [
    NodeAddress.toString(key),
    {
@ -128,16 +131,19 @@ describe("analysis/pagerankNodeDecomposition", () => {
      const edgeWeight = () => ({toWeight: 6.0, froWeight: 3.0});
      const connections = createConnections(g, edgeWeight, 1.0);
      const osmc = createOrderedSparseMarkovChain(connections);
-      const pi = await findStationaryDistribution(osmc.chain, {
+      const distributionResult = await findStationaryDistribution(osmc.chain, {
        verbose: false,
        convergenceThreshold: 1e-6,
        maxIterations: 255,
        yieldAfterMs: 1,
      });
-      const pr = distributionToNodeDistribution(osmc.nodeOrder, pi);
-      const result = decompose(pr, connections);
-      expect(formatDecomposition(result)).toMatchSnapshot();
-      validateDecomposition(result);
+      const pr = distributionToNodeDistribution(
+        osmc.nodeOrder,
+        distributionResult.pi
+      );
+      const decomposition = decompose(pr, connections);
+      expect(formatDecomposition(decomposition)).toMatchSnapshot();
+      validateDecomposition(decomposition);
    });

    it("is valid on the example graph", async () => {
@ -145,15 +151,18 @@ describe("analysis/pagerankNodeDecomposition", () => {
      const edgeWeight = () => ({toWeight: 6.0, froWeight: 3.0});
      const connections = createConnections(g, edgeWeight, 1.0);
      const osmc = createOrderedSparseMarkovChain(connections);
-      const pi = await findStationaryDistribution(osmc.chain, {
+      const distributionResult = await findStationaryDistribution(osmc.chain, {
        verbose: false,
        convergenceThreshold: 1e-6,
        maxIterations: 255,
        yieldAfterMs: 1,
      });
-      const pr = distributionToNodeDistribution(osmc.nodeOrder, pi);
-      const result = decompose(pr, connections);
-      validateDecomposition(result);
+      const pr = distributionToNodeDistribution(
+        osmc.nodeOrder,
+        distributionResult.pi
+      );
+      const decomposition = decompose(pr, connections);
+      validateDecomposition(decomposition);
    });
  });
 });
--- a/src/core/attribution/markovChain.js
+++ b/src/core/attribution/markovChain.js
@ -7,6 +7,24 @@
 */
 export type Distribution = Float64Array;

+export type StationaryDistributionResult = {|
+  // The final distribution after attempting to find the stationary distribution
+  // of the Markov chain.
+  +pi: Distribution,
+
+  // Reports how close the returned distribution is to being converged.
+  //
+  // If the convergenceDelta is near zero, then the distribution is well-converged
+  // (stationary).
+  //
+  // Specifically: let `x` be the distribution being returned, and let `x'` be the
+  // distribution after one additional Markov action, i.e.
+  // `x' = sparseMarkovChainAction(chain, x)`
+  // Then the convergence delta is the maximum difference in absolute value between components
+  // in `x` and `x'`.
+  +convergenceDelta: number,
+|};
+
 /**
 * A representation of a sparse transition matrix that is convenient for
 * computations on Markov chains.
@ -98,47 +116,68 @@ export function sparseMarkovChainAction(
  return result;
 }

+/**
+ * Compute the maximum difference (in absolute value) between components in two
+ * distributions.
+ *
+ * Equivalent to $\norm{pi0 - pi1}_\infty$.
+ */
+export function computeDelta(pi0: Distribution, pi1: Distribution) {
+  let maxDelta = -Infinity;
+  // Here, we assume that `pi0.nodeOrder` and `pi1.nodeOrder` are the
+  // same (i.e., there has been no permutation).
+  pi0.forEach((x, i) => {
+    const delta = Math.abs(x - pi1[i]);
+    maxDelta = Math.max(delta, maxDelta);
+  });
+  return maxDelta;
+}
+
 function* findStationaryDistributionGenerator(
  chain: SparseMarkovChain,
  options: {|
    +verbose: boolean,
+    // A distribution is considered stationary if the action of the Markov
+    // chain on the distribution does not change any component by more than
+    // `convergenceThreshold` in absolute value.
    +convergenceThreshold: number,
+    // We will run maxIterations markov chain steps at most.
    +maxIterations: number,
  |}
-): Generator<void, Distribution, void> {
+): Generator<void, StationaryDistributionResult, void> {
  let pi = uniformDistribution(chain.length);
  let scratch = new Float64Array(pi.length);
-  function computeDelta(pi0, pi1) {
-    let maxDelta = -Infinity;
-    // Here, we assume that `pi0.nodeOrder` and `pi1.nodeOrder` are the
-    // same (i.e., there has been no permutation).
-    pi0.forEach((x, i) => {
-      const delta = Math.abs(x - pi1[i]);
-      maxDelta = Math.max(delta, maxDelta);
-    });
-    return maxDelta;
-  }
-  let iteration = 0;
+
+  let nIterations = 0;
  while (true) {
-    if (iteration >= options.maxIterations) {
+    if (nIterations >= options.maxIterations) {
      if (options.verbose) {
-        console.log(`[${iteration}] FAILED to converge`);
+        console.log(`[${nIterations}] FAILED to converge`);
      }
-      return pi;
+      // We need to do one more step so that we can compute the empirical convergence
+      // delta for the returned distribution.
+      sparseMarkovChainActionInto(chain, pi, scratch);
+      const convergenceDelta = computeDelta(pi, scratch);
+      return {pi, convergenceDelta};
    }
-    iteration++;
+    nIterations++;
    sparseMarkovChainActionInto(chain, pi, scratch);
-    const delta = computeDelta(pi, scratch);
-    [scratch, pi] = [pi, scratch];
+    // We compute the convergenceDelta between 'scratch' (the newest
+    // distribution) and 'pi' (the distribution from the previous step). If the
+    // delta is below threshold, then the distribution from the last step was
+    // already converged and we return it (not scratch). Otherwise, we assign
+    // `scratch` to `distribution` and try again.
+    const convergenceDelta = computeDelta(pi, scratch);
    if (options.verbose) {
-      console.log(`[${iteration}] delta = ${delta}`);
+      console.log(`[${nIterations}] delta = ${convergenceDelta}`);
    }
-    if (delta < options.convergenceThreshold) {
+    if (convergenceDelta < options.convergenceThreshold) {
      if (options.verbose) {
-        console.log(`[${iteration}] CONVERGED`);
+        console.log(`[${nIterations}] CONVERGED`);
      }
-      return pi;
+      return {pi, convergenceDelta};
    }
+    [scratch, pi] = [pi, scratch];
    yield;
  }
  // ESLint knows that this next line is unreachable, but Flow doesn't. :-)
@ -154,7 +193,7 @@ export function findStationaryDistribution(
    +maxIterations: number,
    +yieldAfterMs: number,
  |}
-): Promise<Distribution> {
+): Promise<StationaryDistributionResult> {
  let gen = findStationaryDistributionGenerator(chain, {
    verbose: options.verbose,
    convergenceThreshold: options.convergenceThreshold,
--- a/src/core/attribution/markovChain.test.js
+++ b/src/core/attribution/markovChain.test.js
@ -6,6 +6,8 @@ import {
  sparseMarkovChainAction,
  sparseMarkovChainFromTransitionMatrix,
  uniformDistribution,
+  computeDelta,
+  type StationaryDistributionResult,
 } from "./markovChain";

 describe("core/attribution/markovChain", () => {
@ -145,21 +147,29 @@ describe("core/attribution/markovChain", () => {
  }

  describe("findStationaryDistribution", () => {
+    function validateConvegenceDelta(chain, d: StationaryDistributionResult) {
+      const nextPi = sparseMarkovChainAction(chain, d.pi);
+      expect(d.convergenceDelta).toEqual(computeDelta(d.pi, nextPi));
+    }
+
    it("finds an all-accumulating stationary distribution", async () => {
      const chain = sparseMarkovChainFromTransitionMatrix([
        [1, 0, 0],
        [0.25, 0, 0.75],
        [0.25, 0.75, 0],
      ]);
-      const pi = await findStationaryDistribution(chain, {
+      const result = await findStationaryDistribution(chain, {
        maxIterations: 255,
        convergenceThreshold: 1e-7,
        verbose: false,
        yieldAfterMs: 1,
      });
-      expectStationary(chain, pi);
+      expect(result.convergenceDelta).toBeLessThanOrEqual(1e-7);
+      validateConvegenceDelta(chain, result);
+
+      expectStationary(chain, result.pi);
      const expected = new Float64Array([1, 0, 0]);
-      expectAllClose(pi, expected);
+      expectAllClose(result.pi, expected);
    });

    it("finds a non-degenerate stationary distribution", async () => {
@ -174,40 +184,49 @@ describe("core/attribution/markovChain", () => {
        [0.5, 0, 0.25, 0, 0.25],
        [0.5, 0.25, 0, 0.25, 0],
      ]);
-      const pi = await findStationaryDistribution(chain, {
+      const result = await findStationaryDistribution(chain, {
        maxIterations: 255,
        convergenceThreshold: 1e-7,
        verbose: false,
        yieldAfterMs: 1,
      });
-      expectStationary(chain, pi);
+
+      expect(result.convergenceDelta).toBeLessThanOrEqual(1e-7);
+      validateConvegenceDelta(chain, result);
+
+      expectStationary(chain, result.pi);
      const expected = new Float64Array([1 / 3, 1 / 6, 1 / 6, 1 / 6, 1 / 6]);
-      expectAllClose(pi, expected);
+      expectAllClose(result.pi, expected);
    });

    it("finds the stationary distribution of a periodic chain", async () => {
      const chain = sparseMarkovChainFromTransitionMatrix([[0, 1], [1, 0]]);
-      const pi = await findStationaryDistribution(chain, {
+      const result = await findStationaryDistribution(chain, {
        maxIterations: 255,
        convergenceThreshold: 1e-7,
        verbose: false,
        yieldAfterMs: 1,
      });
-      expectStationary(chain, pi);
+
+      expect(result.convergenceDelta).toEqual(0);
+      validateConvegenceDelta(chain, result);
+
+      expectStationary(chain, result.pi);
      const expected = new Float64Array([0.5, 0.5]);
-      expectAllClose(pi, expected);
+      expectAllClose(result.pi, expected);
    });

    it("returns initial distribution if maxIterations===0", async () => {
      const chain = sparseMarkovChainFromTransitionMatrix([[0, 1], [0, 1]]);
-      const pi = await findStationaryDistribution(chain, {
+      const result = await findStationaryDistribution(chain, {
        verbose: false,
        convergenceThreshold: 1e-7,
        maxIterations: 0,
        yieldAfterMs: 1,
      });
      const expected = new Float64Array([0.5, 0.5]);
-      expect(pi).toEqual(expected);
+      expect(result.pi).toEqual(expected);
+      validateConvegenceDelta(chain, result);
    });
  });
 });