diff --git a/src/analysis/__snapshots__/pagerankNodeDecomposition.test.js.snap b/src/analysis/__snapshots__/pagerankNodeDecomposition.test.js.snap index c86348b..8682d8c 100644 --- a/src/analysis/__snapshots__/pagerankNodeDecomposition.test.js.snap +++ b/src/analysis/__snapshots__/pagerankNodeDecomposition.test.js.snap @@ -3,7 +3,7 @@ exports[`analysis/pagerankNodeDecomposition decompose has the expected output on a simple asymmetric chain 1`] = ` Map { "NodeAddress[\\"n1\\"]" => Object { - "score": 0.19117656878499834, + "score": 0.19117611352146152, "scoredConnections": Array [ Object { "connection": Object { @@ -17,7 +17,7 @@ Map { }, "weight": 0.1875, }, - "connectionScore": 0.1102941261444197, + "connectionScore": 0.11029408674138254, "source": "NodeAddress[\\"sink\\"]", }, Object { @@ -32,7 +32,7 @@ Map { }, "weight": 0.3, }, - "connectionScore": 0.066176427533429, + "connectionScore": 0.06617662715734954, "source": "NodeAddress[\\"n2\\"]", }, Object { @@ -42,13 +42,13 @@ Map { }, "weight": 0.07692307692307693, }, - "connectionScore": 0.014705889906538334, + "connectionScore": 0.014705854886266271, "source": "NodeAddress[\\"n1\\"]", }, ], }, "NodeAddress[\\"n2\\"]" => Object { - "score": 0.22058809177809668, + "score": 0.22058875719116514, "scoredConnections": Array [ Object { "connection": Object { @@ -62,7 +62,7 @@ Map { }, "weight": 0.1875, }, - "connectionScore": 0.1102941261444197, + "connectionScore": 0.11029408674138254, "source": "NodeAddress[\\"sink\\"]", }, Object { @@ -77,7 +77,7 @@ Map { }, "weight": 0.46153846153846156, }, - "connectionScore": 0.08823533943923001, + "connectionScore": 0.08823512931759762, "source": "NodeAddress[\\"n1\\"]", }, Object { @@ -87,13 +87,13 @@ Map { }, "weight": 0.1, }, - "connectionScore": 0.02205880917780967, + "connectionScore": 0.022058875719116515, "source": "NodeAddress[\\"n2\\"]", }, ], }, "NodeAddress[\\"sink\\"]" => Object { - "score": 0.5882353394369051, + "score": 0.5882351292873735, "scoredConnections": Array [ Object { "connection": Object { @@ -107,7 +107,7 @@ Map { }, "weight": 0.375, }, - "connectionScore": 0.2205882522888394, + "connectionScore": 0.22058817348276508, "source": "NodeAddress[\\"sink\\"]", }, Object { @@ -122,7 +122,7 @@ Map { }, "weight": 0.6, }, - "connectionScore": 0.132352855066858, + "connectionScore": 0.13235325431469908, "source": "NodeAddress[\\"n2\\"]", }, Object { @@ -137,7 +137,7 @@ Map { }, "weight": 0.1875, }, - "connectionScore": 0.1102941261444197, + "connectionScore": 0.11029408674138254, "source": "NodeAddress[\\"sink\\"]", }, Object { @@ -152,7 +152,7 @@ Map { }, "weight": 0.46153846153846156, }, - "connectionScore": 0.08823533943923001, + "connectionScore": 0.08823512931759762, "source": "NodeAddress[\\"n1\\"]", }, Object { @@ -162,7 +162,7 @@ Map { }, "weight": 0.0625, }, - "connectionScore": 0.03676470871480657, + "connectionScore": 0.036764695580460846, "source": "NodeAddress[\\"sink\\"]", }, ], diff --git a/src/analysis/pagerank.js b/src/analysis/pagerank.js index b517d8a..20a79f8 100644 --- a/src/analysis/pagerank.js +++ b/src/analysis/pagerank.js @@ -58,13 +58,16 @@ export async function pagerank( fullOptions.selfLoopWeight ); const osmc = createOrderedSparseMarkovChain(connections); - const distribution = await findStationaryDistribution(osmc.chain, { + const distributionResult = await findStationaryDistribution(osmc.chain, { verbose: fullOptions.verbose, convergenceThreshold: fullOptions.convergenceThreshold, maxIterations: fullOptions.maxIterations, yieldAfterMs: 30, }); - const pi = distributionToNodeDistribution(osmc.nodeOrder, distribution); + const pi = distributionToNodeDistribution( + osmc.nodeOrder, + distributionResult.pi + ); const scores = scoreByConstantTotal( pi, fullOptions.totalScore, diff --git a/src/analysis/pagerankNodeDecomposition.test.js b/src/analysis/pagerankNodeDecomposition.test.js index 17522d5..f6fcb5c 100644 --- a/src/analysis/pagerankNodeDecomposition.test.js +++ b/src/analysis/pagerankNodeDecomposition.test.js @@ -7,7 +7,10 @@ import { createOrderedSparseMarkovChain, } from "../core/attribution/graphToMarkovChain"; import {findStationaryDistribution} from "../core/attribution/markovChain"; -import {decompose} from "./pagerankNodeDecomposition"; +import { + decompose, + type PagerankNodeDecomposition, +} from "./pagerankNodeDecomposition"; import * as MapUtil from "../util/map"; import {advancedGraph} from "../core/graphTestUtil"; @@ -16,7 +19,7 @@ import {advancedGraph} from "../core/graphTestUtil"; * Format a decomposition to be shown in a snapshot. This converts * addresses and edges to strings to avoid NUL characters. */ -function formatDecomposition(d) { +function formatDecomposition(d: PagerankNodeDecomposition) { return MapUtil.mapEntries(d, (key, {score, scoredConnections}) => [ NodeAddress.toString(key), { @@ -128,16 +131,19 @@ describe("analysis/pagerankNodeDecomposition", () => { const edgeWeight = () => ({toWeight: 6.0, froWeight: 3.0}); const connections = createConnections(g, edgeWeight, 1.0); const osmc = createOrderedSparseMarkovChain(connections); - const pi = await findStationaryDistribution(osmc.chain, { + const distributionResult = await findStationaryDistribution(osmc.chain, { verbose: false, convergenceThreshold: 1e-6, maxIterations: 255, yieldAfterMs: 1, }); - const pr = distributionToNodeDistribution(osmc.nodeOrder, pi); - const result = decompose(pr, connections); - expect(formatDecomposition(result)).toMatchSnapshot(); - validateDecomposition(result); + const pr = distributionToNodeDistribution( + osmc.nodeOrder, + distributionResult.pi + ); + const decomposition = decompose(pr, connections); + expect(formatDecomposition(decomposition)).toMatchSnapshot(); + validateDecomposition(decomposition); }); it("is valid on the example graph", async () => { @@ -145,15 +151,18 @@ describe("analysis/pagerankNodeDecomposition", () => { const edgeWeight = () => ({toWeight: 6.0, froWeight: 3.0}); const connections = createConnections(g, edgeWeight, 1.0); const osmc = createOrderedSparseMarkovChain(connections); - const pi = await findStationaryDistribution(osmc.chain, { + const distributionResult = await findStationaryDistribution(osmc.chain, { verbose: false, convergenceThreshold: 1e-6, maxIterations: 255, yieldAfterMs: 1, }); - const pr = distributionToNodeDistribution(osmc.nodeOrder, pi); - const result = decompose(pr, connections); - validateDecomposition(result); + const pr = distributionToNodeDistribution( + osmc.nodeOrder, + distributionResult.pi + ); + const decomposition = decompose(pr, connections); + validateDecomposition(decomposition); }); }); }); diff --git a/src/core/attribution/markovChain.js b/src/core/attribution/markovChain.js index b02dae8..7d615fa 100644 --- a/src/core/attribution/markovChain.js +++ b/src/core/attribution/markovChain.js @@ -7,6 +7,24 @@ */ export type Distribution = Float64Array; +export type StationaryDistributionResult = {| + // The final distribution after attempting to find the stationary distribution + // of the Markov chain. + +pi: Distribution, + + // Reports how close the returned distribution is to being converged. + // + // If the convergenceDelta is near zero, then the distribution is well-converged + // (stationary). + // + // Specifically: let `x` be the distribution being returned, and let `x'` be the + // distribution after one additional Markov action, i.e. + // `x' = sparseMarkovChainAction(chain, x)` + // Then the convergence delta is the maximum difference in absolute value between components + // in `x` and `x'`. + +convergenceDelta: number, +|}; + /** * A representation of a sparse transition matrix that is convenient for * computations on Markov chains. @@ -98,47 +116,68 @@ export function sparseMarkovChainAction( return result; } +/** + * Compute the maximum difference (in absolute value) between components in two + * distributions. + * + * Equivalent to $\norm{pi0 - pi1}_\infty$. + */ +export function computeDelta(pi0: Distribution, pi1: Distribution) { + let maxDelta = -Infinity; + // Here, we assume that `pi0.nodeOrder` and `pi1.nodeOrder` are the + // same (i.e., there has been no permutation). + pi0.forEach((x, i) => { + const delta = Math.abs(x - pi1[i]); + maxDelta = Math.max(delta, maxDelta); + }); + return maxDelta; +} + function* findStationaryDistributionGenerator( chain: SparseMarkovChain, options: {| +verbose: boolean, + // A distribution is considered stationary if the action of the Markov + // chain on the distribution does not change any component by more than + // `convergenceThreshold` in absolute value. +convergenceThreshold: number, + // We will run maxIterations markov chain steps at most. +maxIterations: number, |} -): Generator { +): Generator { let pi = uniformDistribution(chain.length); let scratch = new Float64Array(pi.length); - function computeDelta(pi0, pi1) { - let maxDelta = -Infinity; - // Here, we assume that `pi0.nodeOrder` and `pi1.nodeOrder` are the - // same (i.e., there has been no permutation). - pi0.forEach((x, i) => { - const delta = Math.abs(x - pi1[i]); - maxDelta = Math.max(delta, maxDelta); - }); - return maxDelta; - } - let iteration = 0; + + let nIterations = 0; while (true) { - if (iteration >= options.maxIterations) { + if (nIterations >= options.maxIterations) { if (options.verbose) { - console.log(`[${iteration}] FAILED to converge`); + console.log(`[${nIterations}] FAILED to converge`); } - return pi; + // We need to do one more step so that we can compute the empirical convergence + // delta for the returned distribution. + sparseMarkovChainActionInto(chain, pi, scratch); + const convergenceDelta = computeDelta(pi, scratch); + return {pi, convergenceDelta}; } - iteration++; + nIterations++; sparseMarkovChainActionInto(chain, pi, scratch); - const delta = computeDelta(pi, scratch); - [scratch, pi] = [pi, scratch]; + // We compute the convergenceDelta between 'scratch' (the newest + // distribution) and 'pi' (the distribution from the previous step). If the + // delta is below threshold, then the distribution from the last step was + // already converged and we return it (not scratch). Otherwise, we assign + // `scratch` to `distribution` and try again. + const convergenceDelta = computeDelta(pi, scratch); if (options.verbose) { - console.log(`[${iteration}] delta = ${delta}`); + console.log(`[${nIterations}] delta = ${convergenceDelta}`); } - if (delta < options.convergenceThreshold) { + if (convergenceDelta < options.convergenceThreshold) { if (options.verbose) { - console.log(`[${iteration}] CONVERGED`); + console.log(`[${nIterations}] CONVERGED`); } - return pi; + return {pi, convergenceDelta}; } + [scratch, pi] = [pi, scratch]; yield; } // ESLint knows that this next line is unreachable, but Flow doesn't. :-) @@ -154,7 +193,7 @@ export function findStationaryDistribution( +maxIterations: number, +yieldAfterMs: number, |} -): Promise { +): Promise { let gen = findStationaryDistributionGenerator(chain, { verbose: options.verbose, convergenceThreshold: options.convergenceThreshold, diff --git a/src/core/attribution/markovChain.test.js b/src/core/attribution/markovChain.test.js index 8c23784..205ecc9 100644 --- a/src/core/attribution/markovChain.test.js +++ b/src/core/attribution/markovChain.test.js @@ -6,6 +6,8 @@ import { sparseMarkovChainAction, sparseMarkovChainFromTransitionMatrix, uniformDistribution, + computeDelta, + type StationaryDistributionResult, } from "./markovChain"; describe("core/attribution/markovChain", () => { @@ -145,21 +147,29 @@ describe("core/attribution/markovChain", () => { } describe("findStationaryDistribution", () => { + function validateConvegenceDelta(chain, d: StationaryDistributionResult) { + const nextPi = sparseMarkovChainAction(chain, d.pi); + expect(d.convergenceDelta).toEqual(computeDelta(d.pi, nextPi)); + } + it("finds an all-accumulating stationary distribution", async () => { const chain = sparseMarkovChainFromTransitionMatrix([ [1, 0, 0], [0.25, 0, 0.75], [0.25, 0.75, 0], ]); - const pi = await findStationaryDistribution(chain, { + const result = await findStationaryDistribution(chain, { maxIterations: 255, convergenceThreshold: 1e-7, verbose: false, yieldAfterMs: 1, }); - expectStationary(chain, pi); + expect(result.convergenceDelta).toBeLessThanOrEqual(1e-7); + validateConvegenceDelta(chain, result); + + expectStationary(chain, result.pi); const expected = new Float64Array([1, 0, 0]); - expectAllClose(pi, expected); + expectAllClose(result.pi, expected); }); it("finds a non-degenerate stationary distribution", async () => { @@ -174,40 +184,49 @@ describe("core/attribution/markovChain", () => { [0.5, 0, 0.25, 0, 0.25], [0.5, 0.25, 0, 0.25, 0], ]); - const pi = await findStationaryDistribution(chain, { + const result = await findStationaryDistribution(chain, { maxIterations: 255, convergenceThreshold: 1e-7, verbose: false, yieldAfterMs: 1, }); - expectStationary(chain, pi); + + expect(result.convergenceDelta).toBeLessThanOrEqual(1e-7); + validateConvegenceDelta(chain, result); + + expectStationary(chain, result.pi); const expected = new Float64Array([1 / 3, 1 / 6, 1 / 6, 1 / 6, 1 / 6]); - expectAllClose(pi, expected); + expectAllClose(result.pi, expected); }); it("finds the stationary distribution of a periodic chain", async () => { const chain = sparseMarkovChainFromTransitionMatrix([[0, 1], [1, 0]]); - const pi = await findStationaryDistribution(chain, { + const result = await findStationaryDistribution(chain, { maxIterations: 255, convergenceThreshold: 1e-7, verbose: false, yieldAfterMs: 1, }); - expectStationary(chain, pi); + + expect(result.convergenceDelta).toEqual(0); + validateConvegenceDelta(chain, result); + + expectStationary(chain, result.pi); const expected = new Float64Array([0.5, 0.5]); - expectAllClose(pi, expected); + expectAllClose(result.pi, expected); }); it("returns initial distribution if maxIterations===0", async () => { const chain = sparseMarkovChainFromTransitionMatrix([[0, 1], [0, 1]]); - const pi = await findStationaryDistribution(chain, { + const result = await findStationaryDistribution(chain, { verbose: false, convergenceThreshold: 1e-7, maxIterations: 0, yieldAfterMs: 1, }); const expected = new Float64Array([0.5, 0.5]); - expect(pi).toEqual(expected); + expect(result.pi).toEqual(expected); + validateConvegenceDelta(chain, result); }); }); });