Report Markov Chain convergence statistics (#1053)

This commit modifies `markovChain.findStationaryDistribution` so that
in addition to returning the final distribution, it also reports the 
final convergence delta.

This is motivated by the proposed API for the new PagerankGraph (see
[#1020]). Also, I think it makes a nice addition to the test code.

Note that this slightly changes the output from `findStationaryDistribution`,
because we now return the first distribution that is sufficiently converged,
rather than that distribution with one additional Markov action.

Test plan:
Unit tests are updated, and `yarn test` passes.

[#1020]: https://github.com/sourcecred/sourcecred/issues/1020

Thanks to @BrianLitwin for semi-pair-programming it
Thanks to @wchargin for extensive review feedback.
This commit is contained in:
Dandelion Mané 2019-02-12 19:28:53 -07:00 committed by GitHub
parent c428ee01a3
commit dcda8bde1d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 131 additions and 61 deletions

View File

@ -3,7 +3,7 @@
exports[`analysis/pagerankNodeDecomposition decompose has the expected output on a simple asymmetric chain 1`] = `
Map {
"NodeAddress[\\"n1\\"]" => Object {
"score": 0.19117656878499834,
"score": 0.19117611352146152,
"scoredConnections": Array [
Object {
"connection": Object {
@ -17,7 +17,7 @@ Map {
},
"weight": 0.1875,
},
"connectionScore": 0.1102941261444197,
"connectionScore": 0.11029408674138254,
"source": "NodeAddress[\\"sink\\"]",
},
Object {
@ -32,7 +32,7 @@ Map {
},
"weight": 0.3,
},
"connectionScore": 0.066176427533429,
"connectionScore": 0.06617662715734954,
"source": "NodeAddress[\\"n2\\"]",
},
Object {
@ -42,13 +42,13 @@ Map {
},
"weight": 0.07692307692307693,
},
"connectionScore": 0.014705889906538334,
"connectionScore": 0.014705854886266271,
"source": "NodeAddress[\\"n1\\"]",
},
],
},
"NodeAddress[\\"n2\\"]" => Object {
"score": 0.22058809177809668,
"score": 0.22058875719116514,
"scoredConnections": Array [
Object {
"connection": Object {
@ -62,7 +62,7 @@ Map {
},
"weight": 0.1875,
},
"connectionScore": 0.1102941261444197,
"connectionScore": 0.11029408674138254,
"source": "NodeAddress[\\"sink\\"]",
},
Object {
@ -77,7 +77,7 @@ Map {
},
"weight": 0.46153846153846156,
},
"connectionScore": 0.08823533943923001,
"connectionScore": 0.08823512931759762,
"source": "NodeAddress[\\"n1\\"]",
},
Object {
@ -87,13 +87,13 @@ Map {
},
"weight": 0.1,
},
"connectionScore": 0.02205880917780967,
"connectionScore": 0.022058875719116515,
"source": "NodeAddress[\\"n2\\"]",
},
],
},
"NodeAddress[\\"sink\\"]" => Object {
"score": 0.5882353394369051,
"score": 0.5882351292873735,
"scoredConnections": Array [
Object {
"connection": Object {
@ -107,7 +107,7 @@ Map {
},
"weight": 0.375,
},
"connectionScore": 0.2205882522888394,
"connectionScore": 0.22058817348276508,
"source": "NodeAddress[\\"sink\\"]",
},
Object {
@ -122,7 +122,7 @@ Map {
},
"weight": 0.6,
},
"connectionScore": 0.132352855066858,
"connectionScore": 0.13235325431469908,
"source": "NodeAddress[\\"n2\\"]",
},
Object {
@ -137,7 +137,7 @@ Map {
},
"weight": 0.1875,
},
"connectionScore": 0.1102941261444197,
"connectionScore": 0.11029408674138254,
"source": "NodeAddress[\\"sink\\"]",
},
Object {
@ -152,7 +152,7 @@ Map {
},
"weight": 0.46153846153846156,
},
"connectionScore": 0.08823533943923001,
"connectionScore": 0.08823512931759762,
"source": "NodeAddress[\\"n1\\"]",
},
Object {
@ -162,7 +162,7 @@ Map {
},
"weight": 0.0625,
},
"connectionScore": 0.03676470871480657,
"connectionScore": 0.036764695580460846,
"source": "NodeAddress[\\"sink\\"]",
},
],

View File

@ -58,13 +58,16 @@ export async function pagerank(
fullOptions.selfLoopWeight
);
const osmc = createOrderedSparseMarkovChain(connections);
const distribution = await findStationaryDistribution(osmc.chain, {
const distributionResult = await findStationaryDistribution(osmc.chain, {
verbose: fullOptions.verbose,
convergenceThreshold: fullOptions.convergenceThreshold,
maxIterations: fullOptions.maxIterations,
yieldAfterMs: 30,
});
const pi = distributionToNodeDistribution(osmc.nodeOrder, distribution);
const pi = distributionToNodeDistribution(
osmc.nodeOrder,
distributionResult.pi
);
const scores = scoreByConstantTotal(
pi,
fullOptions.totalScore,

View File

@ -7,7 +7,10 @@ import {
createOrderedSparseMarkovChain,
} from "../core/attribution/graphToMarkovChain";
import {findStationaryDistribution} from "../core/attribution/markovChain";
import {decompose} from "./pagerankNodeDecomposition";
import {
decompose,
type PagerankNodeDecomposition,
} from "./pagerankNodeDecomposition";
import * as MapUtil from "../util/map";
import {advancedGraph} from "../core/graphTestUtil";
@ -16,7 +19,7 @@ import {advancedGraph} from "../core/graphTestUtil";
* Format a decomposition to be shown in a snapshot. This converts
* addresses and edges to strings to avoid NUL characters.
*/
function formatDecomposition(d) {
function formatDecomposition(d: PagerankNodeDecomposition) {
return MapUtil.mapEntries(d, (key, {score, scoredConnections}) => [
NodeAddress.toString(key),
{
@ -128,16 +131,19 @@ describe("analysis/pagerankNodeDecomposition", () => {
const edgeWeight = () => ({toWeight: 6.0, froWeight: 3.0});
const connections = createConnections(g, edgeWeight, 1.0);
const osmc = createOrderedSparseMarkovChain(connections);
const pi = await findStationaryDistribution(osmc.chain, {
const distributionResult = await findStationaryDistribution(osmc.chain, {
verbose: false,
convergenceThreshold: 1e-6,
maxIterations: 255,
yieldAfterMs: 1,
});
const pr = distributionToNodeDistribution(osmc.nodeOrder, pi);
const result = decompose(pr, connections);
expect(formatDecomposition(result)).toMatchSnapshot();
validateDecomposition(result);
const pr = distributionToNodeDistribution(
osmc.nodeOrder,
distributionResult.pi
);
const decomposition = decompose(pr, connections);
expect(formatDecomposition(decomposition)).toMatchSnapshot();
validateDecomposition(decomposition);
});
it("is valid on the example graph", async () => {
@ -145,15 +151,18 @@ describe("analysis/pagerankNodeDecomposition", () => {
const edgeWeight = () => ({toWeight: 6.0, froWeight: 3.0});
const connections = createConnections(g, edgeWeight, 1.0);
const osmc = createOrderedSparseMarkovChain(connections);
const pi = await findStationaryDistribution(osmc.chain, {
const distributionResult = await findStationaryDistribution(osmc.chain, {
verbose: false,
convergenceThreshold: 1e-6,
maxIterations: 255,
yieldAfterMs: 1,
});
const pr = distributionToNodeDistribution(osmc.nodeOrder, pi);
const result = decompose(pr, connections);
validateDecomposition(result);
const pr = distributionToNodeDistribution(
osmc.nodeOrder,
distributionResult.pi
);
const decomposition = decompose(pr, connections);
validateDecomposition(decomposition);
});
});
});

View File

@ -7,6 +7,24 @@
*/
export type Distribution = Float64Array;
export type StationaryDistributionResult = {|
// The final distribution after attempting to find the stationary distribution
// of the Markov chain.
+pi: Distribution,
// Reports how close the returned distribution is to being converged.
//
// If the convergenceDelta is near zero, then the distribution is well-converged
// (stationary).
//
// Specifically: let `x` be the distribution being returned, and let `x'` be the
// distribution after one additional Markov action, i.e.
// `x' = sparseMarkovChainAction(chain, x)`
// Then the convergence delta is the maximum difference in absolute value between components
// in `x` and `x'`.
+convergenceDelta: number,
|};
/**
* A representation of a sparse transition matrix that is convenient for
* computations on Markov chains.
@ -98,47 +116,68 @@ export function sparseMarkovChainAction(
return result;
}
/**
* Compute the maximum difference (in absolute value) between components in two
* distributions.
*
* Equivalent to $\norm{pi0 - pi1}_\infty$.
*/
export function computeDelta(pi0: Distribution, pi1: Distribution) {
let maxDelta = -Infinity;
// Here, we assume that `pi0.nodeOrder` and `pi1.nodeOrder` are the
// same (i.e., there has been no permutation).
pi0.forEach((x, i) => {
const delta = Math.abs(x - pi1[i]);
maxDelta = Math.max(delta, maxDelta);
});
return maxDelta;
}
function* findStationaryDistributionGenerator(
chain: SparseMarkovChain,
options: {|
+verbose: boolean,
// A distribution is considered stationary if the action of the Markov
// chain on the distribution does not change any component by more than
// `convergenceThreshold` in absolute value.
+convergenceThreshold: number,
// We will run maxIterations markov chain steps at most.
+maxIterations: number,
|}
): Generator<void, Distribution, void> {
): Generator<void, StationaryDistributionResult, void> {
let pi = uniformDistribution(chain.length);
let scratch = new Float64Array(pi.length);
function computeDelta(pi0, pi1) {
let maxDelta = -Infinity;
// Here, we assume that `pi0.nodeOrder` and `pi1.nodeOrder` are the
// same (i.e., there has been no permutation).
pi0.forEach((x, i) => {
const delta = Math.abs(x - pi1[i]);
maxDelta = Math.max(delta, maxDelta);
});
return maxDelta;
}
let iteration = 0;
let nIterations = 0;
while (true) {
if (iteration >= options.maxIterations) {
if (nIterations >= options.maxIterations) {
if (options.verbose) {
console.log(`[${iteration}] FAILED to converge`);
console.log(`[${nIterations}] FAILED to converge`);
}
return pi;
// We need to do one more step so that we can compute the empirical convergence
// delta for the returned distribution.
sparseMarkovChainActionInto(chain, pi, scratch);
const convergenceDelta = computeDelta(pi, scratch);
return {pi, convergenceDelta};
}
iteration++;
nIterations++;
sparseMarkovChainActionInto(chain, pi, scratch);
const delta = computeDelta(pi, scratch);
[scratch, pi] = [pi, scratch];
// We compute the convergenceDelta between 'scratch' (the newest
// distribution) and 'pi' (the distribution from the previous step). If the
// delta is below threshold, then the distribution from the last step was
// already converged and we return it (not scratch). Otherwise, we assign
// `scratch` to `distribution` and try again.
const convergenceDelta = computeDelta(pi, scratch);
if (options.verbose) {
console.log(`[${iteration}] delta = ${delta}`);
console.log(`[${nIterations}] delta = ${convergenceDelta}`);
}
if (delta < options.convergenceThreshold) {
if (convergenceDelta < options.convergenceThreshold) {
if (options.verbose) {
console.log(`[${iteration}] CONVERGED`);
console.log(`[${nIterations}] CONVERGED`);
}
return pi;
return {pi, convergenceDelta};
}
[scratch, pi] = [pi, scratch];
yield;
}
// ESLint knows that this next line is unreachable, but Flow doesn't. :-)
@ -154,7 +193,7 @@ export function findStationaryDistribution(
+maxIterations: number,
+yieldAfterMs: number,
|}
): Promise<Distribution> {
): Promise<StationaryDistributionResult> {
let gen = findStationaryDistributionGenerator(chain, {
verbose: options.verbose,
convergenceThreshold: options.convergenceThreshold,

View File

@ -6,6 +6,8 @@ import {
sparseMarkovChainAction,
sparseMarkovChainFromTransitionMatrix,
uniformDistribution,
computeDelta,
type StationaryDistributionResult,
} from "./markovChain";
describe("core/attribution/markovChain", () => {
@ -145,21 +147,29 @@ describe("core/attribution/markovChain", () => {
}
describe("findStationaryDistribution", () => {
function validateConvegenceDelta(chain, d: StationaryDistributionResult) {
const nextPi = sparseMarkovChainAction(chain, d.pi);
expect(d.convergenceDelta).toEqual(computeDelta(d.pi, nextPi));
}
it("finds an all-accumulating stationary distribution", async () => {
const chain = sparseMarkovChainFromTransitionMatrix([
[1, 0, 0],
[0.25, 0, 0.75],
[0.25, 0.75, 0],
]);
const pi = await findStationaryDistribution(chain, {
const result = await findStationaryDistribution(chain, {
maxIterations: 255,
convergenceThreshold: 1e-7,
verbose: false,
yieldAfterMs: 1,
});
expectStationary(chain, pi);
expect(result.convergenceDelta).toBeLessThanOrEqual(1e-7);
validateConvegenceDelta(chain, result);
expectStationary(chain, result.pi);
const expected = new Float64Array([1, 0, 0]);
expectAllClose(pi, expected);
expectAllClose(result.pi, expected);
});
it("finds a non-degenerate stationary distribution", async () => {
@ -174,40 +184,49 @@ describe("core/attribution/markovChain", () => {
[0.5, 0, 0.25, 0, 0.25],
[0.5, 0.25, 0, 0.25, 0],
]);
const pi = await findStationaryDistribution(chain, {
const result = await findStationaryDistribution(chain, {
maxIterations: 255,
convergenceThreshold: 1e-7,
verbose: false,
yieldAfterMs: 1,
});
expectStationary(chain, pi);
expect(result.convergenceDelta).toBeLessThanOrEqual(1e-7);
validateConvegenceDelta(chain, result);
expectStationary(chain, result.pi);
const expected = new Float64Array([1 / 3, 1 / 6, 1 / 6, 1 / 6, 1 / 6]);
expectAllClose(pi, expected);
expectAllClose(result.pi, expected);
});
it("finds the stationary distribution of a periodic chain", async () => {
const chain = sparseMarkovChainFromTransitionMatrix([[0, 1], [1, 0]]);
const pi = await findStationaryDistribution(chain, {
const result = await findStationaryDistribution(chain, {
maxIterations: 255,
convergenceThreshold: 1e-7,
verbose: false,
yieldAfterMs: 1,
});
expectStationary(chain, pi);
expect(result.convergenceDelta).toEqual(0);
validateConvegenceDelta(chain, result);
expectStationary(chain, result.pi);
const expected = new Float64Array([0.5, 0.5]);
expectAllClose(pi, expected);
expectAllClose(result.pi, expected);
});
it("returns initial distribution if maxIterations===0", async () => {
const chain = sparseMarkovChainFromTransitionMatrix([[0, 1], [0, 1]]);
const pi = await findStationaryDistribution(chain, {
const result = await findStationaryDistribution(chain, {
verbose: false,
convergenceThreshold: 1e-7,
maxIterations: 0,
yieldAfterMs: 1,
});
const expected = new Float64Array([0.5, 0.5]);
expect(pi).toEqual(expected);
expect(result.pi).toEqual(expected);
validateConvegenceDelta(chain, result);
});
});
});