mirror of
https://github.com/status-im/sourcecred.git
synced 2025-02-17 06:56:36 +00:00
Report Markov Chain convergence statistics (#1053)
This commit modifies `markovChain.findStationaryDistribution` so that in addition to returning the final distribution, it also reports the final convergence delta. This is motivated by the proposed API for the new PagerankGraph (see [#1020]). Also, I think it makes a nice addition to the test code. Note that this slightly changes the output from `findStationaryDistribution`, because we now return the first distribution that is sufficiently converged, rather than that distribution with one additional Markov action. Test plan: Unit tests are updated, and `yarn test` passes. [#1020]: https://github.com/sourcecred/sourcecred/issues/1020 Thanks to @BrianLitwin for semi-pair-programming it Thanks to @wchargin for extensive review feedback.
This commit is contained in:
parent
c428ee01a3
commit
dcda8bde1d
@ -3,7 +3,7 @@
|
||||
exports[`analysis/pagerankNodeDecomposition decompose has the expected output on a simple asymmetric chain 1`] = `
|
||||
Map {
|
||||
"NodeAddress[\\"n1\\"]" => Object {
|
||||
"score": 0.19117656878499834,
|
||||
"score": 0.19117611352146152,
|
||||
"scoredConnections": Array [
|
||||
Object {
|
||||
"connection": Object {
|
||||
@ -17,7 +17,7 @@ Map {
|
||||
},
|
||||
"weight": 0.1875,
|
||||
},
|
||||
"connectionScore": 0.1102941261444197,
|
||||
"connectionScore": 0.11029408674138254,
|
||||
"source": "NodeAddress[\\"sink\\"]",
|
||||
},
|
||||
Object {
|
||||
@ -32,7 +32,7 @@ Map {
|
||||
},
|
||||
"weight": 0.3,
|
||||
},
|
||||
"connectionScore": 0.066176427533429,
|
||||
"connectionScore": 0.06617662715734954,
|
||||
"source": "NodeAddress[\\"n2\\"]",
|
||||
},
|
||||
Object {
|
||||
@ -42,13 +42,13 @@ Map {
|
||||
},
|
||||
"weight": 0.07692307692307693,
|
||||
},
|
||||
"connectionScore": 0.014705889906538334,
|
||||
"connectionScore": 0.014705854886266271,
|
||||
"source": "NodeAddress[\\"n1\\"]",
|
||||
},
|
||||
],
|
||||
},
|
||||
"NodeAddress[\\"n2\\"]" => Object {
|
||||
"score": 0.22058809177809668,
|
||||
"score": 0.22058875719116514,
|
||||
"scoredConnections": Array [
|
||||
Object {
|
||||
"connection": Object {
|
||||
@ -62,7 +62,7 @@ Map {
|
||||
},
|
||||
"weight": 0.1875,
|
||||
},
|
||||
"connectionScore": 0.1102941261444197,
|
||||
"connectionScore": 0.11029408674138254,
|
||||
"source": "NodeAddress[\\"sink\\"]",
|
||||
},
|
||||
Object {
|
||||
@ -77,7 +77,7 @@ Map {
|
||||
},
|
||||
"weight": 0.46153846153846156,
|
||||
},
|
||||
"connectionScore": 0.08823533943923001,
|
||||
"connectionScore": 0.08823512931759762,
|
||||
"source": "NodeAddress[\\"n1\\"]",
|
||||
},
|
||||
Object {
|
||||
@ -87,13 +87,13 @@ Map {
|
||||
},
|
||||
"weight": 0.1,
|
||||
},
|
||||
"connectionScore": 0.02205880917780967,
|
||||
"connectionScore": 0.022058875719116515,
|
||||
"source": "NodeAddress[\\"n2\\"]",
|
||||
},
|
||||
],
|
||||
},
|
||||
"NodeAddress[\\"sink\\"]" => Object {
|
||||
"score": 0.5882353394369051,
|
||||
"score": 0.5882351292873735,
|
||||
"scoredConnections": Array [
|
||||
Object {
|
||||
"connection": Object {
|
||||
@ -107,7 +107,7 @@ Map {
|
||||
},
|
||||
"weight": 0.375,
|
||||
},
|
||||
"connectionScore": 0.2205882522888394,
|
||||
"connectionScore": 0.22058817348276508,
|
||||
"source": "NodeAddress[\\"sink\\"]",
|
||||
},
|
||||
Object {
|
||||
@ -122,7 +122,7 @@ Map {
|
||||
},
|
||||
"weight": 0.6,
|
||||
},
|
||||
"connectionScore": 0.132352855066858,
|
||||
"connectionScore": 0.13235325431469908,
|
||||
"source": "NodeAddress[\\"n2\\"]",
|
||||
},
|
||||
Object {
|
||||
@ -137,7 +137,7 @@ Map {
|
||||
},
|
||||
"weight": 0.1875,
|
||||
},
|
||||
"connectionScore": 0.1102941261444197,
|
||||
"connectionScore": 0.11029408674138254,
|
||||
"source": "NodeAddress[\\"sink\\"]",
|
||||
},
|
||||
Object {
|
||||
@ -152,7 +152,7 @@ Map {
|
||||
},
|
||||
"weight": 0.46153846153846156,
|
||||
},
|
||||
"connectionScore": 0.08823533943923001,
|
||||
"connectionScore": 0.08823512931759762,
|
||||
"source": "NodeAddress[\\"n1\\"]",
|
||||
},
|
||||
Object {
|
||||
@ -162,7 +162,7 @@ Map {
|
||||
},
|
||||
"weight": 0.0625,
|
||||
},
|
||||
"connectionScore": 0.03676470871480657,
|
||||
"connectionScore": 0.036764695580460846,
|
||||
"source": "NodeAddress[\\"sink\\"]",
|
||||
},
|
||||
],
|
||||
|
@ -58,13 +58,16 @@ export async function pagerank(
|
||||
fullOptions.selfLoopWeight
|
||||
);
|
||||
const osmc = createOrderedSparseMarkovChain(connections);
|
||||
const distribution = await findStationaryDistribution(osmc.chain, {
|
||||
const distributionResult = await findStationaryDistribution(osmc.chain, {
|
||||
verbose: fullOptions.verbose,
|
||||
convergenceThreshold: fullOptions.convergenceThreshold,
|
||||
maxIterations: fullOptions.maxIterations,
|
||||
yieldAfterMs: 30,
|
||||
});
|
||||
const pi = distributionToNodeDistribution(osmc.nodeOrder, distribution);
|
||||
const pi = distributionToNodeDistribution(
|
||||
osmc.nodeOrder,
|
||||
distributionResult.pi
|
||||
);
|
||||
const scores = scoreByConstantTotal(
|
||||
pi,
|
||||
fullOptions.totalScore,
|
||||
|
@ -7,7 +7,10 @@ import {
|
||||
createOrderedSparseMarkovChain,
|
||||
} from "../core/attribution/graphToMarkovChain";
|
||||
import {findStationaryDistribution} from "../core/attribution/markovChain";
|
||||
import {decompose} from "./pagerankNodeDecomposition";
|
||||
import {
|
||||
decompose,
|
||||
type PagerankNodeDecomposition,
|
||||
} from "./pagerankNodeDecomposition";
|
||||
import * as MapUtil from "../util/map";
|
||||
|
||||
import {advancedGraph} from "../core/graphTestUtil";
|
||||
@ -16,7 +19,7 @@ import {advancedGraph} from "../core/graphTestUtil";
|
||||
* Format a decomposition to be shown in a snapshot. This converts
|
||||
* addresses and edges to strings to avoid NUL characters.
|
||||
*/
|
||||
function formatDecomposition(d) {
|
||||
function formatDecomposition(d: PagerankNodeDecomposition) {
|
||||
return MapUtil.mapEntries(d, (key, {score, scoredConnections}) => [
|
||||
NodeAddress.toString(key),
|
||||
{
|
||||
@ -128,16 +131,19 @@ describe("analysis/pagerankNodeDecomposition", () => {
|
||||
const edgeWeight = () => ({toWeight: 6.0, froWeight: 3.0});
|
||||
const connections = createConnections(g, edgeWeight, 1.0);
|
||||
const osmc = createOrderedSparseMarkovChain(connections);
|
||||
const pi = await findStationaryDistribution(osmc.chain, {
|
||||
const distributionResult = await findStationaryDistribution(osmc.chain, {
|
||||
verbose: false,
|
||||
convergenceThreshold: 1e-6,
|
||||
maxIterations: 255,
|
||||
yieldAfterMs: 1,
|
||||
});
|
||||
const pr = distributionToNodeDistribution(osmc.nodeOrder, pi);
|
||||
const result = decompose(pr, connections);
|
||||
expect(formatDecomposition(result)).toMatchSnapshot();
|
||||
validateDecomposition(result);
|
||||
const pr = distributionToNodeDistribution(
|
||||
osmc.nodeOrder,
|
||||
distributionResult.pi
|
||||
);
|
||||
const decomposition = decompose(pr, connections);
|
||||
expect(formatDecomposition(decomposition)).toMatchSnapshot();
|
||||
validateDecomposition(decomposition);
|
||||
});
|
||||
|
||||
it("is valid on the example graph", async () => {
|
||||
@ -145,15 +151,18 @@ describe("analysis/pagerankNodeDecomposition", () => {
|
||||
const edgeWeight = () => ({toWeight: 6.0, froWeight: 3.0});
|
||||
const connections = createConnections(g, edgeWeight, 1.0);
|
||||
const osmc = createOrderedSparseMarkovChain(connections);
|
||||
const pi = await findStationaryDistribution(osmc.chain, {
|
||||
const distributionResult = await findStationaryDistribution(osmc.chain, {
|
||||
verbose: false,
|
||||
convergenceThreshold: 1e-6,
|
||||
maxIterations: 255,
|
||||
yieldAfterMs: 1,
|
||||
});
|
||||
const pr = distributionToNodeDistribution(osmc.nodeOrder, pi);
|
||||
const result = decompose(pr, connections);
|
||||
validateDecomposition(result);
|
||||
const pr = distributionToNodeDistribution(
|
||||
osmc.nodeOrder,
|
||||
distributionResult.pi
|
||||
);
|
||||
const decomposition = decompose(pr, connections);
|
||||
validateDecomposition(decomposition);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
@ -7,6 +7,24 @@
|
||||
*/
|
||||
export type Distribution = Float64Array;
|
||||
|
||||
export type StationaryDistributionResult = {|
|
||||
// The final distribution after attempting to find the stationary distribution
|
||||
// of the Markov chain.
|
||||
+pi: Distribution,
|
||||
|
||||
// Reports how close the returned distribution is to being converged.
|
||||
//
|
||||
// If the convergenceDelta is near zero, then the distribution is well-converged
|
||||
// (stationary).
|
||||
//
|
||||
// Specifically: let `x` be the distribution being returned, and let `x'` be the
|
||||
// distribution after one additional Markov action, i.e.
|
||||
// `x' = sparseMarkovChainAction(chain, x)`
|
||||
// Then the convergence delta is the maximum difference in absolute value between components
|
||||
// in `x` and `x'`.
|
||||
+convergenceDelta: number,
|
||||
|};
|
||||
|
||||
/**
|
||||
* A representation of a sparse transition matrix that is convenient for
|
||||
* computations on Markov chains.
|
||||
@ -98,47 +116,68 @@ export function sparseMarkovChainAction(
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute the maximum difference (in absolute value) between components in two
|
||||
* distributions.
|
||||
*
|
||||
* Equivalent to $\norm{pi0 - pi1}_\infty$.
|
||||
*/
|
||||
export function computeDelta(pi0: Distribution, pi1: Distribution) {
|
||||
let maxDelta = -Infinity;
|
||||
// Here, we assume that `pi0.nodeOrder` and `pi1.nodeOrder` are the
|
||||
// same (i.e., there has been no permutation).
|
||||
pi0.forEach((x, i) => {
|
||||
const delta = Math.abs(x - pi1[i]);
|
||||
maxDelta = Math.max(delta, maxDelta);
|
||||
});
|
||||
return maxDelta;
|
||||
}
|
||||
|
||||
function* findStationaryDistributionGenerator(
|
||||
chain: SparseMarkovChain,
|
||||
options: {|
|
||||
+verbose: boolean,
|
||||
// A distribution is considered stationary if the action of the Markov
|
||||
// chain on the distribution does not change any component by more than
|
||||
// `convergenceThreshold` in absolute value.
|
||||
+convergenceThreshold: number,
|
||||
// We will run maxIterations markov chain steps at most.
|
||||
+maxIterations: number,
|
||||
|}
|
||||
): Generator<void, Distribution, void> {
|
||||
): Generator<void, StationaryDistributionResult, void> {
|
||||
let pi = uniformDistribution(chain.length);
|
||||
let scratch = new Float64Array(pi.length);
|
||||
function computeDelta(pi0, pi1) {
|
||||
let maxDelta = -Infinity;
|
||||
// Here, we assume that `pi0.nodeOrder` and `pi1.nodeOrder` are the
|
||||
// same (i.e., there has been no permutation).
|
||||
pi0.forEach((x, i) => {
|
||||
const delta = Math.abs(x - pi1[i]);
|
||||
maxDelta = Math.max(delta, maxDelta);
|
||||
});
|
||||
return maxDelta;
|
||||
}
|
||||
let iteration = 0;
|
||||
|
||||
let nIterations = 0;
|
||||
while (true) {
|
||||
if (iteration >= options.maxIterations) {
|
||||
if (nIterations >= options.maxIterations) {
|
||||
if (options.verbose) {
|
||||
console.log(`[${iteration}] FAILED to converge`);
|
||||
console.log(`[${nIterations}] FAILED to converge`);
|
||||
}
|
||||
return pi;
|
||||
// We need to do one more step so that we can compute the empirical convergence
|
||||
// delta for the returned distribution.
|
||||
sparseMarkovChainActionInto(chain, pi, scratch);
|
||||
const convergenceDelta = computeDelta(pi, scratch);
|
||||
return {pi, convergenceDelta};
|
||||
}
|
||||
iteration++;
|
||||
nIterations++;
|
||||
sparseMarkovChainActionInto(chain, pi, scratch);
|
||||
const delta = computeDelta(pi, scratch);
|
||||
[scratch, pi] = [pi, scratch];
|
||||
// We compute the convergenceDelta between 'scratch' (the newest
|
||||
// distribution) and 'pi' (the distribution from the previous step). If the
|
||||
// delta is below threshold, then the distribution from the last step was
|
||||
// already converged and we return it (not scratch). Otherwise, we assign
|
||||
// `scratch` to `distribution` and try again.
|
||||
const convergenceDelta = computeDelta(pi, scratch);
|
||||
if (options.verbose) {
|
||||
console.log(`[${iteration}] delta = ${delta}`);
|
||||
console.log(`[${nIterations}] delta = ${convergenceDelta}`);
|
||||
}
|
||||
if (delta < options.convergenceThreshold) {
|
||||
if (convergenceDelta < options.convergenceThreshold) {
|
||||
if (options.verbose) {
|
||||
console.log(`[${iteration}] CONVERGED`);
|
||||
console.log(`[${nIterations}] CONVERGED`);
|
||||
}
|
||||
return pi;
|
||||
return {pi, convergenceDelta};
|
||||
}
|
||||
[scratch, pi] = [pi, scratch];
|
||||
yield;
|
||||
}
|
||||
// ESLint knows that this next line is unreachable, but Flow doesn't. :-)
|
||||
@ -154,7 +193,7 @@ export function findStationaryDistribution(
|
||||
+maxIterations: number,
|
||||
+yieldAfterMs: number,
|
||||
|}
|
||||
): Promise<Distribution> {
|
||||
): Promise<StationaryDistributionResult> {
|
||||
let gen = findStationaryDistributionGenerator(chain, {
|
||||
verbose: options.verbose,
|
||||
convergenceThreshold: options.convergenceThreshold,
|
||||
|
@ -6,6 +6,8 @@ import {
|
||||
sparseMarkovChainAction,
|
||||
sparseMarkovChainFromTransitionMatrix,
|
||||
uniformDistribution,
|
||||
computeDelta,
|
||||
type StationaryDistributionResult,
|
||||
} from "./markovChain";
|
||||
|
||||
describe("core/attribution/markovChain", () => {
|
||||
@ -145,21 +147,29 @@ describe("core/attribution/markovChain", () => {
|
||||
}
|
||||
|
||||
describe("findStationaryDistribution", () => {
|
||||
function validateConvegenceDelta(chain, d: StationaryDistributionResult) {
|
||||
const nextPi = sparseMarkovChainAction(chain, d.pi);
|
||||
expect(d.convergenceDelta).toEqual(computeDelta(d.pi, nextPi));
|
||||
}
|
||||
|
||||
it("finds an all-accumulating stationary distribution", async () => {
|
||||
const chain = sparseMarkovChainFromTransitionMatrix([
|
||||
[1, 0, 0],
|
||||
[0.25, 0, 0.75],
|
||||
[0.25, 0.75, 0],
|
||||
]);
|
||||
const pi = await findStationaryDistribution(chain, {
|
||||
const result = await findStationaryDistribution(chain, {
|
||||
maxIterations: 255,
|
||||
convergenceThreshold: 1e-7,
|
||||
verbose: false,
|
||||
yieldAfterMs: 1,
|
||||
});
|
||||
expectStationary(chain, pi);
|
||||
expect(result.convergenceDelta).toBeLessThanOrEqual(1e-7);
|
||||
validateConvegenceDelta(chain, result);
|
||||
|
||||
expectStationary(chain, result.pi);
|
||||
const expected = new Float64Array([1, 0, 0]);
|
||||
expectAllClose(pi, expected);
|
||||
expectAllClose(result.pi, expected);
|
||||
});
|
||||
|
||||
it("finds a non-degenerate stationary distribution", async () => {
|
||||
@ -174,40 +184,49 @@ describe("core/attribution/markovChain", () => {
|
||||
[0.5, 0, 0.25, 0, 0.25],
|
||||
[0.5, 0.25, 0, 0.25, 0],
|
||||
]);
|
||||
const pi = await findStationaryDistribution(chain, {
|
||||
const result = await findStationaryDistribution(chain, {
|
||||
maxIterations: 255,
|
||||
convergenceThreshold: 1e-7,
|
||||
verbose: false,
|
||||
yieldAfterMs: 1,
|
||||
});
|
||||
expectStationary(chain, pi);
|
||||
|
||||
expect(result.convergenceDelta).toBeLessThanOrEqual(1e-7);
|
||||
validateConvegenceDelta(chain, result);
|
||||
|
||||
expectStationary(chain, result.pi);
|
||||
const expected = new Float64Array([1 / 3, 1 / 6, 1 / 6, 1 / 6, 1 / 6]);
|
||||
expectAllClose(pi, expected);
|
||||
expectAllClose(result.pi, expected);
|
||||
});
|
||||
|
||||
it("finds the stationary distribution of a periodic chain", async () => {
|
||||
const chain = sparseMarkovChainFromTransitionMatrix([[0, 1], [1, 0]]);
|
||||
const pi = await findStationaryDistribution(chain, {
|
||||
const result = await findStationaryDistribution(chain, {
|
||||
maxIterations: 255,
|
||||
convergenceThreshold: 1e-7,
|
||||
verbose: false,
|
||||
yieldAfterMs: 1,
|
||||
});
|
||||
expectStationary(chain, pi);
|
||||
|
||||
expect(result.convergenceDelta).toEqual(0);
|
||||
validateConvegenceDelta(chain, result);
|
||||
|
||||
expectStationary(chain, result.pi);
|
||||
const expected = new Float64Array([0.5, 0.5]);
|
||||
expectAllClose(pi, expected);
|
||||
expectAllClose(result.pi, expected);
|
||||
});
|
||||
|
||||
it("returns initial distribution if maxIterations===0", async () => {
|
||||
const chain = sparseMarkovChainFromTransitionMatrix([[0, 1], [0, 1]]);
|
||||
const pi = await findStationaryDistribution(chain, {
|
||||
const result = await findStationaryDistribution(chain, {
|
||||
verbose: false,
|
||||
convergenceThreshold: 1e-7,
|
||||
maxIterations: 0,
|
||||
yieldAfterMs: 1,
|
||||
});
|
||||
const expected = new Float64Array([0.5, 0.5]);
|
||||
expect(pi).toEqual(expected);
|
||||
expect(result.pi).toEqual(expected);
|
||||
validateConvegenceDelta(chain, result);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
Loading…
x
Reference in New Issue
Block a user