Add support for PageRank Seed Vectors (#1128)

Summary:
the cred calculation is defined by a Markov Mixing process. By
introducing the seed vector and teleportation parameter alpha, the
Markov mixing process is augmented with a source of cred originating
from the seed vector. The resulting algorithm is the generalized
variation of PageRank, allowing computation of both canonical PageRank
where the seed vector is the uniform distribution and personalized
PageRank where the seed vector is an indicator distribution. It is still
possible to get the simple markov chain solution by setting alpha = 0.

Note that this changes the Markov process state update, but does not
provide updates to the APIs. All existing behavior is unchanged because
alpha is always set to 0.

This is a port of
https://github.com/sourcecred/odyssey-hackathon/pull/3,
which was created during the Odyssey Hackathon.

Test Plan:

Existing tests have been extended to include passing alpha = 0 to
reproduce exisiting test cases for the simple Markov Process. Additional
test cases include
 - Verifying that resulting stationary distribution is unaffected by seed when alpha = 0
 - Verifying that resulting stationary distribution is precisely equal to seed when alpha = 1
 - Verifying that the resulting stationary distribution is linear in the seed vector
 - Verifying that the correct stationary distribution is computed for non-zero alpha
 - verify that the algorithm converges immediately when the initialDistribution is the stationary distribution
 - verify that the changing the initialDistribution does not change the stationary distribution

Paired with @mzargham
This commit is contained in:
Dandelion Mané 2019-04-24 16:37:16 +03:00 committed by GitHub
parent ee1d2fb996
commit e7bc025379
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 365 additions and 54 deletions

View File

@ -23,6 +23,7 @@ import {
findStationaryDistribution,
type PagerankParams,
type PagerankOptions as CorePagerankOptions,
uniformDistribution,
} from "../core/attribution/markovChain";
export type {NodeDistribution} from "../core/attribution/graphToMarkovChain";
@ -67,7 +68,12 @@ export async function pagerank(
fullOptions.selfLoopWeight
);
const osmc = createOrderedSparseMarkovChain(connections);
const params: PagerankParams = {chain: osmc.chain};
const params: PagerankParams = {
chain: osmc.chain,
alpha: 0,
pi0: uniformDistribution(osmc.chain.length),
seed: uniformDistribution(osmc.chain.length),
};
const coreOptions: CorePagerankOptions = {
verbose: fullOptions.verbose,
convergenceThreshold: fullOptions.convergenceThreshold,

View File

@ -9,6 +9,7 @@ import {
import {
findStationaryDistribution,
type PagerankParams,
uniformDistribution,
} from "../core/attribution/markovChain";
import {
decompose,
@ -134,7 +135,12 @@ describe("analysis/pagerankNodeDecomposition", () => {
const edgeWeight = () => ({toWeight: 6.0, froWeight: 3.0});
const connections = createConnections(g, edgeWeight, 1.0);
const osmc = createOrderedSparseMarkovChain(connections);
const params: PagerankParams = {chain: osmc.chain};
const params: PagerankParams = {
chain: osmc.chain,
alpha: 0,
seed: uniformDistribution(osmc.chain.length),
pi0: uniformDistribution(osmc.chain.length),
};
const distributionResult = await findStationaryDistribution(params, {
verbose: false,
convergenceThreshold: 1e-6,
@ -155,7 +161,12 @@ describe("analysis/pagerankNodeDecomposition", () => {
const edgeWeight = () => ({toWeight: 6.0, froWeight: 3.0});
const connections = createConnections(g, edgeWeight, 1.0);
const osmc = createOrderedSparseMarkovChain(connections);
const params: PagerankParams = {chain: osmc.chain};
const params: PagerankParams = {
chain: osmc.chain,
alpha: 0,
seed: uniformDistribution(osmc.chain.length),
pi0: uniformDistribution(osmc.chain.length),
};
const distributionResult = await findStationaryDistribution(params, {
verbose: false,
convergenceThreshold: 1e-6,

View File

@ -16,7 +16,16 @@ export type Distribution = Float64Array;
* have different PagerankParams, but often have the same PagerankOptions.
*/
export type PagerankParams = {|
// The Markov Chain to run PageRank on.
+chain: SparseMarkovChain,
// The initial distribution to start from.
+pi0: Distribution,
// The seed vector that PageRank 'teleports' back to.
+seed: Distribution,
// The probability of teleporting back to the seed vector.
// If alpha=0, then the seed vector is irrelevant.
// If alpha=1, then it trivially converges to the seed vector.
+alpha: number,
|};
/**
@ -123,15 +132,17 @@ export function uniformDistribution(n: number): Distribution {
function sparseMarkovChainActionInto(
chain: SparseMarkovChain,
seed: Distribution,
alpha: number,
input: Distribution,
output: Distribution
): void {
chain.forEach(({neighbor, weight}, dst) => {
const inDegree = neighbor.length; // (also `weight.length`)
let probability = 0;
let probability = alpha * seed[dst];
for (let i = 0; i < inDegree; i++) {
const src = neighbor[i];
probability += input[src] * weight[i];
probability += (1 - alpha) * input[src] * weight[i];
}
output[dst] = probability;
});
@ -139,10 +150,12 @@ function sparseMarkovChainActionInto(
export function sparseMarkovChainAction(
chain: SparseMarkovChain,
seed: Distribution,
alpha: number,
pi: Distribution
): Distribution {
const result = new Float64Array(pi.length);
sparseMarkovChainActionInto(chain, pi, result);
sparseMarkovChainActionInto(chain, seed, alpha, pi, result);
return result;
}
@ -175,8 +188,8 @@ function* findStationaryDistributionGenerator(
+maxIterations: number,
|}
): Generator<void, StationaryDistributionResult, void> {
const {chain} = params;
let pi = uniformDistribution(chain.length);
const {chain, pi0, seed, alpha} = params;
let pi = new Float64Array(pi0);
let scratch = new Float64Array(pi.length);
let nIterations = 0;
@ -187,12 +200,12 @@ function* findStationaryDistributionGenerator(
}
// We need to do one more step so that we can compute the empirical convergence
// delta for the returned distribution.
sparseMarkovChainActionInto(chain, pi, scratch);
sparseMarkovChainActionInto(chain, seed, alpha, pi, scratch);
const convergenceDelta = computeDelta(pi, scratch);
return {pi, convergenceDelta};
}
nIterations++;
sparseMarkovChainActionInto(chain, pi, scratch);
sparseMarkovChainActionInto(chain, seed, alpha, pi, scratch);
// We compute the convergenceDelta between 'scratch' (the newest
// distribution) and 'pi' (the distribution from the previous step). If the
// delta is below threshold, then the distribution from the last step was

View File

@ -12,6 +12,23 @@ import {
} from "./markovChain";
describe("core/attribution/markovChain", () => {
/** A distribution that is 1 at the chosen index, and 0 elsewhere.*/
function singleIndexDistribution(size: number, index: number): Distribution {
if (!isFinite(size) || size !== Math.floor(size) || size <= 0) {
throw new Error("size: expected positive integer, but got: " + size);
}
if (!isFinite(index) || index !== Math.floor(index) || index < 0) {
throw new Error("index: expected nonnegative integer, got: " + index);
}
if (index >= size) {
throw new Error("index out of range");
}
const distribution = new Float64Array(size);
distribution[index] = 1;
return distribution;
}
describe("sparseMarkovChainFromTransitionMatrix", () => {
it("works for a simple matrix", () => {
const matrix = [[1, 0, 0], [0.25, 0, 0.75], [0.25, 0.75, 0]];
@ -115,8 +132,11 @@ describe("core/attribution/markovChain", () => {
[0.25, 0, 0.75],
[0.25, 0.75, 0],
]);
const alpha = 0;
const seed = uniformDistribution(chain.length);
const pi0 = new Float64Array([0.125, 0.375, 0.625]);
const pi1 = sparseMarkovChainAction(chain, pi0);
const pi1 = sparseMarkovChainAction(chain, seed, alpha, pi0);
// The expected value is given by `pi0 * A`, where `A` is the
// transition matrix. In Octave:
// >> A = [ 1 0 0; 0.25 0 0.75 ; 0.25 0.75 0 ];
@ -127,6 +147,23 @@ describe("core/attribution/markovChain", () => {
const expected = new Float64Array([0.375, 0.46875, 0.28125]);
expect(pi1).toEqual(expected);
});
it("acts properly on a nontrivial chain with seed and non-zero alpha", () => {
const chain = sparseMarkovChainFromTransitionMatrix([
[1, 0, 0],
[0.25, 0, 0.75],
[0.25, 0.75, 0],
]);
const alpha = 0.5;
const seed = singleIndexDistribution(chain.length, 0);
const pi0 = new Float64Array([0.6, 0.2, 0.2]);
const pi1 = sparseMarkovChainAction(chain, seed, alpha, pi0);
// The result is `(1-alpha) * pi0 * A + alpha * seed`,
// where `A` is the transition matrix.
const expected = new Float64Array([0.85, 0.075, 0.075]);
expectAllClose(pi1, expected);
});
});
function expectAllClose(
@ -143,95 +180,333 @@ describe("core/attribution/markovChain", () => {
}
}
function expectStationary(chain: SparseMarkovChain, pi: Distribution): void {
expectAllClose(sparseMarkovChainAction(chain, pi), pi);
function expectStationary(
chain: SparseMarkovChain,
seed: Distribution,
alpha: number,
pi: Distribution
): void {
expectAllClose(sparseMarkovChainAction(chain, seed, alpha, pi), pi);
}
describe("findStationaryDistribution", () => {
function validateConvegenceDelta(chain, d: StationaryDistributionResult) {
const nextPi = sparseMarkovChainAction(chain, d.pi);
function validateConvergenceDelta(
chain: SparseMarkovChain,
seed: Distribution,
alpha: number,
d: StationaryDistributionResult
) {
const nextPi = sparseMarkovChainAction(chain, seed, alpha, d.pi);
expect(d.convergenceDelta).toEqual(computeDelta(d.pi, nextPi));
}
const standardOptions = () => ({
maxIterations: 255,
convergenceThreshold: 1e-7,
verbose: false,
yieldAfterMs: 1,
});
it("finds an all-accumulating stationary distribution", async () => {
const chain = sparseMarkovChainFromTransitionMatrix([
[1, 0, 0],
[0.25, 0, 0.75],
[0.25, 0.75, 0],
]);
const params: PagerankParams = {chain};
const result = await findStationaryDistribution(params, {
maxIterations: 255,
convergenceThreshold: 1e-7,
verbose: false,
yieldAfterMs: 1,
});
const params: PagerankParams = {
chain,
alpha: 0,
seed: uniformDistribution(chain.length),
pi0: uniformDistribution(chain.length),
};
const result = await findStationaryDistribution(
params,
standardOptions()
);
expect(result.convergenceDelta).toBeLessThanOrEqual(1e-7);
validateConvegenceDelta(chain, result);
validateConvergenceDelta(params.chain, params.seed, params.alpha, result);
expectStationary(chain, result.pi);
expectStationary(params.chain, params.seed, params.alpha, result.pi);
const expected = new Float64Array([1, 0, 0]);
expectAllClose(result.pi, expected);
});
it("finds a non-degenerate stationary distribution", async () => {
// Node 0 is the "center"; nodes 1 through 4 are "satellites". A
// satellite transitions to the center with probability 0.5, or to a
// cyclically adjacent satellite with probability 0.25 each. The
// center transitions to a uniformly random satellite.
const chain = sparseMarkovChainFromTransitionMatrix([
// Node 0 is the "center"; nodes 1 through 4 are "satellites". A
// satellite transitions to the center with probability 0.5, or to a
// cyclically adjacent satellite with probability 0.25 each. The
// center transitions to a uniformly random satellite.
const satelliteChain = () =>
sparseMarkovChainFromTransitionMatrix([
[0, 0.25, 0.25, 0.25, 0.25],
[0.5, 0, 0.25, 0, 0.25],
[0.5, 0.25, 0, 0.25, 0],
[0.5, 0, 0.25, 0, 0.25],
[0.5, 0.25, 0, 0.25, 0],
]);
const params: PagerankParams = {chain};
const result = await findStationaryDistribution(params, {
maxIterations: 255,
convergenceThreshold: 1e-7,
verbose: false,
yieldAfterMs: 1,
});
it("finds a stationary distribution", async () => {
const chain = satelliteChain();
const params: PagerankParams = {
chain,
alpha: 0,
seed: uniformDistribution(chain.length),
pi0: uniformDistribution(chain.length),
};
const result = await findStationaryDistribution(
params,
standardOptions()
);
expect(result.convergenceDelta).toBeLessThanOrEqual(1e-7);
validateConvegenceDelta(chain, result);
validateConvergenceDelta(params.chain, params.seed, params.alpha, result);
expectStationary(chain, result.pi);
expectStationary(params.chain, params.seed, params.alpha, result.pi);
const expected = new Float64Array([1 / 3, 1 / 6, 1 / 6, 1 / 6, 1 / 6]);
expectAllClose(result.pi, expected);
});
it("finds the same stationary distribution regardless of initialDistribution", async () => {
const chain = satelliteChain();
const alpha = 0.1;
const seed = uniformDistribution(chain.length);
const initialDistribution1 = singleIndexDistribution(chain.length, 0);
const params1 = {chain, alpha, seed, pi0: initialDistribution1};
const initialDistribution2 = singleIndexDistribution(chain.length, 1);
const params2 = {chain, alpha, seed, pi0: initialDistribution2};
const result1 = await findStationaryDistribution(
params1,
standardOptions()
);
const result2 = await findStationaryDistribution(
params2,
standardOptions()
);
expectAllClose(result1.pi, result2.pi);
});
it("finds a non-degenerate stationary distribution with seed and non-zero alpha", async () => {
const chain = satelliteChain();
const alpha = 0.1;
const seed = singleIndexDistribution(chain.length, 0);
const pi0 = uniformDistribution(chain.length);
const result = await findStationaryDistribution(
{chain, alpha, seed, pi0},
standardOptions()
);
expect(result.convergenceDelta).toBeLessThanOrEqual(1e-7);
validateConvergenceDelta(chain, seed, alpha, result);
expectStationary(chain, seed, alpha, result.pi);
const expected = new Float64Array([
22 / 58,
9 / 58,
9 / 58,
9 / 58,
9 / 58,
]);
expectAllClose(result.pi, expected);
});
it("converges immediately when initialDistribution equals the stationary distribution", async () => {
const chain = satelliteChain();
const alpha = 0.1;
const seed = singleIndexDistribution(chain.length, 0);
// determine the expected stationary distribtution via Linear algebra
// from python3:
// >>A = np.matrix([[0, 0.25, 0.25, 0.25, 0.25],
// [0.5, 0, 0.25, 0, 0.25],
// [0.5, 0.25, 0, 0.25, 0],
// [0.5, 0, 0.25, 0, 0.25],
// [0.5, 0.25, 0, 0.25, 0]])
// >>seed = np.array([1, 0, 0, 0, 0])
// >>n = len(seed)
// >>alpha = .1
// >>piStar = alpha * seed * np.linalg.inv(np.eye(n) -(1-alpha)*A)
// >>print(piStar)
const expected = new Float64Array([
0.37931034,
0.15517241,
0.15517241,
0.15517241,
0.15517241,
]);
const result = await findStationaryDistribution(
{
chain,
seed,
alpha,
pi0: expected,
},
standardOptions()
);
expect(result.convergenceDelta).toBeLessThanOrEqual(1e-7);
validateConvergenceDelta(chain, seed, alpha, result);
expectStationary(chain, seed, alpha, result.pi);
expectAllClose(result.pi, expected);
});
it("finds the stationary distribution of a periodic chain", async () => {
const chain = sparseMarkovChainFromTransitionMatrix([[0, 1], [1, 0]]);
const params: PagerankParams = {chain};
const result = await findStationaryDistribution(params, {
maxIterations: 255,
convergenceThreshold: 1e-7,
verbose: false,
yieldAfterMs: 1,
});
const params: PagerankParams = {
chain,
alpha: 0,
seed: uniformDistribution(chain.length),
pi0: uniformDistribution(chain.length),
};
const result = await findStationaryDistribution(
params,
standardOptions()
);
expect(result.convergenceDelta).toEqual(0);
validateConvegenceDelta(chain, result);
validateConvergenceDelta(params.chain, params.seed, params.alpha, result);
expectStationary(chain, result.pi);
expectStationary(params.chain, params.seed, params.alpha, result.pi);
const expected = new Float64Array([0.5, 0.5]);
expectAllClose(result.pi, expected);
});
it("returns initial distribution if maxIterations===0", async () => {
const chain = sparseMarkovChainFromTransitionMatrix([[0, 1], [0, 1]]);
const params: PagerankParams = {chain};
const params: PagerankParams = {
chain,
alpha: 0,
seed: uniformDistribution(chain.length),
pi0: uniformDistribution(chain.length),
};
const result = await findStationaryDistribution(params, {
verbose: false,
convergenceThreshold: 1e-7,
...standardOptions(),
maxIterations: 0,
yieldAfterMs: 1,
});
const expected = new Float64Array([0.5, 0.5]);
expect(result.pi).toEqual(expected);
validateConvegenceDelta(chain, result);
validateConvergenceDelta(params.chain, params.seed, params.alpha, result);
});
it("is linear in choice of seed vector", async () => {
const chain = sparseMarkovChainFromTransitionMatrix([
[0.75, 0.25],
[0.5, 0.5],
]);
const alpha = 0.1;
const seed1 = singleIndexDistribution(chain.length, 0);
const seed2 = singleIndexDistribution(chain.length, 1);
const seedUniform = uniformDistribution(chain.length);
const pi0 = uniformDistribution(chain.length);
const result1 = await findStationaryDistribution(
{chain, seed: seed1, alpha, pi0},
standardOptions()
);
const result2 = await findStationaryDistribution(
{chain, seed: seed2, alpha, pi0},
standardOptions()
);
const resultUniform = await findStationaryDistribution(
{chain, seed: seedUniform, alpha, pi0},
standardOptions()
);
function addDistributions(
d1: Distribution,
d2: Distribution
): Distribution {
if (d1.length !== d2.length) {
throw new Error("Can't add distributions of different sizes.");
}
const newDistribution = new Float64Array(d1.length);
for (let i = 0; i < newDistribution.length; i++) {
newDistribution[i] = d1[i] + d2[i];
}
return newDistribution;
}
function scaleDistribution(
scalar: number,
d: Distribution
): Distribution {
const newDistribution = new Float64Array(d.length);
for (let i = 0; i < newDistribution.length; i++) {
newDistribution[i] = scalar * d[i];
}
return newDistribution;
}
const combined = addDistributions(result1.pi, result2.pi);
expectAllClose(scaleDistribution(2, resultUniform.pi), combined);
});
it("ignores seed when alpha is zero", async () => {
const chain = sparseMarkovChainFromTransitionMatrix([
[0.75, 0.25],
[0.5, 0.5],
]);
const alpha = 0;
const seed1 = singleIndexDistribution(chain.length, 0);
const seed2 = singleIndexDistribution(chain.length, 1);
const pi0 = uniformDistribution(chain.length);
const result1 = await findStationaryDistribution(
{
chain,
seed: seed1,
alpha,
pi0,
},
standardOptions()
);
const result2 = await findStationaryDistribution(
{
chain,
seed: seed2,
alpha,
pi0,
},
standardOptions()
);
expectAllClose(result1.pi, result2.pi);
});
it("returns seed when alpha is one", async () => {
const chain = sparseMarkovChainFromTransitionMatrix([
[0.75, 0.25],
[0.5, 0.5],
]);
const alpha = 1;
const seed = singleIndexDistribution(chain.length, 0);
const pi0 = uniformDistribution(chain.length);
const result = await findStationaryDistribution(
{chain, seed, alpha, pi0},
standardOptions()
);
expectAllClose(result.pi, seed);
});
it("does not mutate seed or pi0", async () => {
const chain = sparseMarkovChainFromTransitionMatrix([
[0.75, 0.25],
[0.5, 0.5],
]);
const alpha = 0.2;
const seed = singleIndexDistribution(chain.length, 0);
const pi0 = uniformDistribution(chain.length);
const result = await findStationaryDistribution(
{chain, seed, alpha, pi0},
standardOptions()
);
expect(pi0).toEqual(uniformDistribution(chain.length));
expect(seed).toEqual(singleIndexDistribution(chain.length, 0));
expect(result).not.toEqual(pi0);
});
});
});

View File

@ -25,6 +25,7 @@ import {
findStationaryDistribution,
type PagerankParams,
type PagerankOptions as CorePagerankOptions,
uniformDistribution,
} from "../core/attribution/markovChain";
import * as NullUtil from "../util/null";
@ -438,7 +439,12 @@ export class PagerankGraph {
this._syntheticLoopWeight
);
const osmc = createOrderedSparseMarkovChain(connections);
const params: PagerankParams = {chain: osmc.chain};
const params: PagerankParams = {
chain: osmc.chain,
alpha: 0,
seed: uniformDistribution(osmc.chain.length),
pi0: uniformDistribution(osmc.chain.length),
};
const coreOptions: CorePagerankOptions = {
verbose: false,
convergenceThreshold: fullOptions.convergenceThreshold,