From a8a3f4fc3a787e08e8d1a7362816f4064f4bba35 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dandelion=20Man=C3=A9?= Date: Sun, 21 Apr 2019 14:00:30 +0300 Subject: [PATCH] refactor args to findStationaryDistribution (#1130) In [#1128: Add support for seed vectors][#1128], we significantly increase the number of arguments to markovChain.findStationaryDistribution. To clean up the invocations, I added a followon PR (#1129) which converts findStationaryDistribution to use a `PagerankParams` object instead. However, I think it will be cleaner to land the PagerankParams refactor before adding new features in #1128, so I'm making this PR as pre-cleanup. Test plan: This is a trivial refactor. `yarn test` passes. [#1128]: https://github.com/sourcecred/sourcecred/pull/1128 --- src/analysis/pagerank.js | 15 +++++-- .../pagerankNodeDecomposition.test.js | 11 +++-- src/core/attribution/markovChain.js | 44 +++++++++++++++---- src/core/attribution/markovChain.test.js | 13 ++++-- src/core/pagerankGraph.js | 15 +++++-- 5 files changed, 76 insertions(+), 22 deletions(-) diff --git a/src/analysis/pagerank.js b/src/analysis/pagerank.js index 7aec9dc..bf21f90 100644 --- a/src/analysis/pagerank.js +++ b/src/analysis/pagerank.js @@ -19,7 +19,11 @@ import { import {scoreByConstantTotal} from "./nodeScore"; -import {findStationaryDistribution} from "../core/attribution/markovChain"; +import { + findStationaryDistribution, + type PagerankParams, + type PagerankOptions as CorePagerankOptions, +} from "../core/attribution/markovChain"; export type {NodeDistribution} from "../core/attribution/graphToMarkovChain"; export type {PagerankNodeDecomposition} from "./pagerankNodeDecomposition"; @@ -63,12 +67,17 @@ export async function pagerank( fullOptions.selfLoopWeight ); const osmc = createOrderedSparseMarkovChain(connections); - const distributionResult = await findStationaryDistribution(osmc.chain, { + const params: PagerankParams = {chain: osmc.chain}; + const coreOptions: CorePagerankOptions = { verbose: fullOptions.verbose, convergenceThreshold: fullOptions.convergenceThreshold, maxIterations: fullOptions.maxIterations, yieldAfterMs: 30, - }); + }; + const distributionResult = await findStationaryDistribution( + params, + coreOptions + ); const pi = distributionToNodeDistribution( osmc.nodeOrder, distributionResult.pi diff --git a/src/analysis/pagerankNodeDecomposition.test.js b/src/analysis/pagerankNodeDecomposition.test.js index f6fcb5c..3e62dd7 100644 --- a/src/analysis/pagerankNodeDecomposition.test.js +++ b/src/analysis/pagerankNodeDecomposition.test.js @@ -6,7 +6,10 @@ import { createConnections, createOrderedSparseMarkovChain, } from "../core/attribution/graphToMarkovChain"; -import {findStationaryDistribution} from "../core/attribution/markovChain"; +import { + findStationaryDistribution, + type PagerankParams, +} from "../core/attribution/markovChain"; import { decompose, type PagerankNodeDecomposition, @@ -131,7 +134,8 @@ describe("analysis/pagerankNodeDecomposition", () => { const edgeWeight = () => ({toWeight: 6.0, froWeight: 3.0}); const connections = createConnections(g, edgeWeight, 1.0); const osmc = createOrderedSparseMarkovChain(connections); - const distributionResult = await findStationaryDistribution(osmc.chain, { + const params: PagerankParams = {chain: osmc.chain}; + const distributionResult = await findStationaryDistribution(params, { verbose: false, convergenceThreshold: 1e-6, maxIterations: 255, @@ -151,7 +155,8 @@ describe("analysis/pagerankNodeDecomposition", () => { const edgeWeight = () => ({toWeight: 6.0, froWeight: 3.0}); const connections = createConnections(g, edgeWeight, 1.0); const osmc = createOrderedSparseMarkovChain(connections); - const distributionResult = await findStationaryDistribution(osmc.chain, { + const params: PagerankParams = {chain: osmc.chain}; + const distributionResult = await findStationaryDistribution(params, { verbose: false, convergenceThreshold: 1e-6, maxIterations: 255, diff --git a/src/core/attribution/markovChain.js b/src/core/attribution/markovChain.js index 7d615fa..c79ee9a 100644 --- a/src/core/attribution/markovChain.js +++ b/src/core/attribution/markovChain.js @@ -7,6 +7,36 @@ */ export type Distribution = Float64Array; +/** + * The data inputs to running PageRank. + * + * We keep these separate from the PagerankOptions below, + * because we expect that within a given context, every call to + * findStationaryDistribution (or other Pagerank functions) will + * have different PagerankParams, but often have the same PagerankOptions. + */ +export type PagerankParams = {| + +chain: SparseMarkovChain, +|}; + +/** + * PagerankOptions allows the user to tweak PageRank's behavior, especially around + * convergence. + */ +export type PagerankOptions = {| + // Causes runtime information to get logged to console. + +verbose: boolean, + // A distribution is considered stationary if the action of the Markov + // chain on the distribution does not change any component by more than + // `convergenceThreshold` in absolute value. + +convergenceThreshold: number, + // We will run maxIterations markov chain steps at most. + +maxIterations: number, + // To prevent locking the rest of the application, PageRank will yield control + // after this many miliseconds, allowing UI updates, etc. + +yieldAfterMs: number, +|}; + export type StationaryDistributionResult = {| // The final distribution after attempting to find the stationary distribution // of the Markov chain. @@ -134,7 +164,7 @@ export function computeDelta(pi0: Distribution, pi1: Distribution) { } function* findStationaryDistributionGenerator( - chain: SparseMarkovChain, + params: PagerankParams, options: {| +verbose: boolean, // A distribution is considered stationary if the action of the Markov @@ -145,6 +175,7 @@ function* findStationaryDistributionGenerator( +maxIterations: number, |} ): Generator { + const {chain} = params; let pi = uniformDistribution(chain.length); let scratch = new Float64Array(pi.length); @@ -186,15 +217,10 @@ function* findStationaryDistributionGenerator( } export function findStationaryDistribution( - chain: SparseMarkovChain, - options: {| - +verbose: boolean, - +convergenceThreshold: number, - +maxIterations: number, - +yieldAfterMs: number, - |} + params: PagerankParams, + options: PagerankOptions ): Promise { - let gen = findStationaryDistributionGenerator(chain, { + let gen = findStationaryDistributionGenerator(params, { verbose: options.verbose, convergenceThreshold: options.convergenceThreshold, maxIterations: options.maxIterations, diff --git a/src/core/attribution/markovChain.test.js b/src/core/attribution/markovChain.test.js index 205ecc9..b26457e 100644 --- a/src/core/attribution/markovChain.test.js +++ b/src/core/attribution/markovChain.test.js @@ -8,6 +8,7 @@ import { uniformDistribution, computeDelta, type StationaryDistributionResult, + type PagerankParams, } from "./markovChain"; describe("core/attribution/markovChain", () => { @@ -158,7 +159,8 @@ describe("core/attribution/markovChain", () => { [0.25, 0, 0.75], [0.25, 0.75, 0], ]); - const result = await findStationaryDistribution(chain, { + const params: PagerankParams = {chain}; + const result = await findStationaryDistribution(params, { maxIterations: 255, convergenceThreshold: 1e-7, verbose: false, @@ -184,7 +186,8 @@ describe("core/attribution/markovChain", () => { [0.5, 0, 0.25, 0, 0.25], [0.5, 0.25, 0, 0.25, 0], ]); - const result = await findStationaryDistribution(chain, { + const params: PagerankParams = {chain}; + const result = await findStationaryDistribution(params, { maxIterations: 255, convergenceThreshold: 1e-7, verbose: false, @@ -201,7 +204,8 @@ describe("core/attribution/markovChain", () => { it("finds the stationary distribution of a periodic chain", async () => { const chain = sparseMarkovChainFromTransitionMatrix([[0, 1], [1, 0]]); - const result = await findStationaryDistribution(chain, { + const params: PagerankParams = {chain}; + const result = await findStationaryDistribution(params, { maxIterations: 255, convergenceThreshold: 1e-7, verbose: false, @@ -218,7 +222,8 @@ describe("core/attribution/markovChain", () => { it("returns initial distribution if maxIterations===0", async () => { const chain = sparseMarkovChainFromTransitionMatrix([[0, 1], [0, 1]]); - const result = await findStationaryDistribution(chain, { + const params: PagerankParams = {chain}; + const result = await findStationaryDistribution(params, { verbose: false, convergenceThreshold: 1e-7, maxIterations: 0, diff --git a/src/core/pagerankGraph.js b/src/core/pagerankGraph.js index f9776a8..c50815c 100644 --- a/src/core/pagerankGraph.js +++ b/src/core/pagerankGraph.js @@ -21,7 +21,11 @@ import { createOrderedSparseMarkovChain, type EdgeWeight, } from "./attribution/graphToMarkovChain"; -import {findStationaryDistribution} from "../core/attribution/markovChain"; +import { + findStationaryDistribution, + type PagerankParams, + type PagerankOptions, +} from "../core/attribution/markovChain"; import * as NullUtil from "../util/null"; export {Direction} from "./graph"; @@ -421,12 +425,17 @@ export class PagerankGraph { this._syntheticLoopWeight ); const osmc = createOrderedSparseMarkovChain(connections); - const distributionResult = await findStationaryDistribution(osmc.chain, { + const params: PagerankParams = {chain: osmc.chain}; + const coreOptions: PagerankOptions = { verbose: false, convergenceThreshold: options.convergenceThreshold, maxIterations: options.maxIterations, yieldAfterMs: 30, - }); + }; + const distributionResult = await findStationaryDistribution( + params, + coreOptions + ); this._scores = distributionToNodeDistribution( osmc.nodeOrder, distributionResult.pi