From 017fbd774ad3e39906f7ba10aed21a70dc570f27 Mon Sep 17 00:00:00 2001 From: William Chargin Date: Fri, 11 May 2018 21:28:58 -0700 Subject: [PATCH] Use `SparseMarkovChain` in `basicPagerank` (#273) Summary: This commit slightly reorganizes the internals of `basicPagerank` to use the `SparseMarkovChain` type from the `markovChain` module. Test Plan: Behavior of `yarn start` is unchanged. wchargin-branch: use-sparsemarkovchain --- .../__snapshots__/basicPagerank.test.js.snap | 2 +- src/app/credExplorer/basicPagerank.js | 89 +++++++++---------- src/app/credExplorer/basicPagerank.test.js | 4 +- 3 files changed, 45 insertions(+), 50 deletions(-) diff --git a/src/app/credExplorer/__snapshots__/basicPagerank.test.js.snap b/src/app/credExplorer/__snapshots__/basicPagerank.test.js.snap index 6b61863..cf972ed 100644 --- a/src/app/credExplorer/__snapshots__/basicPagerank.test.js.snap +++ b/src/app/credExplorer/__snapshots__/basicPagerank.test.js.snap @@ -2,7 +2,7 @@ exports[`graphToMarkovChain is correct for a trivial one-node chain 1`] = ` Object { - "inNeighbors": Array [ + "chain": Array [ Object { "neighbor": Uint32Array [ 0, diff --git a/src/app/credExplorer/basicPagerank.js b/src/app/credExplorer/basicPagerank.js index 48fcae1..519553e 100644 --- a/src/app/credExplorer/basicPagerank.js +++ b/src/app/credExplorer/basicPagerank.js @@ -5,10 +5,11 @@ import type {Edge} from "../../core/graph"; import {AddressMap} from "../../core/address"; import {Graph} from "../../core/graph"; -export type Distribution = {| - +nodeOrder: $ReadOnlyArray
, - +data: Float64Array, -|}; +import type { + Distribution, + SparseMarkovChain, +} from "../../core/attribution/markovChain"; + export type PagerankResult = AddressMap<{| +address: Address, +probability: number, @@ -22,18 +23,15 @@ type AddressMapMarkovChain = AddressMap<{| |}>, |}>; -type TypedArrayMarkovChain = {| +type OrderedSparseMarkovChain = {| +nodeOrder: $ReadOnlyArray
, - +inNeighbors: $ReadOnlyArray<{| - +neighbor: Uint32Array, - +weight: Float64Array, - |}>, + +chain: SparseMarkovChain, |}; export default function basicPagerank(graph: Graph): PagerankResult { - return distributionToPagerankResult( - findStationaryDistribution(graphToTypedArrayMarkovChain(graph)) - ); + const {nodeOrder, chain} = graphToOrderedSparseMarkovChain(graph); + const pi = findStationaryDistribution(chain); + return distributionToPagerankResult(nodeOrder, pi); } function edgeWeight( @@ -85,20 +83,20 @@ function graphToAddressMapMarkovChain( return result; } -function addressMapMarkovChainToTypedArrayMarkovChain( - mc: AddressMapMarkovChain -): TypedArrayMarkovChain { +function addressMapMarkovChainToOrderedSparseMarkovChain( + chain: AddressMapMarkovChain +): OrderedSparseMarkovChain { // The node ordering is arbitrary, but must be made canonical: calls // to `graph.nodes()` are not guaranteed to be stable. - const nodeOrder = mc.getAll().map(({address}) => address); + const nodeOrder = chain.getAll().map(({address}) => address); const addressToIndex = new AddressMap(); nodeOrder.forEach((address, index) => { addressToIndex.add({address, index}); }); return { nodeOrder, - inNeighbors: nodeOrder.map((address) => { - const theseNeighbors = mc.get(address).inNeighbors.getAll(); + chain: nodeOrder.map((address) => { + const theseNeighbors = chain.get(address).inNeighbors.getAll(); return { neighbor: new Uint32Array( theseNeighbors.map(({address}) => addressToIndex.get(address).index) @@ -109,52 +107,46 @@ function addressMapMarkovChainToTypedArrayMarkovChain( }; } -export function graphToTypedArrayMarkovChain( +export function graphToOrderedSparseMarkovChain( graph: Graph -): TypedArrayMarkovChain { - return addressMapMarkovChainToTypedArrayMarkovChain( +): OrderedSparseMarkovChain { + return addressMapMarkovChainToOrderedSparseMarkovChain( graphToAddressMapMarkovChain(graph) ); } -function markovChainAction( - mc: TypedArrayMarkovChain, +function sparseMarkovChainAction( + chain: SparseMarkovChain, pi: Distribution ): Distribution { - const data = new Float64Array(pi.data.length); - for (let dst = 0; dst < mc.nodeOrder.length; dst++) { - const theseNeighbors = mc.inNeighbors[dst]; - const inDegree = theseNeighbors.neighbor.length; + const result = new Float64Array(pi.length); + chain.forEach(({neighbor, weight}, dst) => { + const inDegree = neighbor.length; // (also `weight.length`) let probability = 0; - for (let srcIndex = 0; srcIndex < inDegree; srcIndex++) { - const src = theseNeighbors.neighbor[srcIndex]; - probability += pi.data[src] * theseNeighbors.weight[srcIndex]; + for (let i = 0; i < inDegree; i++) { + const src = neighbor[i]; + probability += pi[src] * weight[i]; } - data[dst] = probability; - } - return {nodeOrder: pi.nodeOrder, data}; + result[dst] = probability; + }); + return result; } -function uniformDistribution(nodeOrder: $ReadOnlyArray
): Distribution { - return { - nodeOrder, - data: new Float64Array( - Array(nodeOrder.length).fill(1.0 / nodeOrder.length) - ), - }; +function uniformDistribution(n: number): Distribution { + return new Float64Array(n).fill(1 / n); } -function findStationaryDistribution(mc: TypedArrayMarkovChain): Distribution { - let r0 = uniformDistribution(mc.nodeOrder); +function findStationaryDistribution(chain: SparseMarkovChain): Distribution { + let r0 = uniformDistribution(chain.length); function computeDelta(pi0, pi1) { // Here, we assume that `pi0.nodeOrder` and `pi1.nodeOrder` are the // same (i.e., there has been no permutation). - return Math.max(...pi0.data.map((x, i) => Math.abs(x - pi1.data[i]))); + return Math.max(...pi0.map((x, i) => Math.abs(x - pi1[i]))); } let iteration = 0; while (true) { iteration++; - const r1 = markovChainAction(mc, r0); + const r1 = sparseMarkovChainAction(chain, r0); const delta = computeDelta(r0, r1); r0 = r1; console.log(`[${iteration}] delta = ${delta}`); @@ -172,10 +164,13 @@ function findStationaryDistribution(mc: TypedArrayMarkovChain): Distribution { throw new Error("Unreachable."); } -function distributionToPagerankResult(pi: Distribution): PagerankResult { +function distributionToPagerankResult( + nodeOrder: $ReadOnlyArray
, + pi: Distribution +): PagerankResult { const result = new AddressMap(); - pi.nodeOrder.forEach((address, i) => { - const probability = pi.data[i]; + nodeOrder.forEach((address, i) => { + const probability = pi[i]; result.add({address, probability}); }); return result; diff --git a/src/app/credExplorer/basicPagerank.test.js b/src/app/credExplorer/basicPagerank.test.js index b7dd93a..7f7f882 100644 --- a/src/app/credExplorer/basicPagerank.test.js +++ b/src/app/credExplorer/basicPagerank.test.js @@ -1,7 +1,7 @@ // @flow import {Graph} from "../../core/graph"; -import {graphToTypedArrayMarkovChain} from "./basicPagerank"; +import {graphToOrderedSparseMarkovChain} from "./basicPagerank"; describe("graphToMarkovChain", () => { it("is correct for a trivial one-node chain", () => { @@ -14,6 +14,6 @@ describe("graphToMarkovChain", () => { }, payload: "yes", }); - expect(graphToTypedArrayMarkovChain(g)).toMatchSnapshot(); + expect(graphToOrderedSparseMarkovChain(g)).toMatchSnapshot(); }); });