From 61d3cb3f5292af3e2ed55abcab81cb259e6c3ab1 Mon Sep 17 00:00:00 2001 From: William Chargin Date: Thu, 10 May 2018 11:21:18 -0700 Subject: [PATCH] Implement basic PageRank analysis (#252) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Summary: We don’t expect the results to be of good quality right now. Rather, this gives us a starting point from which to iterate the algorithm. The convergence criterion also needs to be adjusted. (In particular, it should almost certainly not be a constant.) Test Plan: Run `yarn start`. Select a graph, like `sourcecred/example-github`. Open the JS console and click “Run basic PageRank”. Watch the console. wchargin-branch: basic-pagerank --- src/app/credExplorer/App.js | 44 ++++++++++++ src/app/credExplorer/basicPagerank.js | 96 +++++++++++++++++++++++++++ 2 files changed, 140 insertions(+) create mode 100644 src/app/credExplorer/basicPagerank.js diff --git a/src/app/credExplorer/App.js b/src/app/credExplorer/App.js index 8bbe0ca..bb46e3c 100644 --- a/src/app/credExplorer/App.js +++ b/src/app/credExplorer/App.js @@ -1,9 +1,12 @@ // @flow +import stringify from "json-stable-stringify"; import React from "react"; import {StyleSheet, css} from "aphrodite/no-important"; +import type {PagerankResult} from "./basicPagerank"; import {Graph} from "../../core/graph"; +import basicPagerank from "./basicPagerank"; type Props = {}; type State = { @@ -62,6 +65,19 @@ export default class App extends React.Component { ) : (

Graph not loaded.

)} + ); @@ -88,6 +104,34 @@ export default class App extends React.Component { console.error("Error while fetching:", e); }); } + + analyzePagerankResult(pagerankResult: PagerankResult) { + const addressKey = ({pluginName, type}) => stringify({pluginName, type}); + const addressesByKey = {}; + pagerankResult.getAll().forEach(({address}) => { + if (addressesByKey[addressKey(address)] === undefined) { + addressesByKey[addressKey(address)] = []; + } + addressesByKey[addressKey(address)].push(address); + }); + Object.keys(addressesByKey).forEach((key) => { + addressesByKey[key] = addressesByKey[key] + .slice() + .sort((x, y) => { + const px = pagerankResult.get(x).probability; + const py = pagerankResult.get(y).probability; + return px - py; + }) + .reverse(); + const {pluginName, type} = JSON.parse(key); + console.log(`%c${type} (${pluginName})`, "font-weight: bold"); + addressesByKey[key].slice(0, 5).forEach((address) => { + const score = pagerankResult.get(address).probability; + const name = address.id; + console.log(` - [${score.toString()}] ${name}`); + }); + }); + } } const styles = StyleSheet.create({ diff --git a/src/app/credExplorer/basicPagerank.js b/src/app/credExplorer/basicPagerank.js new file mode 100644 index 0000000..76119fa --- /dev/null +++ b/src/app/credExplorer/basicPagerank.js @@ -0,0 +1,96 @@ +// @flow + +import * as tf from "@tensorflow/tfjs-core"; + +import type {Address} from "../../core/address"; +import {AddressMap} from "../../core/address"; +import {Graph} from "../../core/graph"; + +export type PagerankResult = AddressMap<{| + +address: Address, + +probability: number, +|}>; + +export default function basicPagerank(graph: Graph): PagerankResult { + return tf.tidy(() => { + const {nodes, markovChain} = graphToMarkovChain(graph); + const stationaryDistribution = findStationaryDistribution(markovChain); + const stationaryDistributionRaw = stationaryDistribution.dataSync(); + const result = new AddressMap(); + nodes.forEach((node, i) => { + result.add({ + address: node.address, + probability: stationaryDistributionRaw[i], + }); + }); + return result; + }); +} + +function graphToMarkovChain(graph: Graph) { + const nodes = graph.nodes(); // for canonical ordering + const addressToIndex = new AddressMap(); + nodes.forEach(({address}, index) => { + addressToIndex.add({address, index}); + }); + const buffer = tf.buffer([nodes.length, nodes.length]); + graph.edges().forEach(({src, dst, address}) => { + if (graph.node(src) == null) { + console.warn("Edge has dangling src:", address, src); + return; + } + if (graph.node(dst) == null) { + console.warn("Edge has dangling dst:", address, dst); + return; + } + const u = addressToIndex.get(src).index; + const v = addressToIndex.get(dst).index; + buffer.set(1, u, v); + buffer.set(1, v, u); + }); + return { + nodes, + markovChain: tf.tidy(() => { + const dampingFactor = 1e-4; + const raw = buffer.toTensor(); + const nonsingular = raw.add(tf.scalar(1e-9)); + const normalized = nonsingular.div(nonsingular.sum(1)); + const damped = tf.add( + normalized.mul(tf.scalar(1 - dampingFactor)), + tf.onesLike(normalized).mul(tf.scalar(dampingFactor / nodes.length)) + ); + return damped; + }), + }; +} + +function findStationaryDistribution(markovChain: $Call) { + const n = markovChain.shape[0]; + if (markovChain.shape.length !== 2 || markovChain.shape[1] !== n) { + throw new Error(`Expected square matrix; got: ${markovChain.shape}`); + } + let r0 = tf.tidy(() => tf.ones([n, 1]).div(tf.scalar(n))); + function computeDelta(pi0, pi1) { + return tf.tidy(() => tf.max(tf.abs(pi0.sub(pi1))).dataSync()[0]); + } + let iteration = 0; + while (true) { + iteration++; + const r1 = tf.matMul(markovChain, r0); + const delta = computeDelta(r0, r1); + r0.dispose(); + r0 = r1; + console.log(`[${iteration}] delta = ${delta}`); + if (delta < 1e-7) { + console.log(`[${iteration}] CONVERGED`); + return r0; + } + if (iteration >= 255) { + console.log(`[${iteration}] FAILED to converge`); + return r0; + } + } + // ESLint knows that this next line is unreachable, but Flow doesn't. :-) + // eslint-disable-next-line no-unreachable + throw new Error("Unreachable."); +}