Use typed arrays for PageRank (#267)

Summary:
This takes `AddressMap` access, and therefore JSON stringification, off
the critical path, resulting in a significant performance increase. The
resulting code is much faster than the original TFJS implementation. On
my laptop, we can run about 300 iterations of PageRank per second on a
graph with 10 000 nodes and 18 000 edges (namely, the SourceCred graph).

Paired with @decentralion.

Test Plan:
Run `yarn start` and note that the cred attribution for SourceCred is
roughly the same as before… but is created faster.

wchargin-branch: pagerank-typed-arrays
This commit is contained in:
William Chargin 2018-05-11 13:22:36 -07:00 committed by GitHub
parent 7e97ba6bf3
commit 3e70edb3be
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 106 additions and 38 deletions

View File

@ -2,12 +2,22 @@
exports[`graphToMarkovChain is correct for a trivial one-node chain 1`] = ` exports[`graphToMarkovChain is correct for a trivial one-node chain 1`] = `
Object { Object {
"{\\"id\\":\\"who are you blah blah\\",\\"pluginName\\":\\"the magnificent foo plugin\\",\\"type\\":\\"irrelevant!\\"}": Object { "inNeighbors": Array [
"inNeighbors": Object { Object {
"{\\"id\\":\\"who are you blah blah\\",\\"pluginName\\":\\"the magnificent foo plugin\\",\\"type\\":\\"irrelevant!\\"}": Object { "neighbor": Uint32Array [
"weight": 1, 0,
}, ],
"weight": Float64Array [
1,
],
}, },
],
"nodeOrder": Array [
Object {
"id": "who are you blah blah",
"pluginName": "the magnificent foo plugin",
"type": "irrelevant!",
}, },
],
} }
`; `;

View File

@ -5,13 +5,16 @@ import type {Edge} from "../../core/graph";
import {AddressMap} from "../../core/address"; import {AddressMap} from "../../core/address";
import {Graph} from "../../core/graph"; import {Graph} from "../../core/graph";
export type Distribution = AddressMap<{| export type Distribution = {|
+nodeOrder: $ReadOnlyArray<Address>,
+data: Float64Array,
|};
export type PagerankResult = AddressMap<{|
+address: Address, +address: Address,
+probability: number, +probability: number,
|}>; |}>;
export type PagerankResult = Distribution;
type MarkovChain = AddressMap<{| type AddressMapMarkovChain = AddressMap<{|
+address: Address, +address: Address,
+inNeighbors: AddressMap<{| +inNeighbors: AddressMap<{|
+address: Address, +address: Address,
@ -19,8 +22,18 @@ type MarkovChain = AddressMap<{|
|}>, |}>,
|}>; |}>;
type TypedArrayMarkovChain = {|
+nodeOrder: $ReadOnlyArray<Address>,
+inNeighbors: $ReadOnlyArray<{|
+neighbor: Uint32Array,
+weight: Float64Array,
|}>,
|};
export default function basicPagerank(graph: Graph<any, any>): PagerankResult { export default function basicPagerank(graph: Graph<any, any>): PagerankResult {
return findStationaryDistribution(graphToMarkovChain(graph)); return distributionToPagerankResult(
findStationaryDistribution(graphToTypedArrayMarkovChain(graph))
);
} }
function edgeWeight( function edgeWeight(
@ -29,7 +42,9 @@ function edgeWeight(
return {toWeight: 1, froWeight: 1}; return {toWeight: 1, froWeight: 1};
} }
export function graphToMarkovChain(graph: Graph<any, any>): MarkovChain { function graphToAddressMapMarkovChain(
graph: Graph<any, any>
): AddressMapMarkovChain {
const result = new AddressMap(); const result = new AddressMap();
const unnormalizedTotalOutWeights = new AddressMap(); const unnormalizedTotalOutWeights = new AddressMap();
@ -70,37 +85,71 @@ export function graphToMarkovChain(graph: Graph<any, any>): MarkovChain {
return result; return result;
} }
function markovChainAction(mc: MarkovChain, pi: Distribution): Distribution { function addressMapMarkovChainToTypedArrayMarkovChain(
const result = new AddressMap(); mc: AddressMapMarkovChain
mc.getAll().forEach(({address, inNeighbors}) => { ): TypedArrayMarkovChain {
let probability = 0; // The node ordering is arbitrary, but must be made canonical: calls
inNeighbors.getAll().forEach(({address: neighbor, weight}) => { // to `graph.nodes()` are not guaranteed to be stable.
probability += pi.get(neighbor).probability * weight; const nodeOrder = mc.getAll().map(({address}) => address);
const addressToIndex = new AddressMap();
nodeOrder.forEach((address, index) => {
addressToIndex.add({address, index});
}); });
result.add({address, probability}); return {
}); nodeOrder,
return result; inNeighbors: nodeOrder.map((address) => {
const theseNeighbors = mc.get(address).inNeighbors.getAll();
return {
neighbor: new Uint32Array(
theseNeighbors.map(({address}) => addressToIndex.get(address).index)
),
weight: new Float64Array(theseNeighbors.map(({weight}) => weight)),
};
}),
};
} }
function uniformDistribution(addresses: $ReadOnlyArray<Address>) { export function graphToTypedArrayMarkovChain(
const result = new AddressMap(); graph: Graph<any, any>
const probability = 1.0 / addresses.length; ): TypedArrayMarkovChain {
addresses.forEach((address) => { return addressMapMarkovChainToTypedArrayMarkovChain(
result.add({address, probability}); graphToAddressMapMarkovChain(graph)
});
return result;
}
function findStationaryDistribution(mc: MarkovChain): Distribution {
let r0 = uniformDistribution(mc.getAll().map(({address}) => address));
function computeDelta(pi0, pi1) {
return Math.max(
...pi0
.getAll()
.map(({address}) =>
Math.abs(pi0.get(address).probability - pi1.get(address).probability)
)
); );
}
function markovChainAction(
mc: TypedArrayMarkovChain,
pi: Distribution
): Distribution {
const data = new Float64Array(pi.data.length);
for (let dst = 0; dst < mc.nodeOrder.length; dst++) {
const theseNeighbors = mc.inNeighbors[dst];
const inDegree = theseNeighbors.neighbor.length;
let probability = 0;
for (let srcIndex = 0; srcIndex < inDegree; srcIndex++) {
const src = theseNeighbors.neighbor[srcIndex];
probability += pi.data[src] * theseNeighbors.weight[srcIndex];
}
data[dst] = probability;
}
return {nodeOrder: pi.nodeOrder, data};
}
function uniformDistribution(nodeOrder: $ReadOnlyArray<Address>): Distribution {
return {
nodeOrder,
data: new Float64Array(
Array(nodeOrder.length).fill(1.0 / nodeOrder.length)
),
};
}
function findStationaryDistribution(mc: TypedArrayMarkovChain): Distribution {
let r0 = uniformDistribution(mc.nodeOrder);
function computeDelta(pi0, pi1) {
// Here, we assume that `pi0.nodeOrder` and `pi1.nodeOrder` are the
// same (i.e., there has been no permutation).
return Math.max(...pi0.data.map((x, i) => Math.abs(x - pi1.data[i])));
} }
let iteration = 0; let iteration = 0;
while (true) { while (true) {
@ -122,3 +171,12 @@ function findStationaryDistribution(mc: MarkovChain): Distribution {
// eslint-disable-next-line no-unreachable // eslint-disable-next-line no-unreachable
throw new Error("Unreachable."); throw new Error("Unreachable.");
} }
function distributionToPagerankResult(pi: Distribution): PagerankResult {
const result = new AddressMap();
pi.nodeOrder.forEach((address, i) => {
const probability = pi.data[i];
result.add({address, probability});
});
return result;
}

View File

@ -1,7 +1,7 @@
// @flow // @flow
import {Graph} from "../../core/graph"; import {Graph} from "../../core/graph";
import {graphToMarkovChain} from "./basicPagerank"; import {graphToTypedArrayMarkovChain} from "./basicPagerank";
describe("graphToMarkovChain", () => { describe("graphToMarkovChain", () => {
it("is correct for a trivial one-node chain", () => { it("is correct for a trivial one-node chain", () => {
@ -14,6 +14,6 @@ describe("graphToMarkovChain", () => {
}, },
payload: "yes", payload: "yes",
}); });
expect(graphToMarkovChain(g)).toMatchSnapshot(); expect(graphToTypedArrayMarkovChain(g)).toMatchSnapshot();
}); });
}); });