mirror of
https://github.com/status-im/sourcecred.git
synced 2025-02-25 10:45:27 +00:00
Use typed arrays for PageRank (#267)
Summary: This takes `AddressMap` access, and therefore JSON stringification, off the critical path, resulting in a significant performance increase. The resulting code is much faster than the original TFJS implementation. On my laptop, we can run about 300 iterations of PageRank per second on a graph with 10 000 nodes and 18 000 edges (namely, the SourceCred graph). Paired with @decentralion. Test Plan: Run `yarn start` and note that the cred attribution for SourceCred is roughly the same as before… but is created faster. wchargin-branch: pagerank-typed-arrays
This commit is contained in:
parent
7e97ba6bf3
commit
3e70edb3be
@ -2,12 +2,22 @@
|
|||||||
|
|
||||||
exports[`graphToMarkovChain is correct for a trivial one-node chain 1`] = `
|
exports[`graphToMarkovChain is correct for a trivial one-node chain 1`] = `
|
||||||
Object {
|
Object {
|
||||||
"{\\"id\\":\\"who are you blah blah\\",\\"pluginName\\":\\"the magnificent foo plugin\\",\\"type\\":\\"irrelevant!\\"}": Object {
|
"inNeighbors": Array [
|
||||||
"inNeighbors": Object {
|
Object {
|
||||||
"{\\"id\\":\\"who are you blah blah\\",\\"pluginName\\":\\"the magnificent foo plugin\\",\\"type\\":\\"irrelevant!\\"}": Object {
|
"neighbor": Uint32Array [
|
||||||
"weight": 1,
|
0,
|
||||||
},
|
],
|
||||||
|
"weight": Float64Array [
|
||||||
|
1,
|
||||||
|
],
|
||||||
},
|
},
|
||||||
},
|
],
|
||||||
|
"nodeOrder": Array [
|
||||||
|
Object {
|
||||||
|
"id": "who are you blah blah",
|
||||||
|
"pluginName": "the magnificent foo plugin",
|
||||||
|
"type": "irrelevant!",
|
||||||
|
},
|
||||||
|
],
|
||||||
}
|
}
|
||||||
`;
|
`;
|
||||||
|
@ -5,13 +5,16 @@ import type {Edge} from "../../core/graph";
|
|||||||
import {AddressMap} from "../../core/address";
|
import {AddressMap} from "../../core/address";
|
||||||
import {Graph} from "../../core/graph";
|
import {Graph} from "../../core/graph";
|
||||||
|
|
||||||
export type Distribution = AddressMap<{|
|
export type Distribution = {|
|
||||||
|
+nodeOrder: $ReadOnlyArray<Address>,
|
||||||
|
+data: Float64Array,
|
||||||
|
|};
|
||||||
|
export type PagerankResult = AddressMap<{|
|
||||||
+address: Address,
|
+address: Address,
|
||||||
+probability: number,
|
+probability: number,
|
||||||
|}>;
|
|}>;
|
||||||
export type PagerankResult = Distribution;
|
|
||||||
|
|
||||||
type MarkovChain = AddressMap<{|
|
type AddressMapMarkovChain = AddressMap<{|
|
||||||
+address: Address,
|
+address: Address,
|
||||||
+inNeighbors: AddressMap<{|
|
+inNeighbors: AddressMap<{|
|
||||||
+address: Address,
|
+address: Address,
|
||||||
@ -19,8 +22,18 @@ type MarkovChain = AddressMap<{|
|
|||||||
|}>,
|
|}>,
|
||||||
|}>;
|
|}>;
|
||||||
|
|
||||||
|
type TypedArrayMarkovChain = {|
|
||||||
|
+nodeOrder: $ReadOnlyArray<Address>,
|
||||||
|
+inNeighbors: $ReadOnlyArray<{|
|
||||||
|
+neighbor: Uint32Array,
|
||||||
|
+weight: Float64Array,
|
||||||
|
|}>,
|
||||||
|
|};
|
||||||
|
|
||||||
export default function basicPagerank(graph: Graph<any, any>): PagerankResult {
|
export default function basicPagerank(graph: Graph<any, any>): PagerankResult {
|
||||||
return findStationaryDistribution(graphToMarkovChain(graph));
|
return distributionToPagerankResult(
|
||||||
|
findStationaryDistribution(graphToTypedArrayMarkovChain(graph))
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
function edgeWeight(
|
function edgeWeight(
|
||||||
@ -29,7 +42,9 @@ function edgeWeight(
|
|||||||
return {toWeight: 1, froWeight: 1};
|
return {toWeight: 1, froWeight: 1};
|
||||||
}
|
}
|
||||||
|
|
||||||
export function graphToMarkovChain(graph: Graph<any, any>): MarkovChain {
|
function graphToAddressMapMarkovChain(
|
||||||
|
graph: Graph<any, any>
|
||||||
|
): AddressMapMarkovChain {
|
||||||
const result = new AddressMap();
|
const result = new AddressMap();
|
||||||
const unnormalizedTotalOutWeights = new AddressMap();
|
const unnormalizedTotalOutWeights = new AddressMap();
|
||||||
|
|
||||||
@ -70,37 +85,71 @@ export function graphToMarkovChain(graph: Graph<any, any>): MarkovChain {
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
function markovChainAction(mc: MarkovChain, pi: Distribution): Distribution {
|
function addressMapMarkovChainToTypedArrayMarkovChain(
|
||||||
const result = new AddressMap();
|
mc: AddressMapMarkovChain
|
||||||
mc.getAll().forEach(({address, inNeighbors}) => {
|
): TypedArrayMarkovChain {
|
||||||
|
// The node ordering is arbitrary, but must be made canonical: calls
|
||||||
|
// to `graph.nodes()` are not guaranteed to be stable.
|
||||||
|
const nodeOrder = mc.getAll().map(({address}) => address);
|
||||||
|
const addressToIndex = new AddressMap();
|
||||||
|
nodeOrder.forEach((address, index) => {
|
||||||
|
addressToIndex.add({address, index});
|
||||||
|
});
|
||||||
|
return {
|
||||||
|
nodeOrder,
|
||||||
|
inNeighbors: nodeOrder.map((address) => {
|
||||||
|
const theseNeighbors = mc.get(address).inNeighbors.getAll();
|
||||||
|
return {
|
||||||
|
neighbor: new Uint32Array(
|
||||||
|
theseNeighbors.map(({address}) => addressToIndex.get(address).index)
|
||||||
|
),
|
||||||
|
weight: new Float64Array(theseNeighbors.map(({weight}) => weight)),
|
||||||
|
};
|
||||||
|
}),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
export function graphToTypedArrayMarkovChain(
|
||||||
|
graph: Graph<any, any>
|
||||||
|
): TypedArrayMarkovChain {
|
||||||
|
return addressMapMarkovChainToTypedArrayMarkovChain(
|
||||||
|
graphToAddressMapMarkovChain(graph)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function markovChainAction(
|
||||||
|
mc: TypedArrayMarkovChain,
|
||||||
|
pi: Distribution
|
||||||
|
): Distribution {
|
||||||
|
const data = new Float64Array(pi.data.length);
|
||||||
|
for (let dst = 0; dst < mc.nodeOrder.length; dst++) {
|
||||||
|
const theseNeighbors = mc.inNeighbors[dst];
|
||||||
|
const inDegree = theseNeighbors.neighbor.length;
|
||||||
let probability = 0;
|
let probability = 0;
|
||||||
inNeighbors.getAll().forEach(({address: neighbor, weight}) => {
|
for (let srcIndex = 0; srcIndex < inDegree; srcIndex++) {
|
||||||
probability += pi.get(neighbor).probability * weight;
|
const src = theseNeighbors.neighbor[srcIndex];
|
||||||
});
|
probability += pi.data[src] * theseNeighbors.weight[srcIndex];
|
||||||
result.add({address, probability});
|
}
|
||||||
});
|
data[dst] = probability;
|
||||||
return result;
|
}
|
||||||
|
return {nodeOrder: pi.nodeOrder, data};
|
||||||
}
|
}
|
||||||
|
|
||||||
function uniformDistribution(addresses: $ReadOnlyArray<Address>) {
|
function uniformDistribution(nodeOrder: $ReadOnlyArray<Address>): Distribution {
|
||||||
const result = new AddressMap();
|
return {
|
||||||
const probability = 1.0 / addresses.length;
|
nodeOrder,
|
||||||
addresses.forEach((address) => {
|
data: new Float64Array(
|
||||||
result.add({address, probability});
|
Array(nodeOrder.length).fill(1.0 / nodeOrder.length)
|
||||||
});
|
),
|
||||||
return result;
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
function findStationaryDistribution(mc: MarkovChain): Distribution {
|
function findStationaryDistribution(mc: TypedArrayMarkovChain): Distribution {
|
||||||
let r0 = uniformDistribution(mc.getAll().map(({address}) => address));
|
let r0 = uniformDistribution(mc.nodeOrder);
|
||||||
function computeDelta(pi0, pi1) {
|
function computeDelta(pi0, pi1) {
|
||||||
return Math.max(
|
// Here, we assume that `pi0.nodeOrder` and `pi1.nodeOrder` are the
|
||||||
...pi0
|
// same (i.e., there has been no permutation).
|
||||||
.getAll()
|
return Math.max(...pi0.data.map((x, i) => Math.abs(x - pi1.data[i])));
|
||||||
.map(({address}) =>
|
|
||||||
Math.abs(pi0.get(address).probability - pi1.get(address).probability)
|
|
||||||
)
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
let iteration = 0;
|
let iteration = 0;
|
||||||
while (true) {
|
while (true) {
|
||||||
@ -122,3 +171,12 @@ function findStationaryDistribution(mc: MarkovChain): Distribution {
|
|||||||
// eslint-disable-next-line no-unreachable
|
// eslint-disable-next-line no-unreachable
|
||||||
throw new Error("Unreachable.");
|
throw new Error("Unreachable.");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function distributionToPagerankResult(pi: Distribution): PagerankResult {
|
||||||
|
const result = new AddressMap();
|
||||||
|
pi.nodeOrder.forEach((address, i) => {
|
||||||
|
const probability = pi.data[i];
|
||||||
|
result.add({address, probability});
|
||||||
|
});
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
// @flow
|
// @flow
|
||||||
|
|
||||||
import {Graph} from "../../core/graph";
|
import {Graph} from "../../core/graph";
|
||||||
import {graphToMarkovChain} from "./basicPagerank";
|
import {graphToTypedArrayMarkovChain} from "./basicPagerank";
|
||||||
|
|
||||||
describe("graphToMarkovChain", () => {
|
describe("graphToMarkovChain", () => {
|
||||||
it("is correct for a trivial one-node chain", () => {
|
it("is correct for a trivial one-node chain", () => {
|
||||||
@ -14,6 +14,6 @@ describe("graphToMarkovChain", () => {
|
|||||||
},
|
},
|
||||||
payload: "yes",
|
payload: "yes",
|
||||||
});
|
});
|
||||||
expect(graphToMarkovChain(g)).toMatchSnapshot();
|
expect(graphToTypedArrayMarkovChain(g)).toMatchSnapshot();
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
Loading…
x
Reference in New Issue
Block a user