mirror of
https://github.com/status-im/sourcecred.git
synced 2025-01-27 12:55:14 +00:00
Expose contributions structure of Markov chains (#490)
Summary: When we convert a graph to a Markov chain, each cell in the transition matrix is a sum of edge weights from the given `src` to the given `dst`, plus the synthetic self-loop needed for stability. Performing this sum loses information: given the transition matrix, a client cannot determine how much a particular edge contributed to the score of a node without redoing the relevant computations. In this commit, we expose the structure of these contributions (i.e., edges and synthetic loops). This changes the API of `graphToMarkovChain.js`, but it does not change the resulting Markov chains. It also does not change the API of `pagerank.js`. In particular, clients of `pagerank.js` will not have access to the contributions structure that we have just created. Test Plan: Existing unit tests have been updated to use the new API, and pass without change. An additional test is added for a newly exposed function, even though this function is also tested extensively as part of later downstream tests. In one snapshot, one value changes from `0.25` to `0.25 + 1.7e-16`. The other values in the enclosing distribution do not change, so I think that it is more likely that this is due to floating-point instability than an actual bug. (I’m not sure where exactly I commuted or associated an operation, but it’s quite possible that I may have done so). To compensate, I added an additional check that the values in the stationary distribution sum to `1.0` within `1e-9` tolerance; this check passes. wchargin-branch: expose-contributions
This commit is contained in:
parent
8921b5b942
commit
761a44c561
@ -47,7 +47,7 @@ Array [
|
||||
"parts": Array [
|
||||
"loop",
|
||||
],
|
||||
"probability": 0.25,
|
||||
"probability": 0.25000000000000017,
|
||||
},
|
||||
Object {
|
||||
"parts": Array [
|
||||
|
@ -1,12 +1,48 @@
|
||||
// @flow
|
||||
|
||||
import {type Edge, type Graph, type NodeAddressT, NodeAddress} from "../graph";
|
||||
import {
|
||||
type Edge,
|
||||
type Graph,
|
||||
type Neighbor,
|
||||
type NodeAddressT,
|
||||
NodeAddress,
|
||||
} from "../graph";
|
||||
import type {Distribution, SparseMarkovChain} from "./markovChain";
|
||||
|
||||
export type Probability = number;
|
||||
export type Contributor =
|
||||
| {|+type: "SYNTHETIC_LOOP"|}
|
||||
| {|+type: "NEIGHBOR", +neighbor: Neighbor|};
|
||||
export type Contribution = {|
|
||||
+contributor: Contributor,
|
||||
// This `weight` is a conditional probability: given that you're at
|
||||
// the source of this contribution's contributor, what's the
|
||||
// probability that you travel along this contribution to the target?
|
||||
+weight: Probability,
|
||||
|};
|
||||
|
||||
export function contributorSource(
|
||||
target: NodeAddressT,
|
||||
contributor: Contributor
|
||||
) {
|
||||
switch (contributor.type) {
|
||||
case "SYNTHETIC_LOOP":
|
||||
return target;
|
||||
case "NEIGHBOR":
|
||||
return contributor.neighbor.node;
|
||||
default:
|
||||
throw new Error((contributor.type: empty));
|
||||
}
|
||||
}
|
||||
|
||||
export type PagerankResult = Map<NodeAddressT, Probability>;
|
||||
|
||||
type AddressMapMarkovChain = Map<
|
||||
export type NodeToContributions = Map<
|
||||
NodeAddressT,
|
||||
$ReadOnlyArray<Contribution>
|
||||
>;
|
||||
|
||||
type NodeAddressMarkovChain = Map<
|
||||
NodeAddressT,
|
||||
/* in-neighbors */ Map<NodeAddressT, Probability>
|
||||
>;
|
||||
@ -21,63 +57,101 @@ export type EdgeWeight = {|
|
||||
+froWeight: number, // weight from dst to src
|
||||
|};
|
||||
|
||||
function graphToAddressMapMarkovChain(
|
||||
export function createContributions(
|
||||
graph: Graph,
|
||||
edgeWeight: (Edge) => EdgeWeight,
|
||||
selfLoopEdgeWeight: number
|
||||
): AddressMapMarkovChain {
|
||||
const inNeighbors: AddressMapMarkovChain = new Map();
|
||||
syntheticLoopWeight: number
|
||||
): NodeToContributions {
|
||||
const result = new Map();
|
||||
const totalOutWeight: Map<NodeAddressT, number> = new Map();
|
||||
for (const node of graph.nodes()) {
|
||||
inNeighbors.set(node, new Map());
|
||||
result.set(node, []);
|
||||
totalOutWeight.set(node, 0);
|
||||
}
|
||||
|
||||
function moreWeight(src, dst, weight) {
|
||||
const neighbors = inNeighbors.get(dst);
|
||||
if (neighbors == null) {
|
||||
function processContribution(
|
||||
target: NodeAddressT,
|
||||
contribution: Contribution
|
||||
) {
|
||||
const contributions = result.get(target);
|
||||
if (contributions == null) {
|
||||
// Should be impossible based on graph invariants.
|
||||
throw new Error("missing dst: " + NodeAddress.toString(dst));
|
||||
throw new Error("missing target: " + NodeAddress.toString(target));
|
||||
}
|
||||
neighbors.set(src, weight + (neighbors.get(src) || 0));
|
||||
(((contributions: $ReadOnlyArray<Contribution>): any): Contribution[]).push(
|
||||
contribution
|
||||
);
|
||||
|
||||
const priorOutWeight = totalOutWeight.get(src);
|
||||
const source = contributorSource(target, contribution.contributor);
|
||||
const priorOutWeight = totalOutWeight.get(source);
|
||||
if (priorOutWeight == null) {
|
||||
// Should be impossible based on graph invariants.
|
||||
throw new Error("missing src: " + NodeAddress.toString(src));
|
||||
throw new Error("missing source: " + NodeAddress.toString(source));
|
||||
}
|
||||
totalOutWeight.set(src, priorOutWeight + weight);
|
||||
totalOutWeight.set(source, priorOutWeight + contribution.weight);
|
||||
}
|
||||
|
||||
// Add self-loops.
|
||||
for (const node of graph.nodes()) {
|
||||
moreWeight(node, node, selfLoopEdgeWeight);
|
||||
processContribution(node, {
|
||||
contributor: {type: "SYNTHETIC_LOOP"},
|
||||
weight: syntheticLoopWeight,
|
||||
});
|
||||
}
|
||||
|
||||
// Process edges.
|
||||
for (const edge of graph.edges()) {
|
||||
const {toWeight, froWeight} = edgeWeight(edge);
|
||||
const {src, dst} = edge;
|
||||
moreWeight(src, dst, toWeight);
|
||||
moreWeight(dst, src, froWeight);
|
||||
processContribution(dst, {
|
||||
contributor: {type: "NEIGHBOR", neighbor: {node: src, edge}},
|
||||
weight: toWeight,
|
||||
});
|
||||
processContribution(src, {
|
||||
contributor: {type: "NEIGHBOR", neighbor: {node: dst, edge}},
|
||||
weight: froWeight,
|
||||
});
|
||||
}
|
||||
|
||||
// Normalize in-weights.
|
||||
for (const neighbors of inNeighbors.values()) {
|
||||
for (const [neighbor, weight] of neighbors.entries()) {
|
||||
const normalization = totalOutWeight.get(neighbor);
|
||||
for (const [target, contributions] of result.entries()) {
|
||||
for (const contribution of contributions) {
|
||||
const source = contributorSource(target, contribution.contributor);
|
||||
const normalization = totalOutWeight.get(source);
|
||||
if (normalization == null) {
|
||||
// Should be impossible.
|
||||
throw new Error("missing node: " + NodeAddress.toString(neighbor));
|
||||
throw new Error("missing node: " + NodeAddress.toString(source));
|
||||
}
|
||||
neighbors.set(neighbor, weight / normalization);
|
||||
const newWeight: typeof contribution.weight =
|
||||
contribution.weight / normalization;
|
||||
// (any-cast because property is not writable)
|
||||
(contribution: any).weight = newWeight;
|
||||
}
|
||||
}
|
||||
return inNeighbors;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
function addressMapMarkovChainToOrderedSparseMarkovChain(
|
||||
chain: AddressMapMarkovChain
|
||||
function createNodeAddressMarkovChain(
|
||||
ntc: NodeToContributions
|
||||
): NodeAddressMarkovChain {
|
||||
const result: NodeAddressMarkovChain = new Map();
|
||||
for (const [target, contributions] of ntc.entries()) {
|
||||
const inNeighbors = new Map();
|
||||
result.set(target, inNeighbors);
|
||||
for (const contribution of contributions) {
|
||||
const source = contributorSource(target, contribution.contributor);
|
||||
inNeighbors.set(
|
||||
source,
|
||||
contribution.weight + (inNeighbors.get(source) || 0)
|
||||
);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
function nodeAddressMarkovChainToOrderedSparseMarkovChain(
|
||||
chain: NodeAddressMarkovChain
|
||||
): OrderedSparseMarkovChain {
|
||||
const nodeOrder = Array.from(chain.keys());
|
||||
const addressToIndex: Map<NodeAddressT, number> = new Map();
|
||||
@ -112,14 +186,11 @@ function addressMapMarkovChainToOrderedSparseMarkovChain(
|
||||
};
|
||||
}
|
||||
|
||||
export function graphToOrderedSparseMarkovChain(
|
||||
graph: Graph,
|
||||
edgeWeight: (Edge) => EdgeWeight,
|
||||
selfLoopEdgeWeight: number
|
||||
export function createOrderedSparseMarkovChain(
|
||||
contributions: NodeToContributions
|
||||
): OrderedSparseMarkovChain {
|
||||
return addressMapMarkovChainToOrderedSparseMarkovChain(
|
||||
graphToAddressMapMarkovChain(graph, edgeWeight, selfLoopEdgeWeight)
|
||||
);
|
||||
const chain = createNodeAddressMarkovChain(contributions);
|
||||
return nodeAddressMarkovChainToOrderedSparseMarkovChain(chain);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -147,7 +218,10 @@ export function permute(
|
||||
);
|
||||
newChain.push({neighbor: newNeighbors, weight});
|
||||
}
|
||||
return {nodeOrder: newOrder, chain: newChain};
|
||||
return {
|
||||
nodeOrder: newOrder,
|
||||
chain: newChain,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1,9 +1,12 @@
|
||||
// @flow
|
||||
|
||||
import sortBy from "lodash.sortby";
|
||||
|
||||
import {EdgeAddress, Graph, NodeAddress} from "../graph";
|
||||
import {
|
||||
distributionToPagerankResult,
|
||||
graphToOrderedSparseMarkovChain,
|
||||
createContributions,
|
||||
createOrderedSparseMarkovChain,
|
||||
normalize,
|
||||
normalizeNeighbors,
|
||||
permute,
|
||||
@ -77,14 +80,96 @@ describe("core/attribution/graphToMarkovChain", () => {
|
||||
expect(actual).toEqual(expected);
|
||||
});
|
||||
|
||||
describe("graphToOrderedSparseMarkovChain", () => {
|
||||
describe("createContributions", () => {
|
||||
// The tests for `createOrderedSparseMarkovChain` also must invoke
|
||||
// `createContributions`, so we add only light testing separately.
|
||||
it("works on a simple asymmetric chain", () => {
|
||||
const n1 = NodeAddress.fromParts(["n1"]);
|
||||
const n2 = NodeAddress.fromParts(["n2"]);
|
||||
const n3 = NodeAddress.fromParts(["sink"]);
|
||||
const e1 = {src: n1, dst: n2, address: EdgeAddress.fromParts(["e1"])};
|
||||
const e2 = {src: n2, dst: n3, address: EdgeAddress.fromParts(["e2"])};
|
||||
const e3 = {src: n1, dst: n3, address: EdgeAddress.fromParts(["e3"])};
|
||||
const e4 = {src: n3, dst: n3, address: EdgeAddress.fromParts(["e4"])};
|
||||
const g = new Graph()
|
||||
.addNode(n1)
|
||||
.addNode(n2)
|
||||
.addNode(n3)
|
||||
.addEdge(e1)
|
||||
.addEdge(e2)
|
||||
.addEdge(e3)
|
||||
.addEdge(e4);
|
||||
const edgeWeight = () => ({toWeight: 6.0, froWeight: 3.0});
|
||||
const actual = createContributions(g, edgeWeight, 1.0);
|
||||
// Total out-weights (for normalization factors):
|
||||
// - for `n1`: 2 out, 0 in, 1 synthetic: 12 + 0 + 1 = 13
|
||||
// - for `n2`: 1 out, 1 in, 1 synthetic: 6 + 3 + 1 = 10
|
||||
// - for `n3`: 1 out, 3 in, 1 synthetic: 6 + 9 + 1 = 16
|
||||
const expected = new Map()
|
||||
.set(n1, [
|
||||
{contributor: {type: "SYNTHETIC_LOOP"}, weight: 1 / 13},
|
||||
{
|
||||
contributor: {type: "NEIGHBOR", neighbor: {node: n2, edge: e1}},
|
||||
weight: 3 / 10,
|
||||
},
|
||||
{
|
||||
contributor: {type: "NEIGHBOR", neighbor: {node: n3, edge: e3}},
|
||||
weight: 3 / 16,
|
||||
},
|
||||
])
|
||||
.set(n2, [
|
||||
{contributor: {type: "SYNTHETIC_LOOP"}, weight: 1 / 10},
|
||||
{
|
||||
contributor: {type: "NEIGHBOR", neighbor: {node: n1, edge: e1}},
|
||||
weight: 6 / 13,
|
||||
},
|
||||
{
|
||||
contributor: {type: "NEIGHBOR", neighbor: {node: n3, edge: e2}},
|
||||
weight: 3 / 16,
|
||||
},
|
||||
])
|
||||
.set(n3, [
|
||||
{contributor: {type: "SYNTHETIC_LOOP"}, weight: 1 / 16},
|
||||
{
|
||||
contributor: {type: "NEIGHBOR", neighbor: {node: n2, edge: e2}},
|
||||
weight: 6 / 10,
|
||||
},
|
||||
{
|
||||
contributor: {type: "NEIGHBOR", neighbor: {node: n1, edge: e3}},
|
||||
weight: 6 / 13,
|
||||
},
|
||||
{
|
||||
contributor: {type: "NEIGHBOR", neighbor: {node: n3, edge: e4}},
|
||||
// this loop, as an out-edge
|
||||
weight: 3 / 16,
|
||||
},
|
||||
{
|
||||
contributor: {type: "NEIGHBOR", neighbor: {node: n3, edge: e4}},
|
||||
// this loop, as an in-edge
|
||||
weight: 6 / 16,
|
||||
},
|
||||
]);
|
||||
const canonicalize = (map) =>
|
||||
new Map(
|
||||
Array.from(map.entries()).map(([k, v]) => [
|
||||
k,
|
||||
sortBy(v, (x) => JSON.stringify(x)),
|
||||
])
|
||||
);
|
||||
expect(canonicalize(actual)).toEqual(canonicalize(expected));
|
||||
});
|
||||
});
|
||||
|
||||
describe("createOrderedSparseMarkovChain", () => {
|
||||
it("works on a trivial one-node chain with no edge", () => {
|
||||
const n = NodeAddress.fromParts(["foo"]);
|
||||
const g = new Graph().addNode(n);
|
||||
const edgeWeight = (_unused_edge) => {
|
||||
throw new Error("Don't even look at me");
|
||||
};
|
||||
const osmc = graphToOrderedSparseMarkovChain(g, edgeWeight, 1e-3);
|
||||
const osmc = createOrderedSparseMarkovChain(
|
||||
createContributions(g, edgeWeight, 1e-3)
|
||||
);
|
||||
const expected = {
|
||||
nodeOrder: [n],
|
||||
chain: [
|
||||
@ -94,7 +179,7 @@ describe("core/attribution/graphToMarkovChain", () => {
|
||||
expect(normalize(osmc)).toEqual(normalize(expected));
|
||||
});
|
||||
|
||||
it("works on a simple asymmetric two-node chain", () => {
|
||||
it("works on a simple asymmetric chain", () => {
|
||||
const n1 = NodeAddress.fromParts(["n1"]);
|
||||
const n2 = NodeAddress.fromParts(["n2"]);
|
||||
const n3 = NodeAddress.fromParts(["sink"]);
|
||||
@ -111,7 +196,9 @@ describe("core/attribution/graphToMarkovChain", () => {
|
||||
.addEdge(e3)
|
||||
.addEdge(e4);
|
||||
const edgeWeight = () => ({toWeight: 1, froWeight: 0});
|
||||
const osmc = graphToOrderedSparseMarkovChain(g, edgeWeight, 0.0);
|
||||
const osmc = createOrderedSparseMarkovChain(
|
||||
createContributions(g, edgeWeight, 0.0)
|
||||
);
|
||||
const expected = {
|
||||
nodeOrder: [n1, n2, n3],
|
||||
chain: [
|
||||
@ -147,7 +234,9 @@ describe("core/attribution/graphToMarkovChain", () => {
|
||||
.addEdge(e2)
|
||||
.addEdge(e3);
|
||||
const edgeWeight = () => ({toWeight: 1, froWeight: 1});
|
||||
const osmc = graphToOrderedSparseMarkovChain(g, edgeWeight, 0.0);
|
||||
const osmc = createOrderedSparseMarkovChain(
|
||||
createContributions(g, edgeWeight, 0.0)
|
||||
);
|
||||
const expected = {
|
||||
nodeOrder: [n1, n2, n3],
|
||||
chain: [
|
||||
@ -177,7 +266,9 @@ describe("core/attribution/graphToMarkovChain", () => {
|
||||
// arithmetic simple.
|
||||
return {toWeight: 4 - epsilon / 2, froWeight: 1 - epsilon / 2};
|
||||
}
|
||||
const osmc = graphToOrderedSparseMarkovChain(g, edgeWeight, epsilon);
|
||||
const osmc = createOrderedSparseMarkovChain(
|
||||
createContributions(g, edgeWeight, epsilon)
|
||||
);
|
||||
// Edges from `src`:
|
||||
// - to `src` with weight `epsilon`
|
||||
// - to `dst` with weight `4 - epsilon / 2`
|
||||
|
@ -4,7 +4,8 @@ import {type Edge, Graph} from "../graph";
|
||||
import {
|
||||
type PagerankResult,
|
||||
distributionToPagerankResult,
|
||||
graphToOrderedSparseMarkovChain,
|
||||
createContributions,
|
||||
createOrderedSparseMarkovChain,
|
||||
type EdgeWeight,
|
||||
} from "./graphToMarkovChain";
|
||||
|
||||
@ -39,11 +40,12 @@ export function pagerank(
|
||||
...defaultOptions(),
|
||||
...(options || {}),
|
||||
};
|
||||
const osmc = graphToOrderedSparseMarkovChain(
|
||||
const contributions = createContributions(
|
||||
graph,
|
||||
edgeWeight,
|
||||
fullOptions.selfLoopWeight
|
||||
);
|
||||
const osmc = createOrderedSparseMarkovChain(contributions);
|
||||
const distribution = findStationaryDistribution(osmc.chain, {
|
||||
verbose: fullOptions.verbose,
|
||||
convergenceThreshold: fullOptions.convergenceThreshold,
|
||||
|
@ -5,6 +5,8 @@ import {NodeAddress} from "../graph";
|
||||
import {advancedGraph} from "../graphTestUtil";
|
||||
|
||||
function snapshotPagerankResult(result) {
|
||||
const prTotal = Array.from(result.values()).reduce((a, b) => a + b, 0);
|
||||
expect(prTotal).toBeCloseTo(1.0, 1e-9);
|
||||
const partsToProbability = [];
|
||||
const sortedKeys = Array.from(result.keys()).sort();
|
||||
for (const key of sortedKeys) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user