mirror of
https://github.com/status-im/sourcecred.git
synced 2025-02-10 03:34:15 +00:00
Add pagerankNodeDecomposition
to ease rendering (#501)
Summary: When updating `PagerankTable` to work with contributions, we found it difficult to keep track of everything when we tried to do two things simultaneously: compute the values to be displayed, and render them hierarchically. @decentralion suggested computing the relevant data ahead of time, and then having a straightforward React component to render this structure. This would incidentally make `PagerankTable` easier to test. This commit implements that data structure and the function to create it from a `PagerankResult`. A subsequent commit will update `PagerankTable` accordingly. As evidence that this structure is well-designed, note that the main contents of a contribution row can be rendered entirely from a `ScoredContribution` datum (though the component will still of course require the full `PagerankNodeDecomposition` to pass down to its children). (At least, I think that it can be!) Designed with @decentralion. Test Plan: Unit tests added. I have checked that the snapshot is structurally correct: each node has contributions with the correct contributors. I did not manually compute the stationary distribution and check the snapshot for correctness. The snapshot is complemented by automated tests. wchargin-branch: pagerank-node-decomposition
This commit is contained in:
parent
09958e1ee2
commit
b2a70f605c
@ -0,0 +1,182 @@
|
|||||||
|
// Jest Snapshot v1, https://goo.gl/fbAQLP
|
||||||
|
|
||||||
|
exports[`core/attribution/contributions decompose has the expected output on a simple asymmetric chain 1`] = `
|
||||||
|
Map {
|
||||||
|
"NodeAddress[\\"n1\\"]" => Object {
|
||||||
|
"score": 0.19117656878499834,
|
||||||
|
"scoredContributions": Array [
|
||||||
|
Object {
|
||||||
|
"contribution": Object {
|
||||||
|
"contributor": Object {
|
||||||
|
"edge": Object {
|
||||||
|
"address": "EdgeAddress[\\"e3\\"]",
|
||||||
|
"dst": "NodeAddress[\\"sink\\"]",
|
||||||
|
"src": "NodeAddress[\\"n1\\"]",
|
||||||
|
},
|
||||||
|
"type": "OUT_EDGE",
|
||||||
|
},
|
||||||
|
"weight": 0.1875,
|
||||||
|
},
|
||||||
|
"contributionScore": 0.1102941261444197,
|
||||||
|
"source": "NodeAddress[\\"sink\\"]",
|
||||||
|
"sourceScore": 0.5882353394369051,
|
||||||
|
},
|
||||||
|
Object {
|
||||||
|
"contribution": Object {
|
||||||
|
"contributor": Object {
|
||||||
|
"edge": Object {
|
||||||
|
"address": "EdgeAddress[\\"e1\\"]",
|
||||||
|
"dst": "NodeAddress[\\"n2\\"]",
|
||||||
|
"src": "NodeAddress[\\"n1\\"]",
|
||||||
|
},
|
||||||
|
"type": "OUT_EDGE",
|
||||||
|
},
|
||||||
|
"weight": 0.3,
|
||||||
|
},
|
||||||
|
"contributionScore": 0.066176427533429,
|
||||||
|
"source": "NodeAddress[\\"n2\\"]",
|
||||||
|
"sourceScore": 0.22058809177809668,
|
||||||
|
},
|
||||||
|
Object {
|
||||||
|
"contribution": Object {
|
||||||
|
"contributor": Object {
|
||||||
|
"type": "SYNTHETIC_LOOP",
|
||||||
|
},
|
||||||
|
"weight": 0.07692307692307693,
|
||||||
|
},
|
||||||
|
"contributionScore": 0.014705889906538334,
|
||||||
|
"source": "NodeAddress[\\"n1\\"]",
|
||||||
|
"sourceScore": 0.19117656878499834,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
"NodeAddress[\\"n2\\"]" => Object {
|
||||||
|
"score": 0.22058809177809668,
|
||||||
|
"scoredContributions": Array [
|
||||||
|
Object {
|
||||||
|
"contribution": Object {
|
||||||
|
"contributor": Object {
|
||||||
|
"edge": Object {
|
||||||
|
"address": "EdgeAddress[\\"e2\\"]",
|
||||||
|
"dst": "NodeAddress[\\"sink\\"]",
|
||||||
|
"src": "NodeAddress[\\"n2\\"]",
|
||||||
|
},
|
||||||
|
"type": "OUT_EDGE",
|
||||||
|
},
|
||||||
|
"weight": 0.1875,
|
||||||
|
},
|
||||||
|
"contributionScore": 0.1102941261444197,
|
||||||
|
"source": "NodeAddress[\\"sink\\"]",
|
||||||
|
"sourceScore": 0.5882353394369051,
|
||||||
|
},
|
||||||
|
Object {
|
||||||
|
"contribution": Object {
|
||||||
|
"contributor": Object {
|
||||||
|
"edge": Object {
|
||||||
|
"address": "EdgeAddress[\\"e1\\"]",
|
||||||
|
"dst": "NodeAddress[\\"n2\\"]",
|
||||||
|
"src": "NodeAddress[\\"n1\\"]",
|
||||||
|
},
|
||||||
|
"type": "IN_EDGE",
|
||||||
|
},
|
||||||
|
"weight": 0.46153846153846156,
|
||||||
|
},
|
||||||
|
"contributionScore": 0.08823533943923001,
|
||||||
|
"source": "NodeAddress[\\"n1\\"]",
|
||||||
|
"sourceScore": 0.19117656878499834,
|
||||||
|
},
|
||||||
|
Object {
|
||||||
|
"contribution": Object {
|
||||||
|
"contributor": Object {
|
||||||
|
"type": "SYNTHETIC_LOOP",
|
||||||
|
},
|
||||||
|
"weight": 0.1,
|
||||||
|
},
|
||||||
|
"contributionScore": 0.02205880917780967,
|
||||||
|
"source": "NodeAddress[\\"n2\\"]",
|
||||||
|
"sourceScore": 0.22058809177809668,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
"NodeAddress[\\"sink\\"]" => Object {
|
||||||
|
"score": 0.5882353394369051,
|
||||||
|
"scoredContributions": Array [
|
||||||
|
Object {
|
||||||
|
"contribution": Object {
|
||||||
|
"contributor": Object {
|
||||||
|
"edge": Object {
|
||||||
|
"address": "EdgeAddress[\\"e4\\"]",
|
||||||
|
"dst": "NodeAddress[\\"sink\\"]",
|
||||||
|
"src": "NodeAddress[\\"sink\\"]",
|
||||||
|
},
|
||||||
|
"type": "IN_EDGE",
|
||||||
|
},
|
||||||
|
"weight": 0.375,
|
||||||
|
},
|
||||||
|
"contributionScore": 0.2205882522888394,
|
||||||
|
"source": "NodeAddress[\\"sink\\"]",
|
||||||
|
"sourceScore": 0.5882353394369051,
|
||||||
|
},
|
||||||
|
Object {
|
||||||
|
"contribution": Object {
|
||||||
|
"contributor": Object {
|
||||||
|
"edge": Object {
|
||||||
|
"address": "EdgeAddress[\\"e2\\"]",
|
||||||
|
"dst": "NodeAddress[\\"sink\\"]",
|
||||||
|
"src": "NodeAddress[\\"n2\\"]",
|
||||||
|
},
|
||||||
|
"type": "IN_EDGE",
|
||||||
|
},
|
||||||
|
"weight": 0.6,
|
||||||
|
},
|
||||||
|
"contributionScore": 0.132352855066858,
|
||||||
|
"source": "NodeAddress[\\"n2\\"]",
|
||||||
|
"sourceScore": 0.22058809177809668,
|
||||||
|
},
|
||||||
|
Object {
|
||||||
|
"contribution": Object {
|
||||||
|
"contributor": Object {
|
||||||
|
"edge": Object {
|
||||||
|
"address": "EdgeAddress[\\"e4\\"]",
|
||||||
|
"dst": "NodeAddress[\\"sink\\"]",
|
||||||
|
"src": "NodeAddress[\\"sink\\"]",
|
||||||
|
},
|
||||||
|
"type": "OUT_EDGE",
|
||||||
|
},
|
||||||
|
"weight": 0.1875,
|
||||||
|
},
|
||||||
|
"contributionScore": 0.1102941261444197,
|
||||||
|
"source": "NodeAddress[\\"sink\\"]",
|
||||||
|
"sourceScore": 0.5882353394369051,
|
||||||
|
},
|
||||||
|
Object {
|
||||||
|
"contribution": Object {
|
||||||
|
"contributor": Object {
|
||||||
|
"edge": Object {
|
||||||
|
"address": "EdgeAddress[\\"e3\\"]",
|
||||||
|
"dst": "NodeAddress[\\"sink\\"]",
|
||||||
|
"src": "NodeAddress[\\"n1\\"]",
|
||||||
|
},
|
||||||
|
"type": "IN_EDGE",
|
||||||
|
},
|
||||||
|
"weight": 0.46153846153846156,
|
||||||
|
},
|
||||||
|
"contributionScore": 0.08823533943923001,
|
||||||
|
"source": "NodeAddress[\\"n1\\"]",
|
||||||
|
"sourceScore": 0.19117656878499834,
|
||||||
|
},
|
||||||
|
Object {
|
||||||
|
"contribution": Object {
|
||||||
|
"contributor": Object {
|
||||||
|
"type": "SYNTHETIC_LOOP",
|
||||||
|
},
|
||||||
|
"weight": 0.0625,
|
||||||
|
},
|
||||||
|
"contributionScore": 0.03676470871480657,
|
||||||
|
"source": "NodeAddress[\\"sink\\"]",
|
||||||
|
"sourceScore": 0.5882353394369051,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
}
|
||||||
|
`;
|
58
src/core/attribution/pagerankNodeDecomposition.js
Normal file
58
src/core/attribution/pagerankNodeDecomposition.js
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
// @flow
|
||||||
|
|
||||||
|
import sortBy from "lodash.sortby";
|
||||||
|
|
||||||
|
import {type NodeAddressT, NodeAddress} from "../graph";
|
||||||
|
import {
|
||||||
|
type Contribution,
|
||||||
|
type NodeToContributions,
|
||||||
|
contributorSource,
|
||||||
|
} from "./graphToMarkovChain";
|
||||||
|
import type {PagerankResult} from "./pagerank";
|
||||||
|
import * as MapUtil from "../../util/map";
|
||||||
|
|
||||||
|
export type ScoredContribution = {|
|
||||||
|
+contribution: Contribution,
|
||||||
|
+source: NodeAddressT,
|
||||||
|
+sourceScore: number,
|
||||||
|
+contributionScore: number,
|
||||||
|
|};
|
||||||
|
|
||||||
|
export type PagerankNodeDecomposition = Map<
|
||||||
|
NodeAddressT,
|
||||||
|
{|
|
||||||
|
+score: number,
|
||||||
|
// Contributions are sorted by `contributorScore` descending,
|
||||||
|
// breaking ties in a deterministic (but unspecified) order.
|
||||||
|
+scoredContributions: $ReadOnlyArray<ScoredContribution>,
|
||||||
|
|}
|
||||||
|
>;
|
||||||
|
|
||||||
|
export function decompose(
|
||||||
|
pr: PagerankResult,
|
||||||
|
contributions: NodeToContributions
|
||||||
|
): PagerankNodeDecomposition {
|
||||||
|
return MapUtil.mapValues(contributions, (target, contributions) => {
|
||||||
|
const score = pr.get(target);
|
||||||
|
if (score == null) {
|
||||||
|
throw new Error("missing target: " + NodeAddress.toString(target));
|
||||||
|
}
|
||||||
|
const scoredContributions = sortBy(
|
||||||
|
contributions.map(
|
||||||
|
(contribution): ScoredContribution => {
|
||||||
|
const source = contributorSource(target, contribution.contributor);
|
||||||
|
const sourceScore = pr.get(source);
|
||||||
|
if (sourceScore == null) {
|
||||||
|
throw new Error("missing source: " + NodeAddress.toString(source));
|
||||||
|
}
|
||||||
|
const contributionScore = contribution.weight * sourceScore;
|
||||||
|
return {contribution, source, sourceScore, contributionScore};
|
||||||
|
}
|
||||||
|
),
|
||||||
|
(x) => -x.contributionScore,
|
||||||
|
// The following should be called rarely and on small objects.
|
||||||
|
(x) => JSON.stringify(x.contribution.contributor)
|
||||||
|
);
|
||||||
|
return {score, scoredContributions};
|
||||||
|
});
|
||||||
|
}
|
158
src/core/attribution/pagerankNodeDecomposition.test.js
Normal file
158
src/core/attribution/pagerankNodeDecomposition.test.js
Normal file
@ -0,0 +1,158 @@
|
|||||||
|
// @flow
|
||||||
|
|
||||||
|
import {EdgeAddress, Graph, NodeAddress, edgeToStrings} from "../graph";
|
||||||
|
import {
|
||||||
|
distributionToPagerankResult,
|
||||||
|
createContributions,
|
||||||
|
createOrderedSparseMarkovChain,
|
||||||
|
} from "./graphToMarkovChain";
|
||||||
|
import {findStationaryDistribution} from "./markovChain";
|
||||||
|
import {decompose} from "./pagerankNodeDecomposition";
|
||||||
|
import * as MapUtil from "../../util/map";
|
||||||
|
|
||||||
|
import {advancedGraph} from "../graphTestUtil";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Format a decomposition to be shown in a snapshot. This converts
|
||||||
|
* addresses and edges to strings to avoid NUL characters.
|
||||||
|
*/
|
||||||
|
function formatDecomposition(d) {
|
||||||
|
return MapUtil.mapEntries(d, (key, {score, scoredContributions}) => [
|
||||||
|
NodeAddress.toString(key),
|
||||||
|
{
|
||||||
|
score,
|
||||||
|
scoredContributions: scoredContributions.map(
|
||||||
|
({contribution, source, sourceScore, contributionScore}) => ({
|
||||||
|
contribution: {
|
||||||
|
contributor: formatContributor(contribution.contributor),
|
||||||
|
weight: contribution.weight,
|
||||||
|
},
|
||||||
|
source: NodeAddress.toString(source),
|
||||||
|
sourceScore,
|
||||||
|
contributionScore,
|
||||||
|
})
|
||||||
|
),
|
||||||
|
},
|
||||||
|
]);
|
||||||
|
function formatContributor(contributor) {
|
||||||
|
switch (contributor.type) {
|
||||||
|
case "SYNTHETIC_LOOP":
|
||||||
|
return {type: "SYNTHETIC_LOOP"};
|
||||||
|
case "IN_EDGE":
|
||||||
|
return {type: "IN_EDGE", edge: edgeToStrings(contributor.edge)};
|
||||||
|
case "OUT_EDGE":
|
||||||
|
return {type: "OUT_EDGE", edge: edgeToStrings(contributor.edge)};
|
||||||
|
default:
|
||||||
|
throw new Error((contributor.type: empty));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Perform basic sanity checks on a decomposition. This ensures that
|
||||||
|
* every node's score is the sum of its contributions' scores, that the
|
||||||
|
* scores of the decomposition sum to 1, and that each node's
|
||||||
|
* contributions are listed in non-increasing order of score.
|
||||||
|
*/
|
||||||
|
function validateDecomposition(decomposition) {
|
||||||
|
const epsilon = 1e-6;
|
||||||
|
|
||||||
|
// Check that each node's score is the sum of its subscores.
|
||||||
|
for (const [key, {score, scoredContributions}] of decomposition.entries()) {
|
||||||
|
const totalSubscore = scoredContributions
|
||||||
|
.map((sc) => sc.contributionScore)
|
||||||
|
.reduce((a, b) => a + b, 0);
|
||||||
|
const delta = totalSubscore - score;
|
||||||
|
if (Math.abs(delta) > epsilon) {
|
||||||
|
const message = [
|
||||||
|
`for node ${NodeAddress.toString(key)}: `,
|
||||||
|
`expected total score (${score}) to equal `,
|
||||||
|
`sum of contribution scores (${totalSubscore}) `,
|
||||||
|
`within ${epsilon}, but the difference is ${delta}`,
|
||||||
|
].join("");
|
||||||
|
throw new Error(message);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check that the total score is 1.
|
||||||
|
{
|
||||||
|
const totalScore = Array.from(decomposition.values())
|
||||||
|
.map((node) => node.score)
|
||||||
|
.reduce((a, b) => a + b, 0);
|
||||||
|
const delta = totalScore - 1;
|
||||||
|
if (Math.abs(delta) > epsilon) {
|
||||||
|
const message = [
|
||||||
|
`expected total score of all nodes (${totalScore}) to equal 1.0 `,
|
||||||
|
`within ${epsilon}, but the difference is ${delta}`,
|
||||||
|
].join("");
|
||||||
|
throw new Error(message);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check that each node's contributions are in score-descending order.
|
||||||
|
for (const {scoredContributions} of decomposition.values()) {
|
||||||
|
scoredContributions.forEach((current, index) => {
|
||||||
|
if (index === 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const previous = scoredContributions[index - 1];
|
||||||
|
if (current.contributionScore > previous.contributionScore) {
|
||||||
|
const message = [
|
||||||
|
`expected contribution score to be non-increasing, but `,
|
||||||
|
`element at index ${index} has score ${current.contributionScore}, `,
|
||||||
|
`higher than that of its predecessor (${previous.contributionScore})`,
|
||||||
|
].join("");
|
||||||
|
throw new Error(message);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
describe("core/attribution/contributions", () => {
|
||||||
|
describe("decompose", () => {
|
||||||
|
it("has the expected output on a simple asymmetric chain", () => {
|
||||||
|
const n1 = NodeAddress.fromParts(["n1"]);
|
||||||
|
const n2 = NodeAddress.fromParts(["n2"]);
|
||||||
|
const n3 = NodeAddress.fromParts(["sink"]);
|
||||||
|
const e1 = {src: n1, dst: n2, address: EdgeAddress.fromParts(["e1"])};
|
||||||
|
const e2 = {src: n2, dst: n3, address: EdgeAddress.fromParts(["e2"])};
|
||||||
|
const e3 = {src: n1, dst: n3, address: EdgeAddress.fromParts(["e3"])};
|
||||||
|
const e4 = {src: n3, dst: n3, address: EdgeAddress.fromParts(["e4"])};
|
||||||
|
const g = new Graph()
|
||||||
|
.addNode(n1)
|
||||||
|
.addNode(n2)
|
||||||
|
.addNode(n3)
|
||||||
|
.addEdge(e1)
|
||||||
|
.addEdge(e2)
|
||||||
|
.addEdge(e3)
|
||||||
|
.addEdge(e4);
|
||||||
|
const edgeWeight = () => ({toWeight: 6.0, froWeight: 3.0});
|
||||||
|
const contributions = createContributions(g, edgeWeight, 1.0);
|
||||||
|
const osmc = createOrderedSparseMarkovChain(contributions);
|
||||||
|
const pi = findStationaryDistribution(osmc.chain, {
|
||||||
|
verbose: false,
|
||||||
|
convergenceThreshold: 1e-6,
|
||||||
|
maxIterations: 255,
|
||||||
|
});
|
||||||
|
const pr = distributionToPagerankResult(osmc.nodeOrder, pi);
|
||||||
|
const result = decompose(pr, contributions);
|
||||||
|
expect(formatDecomposition(result)).toMatchSnapshot();
|
||||||
|
validateDecomposition(result);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("is valid on the example graph", () => {
|
||||||
|
const g = advancedGraph().graph1();
|
||||||
|
const edgeWeight = () => ({toWeight: 6.0, froWeight: 3.0});
|
||||||
|
const contributions = createContributions(g, edgeWeight, 1.0);
|
||||||
|
const osmc = createOrderedSparseMarkovChain(contributions);
|
||||||
|
const pi = findStationaryDistribution(osmc.chain, {
|
||||||
|
verbose: false,
|
||||||
|
convergenceThreshold: 1e-6,
|
||||||
|
maxIterations: 255,
|
||||||
|
});
|
||||||
|
const pr = distributionToPagerankResult(osmc.nodeOrder, pi);
|
||||||
|
const result = decompose(pr, contributions);
|
||||||
|
validateDecomposition(result);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
Loading…
x
Reference in New Issue
Block a user