mirror of
https://github.com/status-im/sourcecred.git
synced 2025-02-05 01:04:53 +00:00
Add pagerankNodeDecomposition
to ease rendering (#501)
Summary: When updating `PagerankTable` to work with contributions, we found it difficult to keep track of everything when we tried to do two things simultaneously: compute the values to be displayed, and render them hierarchically. @decentralion suggested computing the relevant data ahead of time, and then having a straightforward React component to render this structure. This would incidentally make `PagerankTable` easier to test. This commit implements that data structure and the function to create it from a `PagerankResult`. A subsequent commit will update `PagerankTable` accordingly. As evidence that this structure is well-designed, note that the main contents of a contribution row can be rendered entirely from a `ScoredContribution` datum (though the component will still of course require the full `PagerankNodeDecomposition` to pass down to its children). (At least, I think that it can be!) Designed with @decentralion. Test Plan: Unit tests added. I have checked that the snapshot is structurally correct: each node has contributions with the correct contributors. I did not manually compute the stationary distribution and check the snapshot for correctness. The snapshot is complemented by automated tests. wchargin-branch: pagerank-node-decomposition
This commit is contained in:
parent
09958e1ee2
commit
b2a70f605c
@ -0,0 +1,182 @@
|
||||
// Jest Snapshot v1, https://goo.gl/fbAQLP
|
||||
|
||||
exports[`core/attribution/contributions decompose has the expected output on a simple asymmetric chain 1`] = `
|
||||
Map {
|
||||
"NodeAddress[\\"n1\\"]" => Object {
|
||||
"score": 0.19117656878499834,
|
||||
"scoredContributions": Array [
|
||||
Object {
|
||||
"contribution": Object {
|
||||
"contributor": Object {
|
||||
"edge": Object {
|
||||
"address": "EdgeAddress[\\"e3\\"]",
|
||||
"dst": "NodeAddress[\\"sink\\"]",
|
||||
"src": "NodeAddress[\\"n1\\"]",
|
||||
},
|
||||
"type": "OUT_EDGE",
|
||||
},
|
||||
"weight": 0.1875,
|
||||
},
|
||||
"contributionScore": 0.1102941261444197,
|
||||
"source": "NodeAddress[\\"sink\\"]",
|
||||
"sourceScore": 0.5882353394369051,
|
||||
},
|
||||
Object {
|
||||
"contribution": Object {
|
||||
"contributor": Object {
|
||||
"edge": Object {
|
||||
"address": "EdgeAddress[\\"e1\\"]",
|
||||
"dst": "NodeAddress[\\"n2\\"]",
|
||||
"src": "NodeAddress[\\"n1\\"]",
|
||||
},
|
||||
"type": "OUT_EDGE",
|
||||
},
|
||||
"weight": 0.3,
|
||||
},
|
||||
"contributionScore": 0.066176427533429,
|
||||
"source": "NodeAddress[\\"n2\\"]",
|
||||
"sourceScore": 0.22058809177809668,
|
||||
},
|
||||
Object {
|
||||
"contribution": Object {
|
||||
"contributor": Object {
|
||||
"type": "SYNTHETIC_LOOP",
|
||||
},
|
||||
"weight": 0.07692307692307693,
|
||||
},
|
||||
"contributionScore": 0.014705889906538334,
|
||||
"source": "NodeAddress[\\"n1\\"]",
|
||||
"sourceScore": 0.19117656878499834,
|
||||
},
|
||||
],
|
||||
},
|
||||
"NodeAddress[\\"n2\\"]" => Object {
|
||||
"score": 0.22058809177809668,
|
||||
"scoredContributions": Array [
|
||||
Object {
|
||||
"contribution": Object {
|
||||
"contributor": Object {
|
||||
"edge": Object {
|
||||
"address": "EdgeAddress[\\"e2\\"]",
|
||||
"dst": "NodeAddress[\\"sink\\"]",
|
||||
"src": "NodeAddress[\\"n2\\"]",
|
||||
},
|
||||
"type": "OUT_EDGE",
|
||||
},
|
||||
"weight": 0.1875,
|
||||
},
|
||||
"contributionScore": 0.1102941261444197,
|
||||
"source": "NodeAddress[\\"sink\\"]",
|
||||
"sourceScore": 0.5882353394369051,
|
||||
},
|
||||
Object {
|
||||
"contribution": Object {
|
||||
"contributor": Object {
|
||||
"edge": Object {
|
||||
"address": "EdgeAddress[\\"e1\\"]",
|
||||
"dst": "NodeAddress[\\"n2\\"]",
|
||||
"src": "NodeAddress[\\"n1\\"]",
|
||||
},
|
||||
"type": "IN_EDGE",
|
||||
},
|
||||
"weight": 0.46153846153846156,
|
||||
},
|
||||
"contributionScore": 0.08823533943923001,
|
||||
"source": "NodeAddress[\\"n1\\"]",
|
||||
"sourceScore": 0.19117656878499834,
|
||||
},
|
||||
Object {
|
||||
"contribution": Object {
|
||||
"contributor": Object {
|
||||
"type": "SYNTHETIC_LOOP",
|
||||
},
|
||||
"weight": 0.1,
|
||||
},
|
||||
"contributionScore": 0.02205880917780967,
|
||||
"source": "NodeAddress[\\"n2\\"]",
|
||||
"sourceScore": 0.22058809177809668,
|
||||
},
|
||||
],
|
||||
},
|
||||
"NodeAddress[\\"sink\\"]" => Object {
|
||||
"score": 0.5882353394369051,
|
||||
"scoredContributions": Array [
|
||||
Object {
|
||||
"contribution": Object {
|
||||
"contributor": Object {
|
||||
"edge": Object {
|
||||
"address": "EdgeAddress[\\"e4\\"]",
|
||||
"dst": "NodeAddress[\\"sink\\"]",
|
||||
"src": "NodeAddress[\\"sink\\"]",
|
||||
},
|
||||
"type": "IN_EDGE",
|
||||
},
|
||||
"weight": 0.375,
|
||||
},
|
||||
"contributionScore": 0.2205882522888394,
|
||||
"source": "NodeAddress[\\"sink\\"]",
|
||||
"sourceScore": 0.5882353394369051,
|
||||
},
|
||||
Object {
|
||||
"contribution": Object {
|
||||
"contributor": Object {
|
||||
"edge": Object {
|
||||
"address": "EdgeAddress[\\"e2\\"]",
|
||||
"dst": "NodeAddress[\\"sink\\"]",
|
||||
"src": "NodeAddress[\\"n2\\"]",
|
||||
},
|
||||
"type": "IN_EDGE",
|
||||
},
|
||||
"weight": 0.6,
|
||||
},
|
||||
"contributionScore": 0.132352855066858,
|
||||
"source": "NodeAddress[\\"n2\\"]",
|
||||
"sourceScore": 0.22058809177809668,
|
||||
},
|
||||
Object {
|
||||
"contribution": Object {
|
||||
"contributor": Object {
|
||||
"edge": Object {
|
||||
"address": "EdgeAddress[\\"e4\\"]",
|
||||
"dst": "NodeAddress[\\"sink\\"]",
|
||||
"src": "NodeAddress[\\"sink\\"]",
|
||||
},
|
||||
"type": "OUT_EDGE",
|
||||
},
|
||||
"weight": 0.1875,
|
||||
},
|
||||
"contributionScore": 0.1102941261444197,
|
||||
"source": "NodeAddress[\\"sink\\"]",
|
||||
"sourceScore": 0.5882353394369051,
|
||||
},
|
||||
Object {
|
||||
"contribution": Object {
|
||||
"contributor": Object {
|
||||
"edge": Object {
|
||||
"address": "EdgeAddress[\\"e3\\"]",
|
||||
"dst": "NodeAddress[\\"sink\\"]",
|
||||
"src": "NodeAddress[\\"n1\\"]",
|
||||
},
|
||||
"type": "IN_EDGE",
|
||||
},
|
||||
"weight": 0.46153846153846156,
|
||||
},
|
||||
"contributionScore": 0.08823533943923001,
|
||||
"source": "NodeAddress[\\"n1\\"]",
|
||||
"sourceScore": 0.19117656878499834,
|
||||
},
|
||||
Object {
|
||||
"contribution": Object {
|
||||
"contributor": Object {
|
||||
"type": "SYNTHETIC_LOOP",
|
||||
},
|
||||
"weight": 0.0625,
|
||||
},
|
||||
"contributionScore": 0.03676470871480657,
|
||||
"source": "NodeAddress[\\"sink\\"]",
|
||||
"sourceScore": 0.5882353394369051,
|
||||
},
|
||||
],
|
||||
},
|
||||
}
|
||||
`;
|
58
src/core/attribution/pagerankNodeDecomposition.js
Normal file
58
src/core/attribution/pagerankNodeDecomposition.js
Normal file
@ -0,0 +1,58 @@
|
||||
// @flow
|
||||
|
||||
import sortBy from "lodash.sortby";
|
||||
|
||||
import {type NodeAddressT, NodeAddress} from "../graph";
|
||||
import {
|
||||
type Contribution,
|
||||
type NodeToContributions,
|
||||
contributorSource,
|
||||
} from "./graphToMarkovChain";
|
||||
import type {PagerankResult} from "./pagerank";
|
||||
import * as MapUtil from "../../util/map";
|
||||
|
||||
export type ScoredContribution = {|
|
||||
+contribution: Contribution,
|
||||
+source: NodeAddressT,
|
||||
+sourceScore: number,
|
||||
+contributionScore: number,
|
||||
|};
|
||||
|
||||
export type PagerankNodeDecomposition = Map<
|
||||
NodeAddressT,
|
||||
{|
|
||||
+score: number,
|
||||
// Contributions are sorted by `contributorScore` descending,
|
||||
// breaking ties in a deterministic (but unspecified) order.
|
||||
+scoredContributions: $ReadOnlyArray<ScoredContribution>,
|
||||
|}
|
||||
>;
|
||||
|
||||
export function decompose(
|
||||
pr: PagerankResult,
|
||||
contributions: NodeToContributions
|
||||
): PagerankNodeDecomposition {
|
||||
return MapUtil.mapValues(contributions, (target, contributions) => {
|
||||
const score = pr.get(target);
|
||||
if (score == null) {
|
||||
throw new Error("missing target: " + NodeAddress.toString(target));
|
||||
}
|
||||
const scoredContributions = sortBy(
|
||||
contributions.map(
|
||||
(contribution): ScoredContribution => {
|
||||
const source = contributorSource(target, contribution.contributor);
|
||||
const sourceScore = pr.get(source);
|
||||
if (sourceScore == null) {
|
||||
throw new Error("missing source: " + NodeAddress.toString(source));
|
||||
}
|
||||
const contributionScore = contribution.weight * sourceScore;
|
||||
return {contribution, source, sourceScore, contributionScore};
|
||||
}
|
||||
),
|
||||
(x) => -x.contributionScore,
|
||||
// The following should be called rarely and on small objects.
|
||||
(x) => JSON.stringify(x.contribution.contributor)
|
||||
);
|
||||
return {score, scoredContributions};
|
||||
});
|
||||
}
|
158
src/core/attribution/pagerankNodeDecomposition.test.js
Normal file
158
src/core/attribution/pagerankNodeDecomposition.test.js
Normal file
@ -0,0 +1,158 @@
|
||||
// @flow
|
||||
|
||||
import {EdgeAddress, Graph, NodeAddress, edgeToStrings} from "../graph";
|
||||
import {
|
||||
distributionToPagerankResult,
|
||||
createContributions,
|
||||
createOrderedSparseMarkovChain,
|
||||
} from "./graphToMarkovChain";
|
||||
import {findStationaryDistribution} from "./markovChain";
|
||||
import {decompose} from "./pagerankNodeDecomposition";
|
||||
import * as MapUtil from "../../util/map";
|
||||
|
||||
import {advancedGraph} from "../graphTestUtil";
|
||||
|
||||
/**
|
||||
* Format a decomposition to be shown in a snapshot. This converts
|
||||
* addresses and edges to strings to avoid NUL characters.
|
||||
*/
|
||||
function formatDecomposition(d) {
|
||||
return MapUtil.mapEntries(d, (key, {score, scoredContributions}) => [
|
||||
NodeAddress.toString(key),
|
||||
{
|
||||
score,
|
||||
scoredContributions: scoredContributions.map(
|
||||
({contribution, source, sourceScore, contributionScore}) => ({
|
||||
contribution: {
|
||||
contributor: formatContributor(contribution.contributor),
|
||||
weight: contribution.weight,
|
||||
},
|
||||
source: NodeAddress.toString(source),
|
||||
sourceScore,
|
||||
contributionScore,
|
||||
})
|
||||
),
|
||||
},
|
||||
]);
|
||||
function formatContributor(contributor) {
|
||||
switch (contributor.type) {
|
||||
case "SYNTHETIC_LOOP":
|
||||
return {type: "SYNTHETIC_LOOP"};
|
||||
case "IN_EDGE":
|
||||
return {type: "IN_EDGE", edge: edgeToStrings(contributor.edge)};
|
||||
case "OUT_EDGE":
|
||||
return {type: "OUT_EDGE", edge: edgeToStrings(contributor.edge)};
|
||||
default:
|
||||
throw new Error((contributor.type: empty));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform basic sanity checks on a decomposition. This ensures that
|
||||
* every node's score is the sum of its contributions' scores, that the
|
||||
* scores of the decomposition sum to 1, and that each node's
|
||||
* contributions are listed in non-increasing order of score.
|
||||
*/
|
||||
function validateDecomposition(decomposition) {
|
||||
const epsilon = 1e-6;
|
||||
|
||||
// Check that each node's score is the sum of its subscores.
|
||||
for (const [key, {score, scoredContributions}] of decomposition.entries()) {
|
||||
const totalSubscore = scoredContributions
|
||||
.map((sc) => sc.contributionScore)
|
||||
.reduce((a, b) => a + b, 0);
|
||||
const delta = totalSubscore - score;
|
||||
if (Math.abs(delta) > epsilon) {
|
||||
const message = [
|
||||
`for node ${NodeAddress.toString(key)}: `,
|
||||
`expected total score (${score}) to equal `,
|
||||
`sum of contribution scores (${totalSubscore}) `,
|
||||
`within ${epsilon}, but the difference is ${delta}`,
|
||||
].join("");
|
||||
throw new Error(message);
|
||||
}
|
||||
}
|
||||
|
||||
// Check that the total score is 1.
|
||||
{
|
||||
const totalScore = Array.from(decomposition.values())
|
||||
.map((node) => node.score)
|
||||
.reduce((a, b) => a + b, 0);
|
||||
const delta = totalScore - 1;
|
||||
if (Math.abs(delta) > epsilon) {
|
||||
const message = [
|
||||
`expected total score of all nodes (${totalScore}) to equal 1.0 `,
|
||||
`within ${epsilon}, but the difference is ${delta}`,
|
||||
].join("");
|
||||
throw new Error(message);
|
||||
}
|
||||
}
|
||||
|
||||
// Check that each node's contributions are in score-descending order.
|
||||
for (const {scoredContributions} of decomposition.values()) {
|
||||
scoredContributions.forEach((current, index) => {
|
||||
if (index === 0) {
|
||||
return;
|
||||
}
|
||||
const previous = scoredContributions[index - 1];
|
||||
if (current.contributionScore > previous.contributionScore) {
|
||||
const message = [
|
||||
`expected contribution score to be non-increasing, but `,
|
||||
`element at index ${index} has score ${current.contributionScore}, `,
|
||||
`higher than that of its predecessor (${previous.contributionScore})`,
|
||||
].join("");
|
||||
throw new Error(message);
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
describe("core/attribution/contributions", () => {
|
||||
describe("decompose", () => {
|
||||
it("has the expected output on a simple asymmetric chain", () => {
|
||||
const n1 = NodeAddress.fromParts(["n1"]);
|
||||
const n2 = NodeAddress.fromParts(["n2"]);
|
||||
const n3 = NodeAddress.fromParts(["sink"]);
|
||||
const e1 = {src: n1, dst: n2, address: EdgeAddress.fromParts(["e1"])};
|
||||
const e2 = {src: n2, dst: n3, address: EdgeAddress.fromParts(["e2"])};
|
||||
const e3 = {src: n1, dst: n3, address: EdgeAddress.fromParts(["e3"])};
|
||||
const e4 = {src: n3, dst: n3, address: EdgeAddress.fromParts(["e4"])};
|
||||
const g = new Graph()
|
||||
.addNode(n1)
|
||||
.addNode(n2)
|
||||
.addNode(n3)
|
||||
.addEdge(e1)
|
||||
.addEdge(e2)
|
||||
.addEdge(e3)
|
||||
.addEdge(e4);
|
||||
const edgeWeight = () => ({toWeight: 6.0, froWeight: 3.0});
|
||||
const contributions = createContributions(g, edgeWeight, 1.0);
|
||||
const osmc = createOrderedSparseMarkovChain(contributions);
|
||||
const pi = findStationaryDistribution(osmc.chain, {
|
||||
verbose: false,
|
||||
convergenceThreshold: 1e-6,
|
||||
maxIterations: 255,
|
||||
});
|
||||
const pr = distributionToPagerankResult(osmc.nodeOrder, pi);
|
||||
const result = decompose(pr, contributions);
|
||||
expect(formatDecomposition(result)).toMatchSnapshot();
|
||||
validateDecomposition(result);
|
||||
});
|
||||
|
||||
it("is valid on the example graph", () => {
|
||||
const g = advancedGraph().graph1();
|
||||
const edgeWeight = () => ({toWeight: 6.0, froWeight: 3.0});
|
||||
const contributions = createContributions(g, edgeWeight, 1.0);
|
||||
const osmc = createOrderedSparseMarkovChain(contributions);
|
||||
const pi = findStationaryDistribution(osmc.chain, {
|
||||
verbose: false,
|
||||
convergenceThreshold: 1e-6,
|
||||
maxIterations: 255,
|
||||
});
|
||||
const pr = distributionToPagerankResult(osmc.nodeOrder, pi);
|
||||
const result = decompose(pr, contributions);
|
||||
validateDecomposition(result);
|
||||
});
|
||||
});
|
||||
});
|
Loading…
x
Reference in New Issue
Block a user