Add pagerankNodeDecomposition to ease rendering (#501)

Summary:
When updating `PagerankTable` to work with contributions, we found it
difficult to keep track of everything when we tried to do two things
simultaneously: compute the values to be displayed, and render them
hierarchically. @decentralion suggested computing the relevant data
ahead of time, and then having a straightforward React component to
render this structure. This would incidentally make `PagerankTable`
easier to test.

This commit implements that data structure and the function to create
it from a `PagerankResult`. A subsequent commit will update
`PagerankTable` accordingly.

As evidence that this structure is well-designed, note that the main
contents of a contribution row can be rendered entirely from a
`ScoredContribution` datum (though the component will still of course
require the full `PagerankNodeDecomposition` to pass down to its
children). (At least, I think that it can be!)

Designed with @decentralion.

Test Plan:
Unit tests added. I have checked that the snapshot is structurally
correct: each node has contributions with the correct contributors.
I did not manually compute the stationary distribution and check the
snapshot for correctness. The snapshot is complemented by automated
tests.

wchargin-branch: pagerank-node-decomposition
This commit is contained in:
William Chargin 2018-07-06 22:33:12 -07:00 committed by GitHub
parent 09958e1ee2
commit b2a70f605c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 398 additions and 0 deletions

View File

@ -0,0 +1,182 @@
// Jest Snapshot v1, https://goo.gl/fbAQLP
exports[`core/attribution/contributions decompose has the expected output on a simple asymmetric chain 1`] = `
Map {
"NodeAddress[\\"n1\\"]" => Object {
"score": 0.19117656878499834,
"scoredContributions": Array [
Object {
"contribution": Object {
"contributor": Object {
"edge": Object {
"address": "EdgeAddress[\\"e3\\"]",
"dst": "NodeAddress[\\"sink\\"]",
"src": "NodeAddress[\\"n1\\"]",
},
"type": "OUT_EDGE",
},
"weight": 0.1875,
},
"contributionScore": 0.1102941261444197,
"source": "NodeAddress[\\"sink\\"]",
"sourceScore": 0.5882353394369051,
},
Object {
"contribution": Object {
"contributor": Object {
"edge": Object {
"address": "EdgeAddress[\\"e1\\"]",
"dst": "NodeAddress[\\"n2\\"]",
"src": "NodeAddress[\\"n1\\"]",
},
"type": "OUT_EDGE",
},
"weight": 0.3,
},
"contributionScore": 0.066176427533429,
"source": "NodeAddress[\\"n2\\"]",
"sourceScore": 0.22058809177809668,
},
Object {
"contribution": Object {
"contributor": Object {
"type": "SYNTHETIC_LOOP",
},
"weight": 0.07692307692307693,
},
"contributionScore": 0.014705889906538334,
"source": "NodeAddress[\\"n1\\"]",
"sourceScore": 0.19117656878499834,
},
],
},
"NodeAddress[\\"n2\\"]" => Object {
"score": 0.22058809177809668,
"scoredContributions": Array [
Object {
"contribution": Object {
"contributor": Object {
"edge": Object {
"address": "EdgeAddress[\\"e2\\"]",
"dst": "NodeAddress[\\"sink\\"]",
"src": "NodeAddress[\\"n2\\"]",
},
"type": "OUT_EDGE",
},
"weight": 0.1875,
},
"contributionScore": 0.1102941261444197,
"source": "NodeAddress[\\"sink\\"]",
"sourceScore": 0.5882353394369051,
},
Object {
"contribution": Object {
"contributor": Object {
"edge": Object {
"address": "EdgeAddress[\\"e1\\"]",
"dst": "NodeAddress[\\"n2\\"]",
"src": "NodeAddress[\\"n1\\"]",
},
"type": "IN_EDGE",
},
"weight": 0.46153846153846156,
},
"contributionScore": 0.08823533943923001,
"source": "NodeAddress[\\"n1\\"]",
"sourceScore": 0.19117656878499834,
},
Object {
"contribution": Object {
"contributor": Object {
"type": "SYNTHETIC_LOOP",
},
"weight": 0.1,
},
"contributionScore": 0.02205880917780967,
"source": "NodeAddress[\\"n2\\"]",
"sourceScore": 0.22058809177809668,
},
],
},
"NodeAddress[\\"sink\\"]" => Object {
"score": 0.5882353394369051,
"scoredContributions": Array [
Object {
"contribution": Object {
"contributor": Object {
"edge": Object {
"address": "EdgeAddress[\\"e4\\"]",
"dst": "NodeAddress[\\"sink\\"]",
"src": "NodeAddress[\\"sink\\"]",
},
"type": "IN_EDGE",
},
"weight": 0.375,
},
"contributionScore": 0.2205882522888394,
"source": "NodeAddress[\\"sink\\"]",
"sourceScore": 0.5882353394369051,
},
Object {
"contribution": Object {
"contributor": Object {
"edge": Object {
"address": "EdgeAddress[\\"e2\\"]",
"dst": "NodeAddress[\\"sink\\"]",
"src": "NodeAddress[\\"n2\\"]",
},
"type": "IN_EDGE",
},
"weight": 0.6,
},
"contributionScore": 0.132352855066858,
"source": "NodeAddress[\\"n2\\"]",
"sourceScore": 0.22058809177809668,
},
Object {
"contribution": Object {
"contributor": Object {
"edge": Object {
"address": "EdgeAddress[\\"e4\\"]",
"dst": "NodeAddress[\\"sink\\"]",
"src": "NodeAddress[\\"sink\\"]",
},
"type": "OUT_EDGE",
},
"weight": 0.1875,
},
"contributionScore": 0.1102941261444197,
"source": "NodeAddress[\\"sink\\"]",
"sourceScore": 0.5882353394369051,
},
Object {
"contribution": Object {
"contributor": Object {
"edge": Object {
"address": "EdgeAddress[\\"e3\\"]",
"dst": "NodeAddress[\\"sink\\"]",
"src": "NodeAddress[\\"n1\\"]",
},
"type": "IN_EDGE",
},
"weight": 0.46153846153846156,
},
"contributionScore": 0.08823533943923001,
"source": "NodeAddress[\\"n1\\"]",
"sourceScore": 0.19117656878499834,
},
Object {
"contribution": Object {
"contributor": Object {
"type": "SYNTHETIC_LOOP",
},
"weight": 0.0625,
},
"contributionScore": 0.03676470871480657,
"source": "NodeAddress[\\"sink\\"]",
"sourceScore": 0.5882353394369051,
},
],
},
}
`;

View File

@ -0,0 +1,58 @@
// @flow
import sortBy from "lodash.sortby";
import {type NodeAddressT, NodeAddress} from "../graph";
import {
type Contribution,
type NodeToContributions,
contributorSource,
} from "./graphToMarkovChain";
import type {PagerankResult} from "./pagerank";
import * as MapUtil from "../../util/map";
export type ScoredContribution = {|
+contribution: Contribution,
+source: NodeAddressT,
+sourceScore: number,
+contributionScore: number,
|};
export type PagerankNodeDecomposition = Map<
NodeAddressT,
{|
+score: number,
// Contributions are sorted by `contributorScore` descending,
// breaking ties in a deterministic (but unspecified) order.
+scoredContributions: $ReadOnlyArray<ScoredContribution>,
|}
>;
export function decompose(
pr: PagerankResult,
contributions: NodeToContributions
): PagerankNodeDecomposition {
return MapUtil.mapValues(contributions, (target, contributions) => {
const score = pr.get(target);
if (score == null) {
throw new Error("missing target: " + NodeAddress.toString(target));
}
const scoredContributions = sortBy(
contributions.map(
(contribution): ScoredContribution => {
const source = contributorSource(target, contribution.contributor);
const sourceScore = pr.get(source);
if (sourceScore == null) {
throw new Error("missing source: " + NodeAddress.toString(source));
}
const contributionScore = contribution.weight * sourceScore;
return {contribution, source, sourceScore, contributionScore};
}
),
(x) => -x.contributionScore,
// The following should be called rarely and on small objects.
(x) => JSON.stringify(x.contribution.contributor)
);
return {score, scoredContributions};
});
}

View File

@ -0,0 +1,158 @@
// @flow
import {EdgeAddress, Graph, NodeAddress, edgeToStrings} from "../graph";
import {
distributionToPagerankResult,
createContributions,
createOrderedSparseMarkovChain,
} from "./graphToMarkovChain";
import {findStationaryDistribution} from "./markovChain";
import {decompose} from "./pagerankNodeDecomposition";
import * as MapUtil from "../../util/map";
import {advancedGraph} from "../graphTestUtil";
/**
* Format a decomposition to be shown in a snapshot. This converts
* addresses and edges to strings to avoid NUL characters.
*/
function formatDecomposition(d) {
return MapUtil.mapEntries(d, (key, {score, scoredContributions}) => [
NodeAddress.toString(key),
{
score,
scoredContributions: scoredContributions.map(
({contribution, source, sourceScore, contributionScore}) => ({
contribution: {
contributor: formatContributor(contribution.contributor),
weight: contribution.weight,
},
source: NodeAddress.toString(source),
sourceScore,
contributionScore,
})
),
},
]);
function formatContributor(contributor) {
switch (contributor.type) {
case "SYNTHETIC_LOOP":
return {type: "SYNTHETIC_LOOP"};
case "IN_EDGE":
return {type: "IN_EDGE", edge: edgeToStrings(contributor.edge)};
case "OUT_EDGE":
return {type: "OUT_EDGE", edge: edgeToStrings(contributor.edge)};
default:
throw new Error((contributor.type: empty));
}
}
}
/**
* Perform basic sanity checks on a decomposition. This ensures that
* every node's score is the sum of its contributions' scores, that the
* scores of the decomposition sum to 1, and that each node's
* contributions are listed in non-increasing order of score.
*/
function validateDecomposition(decomposition) {
const epsilon = 1e-6;
// Check that each node's score is the sum of its subscores.
for (const [key, {score, scoredContributions}] of decomposition.entries()) {
const totalSubscore = scoredContributions
.map((sc) => sc.contributionScore)
.reduce((a, b) => a + b, 0);
const delta = totalSubscore - score;
if (Math.abs(delta) > epsilon) {
const message = [
`for node ${NodeAddress.toString(key)}: `,
`expected total score (${score}) to equal `,
`sum of contribution scores (${totalSubscore}) `,
`within ${epsilon}, but the difference is ${delta}`,
].join("");
throw new Error(message);
}
}
// Check that the total score is 1.
{
const totalScore = Array.from(decomposition.values())
.map((node) => node.score)
.reduce((a, b) => a + b, 0);
const delta = totalScore - 1;
if (Math.abs(delta) > epsilon) {
const message = [
`expected total score of all nodes (${totalScore}) to equal 1.0 `,
`within ${epsilon}, but the difference is ${delta}`,
].join("");
throw new Error(message);
}
}
// Check that each node's contributions are in score-descending order.
for (const {scoredContributions} of decomposition.values()) {
scoredContributions.forEach((current, index) => {
if (index === 0) {
return;
}
const previous = scoredContributions[index - 1];
if (current.contributionScore > previous.contributionScore) {
const message = [
`expected contribution score to be non-increasing, but `,
`element at index ${index} has score ${current.contributionScore}, `,
`higher than that of its predecessor (${previous.contributionScore})`,
].join("");
throw new Error(message);
}
});
}
}
describe("core/attribution/contributions", () => {
describe("decompose", () => {
it("has the expected output on a simple asymmetric chain", () => {
const n1 = NodeAddress.fromParts(["n1"]);
const n2 = NodeAddress.fromParts(["n2"]);
const n3 = NodeAddress.fromParts(["sink"]);
const e1 = {src: n1, dst: n2, address: EdgeAddress.fromParts(["e1"])};
const e2 = {src: n2, dst: n3, address: EdgeAddress.fromParts(["e2"])};
const e3 = {src: n1, dst: n3, address: EdgeAddress.fromParts(["e3"])};
const e4 = {src: n3, dst: n3, address: EdgeAddress.fromParts(["e4"])};
const g = new Graph()
.addNode(n1)
.addNode(n2)
.addNode(n3)
.addEdge(e1)
.addEdge(e2)
.addEdge(e3)
.addEdge(e4);
const edgeWeight = () => ({toWeight: 6.0, froWeight: 3.0});
const contributions = createContributions(g, edgeWeight, 1.0);
const osmc = createOrderedSparseMarkovChain(contributions);
const pi = findStationaryDistribution(osmc.chain, {
verbose: false,
convergenceThreshold: 1e-6,
maxIterations: 255,
});
const pr = distributionToPagerankResult(osmc.nodeOrder, pi);
const result = decompose(pr, contributions);
expect(formatDecomposition(result)).toMatchSnapshot();
validateDecomposition(result);
});
it("is valid on the example graph", () => {
const g = advancedGraph().graph1();
const edgeWeight = () => ({toWeight: 6.0, froWeight: 3.0});
const contributions = createContributions(g, edgeWeight, 1.0);
const osmc = createOrderedSparseMarkovChain(contributions);
const pi = findStationaryDistribution(osmc.chain, {
verbose: false,
convergenceThreshold: 1e-6,
maxIterations: 255,
});
const pr = distributionToPagerankResult(osmc.nodeOrder, pi);
const result = decompose(pr, contributions);
validateDecomposition(result);
});
});
});