diff --git a/src/core/attribution/__snapshots__/pagerankNodeDecomposition.test.js.snap b/src/core/attribution/__snapshots__/pagerankNodeDecomposition.test.js.snap new file mode 100644 index 0000000..8e1b848 --- /dev/null +++ b/src/core/attribution/__snapshots__/pagerankNodeDecomposition.test.js.snap @@ -0,0 +1,182 @@ +// Jest Snapshot v1, https://goo.gl/fbAQLP + +exports[`core/attribution/contributions decompose has the expected output on a simple asymmetric chain 1`] = ` +Map { + "NodeAddress[\\"n1\\"]" => Object { + "score": 0.19117656878499834, + "scoredContributions": Array [ + Object { + "contribution": Object { + "contributor": Object { + "edge": Object { + "address": "EdgeAddress[\\"e3\\"]", + "dst": "NodeAddress[\\"sink\\"]", + "src": "NodeAddress[\\"n1\\"]", + }, + "type": "OUT_EDGE", + }, + "weight": 0.1875, + }, + "contributionScore": 0.1102941261444197, + "source": "NodeAddress[\\"sink\\"]", + "sourceScore": 0.5882353394369051, + }, + Object { + "contribution": Object { + "contributor": Object { + "edge": Object { + "address": "EdgeAddress[\\"e1\\"]", + "dst": "NodeAddress[\\"n2\\"]", + "src": "NodeAddress[\\"n1\\"]", + }, + "type": "OUT_EDGE", + }, + "weight": 0.3, + }, + "contributionScore": 0.066176427533429, + "source": "NodeAddress[\\"n2\\"]", + "sourceScore": 0.22058809177809668, + }, + Object { + "contribution": Object { + "contributor": Object { + "type": "SYNTHETIC_LOOP", + }, + "weight": 0.07692307692307693, + }, + "contributionScore": 0.014705889906538334, + "source": "NodeAddress[\\"n1\\"]", + "sourceScore": 0.19117656878499834, + }, + ], + }, + "NodeAddress[\\"n2\\"]" => Object { + "score": 0.22058809177809668, + "scoredContributions": Array [ + Object { + "contribution": Object { + "contributor": Object { + "edge": Object { + "address": "EdgeAddress[\\"e2\\"]", + "dst": "NodeAddress[\\"sink\\"]", + "src": "NodeAddress[\\"n2\\"]", + }, + "type": "OUT_EDGE", + }, + "weight": 0.1875, + }, + "contributionScore": 0.1102941261444197, + "source": "NodeAddress[\\"sink\\"]", + "sourceScore": 0.5882353394369051, + }, + Object { + "contribution": Object { + "contributor": Object { + "edge": Object { + "address": "EdgeAddress[\\"e1\\"]", + "dst": "NodeAddress[\\"n2\\"]", + "src": "NodeAddress[\\"n1\\"]", + }, + "type": "IN_EDGE", + }, + "weight": 0.46153846153846156, + }, + "contributionScore": 0.08823533943923001, + "source": "NodeAddress[\\"n1\\"]", + "sourceScore": 0.19117656878499834, + }, + Object { + "contribution": Object { + "contributor": Object { + "type": "SYNTHETIC_LOOP", + }, + "weight": 0.1, + }, + "contributionScore": 0.02205880917780967, + "source": "NodeAddress[\\"n2\\"]", + "sourceScore": 0.22058809177809668, + }, + ], + }, + "NodeAddress[\\"sink\\"]" => Object { + "score": 0.5882353394369051, + "scoredContributions": Array [ + Object { + "contribution": Object { + "contributor": Object { + "edge": Object { + "address": "EdgeAddress[\\"e4\\"]", + "dst": "NodeAddress[\\"sink\\"]", + "src": "NodeAddress[\\"sink\\"]", + }, + "type": "IN_EDGE", + }, + "weight": 0.375, + }, + "contributionScore": 0.2205882522888394, + "source": "NodeAddress[\\"sink\\"]", + "sourceScore": 0.5882353394369051, + }, + Object { + "contribution": Object { + "contributor": Object { + "edge": Object { + "address": "EdgeAddress[\\"e2\\"]", + "dst": "NodeAddress[\\"sink\\"]", + "src": "NodeAddress[\\"n2\\"]", + }, + "type": "IN_EDGE", + }, + "weight": 0.6, + }, + "contributionScore": 0.132352855066858, + "source": "NodeAddress[\\"n2\\"]", + "sourceScore": 0.22058809177809668, + }, + Object { + "contribution": Object { + "contributor": Object { + "edge": Object { + "address": "EdgeAddress[\\"e4\\"]", + "dst": "NodeAddress[\\"sink\\"]", + "src": "NodeAddress[\\"sink\\"]", + }, + "type": "OUT_EDGE", + }, + "weight": 0.1875, + }, + "contributionScore": 0.1102941261444197, + "source": "NodeAddress[\\"sink\\"]", + "sourceScore": 0.5882353394369051, + }, + Object { + "contribution": Object { + "contributor": Object { + "edge": Object { + "address": "EdgeAddress[\\"e3\\"]", + "dst": "NodeAddress[\\"sink\\"]", + "src": "NodeAddress[\\"n1\\"]", + }, + "type": "IN_EDGE", + }, + "weight": 0.46153846153846156, + }, + "contributionScore": 0.08823533943923001, + "source": "NodeAddress[\\"n1\\"]", + "sourceScore": 0.19117656878499834, + }, + Object { + "contribution": Object { + "contributor": Object { + "type": "SYNTHETIC_LOOP", + }, + "weight": 0.0625, + }, + "contributionScore": 0.03676470871480657, + "source": "NodeAddress[\\"sink\\"]", + "sourceScore": 0.5882353394369051, + }, + ], + }, +} +`; diff --git a/src/core/attribution/pagerankNodeDecomposition.js b/src/core/attribution/pagerankNodeDecomposition.js new file mode 100644 index 0000000..1e73863 --- /dev/null +++ b/src/core/attribution/pagerankNodeDecomposition.js @@ -0,0 +1,58 @@ +// @flow + +import sortBy from "lodash.sortby"; + +import {type NodeAddressT, NodeAddress} from "../graph"; +import { + type Contribution, + type NodeToContributions, + contributorSource, +} from "./graphToMarkovChain"; +import type {PagerankResult} from "./pagerank"; +import * as MapUtil from "../../util/map"; + +export type ScoredContribution = {| + +contribution: Contribution, + +source: NodeAddressT, + +sourceScore: number, + +contributionScore: number, +|}; + +export type PagerankNodeDecomposition = Map< + NodeAddressT, + {| + +score: number, + // Contributions are sorted by `contributorScore` descending, + // breaking ties in a deterministic (but unspecified) order. + +scoredContributions: $ReadOnlyArray, + |} +>; + +export function decompose( + pr: PagerankResult, + contributions: NodeToContributions +): PagerankNodeDecomposition { + return MapUtil.mapValues(contributions, (target, contributions) => { + const score = pr.get(target); + if (score == null) { + throw new Error("missing target: " + NodeAddress.toString(target)); + } + const scoredContributions = sortBy( + contributions.map( + (contribution): ScoredContribution => { + const source = contributorSource(target, contribution.contributor); + const sourceScore = pr.get(source); + if (sourceScore == null) { + throw new Error("missing source: " + NodeAddress.toString(source)); + } + const contributionScore = contribution.weight * sourceScore; + return {contribution, source, sourceScore, contributionScore}; + } + ), + (x) => -x.contributionScore, + // The following should be called rarely and on small objects. + (x) => JSON.stringify(x.contribution.contributor) + ); + return {score, scoredContributions}; + }); +} diff --git a/src/core/attribution/pagerankNodeDecomposition.test.js b/src/core/attribution/pagerankNodeDecomposition.test.js new file mode 100644 index 0000000..df3abda --- /dev/null +++ b/src/core/attribution/pagerankNodeDecomposition.test.js @@ -0,0 +1,158 @@ +// @flow + +import {EdgeAddress, Graph, NodeAddress, edgeToStrings} from "../graph"; +import { + distributionToPagerankResult, + createContributions, + createOrderedSparseMarkovChain, +} from "./graphToMarkovChain"; +import {findStationaryDistribution} from "./markovChain"; +import {decompose} from "./pagerankNodeDecomposition"; +import * as MapUtil from "../../util/map"; + +import {advancedGraph} from "../graphTestUtil"; + +/** + * Format a decomposition to be shown in a snapshot. This converts + * addresses and edges to strings to avoid NUL characters. + */ +function formatDecomposition(d) { + return MapUtil.mapEntries(d, (key, {score, scoredContributions}) => [ + NodeAddress.toString(key), + { + score, + scoredContributions: scoredContributions.map( + ({contribution, source, sourceScore, contributionScore}) => ({ + contribution: { + contributor: formatContributor(contribution.contributor), + weight: contribution.weight, + }, + source: NodeAddress.toString(source), + sourceScore, + contributionScore, + }) + ), + }, + ]); + function formatContributor(contributor) { + switch (contributor.type) { + case "SYNTHETIC_LOOP": + return {type: "SYNTHETIC_LOOP"}; + case "IN_EDGE": + return {type: "IN_EDGE", edge: edgeToStrings(contributor.edge)}; + case "OUT_EDGE": + return {type: "OUT_EDGE", edge: edgeToStrings(contributor.edge)}; + default: + throw new Error((contributor.type: empty)); + } + } +} + +/** + * Perform basic sanity checks on a decomposition. This ensures that + * every node's score is the sum of its contributions' scores, that the + * scores of the decomposition sum to 1, and that each node's + * contributions are listed in non-increasing order of score. + */ +function validateDecomposition(decomposition) { + const epsilon = 1e-6; + + // Check that each node's score is the sum of its subscores. + for (const [key, {score, scoredContributions}] of decomposition.entries()) { + const totalSubscore = scoredContributions + .map((sc) => sc.contributionScore) + .reduce((a, b) => a + b, 0); + const delta = totalSubscore - score; + if (Math.abs(delta) > epsilon) { + const message = [ + `for node ${NodeAddress.toString(key)}: `, + `expected total score (${score}) to equal `, + `sum of contribution scores (${totalSubscore}) `, + `within ${epsilon}, but the difference is ${delta}`, + ].join(""); + throw new Error(message); + } + } + + // Check that the total score is 1. + { + const totalScore = Array.from(decomposition.values()) + .map((node) => node.score) + .reduce((a, b) => a + b, 0); + const delta = totalScore - 1; + if (Math.abs(delta) > epsilon) { + const message = [ + `expected total score of all nodes (${totalScore}) to equal 1.0 `, + `within ${epsilon}, but the difference is ${delta}`, + ].join(""); + throw new Error(message); + } + } + + // Check that each node's contributions are in score-descending order. + for (const {scoredContributions} of decomposition.values()) { + scoredContributions.forEach((current, index) => { + if (index === 0) { + return; + } + const previous = scoredContributions[index - 1]; + if (current.contributionScore > previous.contributionScore) { + const message = [ + `expected contribution score to be non-increasing, but `, + `element at index ${index} has score ${current.contributionScore}, `, + `higher than that of its predecessor (${previous.contributionScore})`, + ].join(""); + throw new Error(message); + } + }); + } +} + +describe("core/attribution/contributions", () => { + describe("decompose", () => { + it("has the expected output on a simple asymmetric chain", () => { + const n1 = NodeAddress.fromParts(["n1"]); + const n2 = NodeAddress.fromParts(["n2"]); + const n3 = NodeAddress.fromParts(["sink"]); + const e1 = {src: n1, dst: n2, address: EdgeAddress.fromParts(["e1"])}; + const e2 = {src: n2, dst: n3, address: EdgeAddress.fromParts(["e2"])}; + const e3 = {src: n1, dst: n3, address: EdgeAddress.fromParts(["e3"])}; + const e4 = {src: n3, dst: n3, address: EdgeAddress.fromParts(["e4"])}; + const g = new Graph() + .addNode(n1) + .addNode(n2) + .addNode(n3) + .addEdge(e1) + .addEdge(e2) + .addEdge(e3) + .addEdge(e4); + const edgeWeight = () => ({toWeight: 6.0, froWeight: 3.0}); + const contributions = createContributions(g, edgeWeight, 1.0); + const osmc = createOrderedSparseMarkovChain(contributions); + const pi = findStationaryDistribution(osmc.chain, { + verbose: false, + convergenceThreshold: 1e-6, + maxIterations: 255, + }); + const pr = distributionToPagerankResult(osmc.nodeOrder, pi); + const result = decompose(pr, contributions); + expect(formatDecomposition(result)).toMatchSnapshot(); + validateDecomposition(result); + }); + + it("is valid on the example graph", () => { + const g = advancedGraph().graph1(); + const edgeWeight = () => ({toWeight: 6.0, froWeight: 3.0}); + const contributions = createContributions(g, edgeWeight, 1.0); + const osmc = createOrderedSparseMarkovChain(contributions); + const pi = findStationaryDistribution(osmc.chain, { + verbose: false, + convergenceThreshold: 1e-6, + maxIterations: 255, + }); + const pr = distributionToPagerankResult(osmc.nodeOrder, pi); + const result = decompose(pr, contributions); + validateDecomposition(result); + }); + }); +});