From 8f6a3f30bd64903047436090c6a8c7aae29a5f0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dandelion=20Man=C3=A9?= Date: Fri, 22 Feb 2019 15:14:38 -0700 Subject: [PATCH] PagerankGraph: Add `totalOutWeight` (#1092) This commit adds a `totalOutWeight` method to `PagerankGraph`. For any given node, `totalOutWeight` reports the total weight traveling away from the node on edges (including the synthetic loop edge). Using totalOutWeight makes it possible to normalize the weights to get the actual markov transition probabilities. Test plan: Unit tests verify the following properties: - An error is thrown if the requested node does not exist. - An error is thrown if the graph has been modified. - The out weights are computed correctly in the standard case. - The out weights are computed correctly in the case where there are no weights (except the synthetic loop weight) - The out weights are still computed correctly after JSON-deserialization. --- src/core/pagerankGraph.js | 36 ++++++++++++++++++- src/core/pagerankGraph.test.js | 66 ++++++++++++++++++++++++++++++++-- 2 files changed, 99 insertions(+), 3 deletions(-) diff --git a/src/core/pagerankGraph.js b/src/core/pagerankGraph.js index 565998e..78fb409 100644 --- a/src/core/pagerankGraph.js +++ b/src/core/pagerankGraph.js @@ -11,6 +11,7 @@ import { type GraphJSON, sortedEdgeAddressesFromJSON, sortedNodeAddressesFromJSON, + NodeAddress, } from "./graph"; import { distributionToNodeDistribution, @@ -119,6 +120,8 @@ export class PagerankGraph { // when this PageRankGraph is in an invalid state (due to changes // to the graph backing it). _graphModificationCount: number; + // Sum of all outWeights for a node, including the synthetic weight + _totalOutWeight: Map; /** * Constructs a new PagerankGraph. @@ -152,14 +155,24 @@ export class PagerankGraph { // Initialize scores to the uniform distribution over every node this._scores = new Map(); + this._totalOutWeight = new Map(); const graphNodes = Array.from(this._graph.nodes()); for (const node of graphNodes) { this._scores.set(node, 1 / graphNodes.length); + this._totalOutWeight.set(node, this._syntheticLoopWeight); } this._edgeWeights = new Map(); + const addOutWeight = (node: NodeAddressT, weight: number) => { + const previousWeight = NullUtil.get(this._totalOutWeight.get(node)); + const newWeight = previousWeight + weight; + this._totalOutWeight.set(node, newWeight); + }; for (const edge of this._graph.edges()) { - this._edgeWeights.set(edge.address, edgeEvaluator(edge)); + const weights = edgeEvaluator(edge); + this._edgeWeights.set(edge.address, weights); + addOutWeight(edge.src, weights.toWeight); + addOutWeight(edge.dst, weights.froWeight); } } @@ -253,6 +266,27 @@ export class PagerankGraph { return null; } + /** + * Provides the total out weight for a node, i.e. every edge weight pointed + * away from the node, plus the syntheticLoopWeight. + * + * The total out weight is needed to interpret the actual significance of any + * particular edge's weight, as edge weights are normalized by the totalOutWeight + * so that the normalized weights going out of a node always sum to 1. + */ + totalOutWeight(node: NodeAddressT): number { + this._verifyGraphNotModified(); + const weight = this._totalOutWeight.get(node); + if (weight == null) { + throw new Error( + `Tried to get outWeight for non-existent node ${NodeAddress.toString( + node + )}` + ); + } + return weight; + } + /** * Asynchronously run PageRank to re-compute scores. * diff --git a/src/core/pagerankGraph.test.js b/src/core/pagerankGraph.test.js index eb932c3..65c9d35 100644 --- a/src/core/pagerankGraph.test.js +++ b/src/core/pagerankGraph.test.js @@ -17,9 +17,11 @@ describe("core/pagerankGraph", () => { const nonEmptyGraph = () => new Graph().addNode(NodeAddress.fromParts(["hi"])); - function examplePagerankGraph(): PagerankGraph { + function examplePagerankGraph( + edgeEvaluator = defaultEvaluator + ): PagerankGraph { const g = advancedGraph().graph1(); - return new PagerankGraph(g, defaultEvaluator); + return new PagerankGraph(g, edgeEvaluator); } async function convergedPagerankGraph(): Promise { const pg = examplePagerankGraph(); @@ -168,6 +170,66 @@ describe("core/pagerankGraph", () => { }); }); + describe("totalOutWeight", () => { + it("errors on a modified graph", () => { + const eg = examplePagerankGraph(); + eg.graph().addNode(NodeAddress.fromParts(["bad", "node"])); + expect(() => + eg.totalOutWeight(NodeAddress.fromParts(["bad", "node"])) + ).toThrowError("has been modified"); + }); + it("errors on nonexistent node", () => { + const eg = examplePagerankGraph(); + expect(() => + eg.totalOutWeight(NodeAddress.fromParts(["nonexistent"])) + ).toThrowError("non-existent node"); + }); + function verifyOutWeights(pg: PagerankGraph) { + const outWeight: Map = new Map(); + for (const node of pg.graph().nodes()) { + outWeight.set(node, pg.syntheticLoopWeight()); + } + const addOutWeight = (node: NodeAddressT, weight: number) => { + const previousWeight = NullUtil.get(outWeight.get(node)); + const newWeight = previousWeight + weight; + outWeight.set(node, newWeight); + }; + for (const {edge, weight} of pg.edges()) { + addOutWeight(edge.src, weight.toWeight); + addOutWeight(edge.dst, weight.froWeight); + } + for (const node of pg.graph().nodes()) { + expect(pg.totalOutWeight(node)).toEqual(outWeight.get(node)); + } + } + it("computes outWeight correctly on the example graph", () => { + const edgeEvaluator = (_unused_edge) => ({toWeight: 1, froWeight: 2}); + const eg = examplePagerankGraph(edgeEvaluator); + verifyOutWeights(eg); + }); + it("outWeight is always the syntheticLoopWeight when edges have no weight", () => { + const zeroEvaluator = (_unused_edge) => ({toWeight: 0, froWeight: 0}); + const syntheticLoopWeight = 0.1337; + const pg = new PagerankGraph( + advancedGraph().graph1(), + zeroEvaluator, + syntheticLoopWeight + ); + for (const {node} of pg.nodes()) { + expect(pg.totalOutWeight(node)).toEqual(syntheticLoopWeight); + } + }); + it("outWeight is computed correctly after JSON deserialization", () => { + // I added this test because the outWeight map is a cache that is computed + // once, in the constructor, and since the JSON deserialization invokes + // the constructor and then hacks variables around a bit, I want to ensure the + // outWeight cache is still generated properly. + const eg = examplePagerankGraph(); + const eg_ = PagerankGraph.fromJSON(eg.toJSON()); + verifyOutWeights(eg_); + }); + }); + describe("runPagerank", () => { // The mathematical semantics of PageRank are thoroughly tested // in the markovChain module. The goal for these tests is just