diff --git a/src/core/pagerankGraph.js b/src/core/pagerankGraph.js index 565998e..78fb409 100644 --- a/src/core/pagerankGraph.js +++ b/src/core/pagerankGraph.js @@ -11,6 +11,7 @@ import { type GraphJSON, sortedEdgeAddressesFromJSON, sortedNodeAddressesFromJSON, + NodeAddress, } from "./graph"; import { distributionToNodeDistribution, @@ -119,6 +120,8 @@ export class PagerankGraph { // when this PageRankGraph is in an invalid state (due to changes // to the graph backing it). _graphModificationCount: number; + // Sum of all outWeights for a node, including the synthetic weight + _totalOutWeight: Map; /** * Constructs a new PagerankGraph. @@ -152,14 +155,24 @@ export class PagerankGraph { // Initialize scores to the uniform distribution over every node this._scores = new Map(); + this._totalOutWeight = new Map(); const graphNodes = Array.from(this._graph.nodes()); for (const node of graphNodes) { this._scores.set(node, 1 / graphNodes.length); + this._totalOutWeight.set(node, this._syntheticLoopWeight); } this._edgeWeights = new Map(); + const addOutWeight = (node: NodeAddressT, weight: number) => { + const previousWeight = NullUtil.get(this._totalOutWeight.get(node)); + const newWeight = previousWeight + weight; + this._totalOutWeight.set(node, newWeight); + }; for (const edge of this._graph.edges()) { - this._edgeWeights.set(edge.address, edgeEvaluator(edge)); + const weights = edgeEvaluator(edge); + this._edgeWeights.set(edge.address, weights); + addOutWeight(edge.src, weights.toWeight); + addOutWeight(edge.dst, weights.froWeight); } } @@ -253,6 +266,27 @@ export class PagerankGraph { return null; } + /** + * Provides the total out weight for a node, i.e. every edge weight pointed + * away from the node, plus the syntheticLoopWeight. + * + * The total out weight is needed to interpret the actual significance of any + * particular edge's weight, as edge weights are normalized by the totalOutWeight + * so that the normalized weights going out of a node always sum to 1. + */ + totalOutWeight(node: NodeAddressT): number { + this._verifyGraphNotModified(); + const weight = this._totalOutWeight.get(node); + if (weight == null) { + throw new Error( + `Tried to get outWeight for non-existent node ${NodeAddress.toString( + node + )}` + ); + } + return weight; + } + /** * Asynchronously run PageRank to re-compute scores. * diff --git a/src/core/pagerankGraph.test.js b/src/core/pagerankGraph.test.js index eb932c3..65c9d35 100644 --- a/src/core/pagerankGraph.test.js +++ b/src/core/pagerankGraph.test.js @@ -17,9 +17,11 @@ describe("core/pagerankGraph", () => { const nonEmptyGraph = () => new Graph().addNode(NodeAddress.fromParts(["hi"])); - function examplePagerankGraph(): PagerankGraph { + function examplePagerankGraph( + edgeEvaluator = defaultEvaluator + ): PagerankGraph { const g = advancedGraph().graph1(); - return new PagerankGraph(g, defaultEvaluator); + return new PagerankGraph(g, edgeEvaluator); } async function convergedPagerankGraph(): Promise { const pg = examplePagerankGraph(); @@ -168,6 +170,66 @@ describe("core/pagerankGraph", () => { }); }); + describe("totalOutWeight", () => { + it("errors on a modified graph", () => { + const eg = examplePagerankGraph(); + eg.graph().addNode(NodeAddress.fromParts(["bad", "node"])); + expect(() => + eg.totalOutWeight(NodeAddress.fromParts(["bad", "node"])) + ).toThrowError("has been modified"); + }); + it("errors on nonexistent node", () => { + const eg = examplePagerankGraph(); + expect(() => + eg.totalOutWeight(NodeAddress.fromParts(["nonexistent"])) + ).toThrowError("non-existent node"); + }); + function verifyOutWeights(pg: PagerankGraph) { + const outWeight: Map = new Map(); + for (const node of pg.graph().nodes()) { + outWeight.set(node, pg.syntheticLoopWeight()); + } + const addOutWeight = (node: NodeAddressT, weight: number) => { + const previousWeight = NullUtil.get(outWeight.get(node)); + const newWeight = previousWeight + weight; + outWeight.set(node, newWeight); + }; + for (const {edge, weight} of pg.edges()) { + addOutWeight(edge.src, weight.toWeight); + addOutWeight(edge.dst, weight.froWeight); + } + for (const node of pg.graph().nodes()) { + expect(pg.totalOutWeight(node)).toEqual(outWeight.get(node)); + } + } + it("computes outWeight correctly on the example graph", () => { + const edgeEvaluator = (_unused_edge) => ({toWeight: 1, froWeight: 2}); + const eg = examplePagerankGraph(edgeEvaluator); + verifyOutWeights(eg); + }); + it("outWeight is always the syntheticLoopWeight when edges have no weight", () => { + const zeroEvaluator = (_unused_edge) => ({toWeight: 0, froWeight: 0}); + const syntheticLoopWeight = 0.1337; + const pg = new PagerankGraph( + advancedGraph().graph1(), + zeroEvaluator, + syntheticLoopWeight + ); + for (const {node} of pg.nodes()) { + expect(pg.totalOutWeight(node)).toEqual(syntheticLoopWeight); + } + }); + it("outWeight is computed correctly after JSON deserialization", () => { + // I added this test because the outWeight map is a cache that is computed + // once, in the constructor, and since the JSON deserialization invokes + // the constructor and then hacks variables around a bit, I want to ensure the + // outWeight cache is still generated properly. + const eg = examplePagerankGraph(); + const eg_ = PagerankGraph.fromJSON(eg.toJSON()); + verifyOutWeights(eg_); + }); + }); + describe("runPagerank", () => { // The mathematical semantics of PageRank are thoroughly tested // in the markovChain module. The goal for these tests is just