PagerankGraph: Add totalOutWeight (#1092)

This commit adds a `totalOutWeight` method to `PagerankGraph`.
For any given node, `totalOutWeight` reports the total weight traveling
away from the node on edges (including the synthetic loop edge). Using
totalOutWeight makes it possible to normalize the weights to get the
actual markov transition probabilities.

Test plan: Unit tests verify the following properties:
- An error is thrown if the requested node does not exist.
- An error is thrown if the graph has been modified.
- The out weights are computed correctly in the standard case.
- The out weights are computed correctly in the case where there are no
weights (except the synthetic loop weight)
- The out weights are still computed correctly after
JSON-deserialization.
This commit is contained in:
Dandelion Mané 2019-02-22 15:14:38 -07:00 committed by GitHub
parent bd669f292f
commit 8f6a3f30bd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 99 additions and 3 deletions

View File

@ -11,6 +11,7 @@ import {
type GraphJSON, type GraphJSON,
sortedEdgeAddressesFromJSON, sortedEdgeAddressesFromJSON,
sortedNodeAddressesFromJSON, sortedNodeAddressesFromJSON,
NodeAddress,
} from "./graph"; } from "./graph";
import { import {
distributionToNodeDistribution, distributionToNodeDistribution,
@ -119,6 +120,8 @@ export class PagerankGraph {
// when this PageRankGraph is in an invalid state (due to changes // when this PageRankGraph is in an invalid state (due to changes
// to the graph backing it). // to the graph backing it).
_graphModificationCount: number; _graphModificationCount: number;
// Sum of all outWeights for a node, including the synthetic weight
_totalOutWeight: Map<NodeAddressT, number>;
/** /**
* Constructs a new PagerankGraph. * Constructs a new PagerankGraph.
@ -152,14 +155,24 @@ export class PagerankGraph {
// Initialize scores to the uniform distribution over every node // Initialize scores to the uniform distribution over every node
this._scores = new Map(); this._scores = new Map();
this._totalOutWeight = new Map();
const graphNodes = Array.from(this._graph.nodes()); const graphNodes = Array.from(this._graph.nodes());
for (const node of graphNodes) { for (const node of graphNodes) {
this._scores.set(node, 1 / graphNodes.length); this._scores.set(node, 1 / graphNodes.length);
this._totalOutWeight.set(node, this._syntheticLoopWeight);
} }
this._edgeWeights = new Map(); this._edgeWeights = new Map();
const addOutWeight = (node: NodeAddressT, weight: number) => {
const previousWeight = NullUtil.get(this._totalOutWeight.get(node));
const newWeight = previousWeight + weight;
this._totalOutWeight.set(node, newWeight);
};
for (const edge of this._graph.edges()) { for (const edge of this._graph.edges()) {
this._edgeWeights.set(edge.address, edgeEvaluator(edge)); const weights = edgeEvaluator(edge);
this._edgeWeights.set(edge.address, weights);
addOutWeight(edge.src, weights.toWeight);
addOutWeight(edge.dst, weights.froWeight);
} }
} }
@ -253,6 +266,27 @@ export class PagerankGraph {
return null; return null;
} }
/**
* Provides the total out weight for a node, i.e. every edge weight pointed
* away from the node, plus the syntheticLoopWeight.
*
* The total out weight is needed to interpret the actual significance of any
* particular edge's weight, as edge weights are normalized by the totalOutWeight
* so that the normalized weights going out of a node always sum to 1.
*/
totalOutWeight(node: NodeAddressT): number {
this._verifyGraphNotModified();
const weight = this._totalOutWeight.get(node);
if (weight == null) {
throw new Error(
`Tried to get outWeight for non-existent node ${NodeAddress.toString(
node
)}`
);
}
return weight;
}
/** /**
* Asynchronously run PageRank to re-compute scores. * Asynchronously run PageRank to re-compute scores.
* *

View File

@ -17,9 +17,11 @@ describe("core/pagerankGraph", () => {
const nonEmptyGraph = () => const nonEmptyGraph = () =>
new Graph().addNode(NodeAddress.fromParts(["hi"])); new Graph().addNode(NodeAddress.fromParts(["hi"]));
function examplePagerankGraph(): PagerankGraph { function examplePagerankGraph(
edgeEvaluator = defaultEvaluator
): PagerankGraph {
const g = advancedGraph().graph1(); const g = advancedGraph().graph1();
return new PagerankGraph(g, defaultEvaluator); return new PagerankGraph(g, edgeEvaluator);
} }
async function convergedPagerankGraph(): Promise<PagerankGraph> { async function convergedPagerankGraph(): Promise<PagerankGraph> {
const pg = examplePagerankGraph(); const pg = examplePagerankGraph();
@ -168,6 +170,66 @@ describe("core/pagerankGraph", () => {
}); });
}); });
describe("totalOutWeight", () => {
it("errors on a modified graph", () => {
const eg = examplePagerankGraph();
eg.graph().addNode(NodeAddress.fromParts(["bad", "node"]));
expect(() =>
eg.totalOutWeight(NodeAddress.fromParts(["bad", "node"]))
).toThrowError("has been modified");
});
it("errors on nonexistent node", () => {
const eg = examplePagerankGraph();
expect(() =>
eg.totalOutWeight(NodeAddress.fromParts(["nonexistent"]))
).toThrowError("non-existent node");
});
function verifyOutWeights(pg: PagerankGraph) {
const outWeight: Map<NodeAddressT, number> = new Map();
for (const node of pg.graph().nodes()) {
outWeight.set(node, pg.syntheticLoopWeight());
}
const addOutWeight = (node: NodeAddressT, weight: number) => {
const previousWeight = NullUtil.get(outWeight.get(node));
const newWeight = previousWeight + weight;
outWeight.set(node, newWeight);
};
for (const {edge, weight} of pg.edges()) {
addOutWeight(edge.src, weight.toWeight);
addOutWeight(edge.dst, weight.froWeight);
}
for (const node of pg.graph().nodes()) {
expect(pg.totalOutWeight(node)).toEqual(outWeight.get(node));
}
}
it("computes outWeight correctly on the example graph", () => {
const edgeEvaluator = (_unused_edge) => ({toWeight: 1, froWeight: 2});
const eg = examplePagerankGraph(edgeEvaluator);
verifyOutWeights(eg);
});
it("outWeight is always the syntheticLoopWeight when edges have no weight", () => {
const zeroEvaluator = (_unused_edge) => ({toWeight: 0, froWeight: 0});
const syntheticLoopWeight = 0.1337;
const pg = new PagerankGraph(
advancedGraph().graph1(),
zeroEvaluator,
syntheticLoopWeight
);
for (const {node} of pg.nodes()) {
expect(pg.totalOutWeight(node)).toEqual(syntheticLoopWeight);
}
});
it("outWeight is computed correctly after JSON deserialization", () => {
// I added this test because the outWeight map is a cache that is computed
// once, in the constructor, and since the JSON deserialization invokes
// the constructor and then hacks variables around a bit, I want to ensure the
// outWeight cache is still generated properly.
const eg = examplePagerankGraph();
const eg_ = PagerankGraph.fromJSON(eg.toJSON());
verifyOutWeights(eg_);
});
});
describe("runPagerank", () => { describe("runPagerank", () => {
// The mathematical semantics of PageRank are thoroughly tested // The mathematical semantics of PageRank are thoroughly tested
// in the markovChain module. The goal for these tests is just // in the markovChain module. The goal for these tests is just