PagerankGraph: add neighbors + score decomposition (#1094)

This commit adds a `neighbors` method to `PagerankGraph`. This is an augmented version of `Graph.neighbors`. It returns the base data from `Graph.neighbors` as well as the score, the edge weights, and the score contribution. The score contribution basically means how much score was contributed from the target node by this particular neighbor connection. When the graph is well-converged, a node's score will be the sum of all its neighbors' score contributions, as well as the contribution it received from its synthetic loop edge. So, for completeness sake, I added another method, `syntheticLoopScoreContribution`, which computes how much score a node received from its synthetic loop edge. (This value should usually be close to 0). You can think of these two methods as providing a replacement for the `PagerankNodeDecomposition` logic. Test plan: I've added tests that verify: - That neighbors returns results consistent with Graph.neighbors - That neighbors' score contributions are computed correctly - That neighbors errors if the graph has been modified - That synthetic score contributions are computed correctly - That a node's score is the sum of all its contributions Test plan: Unit tests included. Run `yarn test`.
2019-03-08 15:02:00 -07:00 · 2019-03-08 15:02:00 -07:00 · d1936fbf93
parent 441d6df255
commit d1936fbf93
2 changed files with 211 additions and 1 deletions
--- a/src/core/pagerankGraph.js
+++ b/src/core/pagerankGraph.js
@ -13,6 +13,7 @@ import {
  sortedEdgeAddressesFromJSON,
  sortedNodeAddressesFromJSON,
  NodeAddress,
+  type NeighborsOptions,
 } from "./graph";
 import {
  distributionToNodeDistribution,
@ -23,6 +24,8 @@ import {
 import {findStationaryDistribution} from "../core/attribution/markovChain";
 import * as NullUtil from "../util/null";

+export {Direction} from "./graph";
+export type {DirectionT, NeighborsOptions} from "./graph";
 export type {EdgeWeight} from "./attribution/graphToMarkovChain";
 export type EdgeEvaluator = (Edge) => EdgeWeight;

@ -36,6 +39,16 @@ export type WeightedEdge = {|
  +weight: EdgeWeight,
 |};

+export type ScoredNeighbor = {|
+  // The neighbor node, with its score
+  +scoredNode: ScoredNode,
+  // The edge connecting the target to its neighbor node, with its weight
+  +weightedEdge: WeightedEdge,
+  // How much score (in absolute terms) was provided to the target by
+  // the neighbor node through this weightedEdge
+  +scoreContribution: number,
+|};
+
 export opaque type PagerankGraphJSON = Compatible<{|
  +graphJSON: GraphJSON,
  // Score for every node, ordered by the sorted node address.
@ -293,6 +306,88 @@ export class PagerankGraph {
    return weight;
  }

+  /**
+   * Provides the Neighbors to a target node, along with how those
+   * neighbors contributed to the node's score.
+   *
+   * See the docs on `Graph.neighbors` for the semantics of what a `Neighbor`
+   * is. This call augments the Neighbors from graph, so that for each neighbor
+   * we also have the neighbor node's score, the EdgeWeight for the edge, and a
+   * scoreContribution, which shows how much score was contributed to the
+   * target node from that Neighbor.
+   *
+   * When the PagerankGraph is well-converged, it will be the case that a
+   * node's score is equal to the score contribution from each neighbor plus
+   * the synthetic loop's score contribution.
+   *
+   * When the PagerankGraph is not well-converged, the score contributions are
+   * meaningless.
+   */
+  neighbors(
+    target: NodeAddressT,
+    options: NeighborsOptions
+  ): Iterator<ScoredNeighbor> {
+    this._verifyGraphNotModified();
+    if (!this.graph().hasNode(target)) {
+      throw new Error(
+        `Tried to find neighbors of non-existent node ${NodeAddress.toString(
+          target
+        )}`
+      );
+    }
+    return this._neighborsIterator(target, options);
+  }
+
+  *_neighborsIterator(
+    target: NodeAddressT,
+    options: NeighborsOptions
+  ): Iterator<ScoredNeighbor> {
+    const graphNeighbors = this.graph().neighbors(target, options);
+    for (const {node, edge} of graphNeighbors) {
+      const scoredNode = NullUtil.get(this.node(node));
+      const weightedEdge = NullUtil.get(this.edge(edge.address));
+      // We compute how much of target's score is attributable to the neighbor.
+      // First, we find out how much edge weight there was from node to target,
+      // based on whether it was an IN-edge or OUT-edge or loop.
+      let relevantEdgeWeight = 0;
+      if (edge.src === target) {
+        relevantEdgeWeight += weightedEdge.weight.froWeight;
+      }
+      if (edge.dst === target) {
+        relevantEdgeWeight += weightedEdge.weight.toWeight;
+      }
+      // We normalize this edge weight by the total outWeight for `node`.
+      const normalizedEdgeWeight =
+        relevantEdgeWeight / this.totalOutWeight(node);
+
+      // Then we directly compute the score contribution
+      const scoreContribution = scoredNode.score * normalizedEdgeWeight;
+      yield {scoredNode, weightedEdge, scoreContribution};
+    }
+  }
+
+  /**
+   * Returns how much of a node's score came from its synthetic loop.
+   * For most nodes, this should be near zero. However, if the node has no
+   * outgoing edge edge weight (e.g. it is isolated), then this value
+   * may be larger.
+   *
+   * The results of syntheticLoopScoreContribution are not meaningful if the
+   * PagerankGraph is not converged.
+   */
+  syntheticLoopScoreContribution(node: NodeAddressT): number {
+    this._verifyGraphNotModified();
+    const scoredNode = this.node(node);
+    if (scoredNode == null) {
+      throw new Error(
+        "Cannot get syntheticLoopScoreContribution for non-existent node"
+      );
+    }
+    return (
+      (scoredNode.score * this._syntheticLoopWeight) / this.totalOutWeight(node)
+    );
+  }
+
  /**
   * Asynchronously run PageRank to re-compute scores.
   *
--- a/src/core/pagerankGraph.test.js
+++ b/src/core/pagerankGraph.test.js
@ -9,7 +9,7 @@ import {
  type Edge,
  type EdgesOptions,
 } from "./graph";
-import {PagerankGraph} from "./pagerankGraph";
+import {PagerankGraph, Direction} from "./pagerankGraph";
 import {advancedGraph} from "./graphTestUtil";
 import * as NullUtil from "../util/null";

@ -348,6 +348,121 @@ describe("core/pagerankGraph", () => {
    });
  });

+  describe("neighbors", () => {
+    const allNeighbors = () => ({
+      direction: Direction.ANY,
+      nodePrefix: NodeAddress.empty,
+      edgePrefix: EdgeAddress.empty,
+    });
+    it("is an error to call neighbors after modifying the underlying graph", () => {
+      const pg = examplePagerankGraph();
+      pg.graph().addNode(NodeAddress.fromParts(["foomfazzle"]));
+      expect(() =>
+        pg.neighbors(NodeAddress.fromParts(["src"]), allNeighbors())
+      ).toThrowError("has been modified");
+    });
+    it("it is an error to call neighbors on a non-existent node", () => {
+      const pg = examplePagerankGraph();
+      expect(() =>
+        pg.neighbors(NodeAddress.fromParts(["foomfazzle"]), allNeighbors())
+      ).toThrowError("non-existent node");
+    });
+    it("neighbors returns results consistent with Graph.neighbors", () => {
+      const directions = [Direction.IN, Direction.ANY, Direction.OUT];
+      const nodePrefixes = [
+        NodeAddress.empty,
+        NodeAddress.fromParts(["src"]),
+        NodeAddress.fromParts(["nonexistent"]),
+      ];
+      const edgePrefixes = [
+        EdgeAddress.empty,
+        EdgeAddress.fromParts(["hom"]),
+        EdgeAddress.fromParts(["nonexistent"]),
+      ];
+      const targets = [
+        NodeAddress.fromParts(["src"]),
+        NodeAddress.fromParts(["loop"]),
+      ];
+
+      const graph = advancedGraph().graph1();
+      const pagerankGraph = new PagerankGraph(graph, defaultEvaluator);
+      for (const direction of directions) {
+        for (const nodePrefix of nodePrefixes) {
+          for (const edgePrefix of edgePrefixes) {
+            for (const target of targets) {
+              const options = {direction, nodePrefix, edgePrefix};
+              const prgNeighbors = Array.from(
+                pagerankGraph.neighbors(target, options)
+              );
+              const gNeighbors = Array.from(graph.neighbors(target, options));
+              const reducedPrgNeighbors = prgNeighbors.map((s) => ({
+                node: s.scoredNode.node,
+                edge: s.weightedEdge.edge,
+              }));
+              expect(gNeighbors).toEqual(reducedPrgNeighbors);
+            }
+          }
+        }
+      }
+    });
+  });
+
+  describe("score decomposition", () => {
+    const allNeighbors = () => ({
+      direction: Direction.ANY,
+      nodePrefix: NodeAddress.empty,
+      edgePrefix: EdgeAddress.empty,
+    });
+    it("neighbor's scored contributions are computed correctly", async () => {
+      const pg = await convergedPagerankGraph();
+      for (const {node: target} of pg.nodes()) {
+        for (const {
+          scoredNode,
+          weightedEdge,
+          scoreContribution,
+        } of pg.neighbors(target, allNeighbors())) {
+          let rawWeight = 0;
+          if (weightedEdge.edge.dst === target) {
+            rawWeight += weightedEdge.weight.toWeight;
+          }
+          if (weightedEdge.edge.src === target) {
+            rawWeight += weightedEdge.weight.froWeight;
+          }
+          const normalizedWeight =
+            rawWeight / pg.totalOutWeight(scoredNode.node);
+          expect(scoreContribution).toEqual(
+            scoredNode.score * normalizedWeight
+          );
+        }
+      }
+    });
+    it("synthetic score contributions are computed correctly", async () => {
+      const pg = await convergedPagerankGraph();
+      for (const {node, score} of pg.nodes()) {
+        expect(pg.syntheticLoopScoreContribution(node)).toEqual(
+          (score * pg.syntheticLoopWeight()) / pg.totalOutWeight(node)
+        );
+      }
+    });
+    it("neighbors score contributions + synthetic score contribution == node score", async () => {
+      // Note: I've verified that test fails if we don't properly handle loop
+      // neighbors (need to add the edge toWeight and froWeight if the neighbor
+      // is a loop).
+      const pg = await convergedPagerankGraph();
+      for (const {node, score} of pg.nodes()) {
+        // We need to include the score that came from the synthetic loop edge
+        // (should be near zero for non-isolated nodes)
+        let summedScoreContributions: number = pg.syntheticLoopScoreContribution(
+          node
+        );
+        for (const scoredNeighbor of pg.neighbors(node, allNeighbors())) {
+          summedScoreContributions += scoredNeighbor.scoreContribution;
+        }
+        expect(summedScoreContributions).toBeCloseTo(score);
+      }
+    });
+  });
+
  describe("runPagerank", () => {
    // The mathematical semantics of PageRank are thoroughly tested
    // in the markovChain module. The goal for these tests is just