Display linear scores, normalized by the maximum (#625)

PageRank outputs scores as components in a probability distribution. This means that most scores are very small numbers, e.g. 0.00003. This doesn't make for a great UI (humans don't like thinking in tiny decimals). Our first attempt to come up with a more readable UI was to use log scores; in #265 we displayed the log score alongside (arbitrarily) `rawScore * 100` in the UI. The log scores were more usable, so we kept them, with subsequent modifications. In the original version, all the log scores were negative. In #466, we arbitrarily added 10 to the scores, which made most scores look nicer, but introduced a meaningless switch where scores counter-intuitively become negative after a certain point. That was bad, so in #535 we started displaying negative log scores. This is also counter-intuitive: it's weird that lower scores are better, and it's not clear that a score of (say) 3 is 20x better than a score of 6. I think we need to do away with the log scores; people just don't think about numbers logarithmically. This commit switches to linear scores, normalized so that the largest score is always 1000. I've tried this out on a few repos and demo'd it to people, and it seems much clearer. Test plan: Some unit tests added; also, I launched the cred explorer and experienced the change on several projects.
2018-08-09 14:26:08 -07:00 · 2018-08-09 14:26:08 -07:00 · dc13d460da
parent fb70152e7a
commit dc13d460da
6 changed files with 91 additions and 10 deletions
--- a/src/app/credExplorer/PagerankTable.js
+++ b/src/app/credExplorer/PagerankTable.js
@ -45,8 +45,8 @@ function edgeVerb(
  return direction === "FORWARD" ? edgeType.forwardName : edgeType.backwardName;
 }

-function scoreDisplay(probability: number) {
-  return (-1 * Math.log(probability)).toFixed(2);
+function scoreDisplay(score: number) {
+  return score.toFixed(2);
 }

 type SharedProps = {|
--- a/src/app/credExplorer/PagerankTable.test.js
+++ b/src/app/credExplorer/PagerankTable.test.js
@ -348,10 +348,10 @@ describe("app/credExplorer/PagerankTable", () => {
          .text()
      ).toEqual("—");
    });
-    it("renders a score column with the node's log-score", async () => {
+    it("renders a score column with the node's score", async () => {
      const {element, sharedProps, node} = await setup();
-      const {score: rawScore} = NullUtil.get(sharedProps.pnd.get(node));
-      const expectedScore = (-Math.log(rawScore)).toFixed(2);
+      const {score} = NullUtil.get(sharedProps.pnd.get(node));
+      const expectedScore = score.toFixed(2);
      const connectionColumn = COLUMNS().indexOf("Score");
      expect(connectionColumn).not.toEqual(-1);
      expect(
@ -495,9 +495,9 @@ describe("app/credExplorer/PagerankTable", () => {
          .text()
      ).toEqual(expectedText);
    });
-    it("renders a score column with the source's log-score", async () => {
+    it("renders a score column with the source's score", async () => {
      const {element, connection} = await setup();
-      const expectedScore = (-Math.log(connection.sourceScore)).toFixed(2);
+      const expectedScore = connection.sourceScore.toFixed(2);
      const connectionColumn = COLUMNS().indexOf("Score");
      expect(connectionColumn).not.toEqual(-1);
      expect(
--- a/src/core/attribution/nodeScore.js
+++ b/src/core/attribution/nodeScore.js
@ -0,0 +1,28 @@
+// @flow
+
+import type {NodeAddressT} from "../graph";
+import type {NodeDistribution} from "./graphToMarkovChain";
+
+export type NodeScore = Map<NodeAddressT, number>;
+
+export function scoreByMaximumProbability(
+  pi: NodeDistribution,
+  maxScore: number
+): NodeScore {
+  if (maxScore <= 0) {
+    throw new Error("Invalid argument: maxScore must be >= 0");
+  }
+  let maxProbability = 0;
+  for (const p of pi.values()) {
+    maxProbability = Math.max(p, maxProbability);
+  }
+  if (maxProbability <= 0) {
+    throw new Error("Invariant violation: maxProbability must be >= 0");
+  }
+  const multiFactor = maxScore / maxProbability;
+  const scoreMap = new Map();
+  for (const [addr, prob] of pi) {
+    scoreMap.set(addr, prob * multiFactor);
+  }
+  return scoreMap;
+}
--- a/src/core/attribution/nodeScore.test.js
+++ b/src/core/attribution/nodeScore.test.js
@ -0,0 +1,47 @@
+// @flow
+
+import {NodeAddress} from "../graph";
+import {scoreByMaximumProbability} from "./nodeScore";
+describe("core/attribution/nodeScore", () => {
+  const foo = NodeAddress.fromParts(["foo"]);
+  const bar = NodeAddress.fromParts(["bar"]);
+  const zod = NodeAddress.fromParts(["zod"]);
+  it("works on a simple case", () => {
+    const distribution = new Map();
+    distribution.set(foo, 0.5);
+    distribution.set(bar, 0.3);
+    distribution.set(zod, 0.2);
+    const result = scoreByMaximumProbability(distribution, 100);
+    expect(result.get(foo)).toEqual(100);
+    expect(result.get(bar)).toEqual(60);
+    expect(result.get(zod)).toEqual(40);
+  });
+  it("normalizes to the maxScore argument", () => {
+    const distribution = new Map();
+    distribution.set(foo, 0.5);
+    distribution.set(bar, 0.3);
+    distribution.set(zod, 0.2);
+    const result = scoreByMaximumProbability(distribution, 1000);
+    expect(result.get(foo)).toEqual(1000);
+    expect(result.get(bar)).toEqual(600);
+    expect(result.get(zod)).toEqual(400);
+  });
+  it("handles a case with only a single node", () => {
+    const distribution = new Map();
+    distribution.set(foo, 1.0);
+    const result = scoreByMaximumProbability(distribution, 1000);
+    expect(result.get(foo)).toEqual(1000);
+  });
+  it("errors if maxScore <= 0", () => {
+    const distribution = new Map();
+    distribution.set(foo, 1.0);
+    const result = () => scoreByMaximumProbability(distribution, 0);
+    expect(result).toThrowError("Invalid argument");
+  });
+  it("throws an error rather than divide by 0", () => {
+    const distribution = new Map();
+    distribution.set(foo, 0.0);
+    const result = () => scoreByMaximumProbability(distribution, 1000);
+    expect(result).toThrowError("Invariant violation");
+  });
+});
--- a/src/core/attribution/pagerank.js
+++ b/src/core/attribution/pagerank.js
@ -12,6 +12,8 @@ import {
  type PagerankNodeDecomposition,
 } from "./pagerankNodeDecomposition";

+import {scoreByMaximumProbability} from "./nodeScore";
+
 import {findStationaryDistribution} from "./markovChain";

 export type {NodeDistribution} from "./graphToMarkovChain";
@ -21,6 +23,8 @@ export type PagerankOptions = {|
  +verbose?: boolean,
  +convergenceThreshold?: number,
  +maxIterations?: number,
+  // Scores will be normalized so that `maxScore` is the highest score
+  +maxScore?: number,
 |};

 export type {EdgeWeight} from "./graphToMarkovChain";
@ -32,6 +36,7 @@ function defaultOptions(): PagerankOptions {
    selfLoopWeight: 1e-3,
    convergenceThreshold: 1e-7,
    maxIterations: 255,
+    maxScore: 1000,
  };
 }

@ -57,5 +62,6 @@ export async function pagerank(
    yieldAfterMs: 30,
  });
  const pi = distributionToNodeDistribution(osmc.nodeOrder, distribution);
-  return decompose(pi, connections);
+  const scores = scoreByMaximumProbability(pi, fullOptions.maxScore);
+  return decompose(scores, connections);
 }
--- a/src/core/attribution/pagerankNodeDecomposition.js
+++ b/src/core/attribution/pagerankNodeDecomposition.js
@ -8,7 +8,7 @@ import {
  type NodeToConnections,
  adjacencySource,
 } from "./graphToMarkovChain";
-import type {NodeDistribution} from "./pagerank";
+import type {NodeScore} from "./nodeScore";
 import * as MapUtil from "../../util/map";
 import * as NullUtil from "../../util/null";

@ -30,7 +30,7 @@ export type PagerankNodeDecomposition = Map<
 >;

 export function decompose(
-  pr: NodeDistribution,
+  pr: NodeScore,
  connections: NodeToConnections
 ): PagerankNodeDecomposition {
  return MapUtil.mapValues(connections, (target, connections) => {