Display linear scores, normalized by the maximum (#625)

PageRank outputs scores as components in a probability distribution.
This means that most scores are very small numbers, e.g. 0.00003. This
doesn't make for a great UI (humans don't like thinking in tiny
decimals).

Our first attempt to come up with a more readable UI was to use log
scores; in #265 we displayed the log score alongside (arbitrarily)
`rawScore * 100` in the UI. The log scores were more usable, so we kept
them, with subsequent modifications. In the original version, all the
log scores were negative. In #466, we arbitrarily added 10 to the
scores, which made most scores look nicer, but introduced a meaningless
switch where scores counter-intuitively become negative after a certain
point. That was bad, so in #535 we started displaying negative log
scores. This is also counter-intuitive: it's weird that lower scores are
better, and it's not clear that a score of (say) 3 is 20x better than a
score of 6.

I think we need to do away with the log scores; people just don't think
about numbers logarithmically. This commit switches to linear scores,
normalized so that the largest score is always 1000. I've tried this out
on a few repos and demo'd it to people, and it seems much clearer.

Test plan: Some unit tests added; also, I launched the cred explorer and
experienced the change on several projects.
This commit is contained in:
Dandelion Mané 2018-08-09 14:26:08 -07:00 committed by GitHub
parent fb70152e7a
commit dc13d460da
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 91 additions and 10 deletions

View File

@ -45,8 +45,8 @@ function edgeVerb(
return direction === "FORWARD" ? edgeType.forwardName : edgeType.backwardName; return direction === "FORWARD" ? edgeType.forwardName : edgeType.backwardName;
} }
function scoreDisplay(probability: number) { function scoreDisplay(score: number) {
return (-1 * Math.log(probability)).toFixed(2); return score.toFixed(2);
} }
type SharedProps = {| type SharedProps = {|

View File

@ -348,10 +348,10 @@ describe("app/credExplorer/PagerankTable", () => {
.text() .text()
).toEqual("—"); ).toEqual("—");
}); });
it("renders a score column with the node's log-score", async () => { it("renders a score column with the node's score", async () => {
const {element, sharedProps, node} = await setup(); const {element, sharedProps, node} = await setup();
const {score: rawScore} = NullUtil.get(sharedProps.pnd.get(node)); const {score} = NullUtil.get(sharedProps.pnd.get(node));
const expectedScore = (-Math.log(rawScore)).toFixed(2); const expectedScore = score.toFixed(2);
const connectionColumn = COLUMNS().indexOf("Score"); const connectionColumn = COLUMNS().indexOf("Score");
expect(connectionColumn).not.toEqual(-1); expect(connectionColumn).not.toEqual(-1);
expect( expect(
@ -495,9 +495,9 @@ describe("app/credExplorer/PagerankTable", () => {
.text() .text()
).toEqual(expectedText); ).toEqual(expectedText);
}); });
it("renders a score column with the source's log-score", async () => { it("renders a score column with the source's score", async () => {
const {element, connection} = await setup(); const {element, connection} = await setup();
const expectedScore = (-Math.log(connection.sourceScore)).toFixed(2); const expectedScore = connection.sourceScore.toFixed(2);
const connectionColumn = COLUMNS().indexOf("Score"); const connectionColumn = COLUMNS().indexOf("Score");
expect(connectionColumn).not.toEqual(-1); expect(connectionColumn).not.toEqual(-1);
expect( expect(

View File

@ -0,0 +1,28 @@
// @flow
import type {NodeAddressT} from "../graph";
import type {NodeDistribution} from "./graphToMarkovChain";
export type NodeScore = Map<NodeAddressT, number>;
export function scoreByMaximumProbability(
pi: NodeDistribution,
maxScore: number
): NodeScore {
if (maxScore <= 0) {
throw new Error("Invalid argument: maxScore must be >= 0");
}
let maxProbability = 0;
for (const p of pi.values()) {
maxProbability = Math.max(p, maxProbability);
}
if (maxProbability <= 0) {
throw new Error("Invariant violation: maxProbability must be >= 0");
}
const multiFactor = maxScore / maxProbability;
const scoreMap = new Map();
for (const [addr, prob] of pi) {
scoreMap.set(addr, prob * multiFactor);
}
return scoreMap;
}

View File

@ -0,0 +1,47 @@
// @flow
import {NodeAddress} from "../graph";
import {scoreByMaximumProbability} from "./nodeScore";
describe("core/attribution/nodeScore", () => {
const foo = NodeAddress.fromParts(["foo"]);
const bar = NodeAddress.fromParts(["bar"]);
const zod = NodeAddress.fromParts(["zod"]);
it("works on a simple case", () => {
const distribution = new Map();
distribution.set(foo, 0.5);
distribution.set(bar, 0.3);
distribution.set(zod, 0.2);
const result = scoreByMaximumProbability(distribution, 100);
expect(result.get(foo)).toEqual(100);
expect(result.get(bar)).toEqual(60);
expect(result.get(zod)).toEqual(40);
});
it("normalizes to the maxScore argument", () => {
const distribution = new Map();
distribution.set(foo, 0.5);
distribution.set(bar, 0.3);
distribution.set(zod, 0.2);
const result = scoreByMaximumProbability(distribution, 1000);
expect(result.get(foo)).toEqual(1000);
expect(result.get(bar)).toEqual(600);
expect(result.get(zod)).toEqual(400);
});
it("handles a case with only a single node", () => {
const distribution = new Map();
distribution.set(foo, 1.0);
const result = scoreByMaximumProbability(distribution, 1000);
expect(result.get(foo)).toEqual(1000);
});
it("errors if maxScore <= 0", () => {
const distribution = new Map();
distribution.set(foo, 1.0);
const result = () => scoreByMaximumProbability(distribution, 0);
expect(result).toThrowError("Invalid argument");
});
it("throws an error rather than divide by 0", () => {
const distribution = new Map();
distribution.set(foo, 0.0);
const result = () => scoreByMaximumProbability(distribution, 1000);
expect(result).toThrowError("Invariant violation");
});
});

View File

@ -12,6 +12,8 @@ import {
type PagerankNodeDecomposition, type PagerankNodeDecomposition,
} from "./pagerankNodeDecomposition"; } from "./pagerankNodeDecomposition";
import {scoreByMaximumProbability} from "./nodeScore";
import {findStationaryDistribution} from "./markovChain"; import {findStationaryDistribution} from "./markovChain";
export type {NodeDistribution} from "./graphToMarkovChain"; export type {NodeDistribution} from "./graphToMarkovChain";
@ -21,6 +23,8 @@ export type PagerankOptions = {|
+verbose?: boolean, +verbose?: boolean,
+convergenceThreshold?: number, +convergenceThreshold?: number,
+maxIterations?: number, +maxIterations?: number,
// Scores will be normalized so that `maxScore` is the highest score
+maxScore?: number,
|}; |};
export type {EdgeWeight} from "./graphToMarkovChain"; export type {EdgeWeight} from "./graphToMarkovChain";
@ -32,6 +36,7 @@ function defaultOptions(): PagerankOptions {
selfLoopWeight: 1e-3, selfLoopWeight: 1e-3,
convergenceThreshold: 1e-7, convergenceThreshold: 1e-7,
maxIterations: 255, maxIterations: 255,
maxScore: 1000,
}; };
} }
@ -57,5 +62,6 @@ export async function pagerank(
yieldAfterMs: 30, yieldAfterMs: 30,
}); });
const pi = distributionToNodeDistribution(osmc.nodeOrder, distribution); const pi = distributionToNodeDistribution(osmc.nodeOrder, distribution);
return decompose(pi, connections); const scores = scoreByMaximumProbability(pi, fullOptions.maxScore);
return decompose(scores, connections);
} }

View File

@ -8,7 +8,7 @@ import {
type NodeToConnections, type NodeToConnections,
adjacencySource, adjacencySource,
} from "./graphToMarkovChain"; } from "./graphToMarkovChain";
import type {NodeDistribution} from "./pagerank"; import type {NodeScore} from "./nodeScore";
import * as MapUtil from "../../util/map"; import * as MapUtil from "../../util/map";
import * as NullUtil from "../../util/null"; import * as NullUtil from "../../util/null";
@ -30,7 +30,7 @@ export type PagerankNodeDecomposition = Map<
>; >;
export function decompose( export function decompose(
pr: NodeDistribution, pr: NodeScore,
connections: NodeToConnections connections: NodeToConnections
): PagerankNodeDecomposition { ): PagerankNodeDecomposition {
return MapUtil.mapValues(connections, (target, connections) => { return MapUtil.mapValues(connections, (target, connections) => {