Display linear scores, normalized by the maximum (#625)
PageRank outputs scores as components in a probability distribution. This means that most scores are very small numbers, e.g. 0.00003. This doesn't make for a great UI (humans don't like thinking in tiny decimals). Our first attempt to come up with a more readable UI was to use log scores; in #265 we displayed the log score alongside (arbitrarily) `rawScore * 100` in the UI. The log scores were more usable, so we kept them, with subsequent modifications. In the original version, all the log scores were negative. In #466, we arbitrarily added 10 to the scores, which made most scores look nicer, but introduced a meaningless switch where scores counter-intuitively become negative after a certain point. That was bad, so in #535 we started displaying negative log scores. This is also counter-intuitive: it's weird that lower scores are better, and it's not clear that a score of (say) 3 is 20x better than a score of 6. I think we need to do away with the log scores; people just don't think about numbers logarithmically. This commit switches to linear scores, normalized so that the largest score is always 1000. I've tried this out on a few repos and demo'd it to people, and it seems much clearer. Test plan: Some unit tests added; also, I launched the cred explorer and experienced the change on several projects.
This commit is contained in:
parent
fb70152e7a
commit
dc13d460da
|
@ -45,8 +45,8 @@ function edgeVerb(
|
||||||
return direction === "FORWARD" ? edgeType.forwardName : edgeType.backwardName;
|
return direction === "FORWARD" ? edgeType.forwardName : edgeType.backwardName;
|
||||||
}
|
}
|
||||||
|
|
||||||
function scoreDisplay(probability: number) {
|
function scoreDisplay(score: number) {
|
||||||
return (-1 * Math.log(probability)).toFixed(2);
|
return score.toFixed(2);
|
||||||
}
|
}
|
||||||
|
|
||||||
type SharedProps = {|
|
type SharedProps = {|
|
||||||
|
|
|
@ -348,10 +348,10 @@ describe("app/credExplorer/PagerankTable", () => {
|
||||||
.text()
|
.text()
|
||||||
).toEqual("—");
|
).toEqual("—");
|
||||||
});
|
});
|
||||||
it("renders a score column with the node's log-score", async () => {
|
it("renders a score column with the node's score", async () => {
|
||||||
const {element, sharedProps, node} = await setup();
|
const {element, sharedProps, node} = await setup();
|
||||||
const {score: rawScore} = NullUtil.get(sharedProps.pnd.get(node));
|
const {score} = NullUtil.get(sharedProps.pnd.get(node));
|
||||||
const expectedScore = (-Math.log(rawScore)).toFixed(2);
|
const expectedScore = score.toFixed(2);
|
||||||
const connectionColumn = COLUMNS().indexOf("Score");
|
const connectionColumn = COLUMNS().indexOf("Score");
|
||||||
expect(connectionColumn).not.toEqual(-1);
|
expect(connectionColumn).not.toEqual(-1);
|
||||||
expect(
|
expect(
|
||||||
|
@ -495,9 +495,9 @@ describe("app/credExplorer/PagerankTable", () => {
|
||||||
.text()
|
.text()
|
||||||
).toEqual(expectedText);
|
).toEqual(expectedText);
|
||||||
});
|
});
|
||||||
it("renders a score column with the source's log-score", async () => {
|
it("renders a score column with the source's score", async () => {
|
||||||
const {element, connection} = await setup();
|
const {element, connection} = await setup();
|
||||||
const expectedScore = (-Math.log(connection.sourceScore)).toFixed(2);
|
const expectedScore = connection.sourceScore.toFixed(2);
|
||||||
const connectionColumn = COLUMNS().indexOf("Score");
|
const connectionColumn = COLUMNS().indexOf("Score");
|
||||||
expect(connectionColumn).not.toEqual(-1);
|
expect(connectionColumn).not.toEqual(-1);
|
||||||
expect(
|
expect(
|
||||||
|
|
|
@ -0,0 +1,28 @@
|
||||||
|
// @flow
|
||||||
|
|
||||||
|
import type {NodeAddressT} from "../graph";
|
||||||
|
import type {NodeDistribution} from "./graphToMarkovChain";
|
||||||
|
|
||||||
|
export type NodeScore = Map<NodeAddressT, number>;
|
||||||
|
|
||||||
|
export function scoreByMaximumProbability(
|
||||||
|
pi: NodeDistribution,
|
||||||
|
maxScore: number
|
||||||
|
): NodeScore {
|
||||||
|
if (maxScore <= 0) {
|
||||||
|
throw new Error("Invalid argument: maxScore must be >= 0");
|
||||||
|
}
|
||||||
|
let maxProbability = 0;
|
||||||
|
for (const p of pi.values()) {
|
||||||
|
maxProbability = Math.max(p, maxProbability);
|
||||||
|
}
|
||||||
|
if (maxProbability <= 0) {
|
||||||
|
throw new Error("Invariant violation: maxProbability must be >= 0");
|
||||||
|
}
|
||||||
|
const multiFactor = maxScore / maxProbability;
|
||||||
|
const scoreMap = new Map();
|
||||||
|
for (const [addr, prob] of pi) {
|
||||||
|
scoreMap.set(addr, prob * multiFactor);
|
||||||
|
}
|
||||||
|
return scoreMap;
|
||||||
|
}
|
|
@ -0,0 +1,47 @@
|
||||||
|
// @flow
|
||||||
|
|
||||||
|
import {NodeAddress} from "../graph";
|
||||||
|
import {scoreByMaximumProbability} from "./nodeScore";
|
||||||
|
describe("core/attribution/nodeScore", () => {
|
||||||
|
const foo = NodeAddress.fromParts(["foo"]);
|
||||||
|
const bar = NodeAddress.fromParts(["bar"]);
|
||||||
|
const zod = NodeAddress.fromParts(["zod"]);
|
||||||
|
it("works on a simple case", () => {
|
||||||
|
const distribution = new Map();
|
||||||
|
distribution.set(foo, 0.5);
|
||||||
|
distribution.set(bar, 0.3);
|
||||||
|
distribution.set(zod, 0.2);
|
||||||
|
const result = scoreByMaximumProbability(distribution, 100);
|
||||||
|
expect(result.get(foo)).toEqual(100);
|
||||||
|
expect(result.get(bar)).toEqual(60);
|
||||||
|
expect(result.get(zod)).toEqual(40);
|
||||||
|
});
|
||||||
|
it("normalizes to the maxScore argument", () => {
|
||||||
|
const distribution = new Map();
|
||||||
|
distribution.set(foo, 0.5);
|
||||||
|
distribution.set(bar, 0.3);
|
||||||
|
distribution.set(zod, 0.2);
|
||||||
|
const result = scoreByMaximumProbability(distribution, 1000);
|
||||||
|
expect(result.get(foo)).toEqual(1000);
|
||||||
|
expect(result.get(bar)).toEqual(600);
|
||||||
|
expect(result.get(zod)).toEqual(400);
|
||||||
|
});
|
||||||
|
it("handles a case with only a single node", () => {
|
||||||
|
const distribution = new Map();
|
||||||
|
distribution.set(foo, 1.0);
|
||||||
|
const result = scoreByMaximumProbability(distribution, 1000);
|
||||||
|
expect(result.get(foo)).toEqual(1000);
|
||||||
|
});
|
||||||
|
it("errors if maxScore <= 0", () => {
|
||||||
|
const distribution = new Map();
|
||||||
|
distribution.set(foo, 1.0);
|
||||||
|
const result = () => scoreByMaximumProbability(distribution, 0);
|
||||||
|
expect(result).toThrowError("Invalid argument");
|
||||||
|
});
|
||||||
|
it("throws an error rather than divide by 0", () => {
|
||||||
|
const distribution = new Map();
|
||||||
|
distribution.set(foo, 0.0);
|
||||||
|
const result = () => scoreByMaximumProbability(distribution, 1000);
|
||||||
|
expect(result).toThrowError("Invariant violation");
|
||||||
|
});
|
||||||
|
});
|
|
@ -12,6 +12,8 @@ import {
|
||||||
type PagerankNodeDecomposition,
|
type PagerankNodeDecomposition,
|
||||||
} from "./pagerankNodeDecomposition";
|
} from "./pagerankNodeDecomposition";
|
||||||
|
|
||||||
|
import {scoreByMaximumProbability} from "./nodeScore";
|
||||||
|
|
||||||
import {findStationaryDistribution} from "./markovChain";
|
import {findStationaryDistribution} from "./markovChain";
|
||||||
|
|
||||||
export type {NodeDistribution} from "./graphToMarkovChain";
|
export type {NodeDistribution} from "./graphToMarkovChain";
|
||||||
|
@ -21,6 +23,8 @@ export type PagerankOptions = {|
|
||||||
+verbose?: boolean,
|
+verbose?: boolean,
|
||||||
+convergenceThreshold?: number,
|
+convergenceThreshold?: number,
|
||||||
+maxIterations?: number,
|
+maxIterations?: number,
|
||||||
|
// Scores will be normalized so that `maxScore` is the highest score
|
||||||
|
+maxScore?: number,
|
||||||
|};
|
|};
|
||||||
|
|
||||||
export type {EdgeWeight} from "./graphToMarkovChain";
|
export type {EdgeWeight} from "./graphToMarkovChain";
|
||||||
|
@ -32,6 +36,7 @@ function defaultOptions(): PagerankOptions {
|
||||||
selfLoopWeight: 1e-3,
|
selfLoopWeight: 1e-3,
|
||||||
convergenceThreshold: 1e-7,
|
convergenceThreshold: 1e-7,
|
||||||
maxIterations: 255,
|
maxIterations: 255,
|
||||||
|
maxScore: 1000,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -57,5 +62,6 @@ export async function pagerank(
|
||||||
yieldAfterMs: 30,
|
yieldAfterMs: 30,
|
||||||
});
|
});
|
||||||
const pi = distributionToNodeDistribution(osmc.nodeOrder, distribution);
|
const pi = distributionToNodeDistribution(osmc.nodeOrder, distribution);
|
||||||
return decompose(pi, connections);
|
const scores = scoreByMaximumProbability(pi, fullOptions.maxScore);
|
||||||
|
return decompose(scores, connections);
|
||||||
}
|
}
|
||||||
|
|
|
@ -8,7 +8,7 @@ import {
|
||||||
type NodeToConnections,
|
type NodeToConnections,
|
||||||
adjacencySource,
|
adjacencySource,
|
||||||
} from "./graphToMarkovChain";
|
} from "./graphToMarkovChain";
|
||||||
import type {NodeDistribution} from "./pagerank";
|
import type {NodeScore} from "./nodeScore";
|
||||||
import * as MapUtil from "../../util/map";
|
import * as MapUtil from "../../util/map";
|
||||||
import * as NullUtil from "../../util/null";
|
import * as NullUtil from "../../util/null";
|
||||||
|
|
||||||
|
@ -30,7 +30,7 @@ export type PagerankNodeDecomposition = Map<
|
||||||
>;
|
>;
|
||||||
|
|
||||||
export function decompose(
|
export function decompose(
|
||||||
pr: NodeDistribution,
|
pr: NodeScore,
|
||||||
connections: NodeToConnections
|
connections: NodeToConnections
|
||||||
): PagerankNodeDecomposition {
|
): PagerankNodeDecomposition {
|
||||||
return MapUtil.mapValues(connections, (target, connections) => {
|
return MapUtil.mapValues(connections, (target, connections) => {
|
||||||
|
|
Loading…
Reference in New Issue