Display linear scores, normalized by the maximum (#625)
PageRank outputs scores as components in a probability distribution. This means that most scores are very small numbers, e.g. 0.00003. This doesn't make for a great UI (humans don't like thinking in tiny decimals). Our first attempt to come up with a more readable UI was to use log scores; in #265 we displayed the log score alongside (arbitrarily) `rawScore * 100` in the UI. The log scores were more usable, so we kept them, with subsequent modifications. In the original version, all the log scores were negative. In #466, we arbitrarily added 10 to the scores, which made most scores look nicer, but introduced a meaningless switch where scores counter-intuitively become negative after a certain point. That was bad, so in #535 we started displaying negative log scores. This is also counter-intuitive: it's weird that lower scores are better, and it's not clear that a score of (say) 3 is 20x better than a score of 6. I think we need to do away with the log scores; people just don't think about numbers logarithmically. This commit switches to linear scores, normalized so that the largest score is always 1000. I've tried this out on a few repos and demo'd it to people, and it seems much clearer. Test plan: Some unit tests added; also, I launched the cred explorer and experienced the change on several projects.
This commit is contained in:
parent
fb70152e7a
commit
dc13d460da
|
@ -45,8 +45,8 @@ function edgeVerb(
|
|||
return direction === "FORWARD" ? edgeType.forwardName : edgeType.backwardName;
|
||||
}
|
||||
|
||||
function scoreDisplay(probability: number) {
|
||||
return (-1 * Math.log(probability)).toFixed(2);
|
||||
function scoreDisplay(score: number) {
|
||||
return score.toFixed(2);
|
||||
}
|
||||
|
||||
type SharedProps = {|
|
||||
|
|
|
@ -348,10 +348,10 @@ describe("app/credExplorer/PagerankTable", () => {
|
|||
.text()
|
||||
).toEqual("—");
|
||||
});
|
||||
it("renders a score column with the node's log-score", async () => {
|
||||
it("renders a score column with the node's score", async () => {
|
||||
const {element, sharedProps, node} = await setup();
|
||||
const {score: rawScore} = NullUtil.get(sharedProps.pnd.get(node));
|
||||
const expectedScore = (-Math.log(rawScore)).toFixed(2);
|
||||
const {score} = NullUtil.get(sharedProps.pnd.get(node));
|
||||
const expectedScore = score.toFixed(2);
|
||||
const connectionColumn = COLUMNS().indexOf("Score");
|
||||
expect(connectionColumn).not.toEqual(-1);
|
||||
expect(
|
||||
|
@ -495,9 +495,9 @@ describe("app/credExplorer/PagerankTable", () => {
|
|||
.text()
|
||||
).toEqual(expectedText);
|
||||
});
|
||||
it("renders a score column with the source's log-score", async () => {
|
||||
it("renders a score column with the source's score", async () => {
|
||||
const {element, connection} = await setup();
|
||||
const expectedScore = (-Math.log(connection.sourceScore)).toFixed(2);
|
||||
const expectedScore = connection.sourceScore.toFixed(2);
|
||||
const connectionColumn = COLUMNS().indexOf("Score");
|
||||
expect(connectionColumn).not.toEqual(-1);
|
||||
expect(
|
||||
|
|
|
@ -0,0 +1,28 @@
|
|||
// @flow
|
||||
|
||||
import type {NodeAddressT} from "../graph";
|
||||
import type {NodeDistribution} from "./graphToMarkovChain";
|
||||
|
||||
export type NodeScore = Map<NodeAddressT, number>;
|
||||
|
||||
export function scoreByMaximumProbability(
|
||||
pi: NodeDistribution,
|
||||
maxScore: number
|
||||
): NodeScore {
|
||||
if (maxScore <= 0) {
|
||||
throw new Error("Invalid argument: maxScore must be >= 0");
|
||||
}
|
||||
let maxProbability = 0;
|
||||
for (const p of pi.values()) {
|
||||
maxProbability = Math.max(p, maxProbability);
|
||||
}
|
||||
if (maxProbability <= 0) {
|
||||
throw new Error("Invariant violation: maxProbability must be >= 0");
|
||||
}
|
||||
const multiFactor = maxScore / maxProbability;
|
||||
const scoreMap = new Map();
|
||||
for (const [addr, prob] of pi) {
|
||||
scoreMap.set(addr, prob * multiFactor);
|
||||
}
|
||||
return scoreMap;
|
||||
}
|
|
@ -0,0 +1,47 @@
|
|||
// @flow
|
||||
|
||||
import {NodeAddress} from "../graph";
|
||||
import {scoreByMaximumProbability} from "./nodeScore";
|
||||
describe("core/attribution/nodeScore", () => {
|
||||
const foo = NodeAddress.fromParts(["foo"]);
|
||||
const bar = NodeAddress.fromParts(["bar"]);
|
||||
const zod = NodeAddress.fromParts(["zod"]);
|
||||
it("works on a simple case", () => {
|
||||
const distribution = new Map();
|
||||
distribution.set(foo, 0.5);
|
||||
distribution.set(bar, 0.3);
|
||||
distribution.set(zod, 0.2);
|
||||
const result = scoreByMaximumProbability(distribution, 100);
|
||||
expect(result.get(foo)).toEqual(100);
|
||||
expect(result.get(bar)).toEqual(60);
|
||||
expect(result.get(zod)).toEqual(40);
|
||||
});
|
||||
it("normalizes to the maxScore argument", () => {
|
||||
const distribution = new Map();
|
||||
distribution.set(foo, 0.5);
|
||||
distribution.set(bar, 0.3);
|
||||
distribution.set(zod, 0.2);
|
||||
const result = scoreByMaximumProbability(distribution, 1000);
|
||||
expect(result.get(foo)).toEqual(1000);
|
||||
expect(result.get(bar)).toEqual(600);
|
||||
expect(result.get(zod)).toEqual(400);
|
||||
});
|
||||
it("handles a case with only a single node", () => {
|
||||
const distribution = new Map();
|
||||
distribution.set(foo, 1.0);
|
||||
const result = scoreByMaximumProbability(distribution, 1000);
|
||||
expect(result.get(foo)).toEqual(1000);
|
||||
});
|
||||
it("errors if maxScore <= 0", () => {
|
||||
const distribution = new Map();
|
||||
distribution.set(foo, 1.0);
|
||||
const result = () => scoreByMaximumProbability(distribution, 0);
|
||||
expect(result).toThrowError("Invalid argument");
|
||||
});
|
||||
it("throws an error rather than divide by 0", () => {
|
||||
const distribution = new Map();
|
||||
distribution.set(foo, 0.0);
|
||||
const result = () => scoreByMaximumProbability(distribution, 1000);
|
||||
expect(result).toThrowError("Invariant violation");
|
||||
});
|
||||
});
|
|
@ -12,6 +12,8 @@ import {
|
|||
type PagerankNodeDecomposition,
|
||||
} from "./pagerankNodeDecomposition";
|
||||
|
||||
import {scoreByMaximumProbability} from "./nodeScore";
|
||||
|
||||
import {findStationaryDistribution} from "./markovChain";
|
||||
|
||||
export type {NodeDistribution} from "./graphToMarkovChain";
|
||||
|
@ -21,6 +23,8 @@ export type PagerankOptions = {|
|
|||
+verbose?: boolean,
|
||||
+convergenceThreshold?: number,
|
||||
+maxIterations?: number,
|
||||
// Scores will be normalized so that `maxScore` is the highest score
|
||||
+maxScore?: number,
|
||||
|};
|
||||
|
||||
export type {EdgeWeight} from "./graphToMarkovChain";
|
||||
|
@ -32,6 +36,7 @@ function defaultOptions(): PagerankOptions {
|
|||
selfLoopWeight: 1e-3,
|
||||
convergenceThreshold: 1e-7,
|
||||
maxIterations: 255,
|
||||
maxScore: 1000,
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -57,5 +62,6 @@ export async function pagerank(
|
|||
yieldAfterMs: 30,
|
||||
});
|
||||
const pi = distributionToNodeDistribution(osmc.nodeOrder, distribution);
|
||||
return decompose(pi, connections);
|
||||
const scores = scoreByMaximumProbability(pi, fullOptions.maxScore);
|
||||
return decompose(scores, connections);
|
||||
}
|
||||
|
|
|
@ -8,7 +8,7 @@ import {
|
|||
type NodeToConnections,
|
||||
adjacencySource,
|
||||
} from "./graphToMarkovChain";
|
||||
import type {NodeDistribution} from "./pagerank";
|
||||
import type {NodeScore} from "./nodeScore";
|
||||
import * as MapUtil from "../../util/map";
|
||||
import * as NullUtil from "../../util/null";
|
||||
|
||||
|
@ -30,7 +30,7 @@ export type PagerankNodeDecomposition = Map<
|
|||
>;
|
||||
|
||||
export function decompose(
|
||||
pr: NodeDistribution,
|
||||
pr: NodeScore,
|
||||
connections: NodeToConnections
|
||||
): PagerankNodeDecomposition {
|
||||
return MapUtil.mapValues(connections, (target, connections) => {
|
||||
|
|
Loading…
Reference in New Issue