From a5c909689a68b0be36121417e24fc8acab3207f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dandelion=20Man=C3=A9?= Date: Wed, 29 Aug 2018 12:20:57 -0700 Subject: [PATCH] Users have 1000 cred in aggregate (#709) This commit changes the cred normalization algorithm so that the total cred of all GitHub user nodes always sums to 1000. For rationale on the change, see #705. Fixes #705. Note that this introduces a new way for PageRank to fail: if the graph has no GitHub userlike nodes, then PageRank will throw an error when it attempts to normalize. This will result in a message being displayed to the user, and a more helpful error being printed to console. If we need the cred explorer to display graphs that have no userlike nodes, then we can modify the codepath so that it falls back to normalizing based on all nodes instead of on the GitHub userlike nodes specifically. Test plan: There is an included unit test which verifies that the new argument gets threaded through the state properly. But this is mostly a config change, so it's best tested by actually inspecting the cred explorer. I have done so, and can verify that the behavior is as expected: the sum of users' cred now sums to 1000, and e.g. modifying the weight on the repository node doesn't produce drastic changes to cred scores. --- CHANGELOG.md | 1 + src/app/credExplorer/App.js | 4 +++- src/app/credExplorer/state.js | 7 ++++--- src/app/credExplorer/state.test.js | 21 +++++++++++++++------ src/core/attribution/pagerank.js | 19 +++++++++++++------ 5 files changed, 36 insertions(+), 16 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cbb77c0..eeed20f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ # Changelog ## [Unreleased] +- Normalize scores so that 1000 cred is split amongst users (#709) - Stop persisting weights in local store (#706) - Execute GraphQL queries with exponential backoff (#699) - Introduce a simplified Git plugin that only tracks commits (#685) diff --git a/src/app/credExplorer/App.js b/src/app/credExplorer/App.js index 56d5af9..4c2ce21 100644 --- a/src/app/credExplorer/App.js +++ b/src/app/credExplorer/App.js @@ -105,7 +105,9 @@ export function createApp( loadingState !== "LOADING" ) } - onClick={() => this.stateTransitionMachine.runPagerank()} + onClick={() => + this.stateTransitionMachine.runPagerank(GithubPrefix.userlike) + } > Run PageRank diff --git a/src/app/credExplorer/state.js b/src/app/credExplorer/state.js index c30d8f6..c3bfbbe 100644 --- a/src/app/credExplorer/state.js +++ b/src/app/credExplorer/state.js @@ -3,7 +3,7 @@ import deepEqual from "lodash.isequal"; import * as NullUtil from "../../util/null"; -import {Graph} from "../../core/graph"; +import {Graph, type NodeAddressT} from "../../core/graph"; import type {Assets} from "../../app/assets"; import type {Repo} from "../../core/repo"; import {type EdgeEvaluator} from "../../core/attribution/pagerank"; @@ -80,7 +80,7 @@ export interface StateTransitionMachineInterface { +setRepo: (Repo) => void; +setEdgeEvaluator: (EdgeEvaluator) => void; +loadGraph: (Assets) => Promise; - +runPagerank: () => Promise; + +runPagerank: (NodeAddressT) => Promise; } /* In production, instantiate via createStateTransitionMachine; the constructor * implementation allows specification of the loadGraphWithAdapters and @@ -201,7 +201,7 @@ export class StateTransitionMachine implements StateTransitionMachineInterface { } } - async runPagerank() { + async runPagerank(totalScoreNodePrefix: NodeAddressT) { const state = this.getState(); if ( state.type !== "INITIALIZED" || @@ -228,6 +228,7 @@ export class StateTransitionMachine implements StateTransitionMachineInterface { edgeEvaluator, { verbose: true, + totalScoreNodePrefix: totalScoreNodePrefix, } ); const newSubstate = { diff --git a/src/app/credExplorer/state.test.js b/src/app/credExplorer/state.test.js index 8d1eb1b..f1bbdaa 100644 --- a/src/app/credExplorer/state.test.js +++ b/src/app/credExplorer/state.test.js @@ -7,7 +7,7 @@ import { type GraphWithAdapters, } from "./state"; -import {Graph} from "../../core/graph"; +import {Graph, NodeAddress} from "../../core/graph"; import {Assets} from "../assets"; import {makeRepo, type Repo} from "../../core/repo"; import {type EdgeEvaluator} from "../../core/attribution/pagerank"; @@ -274,7 +274,9 @@ describe("app/credExplorer/state", () => { const badStates = [initialState(), readyToLoadGraph()]; for (const b of badStates) { const {stm} = example(b); - await expect(stm.runPagerank()).rejects.toThrow("incorrect state"); + await expect(stm.runPagerank(NodeAddress.empty)).rejects.toThrow( + "incorrect state" + ); } }); it("can be run when READY_TO_RUN_PAGERANK or PAGERANK_EVALUATED", async () => { @@ -283,7 +285,7 @@ describe("app/credExplorer/state", () => { const {stm, getState, pagerankMock} = example(g); const pnd = pagerankNodeDecomposition(); pagerankMock.mockResolvedValue(pnd); - await stm.runPagerank(); + await stm.runPagerank(NodeAddress.empty); const state = getState(); const substate = getSubstate(state); if (substate.type !== "PAGERANK_EVALUATED") { @@ -296,9 +298,16 @@ describe("app/credExplorer/state", () => { it("immediately sets loading status", () => { const {getState, stm} = example(readyToRunPagerank()); expect(loading(getState())).toBe("NOT_LOADING"); - stm.runPagerank(); + stm.runPagerank(NodeAddress.empty); expect(loading(getState())).toBe("LOADING"); }); + it("calls pagerank with the totalScoreNodePrefix option", async () => { + const {pagerankMock, stm} = example(readyToRunPagerank()); + const foo = NodeAddress.fromParts(["foo"]); + await stm.runPagerank(foo); + const args = pagerankMock.mock.calls[0]; + expect(args[2].totalScoreNodePrefix).toBe(foo); + }); it("does not transition if another transition happens first", async () => { const {getState, stm, pagerankMock} = example(readyToRunPagerank()); const swappedRepo = makeRepo("too", "fast"); @@ -309,7 +318,7 @@ describe("app/credExplorer/state", () => { resolve(graphWithAdapters()); }) ); - await stm.runPagerank(); + await stm.runPagerank(NodeAddress.empty); const state = getState(); const substate = getSubstate(state); expect(loading(state)).toBe("NOT_LOADING"); @@ -322,7 +331,7 @@ describe("app/credExplorer/state", () => { // $ExpectFlowError console.error = jest.fn(); pagerankMock.mockRejectedValue(error); - await stm.runPagerank(); + await stm.runPagerank(NodeAddress.empty); const state = getState(); const substate = getSubstate(state); expect(loading(state)).toBe("FAILED"); diff --git a/src/core/attribution/pagerank.js b/src/core/attribution/pagerank.js index ac1d380..da8feb2 100644 --- a/src/core/attribution/pagerank.js +++ b/src/core/attribution/pagerank.js @@ -1,6 +1,6 @@ // @flow -import {type Edge, Graph} from "../graph"; +import {type Edge, Graph, NodeAddress, type NodeAddressT} from "../graph"; import { distributionToNodeDistribution, createConnections, @@ -12,7 +12,7 @@ import { type PagerankNodeDecomposition, } from "./pagerankNodeDecomposition"; -import {scoreByMaximumProbability} from "./nodeScore"; +import {scoreByConstantTotal} from "./nodeScore"; import {findStationaryDistribution} from "./markovChain"; @@ -23,8 +23,10 @@ export type PagerankOptions = {| +verbose?: boolean, +convergenceThreshold?: number, +maxIterations?: number, - // Scores will be normalized so that `maxScore` is the highest score - +maxScore?: number, + // Scores will be normalized so that scores sum to totalScore + +totalScore?: number, + // Only nodes matching this prefix will count for normalization + +totalScoreNodePrefix?: NodeAddressT, |}; export type {EdgeWeight} from "./graphToMarkovChain"; @@ -36,7 +38,8 @@ function defaultOptions(): PagerankOptions { selfLoopWeight: 1e-3, convergenceThreshold: 1e-7, maxIterations: 255, - maxScore: 1000, + totalScore: 1000, + totalScoreNodePrefix: NodeAddress.empty, }; } @@ -62,6 +65,10 @@ export async function pagerank( yieldAfterMs: 30, }); const pi = distributionToNodeDistribution(osmc.nodeOrder, distribution); - const scores = scoreByMaximumProbability(pi, fullOptions.maxScore); + const scores = scoreByConstantTotal( + pi, + fullOptions.totalScore, + fullOptions.totalScoreNodePrefix + ); return decompose(scores, connections); }