mirror of
https://github.com/status-im/sourcecred.git
synced 2025-02-25 02:35:32 +00:00
add analysis/timeline/timelineCred
This adds a TimelineCred class which serves several functions: - acts as a view on timeline cred data - (lets you get highest scoring nodes, etc) - has an interface for computing timeline cred - lets you serialize cred along with the graph and paramter settings that generated it in a single object One upshot of this design is that now if we let the user provide weights (or other config) on load time in the CLI, those weights will get carried over to the frontend, since they are included along with the cred results. TimelineCred has 'Parameters' and 'Config'. The parameters are user-specified and may change within a given instance. The config is essentially codebase-level configuration around what types are used for scoring, etc; I don't expect users to be changing this. To keep the analysis module decoupled from the plugins module, I put a default config in `src/plugins/defaultCredConfig`; I expect all users of TimelineCred to use this config. (At least for a while!) Test plan: I've added some tests to `TimelineCred`. Run `yarn test`. I also have a yet-unmerged branch that builds a functioning cred display UI using the `TimelineCred` class. fixup tlc
This commit is contained in:
parent
9bd1e88bc9
commit
aa7158dd95
262
src/analysis/timeline/timelineCred.js
Normal file
262
src/analysis/timeline/timelineCred.js
Normal file
@ -0,0 +1,262 @@
|
||||
// @flow
|
||||
|
||||
import {sum} from "d3-array";
|
||||
import sortBy from "lodash.sortby";
|
||||
import * as NullUtil from "../../util/null";
|
||||
import {toCompat, fromCompat, type Compatible} from "../../util/compat";
|
||||
import {type Interval} from "./interval";
|
||||
import {timelinePagerank} from "./timelinePagerank";
|
||||
import {distributionToCred} from "./distributionToCred";
|
||||
import {
|
||||
Graph,
|
||||
type GraphJSON,
|
||||
type NodeAddressT,
|
||||
NodeAddress,
|
||||
type Node,
|
||||
} from "../../core/graph";
|
||||
import {
|
||||
type Weights,
|
||||
type WeightsJSON,
|
||||
toJSON as weightsToJSON,
|
||||
fromJSON as weightsFromJSON,
|
||||
} from "../weights";
|
||||
import {type NodeAndEdgeTypes} from "../types";
|
||||
import {
|
||||
filterTimelineCred,
|
||||
type FilteredTimelineCred,
|
||||
filteredTimelineCredToJSON,
|
||||
filteredTimelineCredFromJSON,
|
||||
type FilteredTimelineCredJSON,
|
||||
} from "./filterTimelineCred";
|
||||
|
||||
export type {Interval} from "./interval";
|
||||
|
||||
/**
|
||||
* A Graph Node wrapped with cred information.
|
||||
*/
|
||||
export type CredNode = {|
|
||||
// The Graph Node in question
|
||||
+node: Node,
|
||||
// The total aggregated cred. (Summed across every interval).
|
||||
+total: number,
|
||||
// The timeline sequence of cred (one score per interval).
|
||||
+cred: $ReadOnlyArray<number>,
|
||||
|};
|
||||
|
||||
/**
|
||||
* Parameters for computing TimelineCred
|
||||
*
|
||||
* The parameters are intended to be user-configurable.
|
||||
*/
|
||||
export type TimelineCredParameters = {|
|
||||
// Determines how quickly cred returns to the PageRank seed vector. If alpha
|
||||
// is high, then cred will tend to "stick" to nodes that are seeded, e.g.
|
||||
// issues and pull requests. Alpha should be between 0 and 1.
|
||||
+alpha: number,
|
||||
// Determines how quickly cred decays. The decay is 1, then cred never
|
||||
// decays, and old nodes and edges will retain full weight forever. (This
|
||||
// would result in cred that is highly biased towards old contributions, as
|
||||
// they would continue earning cred in every timeslice, forever.) If the
|
||||
// decay is 0, then weights go to zero the first week after their node/edge
|
||||
// was created. Should be between 0 and 1.
|
||||
+intervalDecay: number,
|
||||
// The weights. This determines how much cred is assigned based on different
|
||||
// node types, how cred flows across various edge types, and can specify
|
||||
// manual weights directly on individual nodes. See the docs in
|
||||
// `analysis/weights` for details.
|
||||
+weights: Weights,
|
||||
|};
|
||||
|
||||
/**
|
||||
* Configuration for computing TimelineCred
|
||||
*
|
||||
* Unlike the parameters, the config is expected to be static.
|
||||
* It's code-level config that isolates the TimelineCred algorithms from
|
||||
* specific plugin-level details about which nodes addresses are used for scoring,
|
||||
* etc.
|
||||
*
|
||||
* A default config is available in `src/plugins/defaultCredConfig`
|
||||
*/
|
||||
export type TimelineCredConfig = {|
|
||||
// Cred is normalized so that for a given interval, the total score of all
|
||||
// nodes matching this prefix will be equal to the total weight of nodes in
|
||||
// the interval.
|
||||
+scoreNodePrefix: NodeAddressT,
|
||||
// To save on space, we keep cred only for nodes matching one of these
|
||||
// NodeAddresses.
|
||||
+filterNodePrefixes: $ReadOnlyArray<NodeAddressT>,
|
||||
// The types are used to assign base cred to nodes based on their type. Node
|
||||
// that the weight for each type may be overriden in the params.
|
||||
+types: NodeAndEdgeTypes,
|
||||
|};
|
||||
|
||||
/**
|
||||
* Represents the timeline cred of a graph. This class wraps all the data
|
||||
* needed to analyze and interpet cred (ie. it has the Graph and the cred
|
||||
* scores), and provides convenient view methods for accessing the cred.
|
||||
*
|
||||
* The TimelineCred also has the params and config. The intention is that this
|
||||
* is a "one stop shop" for serializing SourceCred results.
|
||||
*/
|
||||
export class TimelineCred {
|
||||
_graph: Graph;
|
||||
_cred: FilteredTimelineCred;
|
||||
_params: TimelineCredParameters;
|
||||
_config: TimelineCredConfig;
|
||||
|
||||
constructor(
|
||||
graph: Graph,
|
||||
cred: FilteredTimelineCred,
|
||||
params: TimelineCredParameters,
|
||||
config: TimelineCredConfig
|
||||
) {
|
||||
this._graph = graph;
|
||||
this._cred = cred;
|
||||
this._params = params;
|
||||
this._config = config;
|
||||
}
|
||||
|
||||
graph(): Graph {
|
||||
return this._graph;
|
||||
}
|
||||
|
||||
params(): TimelineCredParameters {
|
||||
return this._params;
|
||||
}
|
||||
|
||||
config(): TimelineCredConfig {
|
||||
return this._config;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new TimelineCred based on the new Parameters.
|
||||
* Holds the graph and config constant.
|
||||
*
|
||||
* This returns a new TimelineCred; it does not modify the existing one.
|
||||
*/
|
||||
async reanalyze(newParams: TimelineCredParameters): Promise<TimelineCred> {
|
||||
return await TimelineCred.compute(this._graph, newParams, this._config);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return all the intervals in the timeline.
|
||||
*/
|
||||
intervals(): $ReadOnlyArray<Interval> {
|
||||
return this._cred.intervals;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the CredNode for a given NodeAddress.
|
||||
*
|
||||
* Returns undefined if the node is not in the filtered results.
|
||||
*
|
||||
* Note that it's possible that the node is present in the Graph, but not the
|
||||
* filtered results; if so, it will return undefined.
|
||||
*/
|
||||
credNode(a: NodeAddressT): ?CredNode {
|
||||
const cred = this._cred.addressToCred.get(a);
|
||||
if (cred === undefined) {
|
||||
return undefined;
|
||||
}
|
||||
const total = sum(cred);
|
||||
const node = NullUtil.get(this._graph.node(a));
|
||||
return {cred, total, node};
|
||||
}
|
||||
|
||||
/**
|
||||
* Return all the nodes matching the prefix, along with their cred,
|
||||
* sorted by total cred (descending).
|
||||
*/
|
||||
credSortedNodes(prefix: NodeAddressT): $ReadOnlyArray<CredNode> {
|
||||
const match = (a) => NodeAddress.hasPrefix(a, prefix);
|
||||
const addresses = Array.from(this._cred.addressToCred.keys()).filter(match);
|
||||
const credNodes = addresses.map((a) => this.credNode(a));
|
||||
return sortBy(credNodes, (x: CredNode) => -x.total);
|
||||
}
|
||||
|
||||
toJSON(): TimelineCredJSON {
|
||||
const rawJSON = {
|
||||
graphJSON: this._graph.toJSON(),
|
||||
credJSON: filteredTimelineCredToJSON(this._cred),
|
||||
paramsJSON: paramsToJSON(this._params),
|
||||
};
|
||||
return toCompat(COMPAT_INFO, rawJSON);
|
||||
}
|
||||
|
||||
static fromJSON(
|
||||
j: TimelineCredJSON,
|
||||
config: TimelineCredConfig
|
||||
): TimelineCred {
|
||||
const json = fromCompat(COMPAT_INFO, j);
|
||||
const {graphJSON, credJSON, paramsJSON} = json;
|
||||
const graph = Graph.fromJSON(graphJSON);
|
||||
const cred = filteredTimelineCredFromJSON(credJSON);
|
||||
const params = paramsFromJSON(paramsJSON);
|
||||
return new TimelineCred(graph, cred, params, config);
|
||||
}
|
||||
|
||||
static async compute(
|
||||
graph: Graph,
|
||||
params: TimelineCredParameters,
|
||||
config: TimelineCredConfig
|
||||
): Promise<TimelineCred> {
|
||||
const ftc = await _computeTimelineCred(graph, params, config);
|
||||
return new TimelineCred(graph, ftc, params, config);
|
||||
}
|
||||
}
|
||||
|
||||
async function _computeTimelineCred(
|
||||
graph: Graph,
|
||||
params: TimelineCredParameters,
|
||||
config: TimelineCredConfig
|
||||
): Promise<FilteredTimelineCred> {
|
||||
const nodeOrder = Array.from(graph.nodes()).map((x) => x.address);
|
||||
const distribution = await timelinePagerank(
|
||||
graph,
|
||||
config.types,
|
||||
params.weights,
|
||||
params.intervalDecay,
|
||||
params.alpha
|
||||
);
|
||||
const cred = distributionToCred(
|
||||
distribution,
|
||||
nodeOrder,
|
||||
config.scoreNodePrefix
|
||||
);
|
||||
const filtered = filterTimelineCred(
|
||||
cred,
|
||||
nodeOrder,
|
||||
config.filterNodePrefixes
|
||||
);
|
||||
return filtered;
|
||||
}
|
||||
|
||||
const COMPAT_INFO = {type: "sourcecred/timelineCred", version: "0.1.0"};
|
||||
|
||||
export opaque type TimelineCredJSON = Compatible<{|
|
||||
+graphJSON: GraphJSON,
|
||||
+paramsJSON: ParamsJSON,
|
||||
+credJSON: FilteredTimelineCredJSON,
|
||||
|}>;
|
||||
|
||||
type ParamsJSON = {|
|
||||
+alpha: number,
|
||||
+intervalDecay: number,
|
||||
+weights: WeightsJSON,
|
||||
|};
|
||||
|
||||
function paramsToJSON(p: TimelineCredParameters): ParamsJSON {
|
||||
return {
|
||||
alpha: p.alpha,
|
||||
intervalDecay: p.intervalDecay,
|
||||
weights: weightsToJSON(p.weights),
|
||||
};
|
||||
}
|
||||
|
||||
function paramsFromJSON(p: ParamsJSON): TimelineCredParameters {
|
||||
return {
|
||||
alpha: p.alpha,
|
||||
intervalDecay: p.intervalDecay,
|
||||
weights: weightsFromJSON(p.weights),
|
||||
};
|
||||
}
|
87
src/analysis/timeline/timelineCred.test.js
Normal file
87
src/analysis/timeline/timelineCred.test.js
Normal file
@ -0,0 +1,87 @@
|
||||
// @flow
|
||||
|
||||
import {sum} from "d3-array";
|
||||
import sortBy from "lodash.sortby";
|
||||
import {utcWeek} from "d3-time";
|
||||
import {NodeAddress, Graph} from "../../core/graph";
|
||||
import {TimelineCred, type TimelineCredConfig} from "./timelineCred";
|
||||
import {type FilteredTimelineCred} from "./filterTimelineCred";
|
||||
import {defaultWeights} from "../weights";
|
||||
|
||||
describe("src/analysis/timeline/timelineCred", () => {
|
||||
const credConfig: () => TimelineCredConfig = () => ({
|
||||
scoreNodePrefix: NodeAddress.fromParts(["foo"]),
|
||||
filterNodePrefixes: [NodeAddress.fromParts(["foo"])],
|
||||
types: {nodeTypes: [], edgeTypes: []},
|
||||
});
|
||||
|
||||
function exampleTimelineCred(): TimelineCred {
|
||||
const startTimeMs = +new Date(2017, 0);
|
||||
const endTimeMs = +new Date(2017, 6);
|
||||
const boundaries = utcWeek.range(startTimeMs, endTimeMs);
|
||||
const intervals = [];
|
||||
for (let i = 0; i < boundaries.length - 1; i++) {
|
||||
intervals.push({
|
||||
startTimeMs: +boundaries[i],
|
||||
endTimeMs: +boundaries[i + 1],
|
||||
});
|
||||
}
|
||||
const users = [
|
||||
["starter", (x) => Math.max(0, 20 - x)],
|
||||
["steady", (_) => 4],
|
||||
["finisher", (x) => (x * x) / 20],
|
||||
["latecomer", (x) => Math.max(0, x - 20)],
|
||||
];
|
||||
|
||||
const graph = new Graph();
|
||||
const addressToCred = new Map();
|
||||
for (const [name, generator] of users) {
|
||||
const address = NodeAddress.fromParts(["foo", name]);
|
||||
graph.addNode({
|
||||
address,
|
||||
description: `[@${name}](https://github.com/${name})`,
|
||||
timestampMs: null,
|
||||
});
|
||||
const scores = intervals.map((_unuesd, i) => generator(i));
|
||||
addressToCred.set(address, scores);
|
||||
}
|
||||
const filteredTimelineCred: FilteredTimelineCred = {
|
||||
intervals,
|
||||
addressToCred,
|
||||
};
|
||||
const params = {alpha: 0.05, intervalDecay: 0.5, weights: defaultWeights()};
|
||||
return new TimelineCred(graph, filteredTimelineCred, params, credConfig());
|
||||
}
|
||||
|
||||
it("JSON serialization works", () => {
|
||||
const tc = exampleTimelineCred();
|
||||
const json = exampleTimelineCred().toJSON();
|
||||
const tc_ = TimelineCred.fromJSON(json, credConfig());
|
||||
expect(tc.graph()).toEqual(tc_.graph());
|
||||
expect(tc.params()).toEqual(tc_.params());
|
||||
expect(tc.config()).toEqual(tc_.config());
|
||||
expect(tc.credSortedNodes(NodeAddress.empty)).toEqual(
|
||||
tc.credSortedNodes(NodeAddress.empty)
|
||||
);
|
||||
});
|
||||
|
||||
it("cred sorting works", () => {
|
||||
const tc = exampleTimelineCred();
|
||||
const sorted = tc.credSortedNodes(NodeAddress.empty);
|
||||
const expected = sortBy(sorted, (x) => -x.total);
|
||||
expect(sorted).toEqual(expected);
|
||||
});
|
||||
|
||||
it("cred aggregation works", () => {
|
||||
const tc = exampleTimelineCred();
|
||||
const nodes = tc.credSortedNodes(NodeAddress.empty);
|
||||
for (const node of nodes) {
|
||||
expect(node.total).toEqual(sum(node.cred));
|
||||
}
|
||||
});
|
||||
|
||||
it("credNode returns undefined for absent nodes", () => {
|
||||
const tc = exampleTimelineCred();
|
||||
expect(tc.credNode(NodeAddress.fromParts(["baz"]))).toBe(undefined);
|
||||
});
|
||||
});
|
13
src/plugins/defaultCredConfig.js
Normal file
13
src/plugins/defaultCredConfig.js
Normal file
@ -0,0 +1,13 @@
|
||||
// @flow
|
||||
|
||||
import {userNodeType, repoNodeType, declaration} from "./github/declaration";
|
||||
import type {TimelineCredConfig} from "../analysis/timeline/timelineCred";
|
||||
|
||||
export const DEFAULT_CRED_CONFIG: TimelineCredConfig = {
|
||||
scoreNodePrefix: userNodeType.prefix,
|
||||
filterNodePrefixes: Object.freeze([userNodeType.prefix, repoNodeType.prefix]),
|
||||
types: Object.freeze({
|
||||
nodeTypes: Object.freeze(declaration.nodeTypes.slice()),
|
||||
edgeTypes: Object.freeze(declaration.edgeTypes.slice()),
|
||||
}),
|
||||
};
|
@ -5,7 +5,7 @@ import * as N from "./nodes";
|
||||
import * as E from "./edges";
|
||||
import dedent from "../../util/dedent";
|
||||
|
||||
const repoNodeType = Object.freeze({
|
||||
export const repoNodeType = Object.freeze({
|
||||
name: "Repository",
|
||||
pluralName: "Repositories",
|
||||
prefix: N.Prefix.repo,
|
||||
|
Loading…
x
Reference in New Issue
Block a user