diff --git a/src/analysis/timeline/timelineCred.js b/src/analysis/timeline/timelineCred.js new file mode 100644 index 0000000..06bcc0c --- /dev/null +++ b/src/analysis/timeline/timelineCred.js @@ -0,0 +1,262 @@ +// @flow + +import {sum} from "d3-array"; +import sortBy from "lodash.sortby"; +import * as NullUtil from "../../util/null"; +import {toCompat, fromCompat, type Compatible} from "../../util/compat"; +import {type Interval} from "./interval"; +import {timelinePagerank} from "./timelinePagerank"; +import {distributionToCred} from "./distributionToCred"; +import { + Graph, + type GraphJSON, + type NodeAddressT, + NodeAddress, + type Node, +} from "../../core/graph"; +import { + type Weights, + type WeightsJSON, + toJSON as weightsToJSON, + fromJSON as weightsFromJSON, +} from "../weights"; +import {type NodeAndEdgeTypes} from "../types"; +import { + filterTimelineCred, + type FilteredTimelineCred, + filteredTimelineCredToJSON, + filteredTimelineCredFromJSON, + type FilteredTimelineCredJSON, +} from "./filterTimelineCred"; + +export type {Interval} from "./interval"; + +/** + * A Graph Node wrapped with cred information. + */ +export type CredNode = {| + // The Graph Node in question + +node: Node, + // The total aggregated cred. (Summed across every interval). + +total: number, + // The timeline sequence of cred (one score per interval). + +cred: $ReadOnlyArray, +|}; + +/** + * Parameters for computing TimelineCred + * + * The parameters are intended to be user-configurable. + */ +export type TimelineCredParameters = {| + // Determines how quickly cred returns to the PageRank seed vector. If alpha + // is high, then cred will tend to "stick" to nodes that are seeded, e.g. + // issues and pull requests. Alpha should be between 0 and 1. + +alpha: number, + // Determines how quickly cred decays. The decay is 1, then cred never + // decays, and old nodes and edges will retain full weight forever. (This + // would result in cred that is highly biased towards old contributions, as + // they would continue earning cred in every timeslice, forever.) If the + // decay is 0, then weights go to zero the first week after their node/edge + // was created. Should be between 0 and 1. + +intervalDecay: number, + // The weights. This determines how much cred is assigned based on different + // node types, how cred flows across various edge types, and can specify + // manual weights directly on individual nodes. See the docs in + // `analysis/weights` for details. + +weights: Weights, +|}; + +/** + * Configuration for computing TimelineCred + * + * Unlike the parameters, the config is expected to be static. + * It's code-level config that isolates the TimelineCred algorithms from + * specific plugin-level details about which nodes addresses are used for scoring, + * etc. + * + * A default config is available in `src/plugins/defaultCredConfig` + */ +export type TimelineCredConfig = {| + // Cred is normalized so that for a given interval, the total score of all + // nodes matching this prefix will be equal to the total weight of nodes in + // the interval. + +scoreNodePrefix: NodeAddressT, + // To save on space, we keep cred only for nodes matching one of these + // NodeAddresses. + +filterNodePrefixes: $ReadOnlyArray, + // The types are used to assign base cred to nodes based on their type. Node + // that the weight for each type may be overriden in the params. + +types: NodeAndEdgeTypes, +|}; + +/** + * Represents the timeline cred of a graph. This class wraps all the data + * needed to analyze and interpet cred (ie. it has the Graph and the cred + * scores), and provides convenient view methods for accessing the cred. + * + * The TimelineCred also has the params and config. The intention is that this + * is a "one stop shop" for serializing SourceCred results. + */ +export class TimelineCred { + _graph: Graph; + _cred: FilteredTimelineCred; + _params: TimelineCredParameters; + _config: TimelineCredConfig; + + constructor( + graph: Graph, + cred: FilteredTimelineCred, + params: TimelineCredParameters, + config: TimelineCredConfig + ) { + this._graph = graph; + this._cred = cred; + this._params = params; + this._config = config; + } + + graph(): Graph { + return this._graph; + } + + params(): TimelineCredParameters { + return this._params; + } + + config(): TimelineCredConfig { + return this._config; + } + + /** + * Creates a new TimelineCred based on the new Parameters. + * Holds the graph and config constant. + * + * This returns a new TimelineCred; it does not modify the existing one. + */ + async reanalyze(newParams: TimelineCredParameters): Promise { + return await TimelineCred.compute(this._graph, newParams, this._config); + } + + /** + * Return all the intervals in the timeline. + */ + intervals(): $ReadOnlyArray { + return this._cred.intervals; + } + + /** + * Get the CredNode for a given NodeAddress. + * + * Returns undefined if the node is not in the filtered results. + * + * Note that it's possible that the node is present in the Graph, but not the + * filtered results; if so, it will return undefined. + */ + credNode(a: NodeAddressT): ?CredNode { + const cred = this._cred.addressToCred.get(a); + if (cred === undefined) { + return undefined; + } + const total = sum(cred); + const node = NullUtil.get(this._graph.node(a)); + return {cred, total, node}; + } + + /** + * Return all the nodes matching the prefix, along with their cred, + * sorted by total cred (descending). + */ + credSortedNodes(prefix: NodeAddressT): $ReadOnlyArray { + const match = (a) => NodeAddress.hasPrefix(a, prefix); + const addresses = Array.from(this._cred.addressToCred.keys()).filter(match); + const credNodes = addresses.map((a) => this.credNode(a)); + return sortBy(credNodes, (x: CredNode) => -x.total); + } + + toJSON(): TimelineCredJSON { + const rawJSON = { + graphJSON: this._graph.toJSON(), + credJSON: filteredTimelineCredToJSON(this._cred), + paramsJSON: paramsToJSON(this._params), + }; + return toCompat(COMPAT_INFO, rawJSON); + } + + static fromJSON( + j: TimelineCredJSON, + config: TimelineCredConfig + ): TimelineCred { + const json = fromCompat(COMPAT_INFO, j); + const {graphJSON, credJSON, paramsJSON} = json; + const graph = Graph.fromJSON(graphJSON); + const cred = filteredTimelineCredFromJSON(credJSON); + const params = paramsFromJSON(paramsJSON); + return new TimelineCred(graph, cred, params, config); + } + + static async compute( + graph: Graph, + params: TimelineCredParameters, + config: TimelineCredConfig + ): Promise { + const ftc = await _computeTimelineCred(graph, params, config); + return new TimelineCred(graph, ftc, params, config); + } +} + +async function _computeTimelineCred( + graph: Graph, + params: TimelineCredParameters, + config: TimelineCredConfig +): Promise { + const nodeOrder = Array.from(graph.nodes()).map((x) => x.address); + const distribution = await timelinePagerank( + graph, + config.types, + params.weights, + params.intervalDecay, + params.alpha + ); + const cred = distributionToCred( + distribution, + nodeOrder, + config.scoreNodePrefix + ); + const filtered = filterTimelineCred( + cred, + nodeOrder, + config.filterNodePrefixes + ); + return filtered; +} + +const COMPAT_INFO = {type: "sourcecred/timelineCred", version: "0.1.0"}; + +export opaque type TimelineCredJSON = Compatible<{| + +graphJSON: GraphJSON, + +paramsJSON: ParamsJSON, + +credJSON: FilteredTimelineCredJSON, +|}>; + +type ParamsJSON = {| + +alpha: number, + +intervalDecay: number, + +weights: WeightsJSON, +|}; + +function paramsToJSON(p: TimelineCredParameters): ParamsJSON { + return { + alpha: p.alpha, + intervalDecay: p.intervalDecay, + weights: weightsToJSON(p.weights), + }; +} + +function paramsFromJSON(p: ParamsJSON): TimelineCredParameters { + return { + alpha: p.alpha, + intervalDecay: p.intervalDecay, + weights: weightsFromJSON(p.weights), + }; +} diff --git a/src/analysis/timeline/timelineCred.test.js b/src/analysis/timeline/timelineCred.test.js new file mode 100644 index 0000000..de9712d --- /dev/null +++ b/src/analysis/timeline/timelineCred.test.js @@ -0,0 +1,87 @@ +// @flow + +import {sum} from "d3-array"; +import sortBy from "lodash.sortby"; +import {utcWeek} from "d3-time"; +import {NodeAddress, Graph} from "../../core/graph"; +import {TimelineCred, type TimelineCredConfig} from "./timelineCred"; +import {type FilteredTimelineCred} from "./filterTimelineCred"; +import {defaultWeights} from "../weights"; + +describe("src/analysis/timeline/timelineCred", () => { + const credConfig: () => TimelineCredConfig = () => ({ + scoreNodePrefix: NodeAddress.fromParts(["foo"]), + filterNodePrefixes: [NodeAddress.fromParts(["foo"])], + types: {nodeTypes: [], edgeTypes: []}, + }); + + function exampleTimelineCred(): TimelineCred { + const startTimeMs = +new Date(2017, 0); + const endTimeMs = +new Date(2017, 6); + const boundaries = utcWeek.range(startTimeMs, endTimeMs); + const intervals = []; + for (let i = 0; i < boundaries.length - 1; i++) { + intervals.push({ + startTimeMs: +boundaries[i], + endTimeMs: +boundaries[i + 1], + }); + } + const users = [ + ["starter", (x) => Math.max(0, 20 - x)], + ["steady", (_) => 4], + ["finisher", (x) => (x * x) / 20], + ["latecomer", (x) => Math.max(0, x - 20)], + ]; + + const graph = new Graph(); + const addressToCred = new Map(); + for (const [name, generator] of users) { + const address = NodeAddress.fromParts(["foo", name]); + graph.addNode({ + address, + description: `[@${name}](https://github.com/${name})`, + timestampMs: null, + }); + const scores = intervals.map((_unuesd, i) => generator(i)); + addressToCred.set(address, scores); + } + const filteredTimelineCred: FilteredTimelineCred = { + intervals, + addressToCred, + }; + const params = {alpha: 0.05, intervalDecay: 0.5, weights: defaultWeights()}; + return new TimelineCred(graph, filteredTimelineCred, params, credConfig()); + } + + it("JSON serialization works", () => { + const tc = exampleTimelineCred(); + const json = exampleTimelineCred().toJSON(); + const tc_ = TimelineCred.fromJSON(json, credConfig()); + expect(tc.graph()).toEqual(tc_.graph()); + expect(tc.params()).toEqual(tc_.params()); + expect(tc.config()).toEqual(tc_.config()); + expect(tc.credSortedNodes(NodeAddress.empty)).toEqual( + tc.credSortedNodes(NodeAddress.empty) + ); + }); + + it("cred sorting works", () => { + const tc = exampleTimelineCred(); + const sorted = tc.credSortedNodes(NodeAddress.empty); + const expected = sortBy(sorted, (x) => -x.total); + expect(sorted).toEqual(expected); + }); + + it("cred aggregation works", () => { + const tc = exampleTimelineCred(); + const nodes = tc.credSortedNodes(NodeAddress.empty); + for (const node of nodes) { + expect(node.total).toEqual(sum(node.cred)); + } + }); + + it("credNode returns undefined for absent nodes", () => { + const tc = exampleTimelineCred(); + expect(tc.credNode(NodeAddress.fromParts(["baz"]))).toBe(undefined); + }); +}); diff --git a/src/plugins/defaultCredConfig.js b/src/plugins/defaultCredConfig.js new file mode 100644 index 0000000..ef0c044 --- /dev/null +++ b/src/plugins/defaultCredConfig.js @@ -0,0 +1,13 @@ +// @flow + +import {userNodeType, repoNodeType, declaration} from "./github/declaration"; +import type {TimelineCredConfig} from "../analysis/timeline/timelineCred"; + +export const DEFAULT_CRED_CONFIG: TimelineCredConfig = { + scoreNodePrefix: userNodeType.prefix, + filterNodePrefixes: Object.freeze([userNodeType.prefix, repoNodeType.prefix]), + types: Object.freeze({ + nodeTypes: Object.freeze(declaration.nodeTypes.slice()), + edgeTypes: Object.freeze(declaration.edgeTypes.slice()), + }), +}; diff --git a/src/plugins/github/declaration.js b/src/plugins/github/declaration.js index f50f33e..801a9ed 100644 --- a/src/plugins/github/declaration.js +++ b/src/plugins/github/declaration.js @@ -5,7 +5,7 @@ import * as N from "./nodes"; import * as E from "./edges"; import dedent from "../../util/dedent"; -const repoNodeType = Object.freeze({ +export const repoNodeType = Object.freeze({ name: "Repository", pluralName: "Repositories", prefix: N.Prefix.repo,