Enable multiple scoring node types (#1361)

This updates the cred computation logic so that we can have multiple
"scoring node types".

Context: Currently, we designate a single node type (GitHub users) as
the scoring node type, and normalize so that all users have 1000 score
in total.

This commit updates the pipeline to admit using more than one prefix for
scoring, meaning that we could have GitHub users, Discourse users, and
more, and still have all users sum to 1000 score.

We will still need to update the frontend so that it will have a user
pane which aggregates across all users.

Test plan: Unit tests updated. `yarn test` passes.
This commit is contained in:
Dandelion Mané 2019-09-10 19:05:46 +02:00 committed by GitHub
parent 0d7db99d7f
commit e2e6c56650
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 43 additions and 15 deletions

View File

@ -34,7 +34,7 @@ export type FullTimelineCred = $ReadOnlyArray<{|
export function distributionToCred(
ds: TimelineDistributions,
nodeOrder: $ReadOnlyArray<NodeAddressT>,
scoringNodePrefix: NodeAddressT
scoringNodePrefixes: $ReadOnlyArray<NodeAddressT>
): FullTimelineCred {
if (ds.length === 0) {
return [];
@ -43,7 +43,8 @@ export function distributionToCred(
const scoringNodeIndices = [];
const cred = new Array(nodeOrder.length);
for (let i = 0; i < nodeOrder.length; i++) {
if (NodeAddress.hasPrefix(nodeOrder[i], scoringNodePrefix)) {
const addr = nodeOrder[i];
if (scoringNodePrefixes.some((x) => NodeAddress.hasPrefix(addr, x))) {
scoringNodeIndices.push(i);
}
cred[i] = new Array(intervals.length);

View File

@ -20,7 +20,34 @@ describe("src/analysis/timeline/distributionToCred", () => {
},
];
const nodeOrder = [na("foo"), na("bar")];
const actual = distributionToCred(ds, nodeOrder, NodeAddress.empty);
const actual = distributionToCred(ds, nodeOrder, [NodeAddress.empty]);
const expected = [
{
interval: {startTimeMs: 0, endTimeMs: 10},
cred: new Float64Array([1, 1]),
},
{
interval: {startTimeMs: 10, endTimeMs: 20},
cred: new Float64Array([9, 1]),
},
];
expect(expected).toEqual(actual);
});
it("correctly handles multiple scoring prefixes", () => {
const ds = [
{
interval: {startTimeMs: 0, endTimeMs: 10},
intervalWeight: 2,
distribution: new Float64Array([0.5, 0.5]),
},
{
interval: {startTimeMs: 10, endTimeMs: 20},
intervalWeight: 10,
distribution: new Float64Array([0.9, 0.1]),
},
];
const nodeOrder = [na("foo"), na("bar")];
const actual = distributionToCred(ds, nodeOrder, [na("foo"), na("bar")]);
const expected = [
{
interval: {startTimeMs: 0, endTimeMs: 10},
@ -47,7 +74,7 @@ describe("src/analysis/timeline/distributionToCred", () => {
},
];
const nodeOrder = [na("foo"), na("bar")];
const actual = distributionToCred(ds, nodeOrder, na("bar"));
const actual = distributionToCred(ds, nodeOrder, [na("bar")]);
const expected = [
{
interval: {startTimeMs: 0, endTimeMs: 10},
@ -69,11 +96,11 @@ describe("src/analysis/timeline/distributionToCred", () => {
},
];
const nodeOrder = [na("foo"), na("bar")];
const fail = () => distributionToCred(ds, nodeOrder, na("zod"));
const fail = () => distributionToCred(ds, nodeOrder, []);
expect(fail).toThrowError("no nodes matched scoringNodePrefix");
});
it("returns empty array if no intervals are present", () => {
expect(distributionToCred([], [], NodeAddress.empty)).toEqual([]);
expect(distributionToCred([], [], [])).toEqual([]);
});
});
});

View File

@ -75,7 +75,7 @@ export type TimelineCredConfig = {|
// Cred is normalized so that for a given interval, the total score of all
// nodes matching this prefix will be equal to the total weight of nodes in
// the interval.
+scoreNodePrefix: NodeAddressT,
+scoreNodePrefixes: $ReadOnlyArray<NodeAddressT>,
// The types are used to assign base cred to nodes based on their type. Node
// that the weight for each type may be overriden in the params.
+types: NodeAndEdgeTypes,
@ -256,7 +256,7 @@ export class TimelineCred {
const cred = distributionToCred(
distribution,
nodeOrder,
config.scoreNodePrefix
config.scoreNodePrefixes
);
const addressToCred = new Map();
for (let i = 0; i < nodeOrder.length; i++) {
@ -275,12 +275,12 @@ export class TimelineCred {
return preliminaryCred.reduceSize({
typePrefixes: config.types.nodeTypes.map((x) => x.prefix),
nodesPerType: 100,
fullInclusionPrefixes: [config.scoreNodePrefix],
fullInclusionPrefixes: config.scoreNodePrefixes,
});
}
}
const COMPAT_INFO = {type: "sourcecred/timelineCred", version: "0.3.0"};
const COMPAT_INFO = {type: "sourcecred/timelineCred", version: "0.4.0"};
export opaque type TimelineCredJSON = Compatible<{|
+graphJSON: GraphJSON,

View File

@ -27,7 +27,7 @@ describe("src/analysis/timeline/timelineCred", () => {
};
const fooPrefix = fooType.prefix;
const credConfig: () => TimelineCredConfig = () => ({
scoreNodePrefix: userPrefix,
scoreNodePrefixes: [userPrefix],
types: {nodeTypes: [userType, fooType], edgeTypes: []},
});
const users = [

View File

@ -72,7 +72,7 @@ describe("api/load", () => {
// Deep freeze will freeze the weights, too
const params = deepFreeze({alpha: 0.05, intervalDecay: 0.5, weights});
const config = deepFreeze({
scoreNodePrefix: NodeAddress.empty,
scoreNodePrefixes: [NodeAddress.empty],
types: {nodeTypes: [], edgeTypes: []},
});
const example = () => {

View File

@ -5,7 +5,7 @@ import * as Github from "../plugins/github/declaration";
import type {TimelineCredConfig} from "../analysis/timeline/timelineCred";
export const DEFAULT_CRED_CONFIG: TimelineCredConfig = deepFreeze({
scoreNodePrefix: Github.userNodeType.prefix,
scoreNodePrefixes: [Github.userNodeType.prefix],
types: {
nodeTypes: Github.declaration.nodeTypes.slice(),
edgeTypes: Github.declaration.edgeTypes.slice(),

View File

@ -52,7 +52,7 @@ export default class TimelineCredViewInspectiontest extends React.Component<{|
}
const params = {alpha: 0.05, intervalDecay: 0.5, weights: defaultWeights()};
const config: TimelineCredConfig = {
scoreNodePrefix: NodeAddress.empty,
scoreNodePrefixes: [NodeAddress.empty],
types: {nodeTypes: [], edgeTypes: []},
};
return new TimelineCred(graph, intervals, addressToCred, params, config);