Remove TimelineCred.reduceSize (#1633)

TimelineCred has a `reduceSize` method which discards cred for most
nodes, keeping only cred for the top nodes of each type across all time.
I've wanted to remove this for a while, because it is a bad fit for the
kind of experimentation we're starting to do with showing the top nodes
for recent activity periods. Since the recent nodes haven't had much
time to accumulate cred, they are almost all discarded by reduceSize. As
an added inducement, I want to get rid of reduceSize for #1557 because
it requires type information.

`reduceSize` still serves one function, which is enabling the frontend
to load faster because it only loads a smaller amount of data which is
discoverable in the UI. However, it doesn't make sense to discard most
of the data just for a fast UI load--we can later make another data
structure which is tuned for the needs of the frontend, and have that
data structure include only summary statistics.

This will make the cred.json file much larger for large repos, e.g. I
expect that loading tensorflow/tensorflow would now go over the 100MB
hard cap for GitHub pages. However, right now we're prioritizing using
SourceCred for medium-small projects (e.g. SourceCred itself, and maybe
Maker), so this isn't a current concern.

Test plan: `yarn test --full` passes.
This commit is contained in:
Dandelion Mané 2020-02-05 10:46:17 -08:00 committed by GitHub
parent 378255e91d
commit e97a1209d0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 2 additions and 112 deletions

View File

@ -151,59 +151,6 @@ export class TimelineCred {
return this.credSortedNodes(userTypes.map((x) => x.prefix));
}
/**
* Create a new, filtered TimelineCred, by removing low-scored nodes.
*
* Cred Graphs may have a huge number of small contributions, like comments,
* in which end users are not particularly interested. However, the size of
* the TimelineCred offered to the frontend matters quite a bit. Therefore,
* we can use this method to discard almost all nodes in the graph.
*
* Specifically, `reduceSize` takes in an array of inclusion prefixes: for
* each inclusion prefix, we will take the top `k` nodes matching that prefix
* (by total score across all intervals).
*
* It also takes `fullInclusion` prefixes: for these prefixes, every matching
* node will be included. This allows us to ensure that e.g. every user will
* be included in the `cli scores` output, even if they are not in the top
* `k` users.
*/
reduceSize(opts: {|
+typePrefixes: $ReadOnlyArray<NodeAddressT>,
+nodesPerType: number,
+fullInclusionPrefixes: $ReadOnlyArray<NodeAddressT>,
|}): TimelineCred {
const {typePrefixes, nodesPerType, fullInclusionPrefixes} = opts;
const selectedNodes: Set<NodeAddressT> = new Set();
for (const prefix of typePrefixes) {
const matchingNodes = this.credSortedNodes([prefix]).slice(
0,
nodesPerType
);
for (const {node} of matchingNodes) {
selectedNodes.add(node.address);
}
}
// For the fullInclusionPrefixes, we won't slice -- we just take every match.
const matchingNodes = this.credSortedNodes(fullInclusionPrefixes);
for (const {node} of matchingNodes) {
selectedNodes.add(node.address);
}
const filteredAddressToCred = new Map();
for (const address of selectedNodes) {
const cred = NullUtil.get(this._addressToCred.get(address));
filteredAddressToCred.set(address, cred);
}
return new TimelineCred(
this._weightedGraph,
this._intervals,
filteredAddressToCred,
this._params,
this._plugins
);
}
toJSON(): TimelineCredJSON {
const rawJSON = {
weightedGraphJSON: WeightedGraph.toJSON(this._weightedGraph),
@ -263,18 +210,13 @@ export class TimelineCred {
addressToCred.set(addr, addrCred);
}
const intervals = cred.map((x) => x.interval);
const preliminaryCred = new TimelineCred(
return new TimelineCred(
weightedGraph,
intervals,
addressToCred,
fullParams,
plugins
);
return preliminaryCred.reduceSize({
typePrefixes: types.nodeTypes.map((x) => x.prefix),
nodesPerType: 100,
fullInclusionPrefixes: scorePrefixes,
});
}
}

View File

@ -4,7 +4,7 @@ import deepFreeze from "deep-freeze";
import {sum} from "d3-array";
import sortBy from "lodash.sortby";
import {utcWeek} from "d3-time";
import {NodeAddress, type NodeAddressT, EdgeAddress} from "../../core/graph";
import {NodeAddress, EdgeAddress} from "../../core/graph";
import * as WeightedGraph from "../../core/weightedGraph";
import {TimelineCred} from "./timelineCred";
import {defaultParams} from "./params";
@ -141,58 +141,6 @@ describe("src/analysis/timeline/timelineCred", () => {
}
});
describe("reduceSize", () => {
it("chooses top nodes for each type prefix", () => {
const nodesPerType = 3;
const tc = exampleTimelineCred();
const filtered = tc.reduceSize({
typePrefixes: [userPrefix, fooPrefix],
nodesPerType,
fullInclusionPrefixes: [],
});
const checkPrefix = (p: NodeAddressT) => {
const fullNodes = tc.credSortedNodes([p]);
const truncatedNodes = filtered.credSortedNodes([p]);
expect(truncatedNodes).toHaveLength(nodesPerType);
expect(fullNodes.slice(0, nodesPerType)).toEqual(truncatedNodes);
};
checkPrefix(userPrefix);
checkPrefix(fooPrefix);
});
it("can keep only scoring nodes", () => {
const nodesPerType = 3;
const tc = exampleTimelineCred();
const filtered = tc.reduceSize({
typePrefixes: [],
nodesPerType,
fullInclusionPrefixes: [userPrefix],
});
const fullUserNodes = tc.credSortedNodes([userPrefix]);
const truncatedUserNodes = filtered.credSortedNodes([userPrefix]);
expect(fullUserNodes).toEqual(truncatedUserNodes);
const truncatedFoo = filtered.credSortedNodes([fooPrefix]);
expect(truncatedFoo).toHaveLength(0);
});
it("keeps all scoring nodes (with multiple scoring types)", () => {
const nodesPerType = 3;
const tc = exampleTimelineCred();
const filtered = tc.reduceSize({
typePrefixes: [userPrefix, NodeAddress.fromParts(["nope"])],
nodesPerType,
fullInclusionPrefixes: [userPrefix, fooPrefix],
});
const fullUserNodes = tc.credSortedNodes([userPrefix]);
const truncatedUserNodes = filtered.credSortedNodes([userPrefix]);
expect(fullUserNodes).toEqual(truncatedUserNodes);
const fullFoo = tc.credSortedNodes([fooPrefix]);
const truncatedFoo = filtered.credSortedNodes([fooPrefix]);
expect(fullFoo).toEqual(truncatedFoo);
});
});
it("userNodes returns the credSortedNodes for user types", () => {
const tc = exampleTimelineCred();
expect(tc.userNodes()).toEqual(tc.credSortedNodes([userPrefix]));