From e97a1209d039e9daeb60eaeda6f900c2480f5222 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dandelion=20Man=C3=A9?= Date: Wed, 5 Feb 2020 10:46:17 -0800 Subject: [PATCH] Remove TimelineCred.reduceSize (#1633) TimelineCred has a `reduceSize` method which discards cred for most nodes, keeping only cred for the top nodes of each type across all time. I've wanted to remove this for a while, because it is a bad fit for the kind of experimentation we're starting to do with showing the top nodes for recent activity periods. Since the recent nodes haven't had much time to accumulate cred, they are almost all discarded by reduceSize. As an added inducement, I want to get rid of reduceSize for #1557 because it requires type information. `reduceSize` still serves one function, which is enabling the frontend to load faster because it only loads a smaller amount of data which is discoverable in the UI. However, it doesn't make sense to discard most of the data just for a fast UI load--we can later make another data structure which is tuned for the needs of the frontend, and have that data structure include only summary statistics. This will make the cred.json file much larger for large repos, e.g. I expect that loading tensorflow/tensorflow would now go over the 100MB hard cap for GitHub pages. However, right now we're prioritizing using SourceCred for medium-small projects (e.g. SourceCred itself, and maybe Maker), so this isn't a current concern. Test plan: `yarn test --full` passes. --- src/analysis/timeline/timelineCred.js | 60 +--------------------- src/analysis/timeline/timelineCred.test.js | 54 +------------------ 2 files changed, 2 insertions(+), 112 deletions(-) diff --git a/src/analysis/timeline/timelineCred.js b/src/analysis/timeline/timelineCred.js index ce35dd2..d8faf83 100644 --- a/src/analysis/timeline/timelineCred.js +++ b/src/analysis/timeline/timelineCred.js @@ -151,59 +151,6 @@ export class TimelineCred { return this.credSortedNodes(userTypes.map((x) => x.prefix)); } - /** - * Create a new, filtered TimelineCred, by removing low-scored nodes. - * - * Cred Graphs may have a huge number of small contributions, like comments, - * in which end users are not particularly interested. However, the size of - * the TimelineCred offered to the frontend matters quite a bit. Therefore, - * we can use this method to discard almost all nodes in the graph. - * - * Specifically, `reduceSize` takes in an array of inclusion prefixes: for - * each inclusion prefix, we will take the top `k` nodes matching that prefix - * (by total score across all intervals). - * - * It also takes `fullInclusion` prefixes: for these prefixes, every matching - * node will be included. This allows us to ensure that e.g. every user will - * be included in the `cli scores` output, even if they are not in the top - * `k` users. - */ - reduceSize(opts: {| - +typePrefixes: $ReadOnlyArray, - +nodesPerType: number, - +fullInclusionPrefixes: $ReadOnlyArray, - |}): TimelineCred { - const {typePrefixes, nodesPerType, fullInclusionPrefixes} = opts; - const selectedNodes: Set = new Set(); - for (const prefix of typePrefixes) { - const matchingNodes = this.credSortedNodes([prefix]).slice( - 0, - nodesPerType - ); - for (const {node} of matchingNodes) { - selectedNodes.add(node.address); - } - } - // For the fullInclusionPrefixes, we won't slice -- we just take every match. - const matchingNodes = this.credSortedNodes(fullInclusionPrefixes); - for (const {node} of matchingNodes) { - selectedNodes.add(node.address); - } - - const filteredAddressToCred = new Map(); - for (const address of selectedNodes) { - const cred = NullUtil.get(this._addressToCred.get(address)); - filteredAddressToCred.set(address, cred); - } - return new TimelineCred( - this._weightedGraph, - this._intervals, - filteredAddressToCred, - this._params, - this._plugins - ); - } - toJSON(): TimelineCredJSON { const rawJSON = { weightedGraphJSON: WeightedGraph.toJSON(this._weightedGraph), @@ -263,18 +210,13 @@ export class TimelineCred { addressToCred.set(addr, addrCred); } const intervals = cred.map((x) => x.interval); - const preliminaryCred = new TimelineCred( + return new TimelineCred( weightedGraph, intervals, addressToCred, fullParams, plugins ); - return preliminaryCred.reduceSize({ - typePrefixes: types.nodeTypes.map((x) => x.prefix), - nodesPerType: 100, - fullInclusionPrefixes: scorePrefixes, - }); } } diff --git a/src/analysis/timeline/timelineCred.test.js b/src/analysis/timeline/timelineCred.test.js index dedaf13..d160d3b 100644 --- a/src/analysis/timeline/timelineCred.test.js +++ b/src/analysis/timeline/timelineCred.test.js @@ -4,7 +4,7 @@ import deepFreeze from "deep-freeze"; import {sum} from "d3-array"; import sortBy from "lodash.sortby"; import {utcWeek} from "d3-time"; -import {NodeAddress, type NodeAddressT, EdgeAddress} from "../../core/graph"; +import {NodeAddress, EdgeAddress} from "../../core/graph"; import * as WeightedGraph from "../../core/weightedGraph"; import {TimelineCred} from "./timelineCred"; import {defaultParams} from "./params"; @@ -141,58 +141,6 @@ describe("src/analysis/timeline/timelineCred", () => { } }); - describe("reduceSize", () => { - it("chooses top nodes for each type prefix", () => { - const nodesPerType = 3; - const tc = exampleTimelineCred(); - const filtered = tc.reduceSize({ - typePrefixes: [userPrefix, fooPrefix], - nodesPerType, - fullInclusionPrefixes: [], - }); - - const checkPrefix = (p: NodeAddressT) => { - const fullNodes = tc.credSortedNodes([p]); - const truncatedNodes = filtered.credSortedNodes([p]); - expect(truncatedNodes).toHaveLength(nodesPerType); - expect(fullNodes.slice(0, nodesPerType)).toEqual(truncatedNodes); - }; - checkPrefix(userPrefix); - checkPrefix(fooPrefix); - }); - - it("can keep only scoring nodes", () => { - const nodesPerType = 3; - const tc = exampleTimelineCred(); - const filtered = tc.reduceSize({ - typePrefixes: [], - nodesPerType, - fullInclusionPrefixes: [userPrefix], - }); - const fullUserNodes = tc.credSortedNodes([userPrefix]); - const truncatedUserNodes = filtered.credSortedNodes([userPrefix]); - expect(fullUserNodes).toEqual(truncatedUserNodes); - const truncatedFoo = filtered.credSortedNodes([fooPrefix]); - expect(truncatedFoo).toHaveLength(0); - }); - - it("keeps all scoring nodes (with multiple scoring types)", () => { - const nodesPerType = 3; - const tc = exampleTimelineCred(); - const filtered = tc.reduceSize({ - typePrefixes: [userPrefix, NodeAddress.fromParts(["nope"])], - nodesPerType, - fullInclusionPrefixes: [userPrefix, fooPrefix], - }); - const fullUserNodes = tc.credSortedNodes([userPrefix]); - const truncatedUserNodes = filtered.credSortedNodes([userPrefix]); - expect(fullUserNodes).toEqual(truncatedUserNodes); - const fullFoo = tc.credSortedNodes([fooPrefix]); - const truncatedFoo = filtered.credSortedNodes([fooPrefix]); - expect(fullFoo).toEqual(truncatedFoo); - }); - }); - it("userNodes returns the credSortedNodes for user types", () => { const tc = exampleTimelineCred(); expect(tc.userNodes()).toEqual(tc.credSortedNodes([userPrefix]));