diff --git a/sharness/__snapshots__/example-github-load/data/sourcecred/example-github/timestamps.json b/sharness/__snapshots__/example-github-load/data/sourcecred/example-github/timestamps.json new file mode 100644 index 0000000..8d01ced --- /dev/null +++ b/sharness/__snapshots__/example-github-load/data/sourcecred/example-github/timestamps.json @@ -0,0 +1 @@ +{"N\u0000sourcecred\u0000git\u0000COMMIT\u00000a223346b4e6dec0127b1e6aa892c4ee0424b66a\u0000":1519807427000,"N\u0000sourcecred\u0000git\u0000COMMIT\u00006bd1b4c0b719c22c688a74863be07a699b7b9b34\u0000":1536806901000,"N\u0000sourcecred\u0000git\u0000COMMIT\u00006d5b3aa31ebb68a06ceb46bbd6cf49b6ccd6f5e6\u0000":1519878354000,"N\u0000sourcecred\u0000git\u0000COMMIT\u0000c430bd74455105f77215ece51945094ceeee6c86\u0000":1536788634000,"N\u0000sourcecred\u0000git\u0000COMMIT\u0000ec91adb718a6045b492303f00d8e8beb957dc780\u0000":1519807271000,"N\u0000sourcecred\u0000git\u0000COMMIT\u0000ecc889dc94cf6da17ae6eab5bb7b7155f577519d\u0000":1519807329000,"N\u0000sourcecred\u0000github\u0000COMMENT\u0000ISSUE\u0000sourcecred\u0000example-github\u000011\u0000420811872\u0000":1536789545000,"N\u0000sourcecred\u0000github\u0000COMMENT\u0000ISSUE\u0000sourcecred\u0000example-github\u000011\u0000420813013\u0000":1536789813000,"N\u0000sourcecred\u0000github\u0000COMMENT\u0000ISSUE\u0000sourcecred\u0000example-github\u000011\u0000420813206\u0000":1536789858000,"N\u0000sourcecred\u0000github\u0000COMMENT\u0000ISSUE\u0000sourcecred\u0000example-github\u000011\u0000420813621\u0000":1536789965000,"N\u0000sourcecred\u0000github\u0000COMMENT\u0000ISSUE\u0000sourcecred\u0000example-github\u00002\u0000373768703\u0000":1521217693000,"N\u0000sourcecred\u0000github\u0000COMMENT\u0000ISSUE\u0000sourcecred\u0000example-github\u00002\u0000373768850\u0000":1521217725000,"N\u0000sourcecred\u0000github\u0000COMMENT\u0000ISSUE\u0000sourcecred\u0000example-github\u00002\u0000385576185\u0000":1525137909000,"N\u0000sourcecred\u0000github\u0000COMMENT\u0000ISSUE\u0000sourcecred\u0000example-github\u00002\u0000385576220\u0000":1525137925000,"N\u0000sourcecred\u0000github\u0000COMMENT\u0000ISSUE\u0000sourcecred\u0000example-github\u00002\u0000385576248\u0000":1525137939000,"N\u0000sourcecred\u0000github\u0000COMMENT\u0000ISSUE\u0000sourcecred\u0000example-github\u00002\u0000385576273\u0000":1525137951000,"N\u0000sourcecred\u0000github\u0000COMMENT\u0000ISSUE\u0000sourcecred\u0000example-github\u00002\u0000385576920\u0000":1525138231000,"N\u0000sourcecred\u0000github\u0000COMMENT\u0000ISSUE\u0000sourcecred\u0000example-github\u00002\u0000385576936\u0000":1525138238000,"N\u0000sourcecred\u0000github\u0000COMMENT\u0000ISSUE\u0000sourcecred\u0000example-github\u00006\u0000373768442\u0000":1521217642000,"N\u0000sourcecred\u0000github\u0000COMMENT\u0000ISSUE\u0000sourcecred\u0000example-github\u00006\u0000373768538\u0000":1521217661000,"N\u0000sourcecred\u0000github\u0000COMMENT\u0000ISSUE\u0000sourcecred\u0000example-github\u00006\u0000385223316\u0000":1524973307000,"N\u0000sourcecred\u0000github\u0000COMMENT\u0000ISSUE\u0000sourcecred\u0000example-github\u00006\u0000417104047\u0000":1535576390000,"N\u0000sourcecred\u0000github\u0000COMMENT\u0000PULL\u0000sourcecred\u0000example-github\u00003\u0000369162222\u0000":1519807420000,"N\u0000sourcecred\u0000github\u0000COMMENT\u0000PULL\u0000sourcecred\u0000example-github\u00005\u0000396430464\u0000":1528764380000,"N\u0000sourcecred\u0000github\u0000COMMENT\u0000REVIEW\u0000sourcecred\u0000example-github\u00005\u0000100313899\u0000171460198\u0000":1519878210000,"N\u0000sourcecred\u0000github\u0000ISSUE\u0000sourcecred\u0000example-github\u00001\u0000":1519807088000,"N\u0000sourcecred\u0000github\u0000ISSUE\u0000sourcecred\u0000example-github\u000010\u0000":1530297021000,"N\u0000sourcecred\u0000github\u0000ISSUE\u0000sourcecred\u0000example-github\u000011\u0000":1536789479000,"N\u0000sourcecred\u0000github\u0000ISSUE\u0000sourcecred\u0000example-github\u000012\u0000":1536878086000,"N\u0000sourcecred\u0000github\u0000ISSUE\u0000sourcecred\u0000example-github\u000013\u0000":1536878137000,"N\u0000sourcecred\u0000github\u0000ISSUE\u0000sourcecred\u0000example-github\u00002\u0000":1519807129000,"N\u0000sourcecred\u0000github\u0000ISSUE\u0000sourcecred\u0000example-github\u00004\u0000":1519807454000,"N\u0000sourcecred\u0000github\u0000ISSUE\u0000sourcecred\u0000example-github\u00006\u0000":1521217624000,"N\u0000sourcecred\u0000github\u0000ISSUE\u0000sourcecred\u0000example-github\u00007\u0000":1521569949000,"N\u0000sourcecred\u0000github\u0000ISSUE\u0000sourcecred\u0000example-github\u00008\u0000":1521570243000,"N\u0000sourcecred\u0000github\u0000PULL\u0000sourcecred\u0000example-github\u00003\u0000":1519807399000,"N\u0000sourcecred\u0000github\u0000PULL\u0000sourcecred\u0000example-github\u00005\u0000":1519807636000,"N\u0000sourcecred\u0000github\u0000PULL\u0000sourcecred\u0000example-github\u00009\u0000":1525373595000,"N\u0000sourcecred\u0000github\u0000REPO\u0000sourcecred\u0000example-github\u0000":null,"N\u0000sourcecred\u0000github\u0000REVIEW\u0000sourcecred\u0000example-github\u00005\u0000100313899\u0000":1519878210000,"N\u0000sourcecred\u0000github\u0000REVIEW\u0000sourcecred\u0000example-github\u00005\u0000100314038\u0000":1519878296000,"N\u0000sourcecred\u0000github\u0000USERLIKE\u0000BOT\u0000credbot\u0000":null,"N\u0000sourcecred\u0000github\u0000USERLIKE\u0000USER\u0000decentralion\u0000":null,"N\u0000sourcecred\u0000github\u0000USERLIKE\u0000USER\u0000wchargin\u0000":null} \ No newline at end of file diff --git a/src/analysis/temporal/timestampMap.js b/src/analysis/temporal/timestampMap.js new file mode 100644 index 0000000..953f48b --- /dev/null +++ b/src/analysis/temporal/timestampMap.js @@ -0,0 +1,57 @@ +// @flow + +import path from "path"; +import fs from "fs-extra"; +import stringify from "json-stable-stringify"; +import * as MapUtil from "../../util/map"; +import {type RepoId, repoIdToString} from "../../core/repoId"; +import {type NodeAddressT, NodeAddress} from "../../core/graph"; +import {type IAnalysisAdapter, type MsSinceEpoch} from "../analysisAdapter"; +import {NodeTrie} from "../../core/trie"; + +export type TimestampMap = Map; + +export function createTimestampMap( + nodes: Iterable, + adapters: $ReadOnlyArray +): TimestampMap { + const adapterTrie: NodeTrie = new NodeTrie(); + for (const adapter of adapters) { + adapterTrie.add(adapter.declaration().nodePrefix, adapter); + } + const result = new Map(); + for (const node of nodes) { + const adapter = adapterTrie.getLast(node); + if (adapter == null) { + throw new Error(`No adapter for ${NodeAddress.toString(node)}`); + } + result.set(node, adapter.createdAt(node)); + } + return result; +} + +const TIMESTAMP_FILE = "timestamps.json"; +function basepath(sourcecredDirectory: string, repoId: RepoId) { + return path.join(sourcecredDirectory, "data", repoIdToString(repoId)); +} +function filepath(sourcecredDirectory: string, repoId: RepoId) { + return path.join(basepath(sourcecredDirectory, repoId), TIMESTAMP_FILE); +} + +export function writeTimestampMap( + stamps: TimestampMap, + sourcecredDirectory: string, + repoId: RepoId +) { + fs.ensureDirSync(basepath(sourcecredDirectory, repoId)); + const jsonString = stringify(MapUtil.toObject(stamps)); + fs.writeFileSync(filepath(sourcecredDirectory, repoId), jsonString); +} + +export function readTimestampMap( + sourcecredDirectory: string, + repoId: RepoId +): TimestampMap { + const contents = fs.readFileSync(filepath(sourcecredDirectory, repoId)); + return MapUtil.fromObject(JSON.parse(contents.toString())); +} diff --git a/src/analysis/temporal/timestampMap.test.js b/src/analysis/temporal/timestampMap.test.js new file mode 100644 index 0000000..957a4d3 --- /dev/null +++ b/src/analysis/temporal/timestampMap.test.js @@ -0,0 +1,87 @@ +// @flow + +import tmp from "tmp"; +import { + Graph, + type NodeAddressT, + NodeAddress, + EdgeAddress, +} from "../../core/graph"; +import {makeRepoId} from "../../core/repoId"; +import { + createTimestampMap, + readTimestampMap, + writeTimestampMap, +} from "./timestampMap"; + +describe("src/analysis/temporal/timestampMap", () => { + const foo = NodeAddress.fromParts(["foo"]); + const bar = NodeAddress.fromParts(["bar"]); + + describe("createTimestampMap", () => { + const declarationForPrefix = (prefixParts: string[]) => ({ + name: NodeAddress.fromParts(prefixParts), + nodePrefix: NodeAddress.fromParts(prefixParts), + edgePrefix: EdgeAddress.fromParts(prefixParts), + nodeTypes: [], + edgeTypes: [], + }); + const adapterForPrefix = ( + prefixParts: string[], + createdAt: (NodeAddressT) => number | null + ) => { + class Adapter { + declaration() { + return declarationForPrefix(prefixParts); + } + graph() { + return new Graph(); + } + createdAt(n: NodeAddressT) { + return createdAt(n); + } + } + return new Adapter(); + }; + it("matches the most specific adapter", () => { + const fooAdapter = adapterForPrefix(["foo"], (_) => 1); + const fallbackAdapter = adapterForPrefix([], (_) => null); + const nodes = [foo, bar]; + const tsMap = createTimestampMap(nodes, [fooAdapter, fallbackAdapter]); + // foo got its timestamp from the fooAdapter, not from the fallbackAdapter, + // even though it matched both. + expect(tsMap.get(foo)).toEqual(1); + // Bar matched the fallback adapter. + expect(tsMap.get(bar)).toEqual(null); + }); + it("throws an error if there is no matching adapter", () => { + const foo = NodeAddress.fromParts(["foo"]); + expect(() => createTimestampMap([foo], [])).toThrowError( + `No adapter for NodeAddress["foo"]` + ); + }); + }); + describe("{write,read}TimestampMap", () => { + const repo = makeRepoId("foo", "bar"); + it("throws an error if there is no timestamp map to read", () => { + const dir = tmp.dirSync().name; + expect(() => readTimestampMap(dir, repo)).toThrowError( + "ENOENT: no such file or directory" + ); + }); + it("can write/read the empty registry", () => { + const dir = tmp.dirSync().name; + const map = new Map(); + writeTimestampMap(map, dir, repo); + const map2 = readTimestampMap(dir, repo); + expect(map2).toEqual(map); + }); + it("can write/read a non-empty registry", () => { + const dir = tmp.dirSync().name; + const map = new Map([[foo, null], [bar, 3]]); + writeTimestampMap(map, dir, repo); + const map2 = readTimestampMap(dir, repo); + expect(map2).toEqual(map); + }); + }); +}); diff --git a/src/cli/load.js b/src/cli/load.js index 0cdfc52..6d114ec 100644 --- a/src/cli/load.js +++ b/src/cli/load.js @@ -6,15 +6,23 @@ import path from "path"; import * as NullUtil from "../util/null"; +import stringify from "json-stable-stringify"; import * as RepoIdRegistry from "../core/repoIdRegistry"; import {repoIdToString, stringToRepoId, type RepoId} from "../core/repoId"; import dedent from "../util/dedent"; import type {Command} from "./command"; import * as Common from "./common"; +import {loadGraph, type LoadGraphResult} from "../analysis/loadGraph"; +import {type IBackendAdapterLoader} from "../analysis/analysisAdapter"; +import { + createTimestampMap, + writeTimestampMap, +} from "../analysis/temporal/timestampMap"; import execDependencyGraph from "../tools/execDependencyGraph"; import {loadGithubData} from "../plugins/github/loadGithubData"; import {loadGitData} from "../plugins/git/loadGitData"; +import {defaultAdapterLoaders} from "./pagerank"; function usage(print: (string) => void): void { print( @@ -193,6 +201,7 @@ export const loadDefaultPlugins = async (options: LoadOptions) => { throw new Error("Load tasks failed."); } addToRepoIdRegistry(options.output); + saveTimestamps(defaultAdapterLoaders(), options.output); // HACK: Logically, we should have the PagerankTask be included in the // first execDependencyGraph run, depending on the other tasks completing. // @@ -261,6 +270,28 @@ function addToRepoIdRegistry(repoId) { RepoIdRegistry.writeRegistry(newRegistry, Common.sourcecredDirectory()); } +async function saveTimestamps( + adapterLoaders: $ReadOnlyArray, + repoId: RepoId +) { + const loadGraphResult: LoadGraphResult = await loadGraph( + Common.sourcecredDirectory(), + adapterLoaders, + repoId + ); + if (loadGraphResult.status !== "SUCCESS") { + throw new Error(`Unable to load graph: ${stringify(loadGraphResult)}`); + } + const {graph} = loadGraphResult; + // We load all the adapters twice (once in loadGraph, once here). + // Could de-duplicate, but it's marginal overhead compared to loading the data. + const adapters = await Promise.all( + adapterLoaders.map((a) => a.load(Common.sourcecredDirectory(), repoId)) + ); + const timestampMap = createTimestampMap(graph.nodes(), adapters); + writeTimestampMap(timestampMap, Common.sourcecredDirectory(), repoId); +} + export const help: Command = async (args, std) => { if (args.length === 0) { usage(std.out); diff --git a/src/cli/pagerank.js b/src/cli/pagerank.js index 64ecfd0..c64dfc2 100644 --- a/src/cli/pagerank.js +++ b/src/cli/pagerank.js @@ -169,6 +169,8 @@ export async function savePagerankGraph( await fs.writeFile(pgFile, stringify(pgJSON)); } +// TODO(#1120): This should be canonicalized somewhere more appropriate, +// e.g. in src/plugins/defaultPlugins.js export const defaultAdapterLoaders = () => [ new GithubAdapterLoader(), new GitAdapterLoader(),