Aggregate timestamp information on sourcecred load (#1162)

This modifies `sourcecred load` so that it saves timestamp information
for all of the loaded plugins in a single aggregated map.

This is quite convenient, as it saves consumers of timestamp information
from needing to worry about the (rather hacky) implementation whereby
the data is fed from each adapter. Instead, consumers can just load the
timestamp map. This will also make it much easier to use timestamp info
in the research codebase.

Test plan: The timestampMap module has testing around generating the map
from the adapter and nodes, writing it, and reading it.

I haven't added any testing to the `load` CLI command. I think it would
be redundant as the updated snapshot test reveals that the map is
getting serialized properly.

Tests pass, and I have inspected the snapshot
This commit is contained in:
Dandelion Mané 2019-05-30 17:15:15 +03:00 committed by GitHub
parent 4dc97fcc57
commit ad2470e5c6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 178 additions and 0 deletions

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,57 @@
// @flow
import path from "path";
import fs from "fs-extra";
import stringify from "json-stable-stringify";
import * as MapUtil from "../../util/map";
import {type RepoId, repoIdToString} from "../../core/repoId";
import {type NodeAddressT, NodeAddress} from "../../core/graph";
import {type IAnalysisAdapter, type MsSinceEpoch} from "../analysisAdapter";
import {NodeTrie} from "../../core/trie";
export type TimestampMap = Map<NodeAddressT, MsSinceEpoch | null>;
export function createTimestampMap(
nodes: Iterable<NodeAddressT>,
adapters: $ReadOnlyArray<IAnalysisAdapter>
): TimestampMap {
const adapterTrie: NodeTrie<IAnalysisAdapter> = new NodeTrie();
for (const adapter of adapters) {
adapterTrie.add(adapter.declaration().nodePrefix, adapter);
}
const result = new Map();
for (const node of nodes) {
const adapter = adapterTrie.getLast(node);
if (adapter == null) {
throw new Error(`No adapter for ${NodeAddress.toString(node)}`);
}
result.set(node, adapter.createdAt(node));
}
return result;
}
const TIMESTAMP_FILE = "timestamps.json";
function basepath(sourcecredDirectory: string, repoId: RepoId) {
return path.join(sourcecredDirectory, "data", repoIdToString(repoId));
}
function filepath(sourcecredDirectory: string, repoId: RepoId) {
return path.join(basepath(sourcecredDirectory, repoId), TIMESTAMP_FILE);
}
export function writeTimestampMap(
stamps: TimestampMap,
sourcecredDirectory: string,
repoId: RepoId
) {
fs.ensureDirSync(basepath(sourcecredDirectory, repoId));
const jsonString = stringify(MapUtil.toObject(stamps));
fs.writeFileSync(filepath(sourcecredDirectory, repoId), jsonString);
}
export function readTimestampMap(
sourcecredDirectory: string,
repoId: RepoId
): TimestampMap {
const contents = fs.readFileSync(filepath(sourcecredDirectory, repoId));
return MapUtil.fromObject(JSON.parse(contents.toString()));
}

View File

@ -0,0 +1,87 @@
// @flow
import tmp from "tmp";
import {
Graph,
type NodeAddressT,
NodeAddress,
EdgeAddress,
} from "../../core/graph";
import {makeRepoId} from "../../core/repoId";
import {
createTimestampMap,
readTimestampMap,
writeTimestampMap,
} from "./timestampMap";
describe("src/analysis/temporal/timestampMap", () => {
const foo = NodeAddress.fromParts(["foo"]);
const bar = NodeAddress.fromParts(["bar"]);
describe("createTimestampMap", () => {
const declarationForPrefix = (prefixParts: string[]) => ({
name: NodeAddress.fromParts(prefixParts),
nodePrefix: NodeAddress.fromParts(prefixParts),
edgePrefix: EdgeAddress.fromParts(prefixParts),
nodeTypes: [],
edgeTypes: [],
});
const adapterForPrefix = (
prefixParts: string[],
createdAt: (NodeAddressT) => number | null
) => {
class Adapter {
declaration() {
return declarationForPrefix(prefixParts);
}
graph() {
return new Graph();
}
createdAt(n: NodeAddressT) {
return createdAt(n);
}
}
return new Adapter();
};
it("matches the most specific adapter", () => {
const fooAdapter = adapterForPrefix(["foo"], (_) => 1);
const fallbackAdapter = adapterForPrefix([], (_) => null);
const nodes = [foo, bar];
const tsMap = createTimestampMap(nodes, [fooAdapter, fallbackAdapter]);
// foo got its timestamp from the fooAdapter, not from the fallbackAdapter,
// even though it matched both.
expect(tsMap.get(foo)).toEqual(1);
// Bar matched the fallback adapter.
expect(tsMap.get(bar)).toEqual(null);
});
it("throws an error if there is no matching adapter", () => {
const foo = NodeAddress.fromParts(["foo"]);
expect(() => createTimestampMap([foo], [])).toThrowError(
`No adapter for NodeAddress["foo"]`
);
});
});
describe("{write,read}TimestampMap", () => {
const repo = makeRepoId("foo", "bar");
it("throws an error if there is no timestamp map to read", () => {
const dir = tmp.dirSync().name;
expect(() => readTimestampMap(dir, repo)).toThrowError(
"ENOENT: no such file or directory"
);
});
it("can write/read the empty registry", () => {
const dir = tmp.dirSync().name;
const map = new Map();
writeTimestampMap(map, dir, repo);
const map2 = readTimestampMap(dir, repo);
expect(map2).toEqual(map);
});
it("can write/read a non-empty registry", () => {
const dir = tmp.dirSync().name;
const map = new Map([[foo, null], [bar, 3]]);
writeTimestampMap(map, dir, repo);
const map2 = readTimestampMap(dir, repo);
expect(map2).toEqual(map);
});
});
});

View File

@ -6,15 +6,23 @@ import path from "path";
import * as NullUtil from "../util/null"; import * as NullUtil from "../util/null";
import stringify from "json-stable-stringify";
import * as RepoIdRegistry from "../core/repoIdRegistry"; import * as RepoIdRegistry from "../core/repoIdRegistry";
import {repoIdToString, stringToRepoId, type RepoId} from "../core/repoId"; import {repoIdToString, stringToRepoId, type RepoId} from "../core/repoId";
import dedent from "../util/dedent"; import dedent from "../util/dedent";
import type {Command} from "./command"; import type {Command} from "./command";
import * as Common from "./common"; import * as Common from "./common";
import {loadGraph, type LoadGraphResult} from "../analysis/loadGraph";
import {type IBackendAdapterLoader} from "../analysis/analysisAdapter";
import {
createTimestampMap,
writeTimestampMap,
} from "../analysis/temporal/timestampMap";
import execDependencyGraph from "../tools/execDependencyGraph"; import execDependencyGraph from "../tools/execDependencyGraph";
import {loadGithubData} from "../plugins/github/loadGithubData"; import {loadGithubData} from "../plugins/github/loadGithubData";
import {loadGitData} from "../plugins/git/loadGitData"; import {loadGitData} from "../plugins/git/loadGitData";
import {defaultAdapterLoaders} from "./pagerank";
function usage(print: (string) => void): void { function usage(print: (string) => void): void {
print( print(
@ -193,6 +201,7 @@ export const loadDefaultPlugins = async (options: LoadOptions) => {
throw new Error("Load tasks failed."); throw new Error("Load tasks failed.");
} }
addToRepoIdRegistry(options.output); addToRepoIdRegistry(options.output);
saveTimestamps(defaultAdapterLoaders(), options.output);
// HACK: Logically, we should have the PagerankTask be included in the // HACK: Logically, we should have the PagerankTask be included in the
// first execDependencyGraph run, depending on the other tasks completing. // first execDependencyGraph run, depending on the other tasks completing.
// //
@ -261,6 +270,28 @@ function addToRepoIdRegistry(repoId) {
RepoIdRegistry.writeRegistry(newRegistry, Common.sourcecredDirectory()); RepoIdRegistry.writeRegistry(newRegistry, Common.sourcecredDirectory());
} }
async function saveTimestamps(
adapterLoaders: $ReadOnlyArray<IBackendAdapterLoader>,
repoId: RepoId
) {
const loadGraphResult: LoadGraphResult = await loadGraph(
Common.sourcecredDirectory(),
adapterLoaders,
repoId
);
if (loadGraphResult.status !== "SUCCESS") {
throw new Error(`Unable to load graph: ${stringify(loadGraphResult)}`);
}
const {graph} = loadGraphResult;
// We load all the adapters twice (once in loadGraph, once here).
// Could de-duplicate, but it's marginal overhead compared to loading the data.
const adapters = await Promise.all(
adapterLoaders.map((a) => a.load(Common.sourcecredDirectory(), repoId))
);
const timestampMap = createTimestampMap(graph.nodes(), adapters);
writeTimestampMap(timestampMap, Common.sourcecredDirectory(), repoId);
}
export const help: Command = async (args, std) => { export const help: Command = async (args, std) => {
if (args.length === 0) { if (args.length === 0) {
usage(std.out); usage(std.out);

View File

@ -169,6 +169,8 @@ export async function savePagerankGraph(
await fs.writeFile(pgFile, stringify(pgJSON)); await fs.writeFile(pgFile, stringify(pgJSON));
} }
// TODO(#1120): This should be canonicalized somewhere more appropriate,
// e.g. in src/plugins/defaultPlugins.js
export const defaultAdapterLoaders = () => [ export const defaultAdapterLoaders = () => [
new GithubAdapterLoader(), new GithubAdapterLoader(),
new GitAdapterLoader(), new GitAdapterLoader(),