Aggregate timestamp information on sourcecred load (#1162)
This modifies `sourcecred load` so that it saves timestamp information for all of the loaded plugins in a single aggregated map. This is quite convenient, as it saves consumers of timestamp information from needing to worry about the (rather hacky) implementation whereby the data is fed from each adapter. Instead, consumers can just load the timestamp map. This will also make it much easier to use timestamp info in the research codebase. Test plan: The timestampMap module has testing around generating the map from the adapter and nodes, writing it, and reading it. I haven't added any testing to the `load` CLI command. I think it would be redundant as the updated snapshot test reveals that the map is getting serialized properly. Tests pass, and I have inspected the snapshot
This commit is contained in:
parent
4dc97fcc57
commit
ad2470e5c6
File diff suppressed because one or more lines are too long
|
@ -0,0 +1,57 @@
|
|||
// @flow
|
||||
|
||||
import path from "path";
|
||||
import fs from "fs-extra";
|
||||
import stringify from "json-stable-stringify";
|
||||
import * as MapUtil from "../../util/map";
|
||||
import {type RepoId, repoIdToString} from "../../core/repoId";
|
||||
import {type NodeAddressT, NodeAddress} from "../../core/graph";
|
||||
import {type IAnalysisAdapter, type MsSinceEpoch} from "../analysisAdapter";
|
||||
import {NodeTrie} from "../../core/trie";
|
||||
|
||||
export type TimestampMap = Map<NodeAddressT, MsSinceEpoch | null>;
|
||||
|
||||
export function createTimestampMap(
|
||||
nodes: Iterable<NodeAddressT>,
|
||||
adapters: $ReadOnlyArray<IAnalysisAdapter>
|
||||
): TimestampMap {
|
||||
const adapterTrie: NodeTrie<IAnalysisAdapter> = new NodeTrie();
|
||||
for (const adapter of adapters) {
|
||||
adapterTrie.add(adapter.declaration().nodePrefix, adapter);
|
||||
}
|
||||
const result = new Map();
|
||||
for (const node of nodes) {
|
||||
const adapter = adapterTrie.getLast(node);
|
||||
if (adapter == null) {
|
||||
throw new Error(`No adapter for ${NodeAddress.toString(node)}`);
|
||||
}
|
||||
result.set(node, adapter.createdAt(node));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
const TIMESTAMP_FILE = "timestamps.json";
|
||||
function basepath(sourcecredDirectory: string, repoId: RepoId) {
|
||||
return path.join(sourcecredDirectory, "data", repoIdToString(repoId));
|
||||
}
|
||||
function filepath(sourcecredDirectory: string, repoId: RepoId) {
|
||||
return path.join(basepath(sourcecredDirectory, repoId), TIMESTAMP_FILE);
|
||||
}
|
||||
|
||||
export function writeTimestampMap(
|
||||
stamps: TimestampMap,
|
||||
sourcecredDirectory: string,
|
||||
repoId: RepoId
|
||||
) {
|
||||
fs.ensureDirSync(basepath(sourcecredDirectory, repoId));
|
||||
const jsonString = stringify(MapUtil.toObject(stamps));
|
||||
fs.writeFileSync(filepath(sourcecredDirectory, repoId), jsonString);
|
||||
}
|
||||
|
||||
export function readTimestampMap(
|
||||
sourcecredDirectory: string,
|
||||
repoId: RepoId
|
||||
): TimestampMap {
|
||||
const contents = fs.readFileSync(filepath(sourcecredDirectory, repoId));
|
||||
return MapUtil.fromObject(JSON.parse(contents.toString()));
|
||||
}
|
|
@ -0,0 +1,87 @@
|
|||
// @flow
|
||||
|
||||
import tmp from "tmp";
|
||||
import {
|
||||
Graph,
|
||||
type NodeAddressT,
|
||||
NodeAddress,
|
||||
EdgeAddress,
|
||||
} from "../../core/graph";
|
||||
import {makeRepoId} from "../../core/repoId";
|
||||
import {
|
||||
createTimestampMap,
|
||||
readTimestampMap,
|
||||
writeTimestampMap,
|
||||
} from "./timestampMap";
|
||||
|
||||
describe("src/analysis/temporal/timestampMap", () => {
|
||||
const foo = NodeAddress.fromParts(["foo"]);
|
||||
const bar = NodeAddress.fromParts(["bar"]);
|
||||
|
||||
describe("createTimestampMap", () => {
|
||||
const declarationForPrefix = (prefixParts: string[]) => ({
|
||||
name: NodeAddress.fromParts(prefixParts),
|
||||
nodePrefix: NodeAddress.fromParts(prefixParts),
|
||||
edgePrefix: EdgeAddress.fromParts(prefixParts),
|
||||
nodeTypes: [],
|
||||
edgeTypes: [],
|
||||
});
|
||||
const adapterForPrefix = (
|
||||
prefixParts: string[],
|
||||
createdAt: (NodeAddressT) => number | null
|
||||
) => {
|
||||
class Adapter {
|
||||
declaration() {
|
||||
return declarationForPrefix(prefixParts);
|
||||
}
|
||||
graph() {
|
||||
return new Graph();
|
||||
}
|
||||
createdAt(n: NodeAddressT) {
|
||||
return createdAt(n);
|
||||
}
|
||||
}
|
||||
return new Adapter();
|
||||
};
|
||||
it("matches the most specific adapter", () => {
|
||||
const fooAdapter = adapterForPrefix(["foo"], (_) => 1);
|
||||
const fallbackAdapter = adapterForPrefix([], (_) => null);
|
||||
const nodes = [foo, bar];
|
||||
const tsMap = createTimestampMap(nodes, [fooAdapter, fallbackAdapter]);
|
||||
// foo got its timestamp from the fooAdapter, not from the fallbackAdapter,
|
||||
// even though it matched both.
|
||||
expect(tsMap.get(foo)).toEqual(1);
|
||||
// Bar matched the fallback adapter.
|
||||
expect(tsMap.get(bar)).toEqual(null);
|
||||
});
|
||||
it("throws an error if there is no matching adapter", () => {
|
||||
const foo = NodeAddress.fromParts(["foo"]);
|
||||
expect(() => createTimestampMap([foo], [])).toThrowError(
|
||||
`No adapter for NodeAddress["foo"]`
|
||||
);
|
||||
});
|
||||
});
|
||||
describe("{write,read}TimestampMap", () => {
|
||||
const repo = makeRepoId("foo", "bar");
|
||||
it("throws an error if there is no timestamp map to read", () => {
|
||||
const dir = tmp.dirSync().name;
|
||||
expect(() => readTimestampMap(dir, repo)).toThrowError(
|
||||
"ENOENT: no such file or directory"
|
||||
);
|
||||
});
|
||||
it("can write/read the empty registry", () => {
|
||||
const dir = tmp.dirSync().name;
|
||||
const map = new Map();
|
||||
writeTimestampMap(map, dir, repo);
|
||||
const map2 = readTimestampMap(dir, repo);
|
||||
expect(map2).toEqual(map);
|
||||
});
|
||||
it("can write/read a non-empty registry", () => {
|
||||
const dir = tmp.dirSync().name;
|
||||
const map = new Map([[foo, null], [bar, 3]]);
|
||||
writeTimestampMap(map, dir, repo);
|
||||
const map2 = readTimestampMap(dir, repo);
|
||||
expect(map2).toEqual(map);
|
||||
});
|
||||
});
|
||||
});
|
|
@ -6,15 +6,23 @@ import path from "path";
|
|||
|
||||
import * as NullUtil from "../util/null";
|
||||
|
||||
import stringify from "json-stable-stringify";
|
||||
import * as RepoIdRegistry from "../core/repoIdRegistry";
|
||||
import {repoIdToString, stringToRepoId, type RepoId} from "../core/repoId";
|
||||
import dedent from "../util/dedent";
|
||||
import type {Command} from "./command";
|
||||
import * as Common from "./common";
|
||||
import {loadGraph, type LoadGraphResult} from "../analysis/loadGraph";
|
||||
import {type IBackendAdapterLoader} from "../analysis/analysisAdapter";
|
||||
import {
|
||||
createTimestampMap,
|
||||
writeTimestampMap,
|
||||
} from "../analysis/temporal/timestampMap";
|
||||
|
||||
import execDependencyGraph from "../tools/execDependencyGraph";
|
||||
import {loadGithubData} from "../plugins/github/loadGithubData";
|
||||
import {loadGitData} from "../plugins/git/loadGitData";
|
||||
import {defaultAdapterLoaders} from "./pagerank";
|
||||
|
||||
function usage(print: (string) => void): void {
|
||||
print(
|
||||
|
@ -193,6 +201,7 @@ export const loadDefaultPlugins = async (options: LoadOptions) => {
|
|||
throw new Error("Load tasks failed.");
|
||||
}
|
||||
addToRepoIdRegistry(options.output);
|
||||
saveTimestamps(defaultAdapterLoaders(), options.output);
|
||||
// HACK: Logically, we should have the PagerankTask be included in the
|
||||
// first execDependencyGraph run, depending on the other tasks completing.
|
||||
//
|
||||
|
@ -261,6 +270,28 @@ function addToRepoIdRegistry(repoId) {
|
|||
RepoIdRegistry.writeRegistry(newRegistry, Common.sourcecredDirectory());
|
||||
}
|
||||
|
||||
async function saveTimestamps(
|
||||
adapterLoaders: $ReadOnlyArray<IBackendAdapterLoader>,
|
||||
repoId: RepoId
|
||||
) {
|
||||
const loadGraphResult: LoadGraphResult = await loadGraph(
|
||||
Common.sourcecredDirectory(),
|
||||
adapterLoaders,
|
||||
repoId
|
||||
);
|
||||
if (loadGraphResult.status !== "SUCCESS") {
|
||||
throw new Error(`Unable to load graph: ${stringify(loadGraphResult)}`);
|
||||
}
|
||||
const {graph} = loadGraphResult;
|
||||
// We load all the adapters twice (once in loadGraph, once here).
|
||||
// Could de-duplicate, but it's marginal overhead compared to loading the data.
|
||||
const adapters = await Promise.all(
|
||||
adapterLoaders.map((a) => a.load(Common.sourcecredDirectory(), repoId))
|
||||
);
|
||||
const timestampMap = createTimestampMap(graph.nodes(), adapters);
|
||||
writeTimestampMap(timestampMap, Common.sourcecredDirectory(), repoId);
|
||||
}
|
||||
|
||||
export const help: Command = async (args, std) => {
|
||||
if (args.length === 0) {
|
||||
usage(std.out);
|
||||
|
|
|
@ -169,6 +169,8 @@ export async function savePagerankGraph(
|
|||
await fs.writeFile(pgFile, stringify(pgJSON));
|
||||
}
|
||||
|
||||
// TODO(#1120): This should be canonicalized somewhere more appropriate,
|
||||
// e.g. in src/plugins/defaultPlugins.js
|
||||
export const defaultAdapterLoaders = () => [
|
||||
new GithubAdapterLoader(),
|
||||
new GitAdapterLoader(),
|
||||
|
|
Loading…
Reference in New Issue