cli load: save graph, not pagerank or timestampMap
As of the timeline cred work, I'm shifting emphasis away from raw PageRank results, in favor of timeline pagerank results. As such, there's no need to have load save the regular pagerank results on creation. As of #1136, there will be no need for timestampMap, as that data will be present directly in the graph. As the timeline cred UI will depend on the full graph for analysis, let's save the graph instead. Test plan: `yarn test` and snapshot inspection.
This commit is contained in:
parent
e493af2307
commit
67baacd862
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -4,6 +4,7 @@
|
|||
import mkdirp from "mkdirp";
|
||||
import path from "path";
|
||||
|
||||
import fs from "fs-extra";
|
||||
import stringify from "json-stable-stringify";
|
||||
import * as RepoIdRegistry from "../core/repoIdRegistry";
|
||||
import {repoIdToString, stringToRepoId, type RepoId} from "../core/repoId";
|
||||
|
@ -12,10 +13,6 @@ import type {Command} from "./command";
|
|||
import * as Common from "./common";
|
||||
import {loadGraph, type LoadGraphResult} from "../analysis/loadGraph";
|
||||
import {type IBackendAdapterLoader} from "../analysis/analysisAdapter";
|
||||
import {
|
||||
createTimestampMap,
|
||||
writeTimestampMap,
|
||||
} from "../analysis/temporal/timestampMap";
|
||||
|
||||
import execDependencyGraph from "../tools/execDependencyGraph";
|
||||
import {loadGithubData} from "../plugins/github/loadGithubData";
|
||||
|
@ -170,7 +167,7 @@ export function makeLoadCommand(
|
|||
};
|
||||
}
|
||||
|
||||
export type SaveTimestamps = (
|
||||
export type SaveGraph = (
|
||||
$ReadOnlyArray<IBackendAdapterLoader>,
|
||||
RepoId
|
||||
) => Promise<void>;
|
||||
|
@ -178,11 +175,11 @@ export type SaveTimestamps = (
|
|||
/**
|
||||
* A wrapper around the default plugin loader.
|
||||
*
|
||||
* We wrap so that we can dependency inject saveTimestamps for testability.
|
||||
* (saveTimestamps depends on the directory state that's generated by running
|
||||
* We wrap so that we can dependency inject saveGraph for testability.
|
||||
* (saveGraph depends on the directory state that's generated by running
|
||||
* load.)
|
||||
*/
|
||||
export const makeLoadDefaultPlugins = (saveTimestamps: SaveTimestamps) => {
|
||||
export const makeLoadDefaultPlugins = (saveGraph: SaveGraph) => {
|
||||
return async (options: LoadOptions) => {
|
||||
const sourcecredCommand = (args) => [
|
||||
process.execPath,
|
||||
|
@ -212,33 +209,7 @@ export const makeLoadDefaultPlugins = (saveTimestamps: SaveTimestamps) => {
|
|||
throw new Error("Load tasks failed.");
|
||||
}
|
||||
addToRepoIdRegistry(options.output);
|
||||
await saveTimestamps(defaultAdapterLoaders(), options.output);
|
||||
// HACK: Logically, we should have the PagerankTask be included in the
|
||||
// first execDependencyGraph run, depending on the other tasks completing.
|
||||
//
|
||||
// However, running pagerank depends on loading the graph
|
||||
// (analysis/loadGraph), which depends on the relevant repo being present
|
||||
// in the RepoIdRegistry. And it is only in the RepoIdRegistry after the
|
||||
// call to execDependencyGraph has been successful.
|
||||
//
|
||||
// As a simple hack, we just call execDependencyGraph again with the
|
||||
// pagerank command after the first one has been successful. This does have
|
||||
// the awkward effect that CLI users will see two blocks of "task: SUCCESS"
|
||||
// information from execDependencyGraph.
|
||||
const pagerankTask = {
|
||||
id: "run-pagerank",
|
||||
cmd: sourcecredCommand(["pagerank", repoIdToString(options.output)]),
|
||||
deps: [],
|
||||
};
|
||||
const {success: pagerankSuccess} = await execDependencyGraph(
|
||||
[pagerankTask],
|
||||
{
|
||||
taskPassLabel: "DONE",
|
||||
}
|
||||
);
|
||||
if (!pagerankSuccess) {
|
||||
throw new Error("Pagerank task failed.");
|
||||
}
|
||||
await saveGraph(defaultAdapterLoaders(), options.output);
|
||||
return;
|
||||
};
|
||||
};
|
||||
|
@ -289,14 +260,10 @@ function addToRepoIdRegistry(repoId) {
|
|||
}
|
||||
|
||||
/**
|
||||
* Load timestamp data for every node in the serialized graph,
|
||||
* then write the timestamps to disk.
|
||||
*
|
||||
* Note: This function calls well-tested code, but is not itself tested, except
|
||||
* by the sharness load snapshot test.
|
||||
* Load the graph (really generate it by merging from the plugins) and then save to disk.
|
||||
* Modify with caution.
|
||||
*/
|
||||
export async function saveTimestamps(
|
||||
export async function saveGraph(
|
||||
adapterLoaders: $ReadOnlyArray<IBackendAdapterLoader>,
|
||||
repoId: RepoId
|
||||
) {
|
||||
|
@ -309,14 +276,14 @@ export async function saveTimestamps(
|
|||
throw new Error(`Unable to load graph: ${stringify(loadGraphResult)}`);
|
||||
}
|
||||
const {graph} = loadGraphResult;
|
||||
// We load all the adapters twice (once in loadGraph, once here).
|
||||
// Could de-duplicate, but it's marginal overhead compared to loading the data.
|
||||
const adapters = await Promise.all(
|
||||
adapterLoaders.map((a) => a.load(Common.sourcecredDirectory(), repoId))
|
||||
const graphFile = path.join(
|
||||
Common.sourcecredDirectory(),
|
||||
"data",
|
||||
repoIdToString(repoId),
|
||||
"graph.json"
|
||||
);
|
||||
const nodeAddresses = Array.from(graph.nodes());
|
||||
const timestampMap = createTimestampMap(nodeAddresses, adapters);
|
||||
writeTimestampMap(timestampMap, Common.sourcecredDirectory(), repoId);
|
||||
const graphJSON = graph.toJSON();
|
||||
await fs.writeFile(graphFile, stringify(graphJSON));
|
||||
}
|
||||
|
||||
export const help: Command = async (args, std) => {
|
||||
|
@ -329,7 +296,7 @@ export const help: Command = async (args, std) => {
|
|||
}
|
||||
};
|
||||
|
||||
const loadDefaultPlugins = makeLoadDefaultPlugins(saveTimestamps);
|
||||
const loadDefaultPlugins = makeLoadDefaultPlugins(saveGraph);
|
||||
const load = makeLoadCommand(loadIndividualPlugin, loadDefaultPlugins);
|
||||
|
||||
export default load;
|
||||
|
|
|
@ -474,7 +474,7 @@ describe("cli/load", () => {
|
|||
output: fooCombined,
|
||||
repoIds: [fooBar, fooBaz],
|
||||
});
|
||||
expect(execDependencyGraph).toHaveBeenCalledTimes(2);
|
||||
expect(execDependencyGraph).toHaveBeenCalledTimes(1);
|
||||
const loadTasks = execDependencyGraph.mock.calls[0][0];
|
||||
expect(loadTasks).toHaveLength(["git", "github"].length);
|
||||
expect(loadTasks.map((task) => task.id)).toEqual(
|
||||
|
@ -496,29 +496,6 @@ describe("cli/load", () => {
|
|||
}
|
||||
});
|
||||
|
||||
it("creates a pagerank task after load is successful", async () => {
|
||||
execDependencyGraph.mockResolvedValue({success: true});
|
||||
await loadDefaultPlugins({
|
||||
output: fooCombined,
|
||||
repoIds: [fooBar, fooBaz],
|
||||
});
|
||||
expect(execDependencyGraph).toHaveBeenCalledTimes(2);
|
||||
const pagerankTasks = execDependencyGraph.mock.calls[1][0];
|
||||
expect(pagerankTasks).toHaveLength(1);
|
||||
const task = pagerankTasks[0];
|
||||
expect(task).toEqual({
|
||||
id: "run-pagerank",
|
||||
deps: [],
|
||||
cmd: [
|
||||
expect.stringMatching(/\bnode\b/),
|
||||
expect.stringMatching(/--max_old_space_size=/),
|
||||
process.argv[1],
|
||||
"pagerank",
|
||||
"foo/combined",
|
||||
],
|
||||
});
|
||||
});
|
||||
|
||||
it("updates RepoIdRegistry on success", async () => {
|
||||
const directory = newSourcecredDirectory();
|
||||
expect(RepoIdRegistry.getRegistry(directory)).toEqual(
|
||||
|
@ -538,16 +515,16 @@ describe("cli/load", () => {
|
|||
expect(RepoIdRegistry.getRegistry(directory)).toEqual(expectedRegistry);
|
||||
});
|
||||
|
||||
it("calls saveTimestamps on success", async () => {
|
||||
const saveTimestamps = jest.fn();
|
||||
const loadDefaultPlugins = makeLoadDefaultPlugins(saveTimestamps);
|
||||
it("calls saveGraph on success", async () => {
|
||||
const saveGraph = jest.fn();
|
||||
const loadDefaultPlugins = makeLoadDefaultPlugins(saveGraph);
|
||||
execDependencyGraph.mockResolvedValue({success: true});
|
||||
await loadDefaultPlugins({
|
||||
output: fooCombined,
|
||||
repoIds: [fooBar, fooBaz],
|
||||
});
|
||||
expect(saveTimestamps).toHaveBeenCalledTimes(1);
|
||||
expect(saveTimestamps).toHaveBeenCalledWith(
|
||||
expect(saveGraph).toHaveBeenCalledTimes(1);
|
||||
expect(saveGraph).toHaveBeenCalledWith(
|
||||
defaultAdapterLoaders(),
|
||||
fooCombined
|
||||
);
|
||||
|
@ -562,16 +539,5 @@ describe("cli/load", () => {
|
|||
|
||||
expect(result).rejects.toThrow("Load tasks failed.");
|
||||
});
|
||||
|
||||
it("throws an pagerank error on second execDependencyGraph failure", async () => {
|
||||
execDependencyGraph.mockResolvedValueOnce({success: true});
|
||||
execDependencyGraph.mockResolvedValueOnce({success: false});
|
||||
const result = loadDefaultPlugins({
|
||||
output: fooCombined,
|
||||
repoIds: [fooBar, fooBaz],
|
||||
});
|
||||
|
||||
expect(result).rejects.toThrow("Pagerank task failed.");
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
Loading…
Reference in New Issue