Add github/loadGraph

This is a replacement for `github/loadGithubData` which returns a
combined Graph rather than a combined RelationalView. This provides a
major benefit, which is that we can use the (robust) Graph merge logic
rather than the (buggy) relational view merge.

Test plan: This function is untested. It basically pipelines a few APIs
together. I think that flow is basically sufficient to validate that it
works, and writing a unit test will be frustrating (mostly will involve
re-integrating the funcitonality via mocks). A future commit makes this
part of the pipeline that generates snapshot tests, so it is de-facto
integration tested.
This commit is contained in:
Dandelion Mané 2019-07-21 15:30:08 +01:00
parent 0a34c8b036
commit e4c96f3a18
2 changed files with 49 additions and 0 deletions

View File

@ -15,6 +15,7 @@ export type Options = {|
+cacheDirectory: string, +cacheDirectory: string,
|}; |};
// This function is deprecated.
export async function loadGithubData(options: Options): Promise<void> { export async function loadGithubData(options: Options): Promise<void> {
// We intentionally fetch repositories sequentially rather than in // We intentionally fetch repositories sequentially rather than in
// parallel, because GitHub asks that we not make concurrent // parallel, because GitHub asks that we not make concurrent

View File

@ -0,0 +1,48 @@
// @flow
import {TaskReporter} from "../../util/taskReporter";
import {createGraph} from "./createGraph";
import fetchGithubRepo from "./fetchGithubRepo";
import {RelationalView} from "./relationalView";
import {type RepoId, repoIdToString} from "../../core/repoId";
import {Graph} from "../../core/graph";
export type Options = {|
+repoIds: $ReadOnlyArray<RepoId>,
+token: string,
+cacheDirectory: string,
|};
/**
* Loads several GitHub repositories, combining them into a single graph.
*/
export async function loadGraph(
options: Options,
taskReporter: TaskReporter
): Promise<Graph> {
// We intentionally fetch repositories sequentially rather than in
// parallel, because GitHub asks that we not make concurrent
// requests. From <https://archive.is/LlkQp#88%>:
//
// > Make requests for a single user or client ID serially. Do not make
// > make requests for a single user or client ID concurrently.
const repositories = [];
for (const repoId of options.repoIds) {
const taskId = `github/${repoIdToString(repoId)}`;
taskReporter.start(taskId);
repositories.push(
await fetchGithubRepo(repoId, {
token: options.token,
cacheDirectory: options.cacheDirectory,
})
);
taskReporter.finish(taskId);
}
return Graph.merge(
repositories.map((r) => {
const rv = new RelationalView();
rv.addRepository(r);
return createGraph(rv);
})
);
}