Don’t load trees from Git repositories (#730)

Summary:
We currently load trees and then throw them away later, because we don’t
get useful signal from them. We should consider not doing that. This
will be faster.

Test Plan:
```
$ time node bin/sourcecred.js load tensorflow/tensorflow --plugin git

real	0m33.512s
user	0m35.196s
sys	0m12.489s
```

Also, `yarn test --full` passes.

wchargin-branch: git-deforestation
This commit is contained in:
William Chargin 2018-08-29 22:11:19 -07:00 committed by GitHub
parent d8556b618f
commit 908dc82f4c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 25 additions and 4 deletions

View File

@ -7,7 +7,8 @@ import {loadRepository} from "./loadRepository";
import type {Repo} from "../../core/repo"; import type {Repo} from "../../core/repo";
/** /**
* Load Git Repository data from a fresh clone of a GitHub repo. * Load Git repository data from a fresh clone of a GitHub repo. Loads
* commits only.
* *
* @param {Repo} repo * @param {Repo} repo
* the GitHub repository to be cloned * the GitHub repository to be cloned
@ -19,7 +20,7 @@ export default function cloneAndLoadRepository(repo: Repo): Repository {
const tmpdir = tmp.dirSync({unsafeCleanup: true}); const tmpdir = tmp.dirSync({unsafeCleanup: true});
const git = localGit(tmpdir.name); const git = localGit(tmpdir.name);
git(["clone", cloneUrl, ".", "--quiet"]); git(["clone", cloneUrl, ".", "--quiet"]);
const result = loadRepository(tmpdir.name, "HEAD"); const result = loadRepository(tmpdir.name, "HEAD", "COMMITS_ONLY");
tmpdir.removeCallback(); tmpdir.removeCallback();
return result; return result;
} }

View File

@ -21,11 +21,21 @@ import {localGit} from "./gitUtils";
*/ */
export function loadRepository( export function loadRepository(
repositoryPath: string, repositoryPath: string,
rootRef: string rootRef: string,
mode: "FULL" | "COMMITS_ONLY" = "FULL"
): Repository { ): Repository {
const git = localGit(repositoryPath); const git = localGit(repositoryPath);
const commits = findCommits(git, rootRef); const commits = findCommits(git, rootRef);
const trees = findTrees(git, new Set(commits.map((x) => x.treeHash))); const trees = (() => {
switch (mode) {
case "FULL":
return findTrees(git, new Set(commits.map((x) => x.treeHash)));
case "COMMITS_ONLY":
return [];
default:
throw new Error((mode: empty));
}
})();
return {commits: objectMap(commits), trees: objectMap(trees)}; return {commits: objectMap(commits), trees: objectMap(trees)};
} }

View File

@ -29,6 +29,16 @@ describe("loadRepository", () => {
); );
}); });
it("respects commits-only mode", () => {
const repository = createExampleRepo(mkdtemp());
const full = loadRepository(repository.path, "HEAD");
const commitsOnly = loadRepository(repository.path, "HEAD", "COMMITS_ONLY");
expect(commitsOnly).toEqual({
commits: full.commits,
trees: {},
});
});
it("processes an old commit", () => { it("processes an old commit", () => {
const repository = createExampleRepo(mkdtemp()); const repository = createExampleRepo(mkdtemp());
const whole = loadRepository(repository.path, "HEAD"); const whole = loadRepository(repository.path, "HEAD");