diff --git a/package.json b/package.json index 2306bb9..f1bfa69 100644 --- a/package.json +++ b/package.json @@ -116,6 +116,8 @@ "babel-plugin-flow-react-proptypes": "^18.0.0", "enzyme": "^3.3.0", "enzyme-adapter-react-16": "^1.1.1", - "enzyme-to-json": "^3.3.3" + "enzyme-to-json": "^3.3.3", + "mkdirp": "^0.5.1", + "tmp": "^0.0.33" } } diff --git a/src/plugins/git/__snapshots__/loadRepository.test.js.snap b/src/plugins/git/__snapshots__/loadRepository.test.js.snap new file mode 100644 index 0000000..428f574 --- /dev/null +++ b/src/plugins/git/__snapshots__/loadRepository.test.js.snap @@ -0,0 +1,144 @@ +// Jest Snapshot v1, https://goo.gl/fbAQLP + +exports[`loadRepository loads from HEAD 1`] = ` +Object { + "commits": Map { + "677b340674bde17fdaac3b5f5eef929139ef2a52" => Object { + "hash": "677b340674bde17fdaac3b5f5eef929139ef2a52", + "treeHash": "6152a37dba8aa54dc4bc2d59c1f01c2afeba74b0", + }, + "4be43f1cda04e51e42fec0cfe8e1e2dff116e839" => Object { + "hash": "4be43f1cda04e51e42fec0cfe8e1e2dff116e839", + "treeHash": "93642dbd1793e84a6f529a1e1b1b4f87a4f5c878", + }, + "cbb26b570d1eed3c681b8f03ff31231c1bffd6d6" => Object { + "hash": "cbb26b570d1eed3c681b8f03ff31231c1bffd6d6", + "treeHash": "f6736d27cd7eb7e35ae22a906854c700eb5cf6c1", + }, + "301749e9af8cd6e9aee3a49a64029b98a4695e34" => Object { + "hash": "301749e9af8cd6e9aee3a49a64029b98a4695e34", + "treeHash": "4d5f2603a4b63aa68b8e51facf542a62e4c1d065", + }, + }, + "trees": Map { + "6152a37dba8aa54dc4bc2d59c1f01c2afeba74b0" => Object { + "entries": Map { + "README.txt" => Object { + "hash": "f1f2514ca6d7a6a1a0511957021b1995bf9ace1c", + "name": "README.txt", + "type": "blob", + }, + "src" => Object { + "hash": "78fc9c83023386854c6bfdc5761c0e58f68e226f", + "name": "src", + "type": "tree", + }, + }, + "hash": "6152a37dba8aa54dc4bc2d59c1f01c2afeba74b0", + }, + "93642dbd1793e84a6f529a1e1b1b4f87a4f5c878" => Object { + "entries": Map { + "README.txt" => Object { + "hash": "f1f2514ca6d7a6a1a0511957021b1995bf9ace1c", + "name": "README.txt", + "type": "blob", + }, + "TODOS.txt" => Object { + "hash": "ddec7477206c30c31b81482e56b877a0b3c2638b", + "name": "TODOS.txt", + "type": "blob", + }, + "src" => Object { + "hash": "78fc9c83023386854c6bfdc5761c0e58f68e226f", + "name": "src", + "type": "tree", + }, + }, + "hash": "93642dbd1793e84a6f529a1e1b1b4f87a4f5c878", + }, + "f6736d27cd7eb7e35ae22a906854c700eb5cf6c1" => Object { + "entries": Map { + "README.txt" => Object { + "hash": "f1f2514ca6d7a6a1a0511957021b1995bf9ace1c", + "name": "README.txt", + "type": "blob", + }, + "TODOS.txt" => Object { + "hash": "ddec7477206c30c31b81482e56b877a0b3c2638b", + "name": "TODOS.txt", + "type": "blob", + }, + "src" => Object { + "hash": "7b79d579b62994faba3b69fdf8aa442586c32681", + "name": "src", + "type": "tree", + }, + }, + "hash": "f6736d27cd7eb7e35ae22a906854c700eb5cf6c1", + }, + "4d5f2603a4b63aa68b8e51facf542a62e4c1d065" => Object { + "entries": Map { + "README.txt" => Object { + "hash": "f1f2514ca6d7a6a1a0511957021b1995bf9ace1c", + "name": "README.txt", + "type": "blob", + }, + }, + "hash": "4d5f2603a4b63aa68b8e51facf542a62e4c1d065", + }, + "78fc9c83023386854c6bfdc5761c0e58f68e226f" => Object { + "entries": Map { + "index.py" => Object { + "hash": "674b0b476989384510304846248b3acd16206782", + "name": "index.py", + "type": "blob", + }, + "quantum_gravity.py" => Object { + "hash": "aea4f28abb23abde151b0ead4063227f8bf6c0b0", + "name": "quantum_gravity.py", + "type": "blob", + }, + }, + "hash": "78fc9c83023386854c6bfdc5761c0e58f68e226f", + }, + "7b79d579b62994faba3b69fdf8aa442586c32681" => Object { + "entries": Map { + "index.py" => Object { + "hash": "674b0b476989384510304846248b3acd16206782", + "name": "index.py", + "type": "blob", + }, + "quantum_gravity.py" => Object { + "hash": "887ad856bbc1373da146106c86cb581ad78cdafe", + "name": "quantum_gravity.py", + "type": "blob", + }, + }, + "hash": "7b79d579b62994faba3b69fdf8aa442586c32681", + }, + }, +} +`; + +exports[`loadRepository processes an old commit 1`] = ` +Object { + "commits": Set { + "cbb26b570d1eed3c681b8f03ff31231c1bffd6d6", + "301749e9af8cd6e9aee3a49a64029b98a4695e34", + }, + "trees": Set { + "f6736d27cd7eb7e35ae22a906854c700eb5cf6c1", + "4d5f2603a4b63aa68b8e51facf542a62e4c1d065", + "7b79d579b62994faba3b69fdf8aa442586c32681", + }, +} +`; + +exports[`we create a deterministic repository 1`] = ` +Array [ + "301749e9af8cd6e9aee3a49a64029b98a4695e34", + "cbb26b570d1eed3c681b8f03ff31231c1bffd6d6", + "4be43f1cda04e51e42fec0cfe8e1e2dff116e839", + "677b340674bde17fdaac3b5f5eef929139ef2a52", +] +`; diff --git a/src/plugins/git/loadRepository.js b/src/plugins/git/loadRepository.js new file mode 100644 index 0000000..2503cd6 --- /dev/null +++ b/src/plugins/git/loadRepository.js @@ -0,0 +1,121 @@ +/* + * Load a git repository into memory. This dumps the commit and tree + * data into a structured form. Contents of blobs are not loaded. + * + * If the repository contains file names that are not valid UTF-8 + * strings, the result is undefined. + * + * Note: git(1) is a runtime dependency of this module. + */ +// @flow + +import {execFileSync} from "child_process"; + +export type GitDriver = (args: string[], options?: ExecOptions) => string; +type ExecOptions = Object; // close enough +export function localGit(repositoryPath: string): GitDriver { + return function git(args: string[], options?: ExecOptions): string { + // Throws an Error on shell failure. + return execFileSync( + "git", + ["-C", repositoryPath, ...args], + options + ).toString(); + }; +} + +export type Repository = {| + +commits: Map, + +trees: Map, +|}; +export type Hash = string; +export type Commit = {| + +hash: Hash, + +treeHash: Hash, +|}; +export type Tree = {| + +hash: Hash, + +entries: Map, // map from name +|}; +export type TreeEntry = {| + +type: "blob" | "commit" | "tree", + +name: string, + +hash: Hash, +|}; + +/** + * Load a Git repository from disk into memory. The `rootRef` should be + * a revision reference as accepted by `git rev-parse`: "HEAD" and + * "origin/master" will be common, while a specific SHA or tag might be + * used to fix a particular state of a repository. + */ +export function loadRepository( + repositoryPath: string, + rootRef: string +): Repository { + const git = localGit(repositoryPath); + const commits = findCommits(git, rootRef); + const trees = findTrees(git, new Set(commits.map((x) => x.treeHash))); + return {commits: hashMap(commits), trees: hashMap(trees)}; +} + +function hashMap(ts: $ReadOnlyArray): Map { + const result = new Map(); + ts.forEach((t) => { + result.set(t.hash, t); + }); + return result; +} + +function findCommits(git: GitDriver, rootRef: string): Commit[] { + return git(["log", "--oneline", "--pretty=%H %T", rootRef]) + .split("\n") + .filter((line) => line.length > 0) + .map((line) => { + const [hash, treeHash] = line.split(" "); + return {hash, treeHash}; + }); +} + +function findTrees(git: GitDriver, rootTrees: Set): Tree[] { + const result: Tree[] = []; + const visited: Set = new Set(); + const frontier: Set = new Set(rootTrees); + while (frontier.size > 0) { + const next = frontier.values().next(); + if (next.done) { + // Flow doesn't know that this is impossible, but it is. + throw new Error("Impossible! `frontier` had positive size."); + } + const treeHash: Hash = next.value; + visited.add(treeHash); + frontier.delete(treeHash); + const tree = loadTree(git, treeHash); + result.push(tree); + for (const entry of tree.entries.values()) { + if (entry.type === "tree" && !visited.has(entry.hash)) { + frontier.add(entry.hash); + } + } + } + return result; +} + +function loadTree(git: GitDriver, treeHash: Hash): Tree { + const entries: TreeEntry[] = git(["ls-tree", "--full-tree", "-z", treeHash]) + .split("\0") + .filter((line) => line.length > 0) + .map((line) => { + // See `git help ls-tree`, section OUTPUT FORMAT, for details. + const [metadata, name] = line.split("\t"); + const [mode, type, hash] = metadata.split(" "); + if (type !== "blob" && type !== "commit" && type !== "tree") { + throw new Error( + `entry ${treeHash}[${JSON.stringify(name)}] ` + + `has unexpected type "${type}"` + ); + } + return {name, type, hash}; + }); + return {hash: treeHash, entries: new Map(entries.map((e) => [e.name, e]))}; +} diff --git a/src/plugins/git/loadRepository.test.js b/src/plugins/git/loadRepository.test.js new file mode 100644 index 0000000..abebba9 --- /dev/null +++ b/src/plugins/git/loadRepository.test.js @@ -0,0 +1,153 @@ +// @flow + +import fs from "fs"; +import mkdirp from "mkdirp"; +import path from "path"; +import tmp from "tmp"; + +import type {GitDriver} from "./loadRepository"; +import {localGit, loadRepository} from "./loadRepository"; + +const cleanups: (() => void)[] = []; +afterAll(() => { + cleanups.forEach((f) => { + f(); + }); +}); + +function mkdtemp() { + const result = tmp.dirSync(); + cleanups.push(() => result.removeCallback()); + return result.name; +} + +function deterministicCommit(git: GitDriver, message: string): void { + git( + [ + "-c", + "user.name=Test Runner", + "-c", + "user.email=nobody@example.com", + "commit", + "-m", + message, + ], + { + env: { + TZ: "UTC", + GIT_AUTHOR_DATE: "2001-02-03T04:05:06", + GIT_COMMITTER_DATE: "2002-03-04T05:06:07", + }, + } + ); +} + +function createRepository(): {path: string, commits: string[]} { + const repositoryPath = mkdtemp(); + const git = localGit(repositoryPath); + + git(["init"]); + + function makeChangesAndCommit( + message: string, + changes: {[filename: string]: ?string} + ): string /* commit SHA */ { + Object.keys(changes).forEach((filename) => { + const filepath = path.join(repositoryPath, filename); + const dirpath = path.join(repositoryPath, path.dirname(filename)); + if (changes[filename] == null) { + fs.unlinkSync(filepath); + git(["rm", filename]); + } else { + const change = changes[filename]; + mkdirp.sync(dirpath); + fs.writeFileSync(filepath, change); + git(["add", filename]); + } + }); + deterministicCommit(git, message); + return git(["rev-parse", "HEAD"]).trim(); + } + + const commit1 = makeChangesAndCommit("Initial commit", { + "README.txt": "Amazing physics going on...\n", + }); + const commit2 = makeChangesAndCommit("Discover gravity", { + "src/index.py": "import antigravity\n", + "src/quantum_gravity.py": 'raise NotImplementedError("TODO(physicists)")\n', + "TODOS.txt": "1. Resolve quantum gravity\n", + }); + const commit3 = makeChangesAndCommit("Solve quantum gravity", { + "src/quantum_gravity.py": + "import random\nif random.random() < 0.5:\n import antigravity\n", + }); + const commit4 = makeChangesAndCommit("Clean up TODOS", { + "TODOS.txt": null, + }); + + return { + path: repositoryPath, + commits: [commit1, commit2, commit3, commit4], + }; +} + +test("we create a deterministic repository", () => { + expect(createRepository().commits).toMatchSnapshot(); +}); + +describe("loadRepository", () => { + it("loads from HEAD", () => { + const repository = createRepository(); + expect(loadRepository(repository.path, "HEAD")).toMatchSnapshot(); + }); + + it("processes an old commit", () => { + const repository = createRepository(); + const whole = loadRepository(repository.path, "HEAD"); + const part = loadRepository(repository.path, repository.commits[1]); + + // Check that `part` is a subset of `whole`... + for (const hash of part.commits.keys()) { + expect(part.commits.get(hash)).toEqual(whole.commits.get(hash)); + } + for (const hash of part.trees.keys()) { + expect(part.trees.get(hash)).toEqual(whole.trees.get(hash)); + } + + // ...and that it's the right subset. + expect({ + commits: new Set(part.commits.keys()), + trees: new Set(part.trees.keys()), + }).toMatchSnapshot(); + }); + + it("works with submodules", () => { + const repositoryPath = mkdtemp(); + const git = localGit(repositoryPath); + + const subproject = createRepository(); + + git(["init"]); + git(["submodule", "--quiet", "add", subproject.path, "physics"]); + deterministicCommit(git, "Initial commit"); + + const head = git(["rev-parse", "HEAD"]).trim(); + + const repository = loadRepository(repositoryPath, "HEAD"); + const commit = repository.commits.get(head); + expect(commit).toEqual(expect.anything()); + if (commit == null) { + throw new Error("Unreachable"); + } + const tree = repository.trees.get(commit.treeHash); + expect(tree).toEqual(expect.anything()); + if (tree == null) { + throw new Error("Unreachable"); + } + expect(tree.entries.get("physics")).toEqual({ + type: "commit", + name: "physics", + hash: subproject.commits[subproject.commits.length - 1], + }); + }); +});