Convert in-memory Git repos to cred graphs (#169)

Test Plan:
This snapshot test is too unwieldy to actually read—it’s 1000 lines of
opaque SHAs and thrice-stringified JSON objects—so it should be
interpreted as a regression test only. The programmatic tests should
suffice.

wchargin-branch: wip-git-create-graph
This commit is contained in:
William Chargin 2018-04-30 15:23:37 -07:00 committed by GitHub
parent f3a440244e
commit 5af5748ed7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 1668 additions and 0 deletions

View File

@ -33,6 +33,7 @@
"jest": "20.0.4", "jest": "20.0.4",
"json-stable-stringify": "^1.0.1", "json-stable-stringify": "^1.0.1",
"lint-staged": "^6.1.1", "lint-staged": "^6.1.1",
"lodash.clonedeep": "^4.5.0",
"lodash.isequal": "^4.5.0", "lodash.isequal": "^4.5.0",
"lodash.sortby": "^4.7.0", "lodash.sortby": "^4.7.0",
"object-assign": "4.1.1", "object-assign": "4.1.1",

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,142 @@
// @flow
import type {Address} from "../../core/address";
import type {
Repository,
Commit,
Tree,
NodePayload,
EdgePayload,
NodeType,
EdgeType,
} from "./types";
import {Graph, edgeID} from "../../core/graph";
import {
BLOB_NODE_TYPE,
COMMIT_NODE_TYPE,
TREE_NODE_TYPE,
TREE_ENTRY_NODE_TYPE,
INCLUDES_EDGE_TYPE,
HAS_CONTENTS_EDGE_TYPE,
HAS_TREE_EDGE_TYPE,
GIT_PLUGIN_NAME,
includesEdgeId,
treeEntryId,
} from "./types";
class GitGraphCreator {
repositoryName: string;
constructor(repositoryName) {
this.repositoryName = repositoryName;
}
makeAddress(type: NodeType | EdgeType, id: string): Address {
return {
pluginName: GIT_PLUGIN_NAME,
repositoryName: this.repositoryName,
type,
id,
};
}
createGraph(repository: Repository): Graph<NodePayload, EdgePayload> {
const graphs = [
...Object.keys(repository.commits).map((hash) =>
this.commitGraph(repository.commits[hash])
),
...Object.keys(repository.trees).map((hash) =>
this.treeGraph(repository.trees[hash])
),
];
return graphs.reduce((g, h) => Graph.mergeConservative(g, h), new Graph());
}
commitGraph(commit: Commit) {
const commitNode = {
address: this.makeAddress(COMMIT_NODE_TYPE, commit.hash),
payload: {},
};
const treeNode = {
address: this.makeAddress(TREE_NODE_TYPE, commit.treeHash),
payload: {},
};
const edge = {
address: this.makeAddress(
HAS_TREE_EDGE_TYPE,
edgeID(commitNode.address, treeNode.address)
),
src: commitNode.address,
dst: treeNode.address,
payload: {},
};
return new Graph()
.addNode(commitNode)
.addNode(treeNode)
.addEdge(edge);
}
treeGraph(tree: Tree) {
const treeNode = {
address: this.makeAddress(TREE_NODE_TYPE, tree.hash),
payload: {},
};
const result = new Graph().addNode(treeNode);
Object.keys(tree.entries).forEach((name) => {
const entry = tree.entries[name];
const entryNode = {
address: this.makeAddress(
TREE_ENTRY_NODE_TYPE,
treeEntryId(tree.hash, entry.name)
),
payload: {},
};
const entryEdge = {
address: this.makeAddress(
INCLUDES_EDGE_TYPE,
includesEdgeId(tree.hash, entry.name)
),
src: treeNode.address,
dst: entryNode.address,
payload: {},
};
result.addNode(entryNode).addEdge(entryEdge);
if (entry.type === "commit") {
// We don't represent subproject commits in the graph.
} else {
let contentsNodeType;
if (entry.type === "tree") {
contentsNodeType = TREE_NODE_TYPE;
} else if (entry.type === "blob") {
contentsNodeType = BLOB_NODE_TYPE;
} else {
// eslint-disable-next-line no-unused-expressions
(entry.type: empty);
throw new Error(`Unknown entry type: ${entry.type}`);
}
const contentsNode = {
address: this.makeAddress(contentsNodeType, entry.hash),
payload: {},
};
const contentsEdge = {
address: this.makeAddress(
HAS_CONTENTS_EDGE_TYPE,
edgeID(entryNode.address, contentsNode.address)
),
src: entryNode.address,
dst: contentsNode.address,
payload: {},
};
result.addNode(contentsNode).addEdge(contentsEdge);
}
});
return result;
}
}
export function createGraph(
repository: Repository,
repositoryName: string
): Graph<NodePayload, EdgePayload> {
return new GitGraphCreator(repositoryName).createGraph(repository);
}

View File

@ -0,0 +1,224 @@
// @flow
import cloneDeep from "lodash.clonedeep";
import {createGraph} from "./createGraph";
import {
BLOB_NODE_TYPE,
COMMIT_NODE_TYPE,
GIT_PLUGIN_NAME,
HAS_CONTENTS_EDGE_TYPE,
HAS_TREE_EDGE_TYPE,
INCLUDES_EDGE_TYPE,
TREE_ENTRY_NODE_TYPE,
TREE_NODE_TYPE,
treeEntryId,
} from "./types";
const makeData = () => cloneDeep(require("./demoData/example-git"));
describe("createGraph", () => {
it("processes a simple repository", () => {
expect(createGraph(makeData(), "sourcecred/example-git")).toMatchSnapshot();
});
it("has no dangling edges", () => {
const graph = createGraph(makeData(), "sourcecred/example-git");
graph.edges().forEach((edge) => {
expect(graph.node(edge.src)).toEqual(expect.anything());
expect(graph.node(edge.dst)).toEqual(expect.anything());
});
});
it("has all commits, each with correct edges", () => {
const data = makeData();
const graph = createGraph(data, "sourcecred/example-git");
Object.keys(data.commits).forEach((hash) => {
const address = {
pluginName: GIT_PLUGIN_NAME,
repositoryName: "sourcecred/example-git",
type: COMMIT_NODE_TYPE,
id: hash,
};
expect(graph.node(address)).toEqual({address, payload: {}});
expect(graph.neighborhood(address)).toHaveLength(1);
expect(
graph.neighborhood(address, {
nodeType: TREE_NODE_TYPE,
edgeType: HAS_TREE_EDGE_TYPE,
})
).toHaveLength(1);
});
});
it("has all trees, each with correct edges", () => {
const data = makeData();
const graph = createGraph(data, "sourcecred/example-git");
Object.keys(data.trees).forEach((hash) => {
const address = {
pluginName: GIT_PLUGIN_NAME,
repositoryName: "sourcecred/example-git",
type: TREE_NODE_TYPE,
id: hash,
};
const entryChildren = graph.outEdges(address, {
nodeType: TREE_ENTRY_NODE_TYPE,
edgeType: INCLUDES_EDGE_TYPE,
});
expect(entryChildren).toHaveLength(
Object.keys(data.trees[hash].entries).length
);
expect(graph.outEdges(address)).toHaveLength(entryChildren.length);
expect(graph.node(address)).toEqual({address, payload: {}});
const owningCommits = graph.inEdges(address, {
nodeType: COMMIT_NODE_TYPE,
edgeType: HAS_TREE_EDGE_TYPE,
});
expect(owningCommits.length).toBeLessThanOrEqual(1);
const parentTreeEntries = graph.inEdges(address, {
nodeType: TREE_ENTRY_NODE_TYPE,
edgeType: HAS_CONTENTS_EDGE_TYPE,
});
expect(graph.inEdges(address)).toHaveLength(
owningCommits.length + parentTreeEntries.length
);
});
});
it("has all tree entries, each with correct edges", () => {
const data = makeData();
const graph = createGraph(data, "sourcecred/example-git");
Object.keys(data.trees).forEach((hash) => {
const tree = data.trees[hash];
const treeAddress = {
pluginName: GIT_PLUGIN_NAME,
repositoryName: "sourcecred/example-git",
type: TREE_NODE_TYPE,
id: hash,
};
expect(graph.node(treeAddress)).toEqual({
address: treeAddress,
payload: {},
});
Object.keys(tree.entries).forEach((name) => {
const entryAddress = {
pluginName: GIT_PLUGIN_NAME,
repositoryName: "sourcecred/example-git",
type: TREE_ENTRY_NODE_TYPE,
id: treeEntryId(hash, name),
};
expect(
graph.inEdges(entryAddress, {
nodeType: TREE_NODE_TYPE,
edgeType: INCLUDES_EDGE_TYPE,
})
).toHaveLength(1);
const shouldHaveContents = tree.entries[name].type !== "commit";
expect(
graph.outEdges(entryAddress, {edgeType: HAS_CONTENTS_EDGE_TYPE})
).toHaveLength(shouldHaveContents ? 1 : 0);
expect(graph.neighborhood(entryAddress)).toHaveLength(
shouldHaveContents ? 2 : 1
);
});
});
});
describe("has specific paths:", () => {
const headCommitHash = "3715ddfb8d4c4fd2a6f6af75488c82f84c92ec2f";
if (makeData().commits[headCommitHash] == null) {
throw new Error("Commit hash out of date.");
}
function uniqueNeighborMatching(
graph,
nodeAddress,
filter?,
predicate = (_) => true
) {
const edges = graph
.neighborhood(nodeAddress, filter)
.filter((x) => predicate(x));
expect(edges).toHaveLength(1);
return edges[0].neighborAddress;
}
function uniqueTree(graph, commitAddress) {
return uniqueNeighborMatching(graph, commitAddress, {
nodeType: TREE_NODE_TYPE,
edgeType: HAS_TREE_EDGE_TYPE,
});
}
function uniqueEntry(graph, treeAddress, entryName: string) {
return uniqueNeighborMatching(
graph,
treeAddress,
{
nodeType: TREE_ENTRY_NODE_TYPE,
edgeType: INCLUDES_EDGE_TYPE,
},
({edge}) => edge.address.id.endsWith(`:${entryName}`)
);
}
function uniqueContents(graph, treeEntryNodeAddress) {
return uniqueNeighborMatching(graph, treeEntryNodeAddress, {
edgeType: HAS_CONTENTS_EDGE_TYPE,
});
}
test("HEAD^{tree}:src/quantum_gravity.py with correct contents", () => {
const data = makeData();
const graph = createGraph(data, "sourcecred/example-git");
const headCommitAddress = {
pluginName: GIT_PLUGIN_NAME,
repositoryName: "sourcecred/example-git",
type: COMMIT_NODE_TYPE,
id: headCommitHash,
};
const headTreeAddress = uniqueTree(graph, headCommitAddress);
const srcTreeEntryAddress = uniqueEntry(graph, headTreeAddress, "src");
const srcTreeAddress = uniqueContents(graph, srcTreeEntryAddress);
const blobEntryAddress = uniqueEntry(
graph,
srcTreeAddress,
"quantum_gravity.py"
);
const blobAddress = uniqueContents(graph, blobEntryAddress);
expect(graph.node(blobAddress)).toEqual(
expect.objectContaining({
address: expect.objectContaining({
type: BLOB_NODE_TYPE,
id: "aea4f28abb23abde151b0ead4063227f8bf6c0b0",
}),
})
);
});
test("HEAD^{tree}:pygravitydefier with no contents", () => {
const data = makeData();
const graph = createGraph(data, "sourcecred/example-git");
const headCommitAddress = {
pluginName: GIT_PLUGIN_NAME,
repositoryName: "sourcecred/example-git",
type: COMMIT_NODE_TYPE,
id: headCommitHash,
};
const headTreeAddress = uniqueTree(graph, headCommitAddress);
const treeEntryAddress = uniqueEntry(
graph,
headTreeAddress,
"pygravitydefier"
);
expect(graph.node(treeEntryAddress)).toEqual(expect.anything());
// Submodule commits never have contents, because the commit nodes
// are from an unknown repository.
expect(graph.outEdges(treeEntryAddress)).toHaveLength(0);
});
});
});

View File

@ -1,5 +1,8 @@
// @flow // @flow
export const GIT_PLUGIN_NAME = "sourcecred/git-beta";
// Logical types
export type Repository = {| export type Repository = {|
+commits: {[Hash]: Commit}, +commits: {[Hash]: Commit},
+trees: {[Hash]: Tree}, +trees: {[Hash]: Tree},
@ -18,3 +21,66 @@ export type TreeEntry = {|
+name: string, +name: string,
+hash: Hash, +hash: Hash,
|}; |};
// Graph types
// Nodes
export const COMMIT_NODE_TYPE: "COMMIT" = "COMMIT";
export type CommitNodePayload = {||};
export const TREE_NODE_TYPE: "TREE" = "TREE";
export type TreeNodePayload = {||};
export const BLOB_NODE_TYPE: "BLOB" = "BLOB";
export type BlobNodePayload = {||}; // we do not store the content
export const TREE_ENTRY_NODE_TYPE: "TREE_ENTRY" = "TREE_ENTRY";
export type TreeEntryNodePayload = {||};
export function treeEntryId(treeSha: string, name: string): string {
return `${treeSha}:${name}`;
}
export type NodePayload =
| CommitNodePayload
| TreeNodePayload
| TreeEntryNodePayload
| HasContentsEdgePayload;
export type NodeType =
| typeof COMMIT_NODE_TYPE
| typeof TREE_NODE_TYPE
| typeof TREE_ENTRY_NODE_TYPE
| typeof BLOB_NODE_TYPE;
// Edges
// CommitNode -> TreeNode
export const HAS_TREE_EDGE_TYPE: "HAS_TREE" = "HAS_TREE";
export type HasTreeEdgePayload = {||};
// TreeNode -> TreeEntryNode
export const INCLUDES_EDGE_TYPE: "INCLUDES" = "INCLUDES";
export type IncludesEdgePayload = {||};
export function includesEdgeId(treeSha: string, name: string): string {
return `${treeSha}:${name}`;
}
// TreeEntryNode -> TreeEntryNode
export const BECOMES_EDGE_TYPE: "BECOMES" = "BECOMES";
export type BecomesEdgePayload = {||};
// TreeEntryNode -> BlobNode | TreeNode
export const HAS_CONTENTS_EDGE_TYPE: "HAS_CONTENTS" = "HAS_CONTENTS";
export type HasContentsEdgePayload = {||};
export type EdgeType =
| typeof HAS_TREE_EDGE_TYPE
| typeof INCLUDES_EDGE_TYPE
| typeof BECOMES_EDGE_TYPE
| typeof HAS_CONTENTS_EDGE_TYPE;
export type EdgePayload =
| HasTreeEdgePayload
| IncludesEdgePayload
| BecomesEdgePayload
| HasContentsEdgePayload;

View File

@ -4282,6 +4282,10 @@ lodash.camelcase@^4.3.0:
version "4.3.0" version "4.3.0"
resolved "https://registry.yarnpkg.com/lodash.camelcase/-/lodash.camelcase-4.3.0.tgz#b28aa6288a2b9fc651035c7711f65ab6190331a6" resolved "https://registry.yarnpkg.com/lodash.camelcase/-/lodash.camelcase-4.3.0.tgz#b28aa6288a2b9fc651035c7711f65ab6190331a6"
lodash.clonedeep@^4.5.0:
version "4.5.0"
resolved "https://registry.yarnpkg.com/lodash.clonedeep/-/lodash.clonedeep-4.5.0.tgz#e23f3f9c4f8fbdde872529c1071857a086e5ccef"
lodash.cond@^4.3.0: lodash.cond@^4.3.0:
version "4.5.2" version "4.5.2"
resolved "https://registry.yarnpkg.com/lodash.cond/-/lodash.cond-4.5.2.tgz#f471a1da486be60f6ab955d17115523dd1d255d5" resolved "https://registry.yarnpkg.com/lodash.cond/-/lodash.cond-4.5.2.tgz#f471a1da486be60f6ab955d17115523dd1d255d5"