Convert in-memory Git repos to cred graphs (#169)
Test Plan: This snapshot test is too unwieldy to actually read—it’s 1000 lines of opaque SHAs and thrice-stringified JSON objects—so it should be interpreted as a regression test only. The programmatic tests should suffice. wchargin-branch: wip-git-create-graph
This commit is contained in:
parent
f3a440244e
commit
5af5748ed7
|
@ -33,6 +33,7 @@
|
||||||
"jest": "20.0.4",
|
"jest": "20.0.4",
|
||||||
"json-stable-stringify": "^1.0.1",
|
"json-stable-stringify": "^1.0.1",
|
||||||
"lint-staged": "^6.1.1",
|
"lint-staged": "^6.1.1",
|
||||||
|
"lodash.clonedeep": "^4.5.0",
|
||||||
"lodash.isequal": "^4.5.0",
|
"lodash.isequal": "^4.5.0",
|
||||||
"lodash.sortby": "^4.7.0",
|
"lodash.sortby": "^4.7.0",
|
||||||
"object-assign": "4.1.1",
|
"object-assign": "4.1.1",
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,142 @@
|
||||||
|
// @flow
|
||||||
|
|
||||||
|
import type {Address} from "../../core/address";
|
||||||
|
import type {
|
||||||
|
Repository,
|
||||||
|
Commit,
|
||||||
|
Tree,
|
||||||
|
NodePayload,
|
||||||
|
EdgePayload,
|
||||||
|
NodeType,
|
||||||
|
EdgeType,
|
||||||
|
} from "./types";
|
||||||
|
import {Graph, edgeID} from "../../core/graph";
|
||||||
|
import {
|
||||||
|
BLOB_NODE_TYPE,
|
||||||
|
COMMIT_NODE_TYPE,
|
||||||
|
TREE_NODE_TYPE,
|
||||||
|
TREE_ENTRY_NODE_TYPE,
|
||||||
|
INCLUDES_EDGE_TYPE,
|
||||||
|
HAS_CONTENTS_EDGE_TYPE,
|
||||||
|
HAS_TREE_EDGE_TYPE,
|
||||||
|
GIT_PLUGIN_NAME,
|
||||||
|
includesEdgeId,
|
||||||
|
treeEntryId,
|
||||||
|
} from "./types";
|
||||||
|
|
||||||
|
class GitGraphCreator {
|
||||||
|
repositoryName: string;
|
||||||
|
|
||||||
|
constructor(repositoryName) {
|
||||||
|
this.repositoryName = repositoryName;
|
||||||
|
}
|
||||||
|
|
||||||
|
makeAddress(type: NodeType | EdgeType, id: string): Address {
|
||||||
|
return {
|
||||||
|
pluginName: GIT_PLUGIN_NAME,
|
||||||
|
repositoryName: this.repositoryName,
|
||||||
|
type,
|
||||||
|
id,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
createGraph(repository: Repository): Graph<NodePayload, EdgePayload> {
|
||||||
|
const graphs = [
|
||||||
|
...Object.keys(repository.commits).map((hash) =>
|
||||||
|
this.commitGraph(repository.commits[hash])
|
||||||
|
),
|
||||||
|
...Object.keys(repository.trees).map((hash) =>
|
||||||
|
this.treeGraph(repository.trees[hash])
|
||||||
|
),
|
||||||
|
];
|
||||||
|
return graphs.reduce((g, h) => Graph.mergeConservative(g, h), new Graph());
|
||||||
|
}
|
||||||
|
|
||||||
|
commitGraph(commit: Commit) {
|
||||||
|
const commitNode = {
|
||||||
|
address: this.makeAddress(COMMIT_NODE_TYPE, commit.hash),
|
||||||
|
payload: {},
|
||||||
|
};
|
||||||
|
const treeNode = {
|
||||||
|
address: this.makeAddress(TREE_NODE_TYPE, commit.treeHash),
|
||||||
|
payload: {},
|
||||||
|
};
|
||||||
|
const edge = {
|
||||||
|
address: this.makeAddress(
|
||||||
|
HAS_TREE_EDGE_TYPE,
|
||||||
|
edgeID(commitNode.address, treeNode.address)
|
||||||
|
),
|
||||||
|
src: commitNode.address,
|
||||||
|
dst: treeNode.address,
|
||||||
|
payload: {},
|
||||||
|
};
|
||||||
|
return new Graph()
|
||||||
|
.addNode(commitNode)
|
||||||
|
.addNode(treeNode)
|
||||||
|
.addEdge(edge);
|
||||||
|
}
|
||||||
|
|
||||||
|
treeGraph(tree: Tree) {
|
||||||
|
const treeNode = {
|
||||||
|
address: this.makeAddress(TREE_NODE_TYPE, tree.hash),
|
||||||
|
payload: {},
|
||||||
|
};
|
||||||
|
const result = new Graph().addNode(treeNode);
|
||||||
|
Object.keys(tree.entries).forEach((name) => {
|
||||||
|
const entry = tree.entries[name];
|
||||||
|
const entryNode = {
|
||||||
|
address: this.makeAddress(
|
||||||
|
TREE_ENTRY_NODE_TYPE,
|
||||||
|
treeEntryId(tree.hash, entry.name)
|
||||||
|
),
|
||||||
|
payload: {},
|
||||||
|
};
|
||||||
|
const entryEdge = {
|
||||||
|
address: this.makeAddress(
|
||||||
|
INCLUDES_EDGE_TYPE,
|
||||||
|
includesEdgeId(tree.hash, entry.name)
|
||||||
|
),
|
||||||
|
src: treeNode.address,
|
||||||
|
dst: entryNode.address,
|
||||||
|
payload: {},
|
||||||
|
};
|
||||||
|
result.addNode(entryNode).addEdge(entryEdge);
|
||||||
|
if (entry.type === "commit") {
|
||||||
|
// We don't represent subproject commits in the graph.
|
||||||
|
} else {
|
||||||
|
let contentsNodeType;
|
||||||
|
if (entry.type === "tree") {
|
||||||
|
contentsNodeType = TREE_NODE_TYPE;
|
||||||
|
} else if (entry.type === "blob") {
|
||||||
|
contentsNodeType = BLOB_NODE_TYPE;
|
||||||
|
} else {
|
||||||
|
// eslint-disable-next-line no-unused-expressions
|
||||||
|
(entry.type: empty);
|
||||||
|
throw new Error(`Unknown entry type: ${entry.type}`);
|
||||||
|
}
|
||||||
|
const contentsNode = {
|
||||||
|
address: this.makeAddress(contentsNodeType, entry.hash),
|
||||||
|
payload: {},
|
||||||
|
};
|
||||||
|
const contentsEdge = {
|
||||||
|
address: this.makeAddress(
|
||||||
|
HAS_CONTENTS_EDGE_TYPE,
|
||||||
|
edgeID(entryNode.address, contentsNode.address)
|
||||||
|
),
|
||||||
|
src: entryNode.address,
|
||||||
|
dst: contentsNode.address,
|
||||||
|
payload: {},
|
||||||
|
};
|
||||||
|
result.addNode(contentsNode).addEdge(contentsEdge);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export function createGraph(
|
||||||
|
repository: Repository,
|
||||||
|
repositoryName: string
|
||||||
|
): Graph<NodePayload, EdgePayload> {
|
||||||
|
return new GitGraphCreator(repositoryName).createGraph(repository);
|
||||||
|
}
|
|
@ -0,0 +1,224 @@
|
||||||
|
// @flow
|
||||||
|
|
||||||
|
import cloneDeep from "lodash.clonedeep";
|
||||||
|
|
||||||
|
import {createGraph} from "./createGraph";
|
||||||
|
import {
|
||||||
|
BLOB_NODE_TYPE,
|
||||||
|
COMMIT_NODE_TYPE,
|
||||||
|
GIT_PLUGIN_NAME,
|
||||||
|
HAS_CONTENTS_EDGE_TYPE,
|
||||||
|
HAS_TREE_EDGE_TYPE,
|
||||||
|
INCLUDES_EDGE_TYPE,
|
||||||
|
TREE_ENTRY_NODE_TYPE,
|
||||||
|
TREE_NODE_TYPE,
|
||||||
|
treeEntryId,
|
||||||
|
} from "./types";
|
||||||
|
|
||||||
|
const makeData = () => cloneDeep(require("./demoData/example-git"));
|
||||||
|
|
||||||
|
describe("createGraph", () => {
|
||||||
|
it("processes a simple repository", () => {
|
||||||
|
expect(createGraph(makeData(), "sourcecred/example-git")).toMatchSnapshot();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("has no dangling edges", () => {
|
||||||
|
const graph = createGraph(makeData(), "sourcecred/example-git");
|
||||||
|
graph.edges().forEach((edge) => {
|
||||||
|
expect(graph.node(edge.src)).toEqual(expect.anything());
|
||||||
|
expect(graph.node(edge.dst)).toEqual(expect.anything());
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it("has all commits, each with correct edges", () => {
|
||||||
|
const data = makeData();
|
||||||
|
const graph = createGraph(data, "sourcecred/example-git");
|
||||||
|
Object.keys(data.commits).forEach((hash) => {
|
||||||
|
const address = {
|
||||||
|
pluginName: GIT_PLUGIN_NAME,
|
||||||
|
repositoryName: "sourcecred/example-git",
|
||||||
|
type: COMMIT_NODE_TYPE,
|
||||||
|
id: hash,
|
||||||
|
};
|
||||||
|
expect(graph.node(address)).toEqual({address, payload: {}});
|
||||||
|
expect(graph.neighborhood(address)).toHaveLength(1);
|
||||||
|
expect(
|
||||||
|
graph.neighborhood(address, {
|
||||||
|
nodeType: TREE_NODE_TYPE,
|
||||||
|
edgeType: HAS_TREE_EDGE_TYPE,
|
||||||
|
})
|
||||||
|
).toHaveLength(1);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it("has all trees, each with correct edges", () => {
|
||||||
|
const data = makeData();
|
||||||
|
const graph = createGraph(data, "sourcecred/example-git");
|
||||||
|
Object.keys(data.trees).forEach((hash) => {
|
||||||
|
const address = {
|
||||||
|
pluginName: GIT_PLUGIN_NAME,
|
||||||
|
repositoryName: "sourcecred/example-git",
|
||||||
|
type: TREE_NODE_TYPE,
|
||||||
|
id: hash,
|
||||||
|
};
|
||||||
|
|
||||||
|
const entryChildren = graph.outEdges(address, {
|
||||||
|
nodeType: TREE_ENTRY_NODE_TYPE,
|
||||||
|
edgeType: INCLUDES_EDGE_TYPE,
|
||||||
|
});
|
||||||
|
expect(entryChildren).toHaveLength(
|
||||||
|
Object.keys(data.trees[hash].entries).length
|
||||||
|
);
|
||||||
|
expect(graph.outEdges(address)).toHaveLength(entryChildren.length);
|
||||||
|
|
||||||
|
expect(graph.node(address)).toEqual({address, payload: {}});
|
||||||
|
const owningCommits = graph.inEdges(address, {
|
||||||
|
nodeType: COMMIT_NODE_TYPE,
|
||||||
|
edgeType: HAS_TREE_EDGE_TYPE,
|
||||||
|
});
|
||||||
|
expect(owningCommits.length).toBeLessThanOrEqual(1);
|
||||||
|
const parentTreeEntries = graph.inEdges(address, {
|
||||||
|
nodeType: TREE_ENTRY_NODE_TYPE,
|
||||||
|
edgeType: HAS_CONTENTS_EDGE_TYPE,
|
||||||
|
});
|
||||||
|
expect(graph.inEdges(address)).toHaveLength(
|
||||||
|
owningCommits.length + parentTreeEntries.length
|
||||||
|
);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it("has all tree entries, each with correct edges", () => {
|
||||||
|
const data = makeData();
|
||||||
|
const graph = createGraph(data, "sourcecred/example-git");
|
||||||
|
Object.keys(data.trees).forEach((hash) => {
|
||||||
|
const tree = data.trees[hash];
|
||||||
|
const treeAddress = {
|
||||||
|
pluginName: GIT_PLUGIN_NAME,
|
||||||
|
repositoryName: "sourcecred/example-git",
|
||||||
|
type: TREE_NODE_TYPE,
|
||||||
|
id: hash,
|
||||||
|
};
|
||||||
|
expect(graph.node(treeAddress)).toEqual({
|
||||||
|
address: treeAddress,
|
||||||
|
payload: {},
|
||||||
|
});
|
||||||
|
Object.keys(tree.entries).forEach((name) => {
|
||||||
|
const entryAddress = {
|
||||||
|
pluginName: GIT_PLUGIN_NAME,
|
||||||
|
repositoryName: "sourcecred/example-git",
|
||||||
|
type: TREE_ENTRY_NODE_TYPE,
|
||||||
|
id: treeEntryId(hash, name),
|
||||||
|
};
|
||||||
|
expect(
|
||||||
|
graph.inEdges(entryAddress, {
|
||||||
|
nodeType: TREE_NODE_TYPE,
|
||||||
|
edgeType: INCLUDES_EDGE_TYPE,
|
||||||
|
})
|
||||||
|
).toHaveLength(1);
|
||||||
|
const shouldHaveContents = tree.entries[name].type !== "commit";
|
||||||
|
expect(
|
||||||
|
graph.outEdges(entryAddress, {edgeType: HAS_CONTENTS_EDGE_TYPE})
|
||||||
|
).toHaveLength(shouldHaveContents ? 1 : 0);
|
||||||
|
expect(graph.neighborhood(entryAddress)).toHaveLength(
|
||||||
|
shouldHaveContents ? 2 : 1
|
||||||
|
);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe("has specific paths:", () => {
|
||||||
|
const headCommitHash = "3715ddfb8d4c4fd2a6f6af75488c82f84c92ec2f";
|
||||||
|
if (makeData().commits[headCommitHash] == null) {
|
||||||
|
throw new Error("Commit hash out of date.");
|
||||||
|
}
|
||||||
|
|
||||||
|
function uniqueNeighborMatching(
|
||||||
|
graph,
|
||||||
|
nodeAddress,
|
||||||
|
filter?,
|
||||||
|
predicate = (_) => true
|
||||||
|
) {
|
||||||
|
const edges = graph
|
||||||
|
.neighborhood(nodeAddress, filter)
|
||||||
|
.filter((x) => predicate(x));
|
||||||
|
expect(edges).toHaveLength(1);
|
||||||
|
return edges[0].neighborAddress;
|
||||||
|
}
|
||||||
|
|
||||||
|
function uniqueTree(graph, commitAddress) {
|
||||||
|
return uniqueNeighborMatching(graph, commitAddress, {
|
||||||
|
nodeType: TREE_NODE_TYPE,
|
||||||
|
edgeType: HAS_TREE_EDGE_TYPE,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function uniqueEntry(graph, treeAddress, entryName: string) {
|
||||||
|
return uniqueNeighborMatching(
|
||||||
|
graph,
|
||||||
|
treeAddress,
|
||||||
|
{
|
||||||
|
nodeType: TREE_ENTRY_NODE_TYPE,
|
||||||
|
edgeType: INCLUDES_EDGE_TYPE,
|
||||||
|
},
|
||||||
|
({edge}) => edge.address.id.endsWith(`:${entryName}`)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function uniqueContents(graph, treeEntryNodeAddress) {
|
||||||
|
return uniqueNeighborMatching(graph, treeEntryNodeAddress, {
|
||||||
|
edgeType: HAS_CONTENTS_EDGE_TYPE,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
test("HEAD^{tree}:src/quantum_gravity.py with correct contents", () => {
|
||||||
|
const data = makeData();
|
||||||
|
const graph = createGraph(data, "sourcecred/example-git");
|
||||||
|
|
||||||
|
const headCommitAddress = {
|
||||||
|
pluginName: GIT_PLUGIN_NAME,
|
||||||
|
repositoryName: "sourcecred/example-git",
|
||||||
|
type: COMMIT_NODE_TYPE,
|
||||||
|
id: headCommitHash,
|
||||||
|
};
|
||||||
|
const headTreeAddress = uniqueTree(graph, headCommitAddress);
|
||||||
|
const srcTreeEntryAddress = uniqueEntry(graph, headTreeAddress, "src");
|
||||||
|
const srcTreeAddress = uniqueContents(graph, srcTreeEntryAddress);
|
||||||
|
const blobEntryAddress = uniqueEntry(
|
||||||
|
graph,
|
||||||
|
srcTreeAddress,
|
||||||
|
"quantum_gravity.py"
|
||||||
|
);
|
||||||
|
const blobAddress = uniqueContents(graph, blobEntryAddress);
|
||||||
|
expect(graph.node(blobAddress)).toEqual(
|
||||||
|
expect.objectContaining({
|
||||||
|
address: expect.objectContaining({
|
||||||
|
type: BLOB_NODE_TYPE,
|
||||||
|
id: "aea4f28abb23abde151b0ead4063227f8bf6c0b0",
|
||||||
|
}),
|
||||||
|
})
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("HEAD^{tree}:pygravitydefier with no contents", () => {
|
||||||
|
const data = makeData();
|
||||||
|
const graph = createGraph(data, "sourcecred/example-git");
|
||||||
|
|
||||||
|
const headCommitAddress = {
|
||||||
|
pluginName: GIT_PLUGIN_NAME,
|
||||||
|
repositoryName: "sourcecred/example-git",
|
||||||
|
type: COMMIT_NODE_TYPE,
|
||||||
|
id: headCommitHash,
|
||||||
|
};
|
||||||
|
const headTreeAddress = uniqueTree(graph, headCommitAddress);
|
||||||
|
const treeEntryAddress = uniqueEntry(
|
||||||
|
graph,
|
||||||
|
headTreeAddress,
|
||||||
|
"pygravitydefier"
|
||||||
|
);
|
||||||
|
expect(graph.node(treeEntryAddress)).toEqual(expect.anything());
|
||||||
|
// Submodule commits never have contents, because the commit nodes
|
||||||
|
// are from an unknown repository.
|
||||||
|
expect(graph.outEdges(treeEntryAddress)).toHaveLength(0);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
|
@ -1,5 +1,8 @@
|
||||||
// @flow
|
// @flow
|
||||||
|
|
||||||
|
export const GIT_PLUGIN_NAME = "sourcecred/git-beta";
|
||||||
|
|
||||||
|
// Logical types
|
||||||
export type Repository = {|
|
export type Repository = {|
|
||||||
+commits: {[Hash]: Commit},
|
+commits: {[Hash]: Commit},
|
||||||
+trees: {[Hash]: Tree},
|
+trees: {[Hash]: Tree},
|
||||||
|
@ -18,3 +21,66 @@ export type TreeEntry = {|
|
||||||
+name: string,
|
+name: string,
|
||||||
+hash: Hash,
|
+hash: Hash,
|
||||||
|};
|
|};
|
||||||
|
|
||||||
|
// Graph types
|
||||||
|
|
||||||
|
// Nodes
|
||||||
|
export const COMMIT_NODE_TYPE: "COMMIT" = "COMMIT";
|
||||||
|
export type CommitNodePayload = {||};
|
||||||
|
|
||||||
|
export const TREE_NODE_TYPE: "TREE" = "TREE";
|
||||||
|
export type TreeNodePayload = {||};
|
||||||
|
|
||||||
|
export const BLOB_NODE_TYPE: "BLOB" = "BLOB";
|
||||||
|
export type BlobNodePayload = {||}; // we do not store the content
|
||||||
|
|
||||||
|
export const TREE_ENTRY_NODE_TYPE: "TREE_ENTRY" = "TREE_ENTRY";
|
||||||
|
export type TreeEntryNodePayload = {||};
|
||||||
|
export function treeEntryId(treeSha: string, name: string): string {
|
||||||
|
return `${treeSha}:${name}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
export type NodePayload =
|
||||||
|
| CommitNodePayload
|
||||||
|
| TreeNodePayload
|
||||||
|
| TreeEntryNodePayload
|
||||||
|
| HasContentsEdgePayload;
|
||||||
|
|
||||||
|
export type NodeType =
|
||||||
|
| typeof COMMIT_NODE_TYPE
|
||||||
|
| typeof TREE_NODE_TYPE
|
||||||
|
| typeof TREE_ENTRY_NODE_TYPE
|
||||||
|
| typeof BLOB_NODE_TYPE;
|
||||||
|
|
||||||
|
// Edges
|
||||||
|
|
||||||
|
// CommitNode -> TreeNode
|
||||||
|
export const HAS_TREE_EDGE_TYPE: "HAS_TREE" = "HAS_TREE";
|
||||||
|
export type HasTreeEdgePayload = {||};
|
||||||
|
|
||||||
|
// TreeNode -> TreeEntryNode
|
||||||
|
export const INCLUDES_EDGE_TYPE: "INCLUDES" = "INCLUDES";
|
||||||
|
export type IncludesEdgePayload = {||};
|
||||||
|
export function includesEdgeId(treeSha: string, name: string): string {
|
||||||
|
return `${treeSha}:${name}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
// TreeEntryNode -> TreeEntryNode
|
||||||
|
export const BECOMES_EDGE_TYPE: "BECOMES" = "BECOMES";
|
||||||
|
export type BecomesEdgePayload = {||};
|
||||||
|
|
||||||
|
// TreeEntryNode -> BlobNode | TreeNode
|
||||||
|
export const HAS_CONTENTS_EDGE_TYPE: "HAS_CONTENTS" = "HAS_CONTENTS";
|
||||||
|
export type HasContentsEdgePayload = {||};
|
||||||
|
|
||||||
|
export type EdgeType =
|
||||||
|
| typeof HAS_TREE_EDGE_TYPE
|
||||||
|
| typeof INCLUDES_EDGE_TYPE
|
||||||
|
| typeof BECOMES_EDGE_TYPE
|
||||||
|
| typeof HAS_CONTENTS_EDGE_TYPE;
|
||||||
|
|
||||||
|
export type EdgePayload =
|
||||||
|
| HasTreeEdgePayload
|
||||||
|
| IncludesEdgePayload
|
||||||
|
| BecomesEdgePayload
|
||||||
|
| HasContentsEdgePayload;
|
||||||
|
|
|
@ -4282,6 +4282,10 @@ lodash.camelcase@^4.3.0:
|
||||||
version "4.3.0"
|
version "4.3.0"
|
||||||
resolved "https://registry.yarnpkg.com/lodash.camelcase/-/lodash.camelcase-4.3.0.tgz#b28aa6288a2b9fc651035c7711f65ab6190331a6"
|
resolved "https://registry.yarnpkg.com/lodash.camelcase/-/lodash.camelcase-4.3.0.tgz#b28aa6288a2b9fc651035c7711f65ab6190331a6"
|
||||||
|
|
||||||
|
lodash.clonedeep@^4.5.0:
|
||||||
|
version "4.5.0"
|
||||||
|
resolved "https://registry.yarnpkg.com/lodash.clonedeep/-/lodash.clonedeep-4.5.0.tgz#e23f3f9c4f8fbdde872529c1071857a086e5ccef"
|
||||||
|
|
||||||
lodash.cond@^4.3.0:
|
lodash.cond@^4.3.0:
|
||||||
version "4.5.2"
|
version "4.5.2"
|
||||||
resolved "https://registry.yarnpkg.com/lodash.cond/-/lodash.cond-4.5.2.tgz#f471a1da486be60f6ab955d17115523dd1d255d5"
|
resolved "https://registry.yarnpkg.com/lodash.cond/-/lodash.cond-4.5.2.tgz#f471a1da486be60f6ab955d17115523dd1d255d5"
|
||||||
|
|
Loading…
Reference in New Issue