Load submodule URLs at each commit (#185)

Summary:
In Git, a tree may point to a commit directly. In our graph, we’d like
to represent “submodule commits” explicitly, because, a priori, we do
not know the repository to which the commit belongs. A submodule commit
node will store the hash of the referent commit, as well as the URL to
the subproject as listed in the .gitmodules file. In this commit, we
load the list of those URLs into the in-memory repository.

Shout-out to `git` for having an excellent command-line API:
the `--blob` argument to `git-config` is perfect for this situation.

Test Plan:
Snapshot updates are readable and sufficient.

wchargin-branch: load-submodule-urls
This commit is contained in:
William Chargin 2018-05-03 10:39:03 -07:00 committed by GitHub
parent bbb05c9508
commit 7dbecfdac6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 96 additions and 1 deletions

View File

@ -5,6 +5,9 @@
"parentHashes": [ "parentHashes": [
"69c5aad50eec8f2a0a07c988c3b283a6490eb45b" "69c5aad50eec8f2a0a07c988c3b283a6490eb45b"
], ],
"submoduleUrls": {
"pygravitydefier": "https://github.com/sourcecred/example-git-submodule.git"
},
"treeHash": "7be3ecfee5314ffa9b2d93fc4377792b2d6d70ed" "treeHash": "7be3ecfee5314ffa9b2d93fc4377792b2d6d70ed"
}, },
"69c5aad50eec8f2a0a07c988c3b283a6490eb45b": { "69c5aad50eec8f2a0a07c988c3b283a6490eb45b": {
@ -12,6 +15,9 @@
"parentHashes": [ "parentHashes": [
"e8b7a8f19701cd5a25e4a097d513ead60e5f8bcc" "e8b7a8f19701cd5a25e4a097d513ead60e5f8bcc"
], ],
"submoduleUrls": {
"pygravitydefier": "https://github.com/sourcecred/example-git-submodule.git"
},
"treeHash": "bbf3b8b3d26a4f884b5c022d46851f593d329192" "treeHash": "bbf3b8b3d26a4f884b5c022d46851f593d329192"
}, },
"8d287c3bfbf8455ef30187bf5153ffc1b6eef268": { "8d287c3bfbf8455ef30187bf5153ffc1b6eef268": {
@ -19,6 +25,9 @@
"parentHashes": [ "parentHashes": [
"c08ee3a4edea384d5291ffcbf06724a13ed72325" "c08ee3a4edea384d5291ffcbf06724a13ed72325"
], ],
"submoduleUrls": {
"pygravitydefier": "https://github.com/sourcecred/example-git-submodule.git"
},
"treeHash": "3dfb84795e07341b05fad3a0d5a55f8304b2d7d8" "treeHash": "3dfb84795e07341b05fad3a0d5a55f8304b2d7d8"
}, },
"c08ee3a4edea384d5291ffcbf06724a13ed72325": { "c08ee3a4edea384d5291ffcbf06724a13ed72325": {
@ -26,12 +35,16 @@
"parentHashes": [ "parentHashes": [
"c2b51945e7457546912a8ce158ed9d294558d294" "c2b51945e7457546912a8ce158ed9d294558d294"
], ],
"submoduleUrls": {
},
"treeHash": "2f7155e359fd0ecb96ffdca66fa45b6ed5792809" "treeHash": "2f7155e359fd0ecb96ffdca66fa45b6ed5792809"
}, },
"c2b51945e7457546912a8ce158ed9d294558d294": { "c2b51945e7457546912a8ce158ed9d294558d294": {
"hash": "c2b51945e7457546912a8ce158ed9d294558d294", "hash": "c2b51945e7457546912a8ce158ed9d294558d294",
"parentHashes": [ "parentHashes": [
], ],
"submoduleUrls": {
},
"treeHash": "bdff5d94193170015d6cbb549b7b630649428b1f" "treeHash": "bdff5d94193170015d6cbb549b7b630649428b1f"
}, },
"d160cca97611e9dfed642522ad44408d0292e8ea": { "d160cca97611e9dfed642522ad44408d0292e8ea": {
@ -39,6 +52,9 @@
"parentHashes": [ "parentHashes": [
"8d287c3bfbf8455ef30187bf5153ffc1b6eef268" "8d287c3bfbf8455ef30187bf5153ffc1b6eef268"
], ],
"submoduleUrls": {
"pygravitydefier": "https://github.com/sourcecred/example-git-submodule.git"
},
"treeHash": "569e1d383759903134df75230d63c0090196d4cb" "treeHash": "569e1d383759903134df75230d63c0090196d4cb"
}, },
"e8b7a8f19701cd5a25e4a097d513ead60e5f8bcc": { "e8b7a8f19701cd5a25e4a097d513ead60e5f8bcc": {
@ -46,6 +62,9 @@
"parentHashes": [ "parentHashes": [
"d160cca97611e9dfed642522ad44408d0292e8ea" "d160cca97611e9dfed642522ad44408d0292e8ea"
], ],
"submoduleUrls": {
"pygravitydefier": "https://github.com/sourcecred/example-git-submodule.git"
},
"treeHash": "819fc546cea489476ce8dc90785e9ba7753d0a8f" "treeHash": "819fc546cea489476ce8dc90785e9ba7753d0a8f"
} }
}, },

View File

@ -43,10 +43,85 @@ function findCommits(git: GitDriver, rootRef: string): Commit[] {
.filter((line) => line.length > 0) .filter((line) => line.length > 0)
.map((line) => { .map((line) => {
const [hash, treeHash, ...parentHashes] = line.trim().split(" "); const [hash, treeHash, ...parentHashes] = line.trim().split(" ");
return {hash, parentHashes, treeHash}; const submoduleUrls = loadSubmoduleUrls(git, hash);
return {hash, parentHashes, treeHash, submoduleUrls};
}); });
} }
const GITMODULES_SUBMODULES_KEY_RE = /^submodule\.(.*)\.(path|url)$/;
function loadSubmoduleUrls(
git: GitDriver,
commitHash: Hash
): {[path: string]: string} {
const gitmodulesRef = `${commitHash}:.gitmodules`;
const gitmodulesBlob: string | null = (() => {
try {
return git(["rev-parse", "--quiet", "--verify", gitmodulesRef]).trim();
} catch (e) {
if (e.status === 1) {
// No .gitmodules file here.
return null;
} else {
throw e;
}
}
})();
if (gitmodulesBlob == null) {
// No problem; there just weren't any submodules at this commit.
return {};
}
// The output format of the following is `${key}\n${value}\0...`, as
// specified in `git help config`'s section about the `-z` option.
// The format is safe because keys are strictly validated; see the
// function `git_config_parse_key` in `git/git:config.c`.
const rawConfig = git(["config", "--blob", gitmodulesBlob, "--list", "-z"]);
const configKeyValuePairs = rawConfig
.split("\0")
.filter((line) => line.length > 0)
.map((line) => {
const separator = line.indexOf("\n");
if (separator < 0) {
// Shouldn't happen, according to Git docs. Guard anyway.
throw new Error(`Bad .gitmodules line at ${commitHash}: ${line}`);
}
return {
key: line.substring(0, separator),
value: line.substring(separator + 1),
};
});
const submoduleInfoByKey: {
[submoduleKey: string]: {path: string | null, url: string | null},
} = {};
configKeyValuePairs.forEach(({key, value}) => {
const match = key.match(GITMODULES_SUBMODULES_KEY_RE);
if (!match) {
return;
}
const [_, submoduleKey, kind] = match;
if (submoduleInfoByKey[submoduleKey] == null) {
submoduleInfoByKey[submoduleKey] = {path: null, url: null};
}
if (kind !== "path" && kind !== "url") {
throw new Error(`Invariant violation: bad kind: ${kind}`);
}
submoduleInfoByKey[submoduleKey][kind] = value;
});
const result = {};
Object.keys(submoduleInfoByKey).forEach((submoduleKey) => {
const {path, url} = submoduleInfoByKey[submoduleKey];
if (path != null && url != null) {
result[path] = url;
} else {
console.warn(`Partial submodule at ${commitHash}: ${submoduleKey}`);
}
});
return result;
}
function findTrees(git: GitDriver, rootTrees: Set<Hash>): Tree[] { function findTrees(git: GitDriver, rootTrees: Set<Hash>): Tree[] {
const result: Tree[] = []; const result: Tree[] = [];
const visited: Set<Hash> = new Set(); const visited: Set<Hash> = new Set();

View File

@ -12,6 +12,7 @@ export type Commit = {|
+hash: Hash, +hash: Hash,
+parentHashes: $ReadOnlyArray<Hash>, +parentHashes: $ReadOnlyArray<Hash>,
+treeHash: Hash, +treeHash: Hash,
+submoduleUrls: {[path: string]: string},
|}; |};
export type Tree = {| export type Tree = {|
+hash: Hash, +hash: Hash,