From 7dbecfdac6b75934a74d621c3ce3e0b1e2cb1bce Mon Sep 17 00:00:00 2001 From: William Chargin Date: Thu, 3 May 2018 10:39:03 -0700 Subject: [PATCH] Load submodule URLs at each commit (#185) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Summary: In Git, a tree may point to a commit directly. In our graph, we’d like to represent “submodule commits” explicitly, because, a priori, we do not know the repository to which the commit belongs. A submodule commit node will store the hash of the referent commit, as well as the URL to the subproject as listed in the .gitmodules file. In this commit, we load the list of those URLs into the in-memory repository. Shout-out to `git` for having an excellent command-line API: the `--blob` argument to `git-config` is perfect for this situation. Test Plan: Snapshot updates are readable and sufficient. wchargin-branch: load-submodule-urls --- src/plugins/git/demoData/example-git.json | 19 ++++++ src/plugins/git/loadRepository.js | 77 ++++++++++++++++++++++- src/plugins/git/types.js | 1 + 3 files changed, 96 insertions(+), 1 deletion(-) diff --git a/src/plugins/git/demoData/example-git.json b/src/plugins/git/demoData/example-git.json index 0918e93..b75424f 100644 --- a/src/plugins/git/demoData/example-git.json +++ b/src/plugins/git/demoData/example-git.json @@ -5,6 +5,9 @@ "parentHashes": [ "69c5aad50eec8f2a0a07c988c3b283a6490eb45b" ], + "submoduleUrls": { + "pygravitydefier": "https://github.com/sourcecred/example-git-submodule.git" + }, "treeHash": "7be3ecfee5314ffa9b2d93fc4377792b2d6d70ed" }, "69c5aad50eec8f2a0a07c988c3b283a6490eb45b": { @@ -12,6 +15,9 @@ "parentHashes": [ "e8b7a8f19701cd5a25e4a097d513ead60e5f8bcc" ], + "submoduleUrls": { + "pygravitydefier": "https://github.com/sourcecred/example-git-submodule.git" + }, "treeHash": "bbf3b8b3d26a4f884b5c022d46851f593d329192" }, "8d287c3bfbf8455ef30187bf5153ffc1b6eef268": { @@ -19,6 +25,9 @@ "parentHashes": [ "c08ee3a4edea384d5291ffcbf06724a13ed72325" ], + "submoduleUrls": { + "pygravitydefier": "https://github.com/sourcecred/example-git-submodule.git" + }, "treeHash": "3dfb84795e07341b05fad3a0d5a55f8304b2d7d8" }, "c08ee3a4edea384d5291ffcbf06724a13ed72325": { @@ -26,12 +35,16 @@ "parentHashes": [ "c2b51945e7457546912a8ce158ed9d294558d294" ], + "submoduleUrls": { + }, "treeHash": "2f7155e359fd0ecb96ffdca66fa45b6ed5792809" }, "c2b51945e7457546912a8ce158ed9d294558d294": { "hash": "c2b51945e7457546912a8ce158ed9d294558d294", "parentHashes": [ ], + "submoduleUrls": { + }, "treeHash": "bdff5d94193170015d6cbb549b7b630649428b1f" }, "d160cca97611e9dfed642522ad44408d0292e8ea": { @@ -39,6 +52,9 @@ "parentHashes": [ "8d287c3bfbf8455ef30187bf5153ffc1b6eef268" ], + "submoduleUrls": { + "pygravitydefier": "https://github.com/sourcecred/example-git-submodule.git" + }, "treeHash": "569e1d383759903134df75230d63c0090196d4cb" }, "e8b7a8f19701cd5a25e4a097d513ead60e5f8bcc": { @@ -46,6 +62,9 @@ "parentHashes": [ "d160cca97611e9dfed642522ad44408d0292e8ea" ], + "submoduleUrls": { + "pygravitydefier": "https://github.com/sourcecred/example-git-submodule.git" + }, "treeHash": "819fc546cea489476ce8dc90785e9ba7753d0a8f" } }, diff --git a/src/plugins/git/loadRepository.js b/src/plugins/git/loadRepository.js index b62d211..c69ef5c 100644 --- a/src/plugins/git/loadRepository.js +++ b/src/plugins/git/loadRepository.js @@ -43,10 +43,85 @@ function findCommits(git: GitDriver, rootRef: string): Commit[] { .filter((line) => line.length > 0) .map((line) => { const [hash, treeHash, ...parentHashes] = line.trim().split(" "); - return {hash, parentHashes, treeHash}; + const submoduleUrls = loadSubmoduleUrls(git, hash); + return {hash, parentHashes, treeHash, submoduleUrls}; }); } +const GITMODULES_SUBMODULES_KEY_RE = /^submodule\.(.*)\.(path|url)$/; + +function loadSubmoduleUrls( + git: GitDriver, + commitHash: Hash +): {[path: string]: string} { + const gitmodulesRef = `${commitHash}:.gitmodules`; + const gitmodulesBlob: string | null = (() => { + try { + return git(["rev-parse", "--quiet", "--verify", gitmodulesRef]).trim(); + } catch (e) { + if (e.status === 1) { + // No .gitmodules file here. + return null; + } else { + throw e; + } + } + })(); + if (gitmodulesBlob == null) { + // No problem; there just weren't any submodules at this commit. + return {}; + } + + // The output format of the following is `${key}\n${value}\0...`, as + // specified in `git help config`'s section about the `-z` option. + // The format is safe because keys are strictly validated; see the + // function `git_config_parse_key` in `git/git:config.c`. + const rawConfig = git(["config", "--blob", gitmodulesBlob, "--list", "-z"]); + const configKeyValuePairs = rawConfig + .split("\0") + .filter((line) => line.length > 0) + .map((line) => { + const separator = line.indexOf("\n"); + if (separator < 0) { + // Shouldn't happen, according to Git docs. Guard anyway. + throw new Error(`Bad .gitmodules line at ${commitHash}: ${line}`); + } + return { + key: line.substring(0, separator), + value: line.substring(separator + 1), + }; + }); + + const submoduleInfoByKey: { + [submoduleKey: string]: {path: string | null, url: string | null}, + } = {}; + configKeyValuePairs.forEach(({key, value}) => { + const match = key.match(GITMODULES_SUBMODULES_KEY_RE); + if (!match) { + return; + } + const [_, submoduleKey, kind] = match; + if (submoduleInfoByKey[submoduleKey] == null) { + submoduleInfoByKey[submoduleKey] = {path: null, url: null}; + } + if (kind !== "path" && kind !== "url") { + throw new Error(`Invariant violation: bad kind: ${kind}`); + } + submoduleInfoByKey[submoduleKey][kind] = value; + }); + + const result = {}; + Object.keys(submoduleInfoByKey).forEach((submoduleKey) => { + const {path, url} = submoduleInfoByKey[submoduleKey]; + if (path != null && url != null) { + result[path] = url; + } else { + console.warn(`Partial submodule at ${commitHash}: ${submoduleKey}`); + } + }); + return result; +} + function findTrees(git: GitDriver, rootTrees: Set): Tree[] { const result: Tree[] = []; const visited: Set = new Set(); diff --git a/src/plugins/git/types.js b/src/plugins/git/types.js index c10d19d..ee3c140 100644 --- a/src/plugins/git/types.js +++ b/src/plugins/git/types.js @@ -12,6 +12,7 @@ export type Commit = {| +hash: Hash, +parentHashes: $ReadOnlyArray, +treeHash: Hash, + +submoduleUrls: {[path: string]: string}, |}; export type Tree = {| +hash: Hash,