mirror of
https://github.com/status-im/sourcecred.git
synced 2025-02-16 22:46:50 +00:00
github: use plaintext owner and name in cache key (#1685)
Summary: This is mostly a QoL improvement for maintainers. GraphQL mirrors are stored in the SourceCred cache directory, but until now have been hard to tell apart. All IDs looked like long, similar hex strings; e.g.: ``` github_736f75726365637265642d746573742f6578616d706c652d676974687562.db github_736f75726365637265642f646f6373.db github_736f75726365637265642f736f7572636563726564.db ``` It’s too hard to tell that these are `sourcecred-test/example-github`, `sourcecred/docs`, and `sourcecred/sourcecred` in that order. (Yes, you can work it out if you try; that’s not good enough.) With dozens of caches loaded, finding the right one to poke at for debugging or progress-checking takes way too much scripting. The purpose of this abstruse encoding was portable filename safety, but we can actually achieve this with human-readable names like these: ``` github_sourcecred-test_example_github.db github_sourcecred_docs.db github_sourcecred_sourcecred.db ``` This suffices because (a) login names cannot contain underscores, so we can safely use that as a separator, and (b) GitHub disallows collisions on names that are equal ignoring case, so we can convert all names to lowercase without introducing collisions. This change orphans existing caches. Running `sourcecred clear --cache` will clean those up. Test Plan: Try to create a GitHub repository with the same name as a repository that you already have, but with inverted case; note that this is disallowed. Then, note that loading `sourcecred-test/example-github` still works and produces a database with nicely readable name. wchargin-branch: github-readable-db-names
This commit is contained in:
parent
09739512ab
commit
86c121e3c6
@ -9,14 +9,33 @@ export opaque type CacheId: string = string;
|
||||
|
||||
/**
|
||||
* Derives the CacheId for a RepoId.
|
||||
* Returned CacheId's will be:
|
||||
*
|
||||
* Returned `CacheId`s will be:
|
||||
* - Deterministic
|
||||
* - Unique for this plugin
|
||||
* - Lowercase
|
||||
* - Safe to use for filenames
|
||||
* - Distinct for semantically distinct inputs (input IDs that differ
|
||||
* only in case may map to the same output, because GitHub does not
|
||||
* permit collisions-modulo-case)
|
||||
*/
|
||||
export function cacheIdForRepoId(repoId: RepoId): CacheId {
|
||||
const repoString = repoIdToString(repoId);
|
||||
const repoStringHex = Buffer.from(repoString).toString("hex");
|
||||
return `github_${repoStringHex}`.toLowerCase();
|
||||
// GitHub owner (user/organization) and repository names may be in
|
||||
// mixed case, but GitHub prevents mixed-case collisions: e.g., if
|
||||
// `foo` is a user then `Foo` cannot also be a user, and likewise for
|
||||
// repositories. GitHub login names are DNS-safe (`[0-9A-Za-z-]` only)
|
||||
// and so are filename-safe. Repository names have a slightly larger
|
||||
// character set, including underscore, but are also filename-safe.
|
||||
// Because login may not contain an underscore, it thus suffices to
|
||||
// use an underscore as the delimiter. The resulting filename uses
|
||||
// only `[0-9A-Za-z_.-]` and so is valid on all major filesystems.
|
||||
const owner = repoId.owner.toLowerCase();
|
||||
const name = repoId.name.toLowerCase();
|
||||
if (owner.includes("_")) {
|
||||
throw new Error(
|
||||
"unexpected underscore in GitHub owner name would be ambiguous: " +
|
||||
repoIdToString(repoId)
|
||||
);
|
||||
}
|
||||
return `github_${owner}_${name}`;
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
// @flow
|
||||
|
||||
import {makeRepoId, repoIdToString} from "./repoId";
|
||||
import {makeRepoId, type RepoId} from "./repoId";
|
||||
import {type CacheId, cacheIdForRepoId} from "./cacheId";
|
||||
|
||||
describe("plugins/github/cacheId", () => {
|
||||
@ -24,19 +24,6 @@ describe("plugins/github/cacheId", () => {
|
||||
const id = cacheIdForRepoId(sampleRepo);
|
||||
expect(id).toMatch(/^github_/);
|
||||
});
|
||||
it("should contain a hex encoding of 'repoIdToString'", () => {
|
||||
const id = cacheIdForRepoId(sampleRepo);
|
||||
const repoIdString = repoIdToString(sampleRepo);
|
||||
const expectedHex = Buffer.from(repoIdString).toString("hex");
|
||||
expect(id).toContain(expectedHex);
|
||||
});
|
||||
it("should treat repoId as case sensitive", () => {
|
||||
const lowercaseId = cacheIdForRepoId(makeRepoId("foo", "bar"));
|
||||
const upperOwnerId = cacheIdForRepoId(makeRepoId("FOO", "bar"));
|
||||
const upperNameId = cacheIdForRepoId(makeRepoId("foo", "BAR"));
|
||||
expect(upperOwnerId).not.toEqual(lowercaseId);
|
||||
expect(upperNameId).not.toEqual(lowercaseId);
|
||||
});
|
||||
it("should have lowercase output", () => {
|
||||
const lowercaseId = cacheIdForRepoId(makeRepoId("foo", "bar"));
|
||||
const upperOwnerId = cacheIdForRepoId(makeRepoId("FOO", "bar"));
|
||||
@ -46,24 +33,46 @@ describe("plugins/github/cacheId", () => {
|
||||
expect(upperNameId).not.toMatch(/[A-Z]/);
|
||||
});
|
||||
it("should deterministically match examples", () => {
|
||||
const ids = [
|
||||
cacheIdForRepoId(makeRepoId("sourcecred", "sourcecred")),
|
||||
cacheIdForRepoId(makeRepoId("sourcecred", "sourcecred.github.io")),
|
||||
cacheIdForRepoId(makeRepoId("foo", "Something-Good")),
|
||||
cacheIdForRepoId(makeRepoId("foo", "still_good")),
|
||||
cacheIdForRepoId(makeRepoId("fooolio", "foo-bar.bar-99_x")),
|
||||
cacheIdForRepoId(makeRepoId("FOOolio", "foo-bar.bar-99_x")),
|
||||
const expected = [
|
||||
{
|
||||
repoId: makeRepoId("sourcecred", "sourcecred"),
|
||||
cacheId: "github_sourcecred_sourcecred",
|
||||
},
|
||||
{
|
||||
repoId: makeRepoId("sourcecred", "sourcecred.github.io"),
|
||||
cacheId: "github_sourcecred_sourcecred.github.io",
|
||||
},
|
||||
{
|
||||
repoId: makeRepoId("foo", "Something-Good"),
|
||||
cacheId: "github_foo_something-good",
|
||||
},
|
||||
{
|
||||
repoId: makeRepoId("foo", "still_good"),
|
||||
cacheId: "github_foo_still_good",
|
||||
},
|
||||
{
|
||||
repoId: makeRepoId("fooolio", "foo-bar.bar-99_x"),
|
||||
cacheId: "github_fooolio_foo-bar.bar-99_x",
|
||||
},
|
||||
{
|
||||
repoId: makeRepoId("FOOolio", "foo-bar.bar-99_x"),
|
||||
cacheId: "github_fooolio_foo-bar.bar-99_x",
|
||||
},
|
||||
];
|
||||
expect(ids).toMatchInlineSnapshot(`
|
||||
Array [
|
||||
"github_736f75726365637265642f736f7572636563726564",
|
||||
"github_736f75726365637265642f736f75726365637265642e6769746875622e696f",
|
||||
"github_666f6f2f536f6d657468696e672d476f6f64",
|
||||
"github_666f6f2f7374696c6c5f676f6f64",
|
||||
"github_666f6f6f6c696f2f666f6f2d6261722e6261722d39395f78",
|
||||
"github_464f4f6f6c696f2f666f6f2d6261722e6261722d39395f78",
|
||||
]
|
||||
`);
|
||||
const output = expected.map(({repoId}) => ({
|
||||
repoId,
|
||||
cacheId: cacheIdForRepoId(repoId),
|
||||
}));
|
||||
expect(output).toEqual(expected);
|
||||
});
|
||||
it("should error if the owner name contains an undescore", () => {
|
||||
// GitHub doesn't allow such names because they're not valid DNS
|
||||
// labels. The `RepoId` type thus prevents constructing such
|
||||
// invalid instances; we fabricate one as a defensive check.
|
||||
const badRepoId: RepoId = ({owner: "h_mmm", name: "ok"}: any);
|
||||
expect(() => {
|
||||
cacheIdForRepoId(badRepoId);
|
||||
}).toThrow(/ambiguous/);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
Loading…
x
Reference in New Issue
Block a user