Compress the RelationalView by removing post body (#747)

Our serialized RelationalView can get quite large - in the case of
TensorFlow it's over 190MB. This is a problem, as GitHub pages have a
hard cap of 100MB on hosted files.

As a temporary workaround, this commit introduces a method,
`compressByRemovingBody`, which strips away the bodies of every post. In
the longer term, we'll need a solution that scales with larger
repositories, e.g. sharding the relational view into smaller pieces.

Test plan: Unit tests were added. I've manually confirmed that the
newly-generated views are smaller (2.1MB vs 3.3MB), and that the
frontend continues to function.
This commit is contained in:
Dandelion Mané 2018-09-02 00:16:09 -07:00 committed by GitHub
parent 7f81337d74
commit 931f07de13
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 61 additions and 0 deletions

View File

@ -30,6 +30,7 @@ export async function loadGithubData(options: Options): Promise<void> {
for (const response of responses) {
view.addData(response);
}
view.compressByRemovingBody();
const blob: Uint8Array = pako.gzip(JSON.stringify(view));
const outputFilename = path.join(options.outputDirectory, "view.json.gz");
return fs.writeFile(outputFilename, blob);

View File

@ -69,6 +69,33 @@ export class RelationalView {
this._addReferences();
}
/**
* Mutate the RelationalView, by replacing all of the post bodies with
* empty strings. Usage of this method is a convenient hack to save space,
* as we don't currently use the bodies after the _addReferences step.
*/
compressByRemovingBody() {
for (const [address, post] of this._issues.entries()) {
const compressedPost = {...post, body: ""};
this._issues.set(address, compressedPost);
}
for (const [address, post] of this._pulls.entries()) {
const compressedPost = {...post, body: ""};
this._pulls.set(address, compressedPost);
}
for (const [address, post] of this._comments.entries()) {
const compressedPost = {...post, body: ""};
this._comments.set(address, compressedPost);
}
for (const [address, post] of this._reviews.entries()) {
const compressedPost = {...post, body: ""};
this._reviews.set(address, compressedPost);
}
}
*repos(): Iterator<Repo> {
for (const entry of this._repos.values()) {
yield new Repo(this, entry);

View File

@ -267,6 +267,39 @@ describe("plugins/github/relationalView", () => {
expect(rv1).toEqual(rv2);
});
describe("compressByRemovingBody", () => {
it("doesn't mutate the original entries", () => {
const rv = new R.RelationalView();
rv.addData(exampleData());
const issue0 = Array.from(rv.issues())[0];
expect(issue0.body()).not.toEqual("");
rv.compressByRemovingBody();
expect(issue0.body()).not.toEqual("");
});
it("removes bodies from all posts", () => {
const rv = new R.RelationalView();
rv.addData(exampleData());
function somePostsHaveBodies() {
for (const posts of [
rv.issues(),
rv.pulls(),
rv.comments(),
rv.reviews(),
]) {
for (const post of posts) {
if (post.body() !== "") {
return true;
}
}
}
return false;
}
expect(somePostsHaveBodies()).toBe(true);
rv.compressByRemovingBody();
expect(somePostsHaveBodies()).toBe(false);
});
});
describe("to/fromJSON", () => {
it("to->from->to is identity", () => {
const json1 = view.toJSON();