diff --git a/CHANGELOG.md b/CHANGELOG.md index 6c80696..620c672 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ # Changelog ## [Unreleased] +- Store GitHub data compressed at rest, reducing space usage by 6–8× (#750) - Improve weight sliders display (#736) - Separate bots from users in the UI (#720) - Add a feedback link to the prototype (#715) diff --git a/package.json b/package.json index e6763d6..e8ca6bb 100644 --- a/package.json +++ b/package.json @@ -19,6 +19,7 @@ "lodash.sortby": "^4.7.0", "mkdirp": "^0.5.1", "object-assign": "4.1.1", + "pako": "^1.0.6", "promise": "8.0.1", "react": "^16.4.1", "react-dom": "^16.4.1", diff --git a/sharness/test_build_static_site.t b/sharness/test_build_static_site.t index c54c90e..1c5c19a 100755 --- a/sharness/test_build_static_site.t +++ b/sharness/test_build_static_site.t @@ -219,7 +219,7 @@ test_expect_success TWO_REPOS \ test_expect_success TWO_REPOS \ "TWO_REPOS: should have data for the two repositories" ' for repo in sourcecred/example-git sourcecred/example-github; do - for file in github/view.json; do + for file in github/view.json.gz; do test -s "${data_dir}/${repo}/${file}" || return done done @@ -253,7 +253,7 @@ test_expect_success NO_REPOS \ test_expect_success NO_REPOS \ "NO_REPOS: should not have repository data" ' for repo in sourcecred/example-git sourcecred/example-github; do - for file in git/graph.json github/view.json; do + for file in git/graph.json github/view.json.gz; do test_must_fail test -f "${data_dir}/${repo}/${file}" || return done done diff --git a/src/plugins/github/loadGithubData.js b/src/plugins/github/loadGithubData.js index 752d35e..f9626a5 100644 --- a/src/plugins/github/loadGithubData.js +++ b/src/plugins/github/loadGithubData.js @@ -2,6 +2,7 @@ import fs from "fs-extra"; import path from "path"; +import pako from "pako"; import fetchGithubRepo from "./fetchGithubRepo"; import {RelationalView} from "./relationalView"; @@ -29,7 +30,7 @@ export async function loadGithubData(options: Options): Promise { for (const response of responses) { view.addData(response); } - const blob = JSON.stringify(view); - const outputFilename = path.join(options.outputDirectory, "view.json"); + const blob: Uint8Array = pako.gzip(JSON.stringify(view)); + const outputFilename = path.join(options.outputDirectory, "view.json.gz"); return fs.writeFile(outputFilename, blob); } diff --git a/src/plugins/github/pluginAdapter.js b/src/plugins/github/pluginAdapter.js index 1a4bd08..db05beb 100644 --- a/src/plugins/github/pluginAdapter.js +++ b/src/plugins/github/pluginAdapter.js @@ -1,4 +1,6 @@ // @flow +import pako from "pako"; + import type { StaticPluginAdapter as IStaticPluginAdapter, DynamicPluginAdapter as IDynamicPluginAdapater, @@ -94,13 +96,15 @@ export class StaticPluginAdapter implements IStaticPluginAdapter { } async load(assets: Assets, repo: Repo): Promise { const url = assets.resolve( - `/api/v1/data/data/${repo.owner}/${repo.name}/github/view.json` + `/api/v1/data/data/${repo.owner}/${repo.name}/github/view.json.gz` ); const response = await fetch(url); if (!response.ok) { return Promise.reject(response); } - const json = await response.json(); + const arrayBuffer = await response.arrayBuffer(); + const blob = new Uint8Array(arrayBuffer); + const json = JSON.parse(pako.ungzip(blob, {to: "string"})); const view = RelationalView.fromJSON(json); const graph = createGraph(view); return new DynamicPluginAdapter(view, graph); diff --git a/yarn.lock b/yarn.lock index 36bc56a..18868fc 100644 --- a/yarn.lock +++ b/yarn.lock @@ -5658,7 +5658,7 @@ p-try@^1.0.0: version "1.0.0" resolved "https://registry.yarnpkg.com/p-try/-/p-try-1.0.0.tgz#cbc79cdbaf8fd4228e13f621f2b1a237c1b207b3" -pako@~1.0.5: +pako@^1.0.6, pako@~1.0.5: version "1.0.6" resolved "https://registry.yarnpkg.com/pako/-/pako-1.0.6.tgz#0101211baa70c4bca4a0f63f2206e97b7dfaf258"