mirror of
https://github.com/status-im/sourcecred.git
synced 2025-02-03 08:13:59 +00:00
Store GitHub data gzipped at rest (#751)
Summary: We store the relational view in `view.json.gz` instead of `view.json`, taking advantage of the isomorphic `pako` library for gzip encoding and decoding. Sample space savings (note that post bodies are included; i.e., #747 has not been applied): SAVE OLD (B) NEW (B) REPO 89.7% 25326 2617 sourcecred/example-github 82.9% 3257576 555948 sourcecred/sourcecred 85.2% 11287621 1665884 ipfs/js-ipfs 88.0% 20953425 2520358 gitcoinco/web 84.4% 38196825 5951459 ipfs/go-ipfs 84.9% 205770642 31101452 tensorflow/tensorflow <details> <summary>Script to generate space savings output</summary> ```shell savings() { printf '% 7s % 11s % 11s %s\n' 'SAVE' 'OLD (B)' 'NEW (B)' 'REPO' for repo; do file="${SOURCECRED_DIRECTORY}/data/${repo}/github/view.json.gz" if ! [ -f "${file}" ]; then printf >&2 'warn: no such file %s\n' "${file}" continue fi script="$(sed -e 's/^ *//' <<EOF repo = '${repo}' pre_size = $(<"${file}" gzip -dc | wc -c) post_size = $(<"${file}" wc -c) percentage = '%0.1f%%' % (100 * (1 - post_size / pre_size)) p = '% 7s % 11d % 11d %s' % (percentage, pre_size, post_size, repo) print(p) EOF )" python3 -c "${script}" done } ``` </details> Closes #750. Test Plan: Comparing the raw old version with the decompressed new version shows that they are identical: ``` $ <~/tmp/sourcecred/data/sourcecred/example-github/github/view.json \ > shasum -a 256 - 63853b9d3f918274aafacf5198787e18185a61b9c95faf640a1e61f5d11fa19f - $ <~/tmp/sourcecred/data/sourcecred/example-github/github/view.json.gz \ > gzip -dc | shasum -a 256 63853b9d3f918274aafacf5198787e18185a61b9c95faf640a1e61f5d11fa19f - ``` Additionally, `yarn test --full` passes, and `yarn start` still loads data and runs PageRank properly. wchargin-branch: gzip-relational-view
This commit is contained in:
parent
f1a6b37524
commit
7f81337d74
@ -1,6 +1,7 @@
|
||||
# Changelog
|
||||
|
||||
## [Unreleased]
|
||||
- Store GitHub data compressed at rest, reducing space usage by 6–8× (#750)
|
||||
- Improve weight sliders display (#736)
|
||||
- Separate bots from users in the UI (#720)
|
||||
- Add a feedback link to the prototype (#715)
|
||||
|
@ -19,6 +19,7 @@
|
||||
"lodash.sortby": "^4.7.0",
|
||||
"mkdirp": "^0.5.1",
|
||||
"object-assign": "4.1.1",
|
||||
"pako": "^1.0.6",
|
||||
"promise": "8.0.1",
|
||||
"react": "^16.4.1",
|
||||
"react-dom": "^16.4.1",
|
||||
|
@ -219,7 +219,7 @@ test_expect_success TWO_REPOS \
|
||||
test_expect_success TWO_REPOS \
|
||||
"TWO_REPOS: should have data for the two repositories" '
|
||||
for repo in sourcecred/example-git sourcecred/example-github; do
|
||||
for file in github/view.json; do
|
||||
for file in github/view.json.gz; do
|
||||
test -s "${data_dir}/${repo}/${file}" || return
|
||||
done
|
||||
done
|
||||
@ -253,7 +253,7 @@ test_expect_success NO_REPOS \
|
||||
test_expect_success NO_REPOS \
|
||||
"NO_REPOS: should not have repository data" '
|
||||
for repo in sourcecred/example-git sourcecred/example-github; do
|
||||
for file in git/graph.json github/view.json; do
|
||||
for file in git/graph.json github/view.json.gz; do
|
||||
test_must_fail test -f "${data_dir}/${repo}/${file}" || return
|
||||
done
|
||||
done
|
||||
|
@ -2,6 +2,7 @@
|
||||
|
||||
import fs from "fs-extra";
|
||||
import path from "path";
|
||||
import pako from "pako";
|
||||
|
||||
import fetchGithubRepo from "./fetchGithubRepo";
|
||||
import {RelationalView} from "./relationalView";
|
||||
@ -29,7 +30,7 @@ export async function loadGithubData(options: Options): Promise<void> {
|
||||
for (const response of responses) {
|
||||
view.addData(response);
|
||||
}
|
||||
const blob = JSON.stringify(view);
|
||||
const outputFilename = path.join(options.outputDirectory, "view.json");
|
||||
const blob: Uint8Array = pako.gzip(JSON.stringify(view));
|
||||
const outputFilename = path.join(options.outputDirectory, "view.json.gz");
|
||||
return fs.writeFile(outputFilename, blob);
|
||||
}
|
||||
|
@ -1,4 +1,6 @@
|
||||
// @flow
|
||||
import pako from "pako";
|
||||
|
||||
import type {
|
||||
StaticPluginAdapter as IStaticPluginAdapter,
|
||||
DynamicPluginAdapter as IDynamicPluginAdapater,
|
||||
@ -94,13 +96,15 @@ export class StaticPluginAdapter implements IStaticPluginAdapter {
|
||||
}
|
||||
async load(assets: Assets, repo: Repo): Promise<IDynamicPluginAdapater> {
|
||||
const url = assets.resolve(
|
||||
`/api/v1/data/data/${repo.owner}/${repo.name}/github/view.json`
|
||||
`/api/v1/data/data/${repo.owner}/${repo.name}/github/view.json.gz`
|
||||
);
|
||||
const response = await fetch(url);
|
||||
if (!response.ok) {
|
||||
return Promise.reject(response);
|
||||
}
|
||||
const json = await response.json();
|
||||
const arrayBuffer = await response.arrayBuffer();
|
||||
const blob = new Uint8Array(arrayBuffer);
|
||||
const json = JSON.parse(pako.ungzip(blob, {to: "string"}));
|
||||
const view = RelationalView.fromJSON(json);
|
||||
const graph = createGraph(view);
|
||||
return new DynamicPluginAdapter(view, graph);
|
||||
|
@ -5658,7 +5658,7 @@ p-try@^1.0.0:
|
||||
version "1.0.0"
|
||||
resolved "https://registry.yarnpkg.com/p-try/-/p-try-1.0.0.tgz#cbc79cdbaf8fd4228e13f621f2b1a237c1b207b3"
|
||||
|
||||
pako@~1.0.5:
|
||||
pako@^1.0.6, pako@~1.0.5:
|
||||
version "1.0.6"
|
||||
resolved "https://registry.yarnpkg.com/pako/-/pako-1.0.6.tgz#0101211baa70c4bca4a0f63f2206e97b7dfaf258"
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user