sourcecred/sharness/test_build_static_site.t

263 lines
9.0 KiB
Perl
Raw Normal View History

2018-08-06 20:05:40 +00:00
#!/bin/sh
# Disable these lint rules globally:
# 2034 = unused variable (used by sharness)
# 2016 = parameter expansion in single quotes
# 1004 = backslash-newline in single quotes
# shellcheck disable=SC2034,SC2016,SC1004
:
test_description='tests for scripts/build_static_site.sh'
export GIT_CONFIG_NOSYSTEM=1
export GIT_ATTR_NOSYSTEM=1
# shellcheck disable=SC1091
. ./sharness.sh
run() (
set -eu
toplevel="$(git -C "$(dirname "$0")" rev-parse --show-toplevel)"
"${toplevel}"/scripts/build_static_site.sh "$@"
)
#
# Start by checking a bunch of easy cases related to the argument
# parser, mostly about rejecting various ill-formed invocations.
test_expect_success "should print a help message" '
run --help >msg 2>err &&
test_must_be_empty err &&
test_path_is_file msg &&
grep -qF "usage: build_static_site.sh" msg
'
test_expect_success "should fail with no target" '
test_must_fail run 2>err &&
grep -qF -- "target directory not specified" err
'
test_expect_success "should fail with missing target value" '
test_must_fail run --target 2>err &&
grep -qF -- "missing value for --target" err
'
test_expect_success "should fail with multiple targets" '
mkdir one two &&
test_must_fail run --target one --target two 2>err &&
grep -qF -- "--target specified multiple times" err
'
test_expect_success "should fail with a file as target" '
printf "important\nstuff" >important_data &&
test_must_fail run --target important_data 2>err &&
grep -qF -- "target is not a directory" err &&
printf "important\nstuff" | test_cmp - important_data
'
test_expect_success "should fail with a target under a file" '
printf "important\nstuff" >important_data &&
test_must_fail run --target important_data/something 2>err &&
grep -q -- "cannot create directory.*Not a directory" err &&
2018-08-06 20:05:40 +00:00
printf "important\nstuff" | test_cmp - important_data
'
test_expect_success "should fail with a nonempty directory as target" '
mkdir important_dir &&
printf "redacted\n" >important_dir/.wallet.dat &&
test_must_fail run --target important_dir 2>err &&
grep -qF -- "target directory is nonempty: important_dir" err &&
2018-08-06 20:05:40 +00:00
printf "redacted\n" | test_cmp - important_dir/.wallet.dat
'
mkdir putative_output
test_expect_success "should fail with missing repo value" '
test_must_fail run --target putative_output --repo 2>err &&
grep -qF -- "missing value for --repo" err &&
printf "redacted\n" | test_cmp - important_dir/.wallet.dat
'
test_expect_success "should fail with missing cname value" '
test_must_fail run --target putative_output --cname 2>err &&
grep -qF -- "missing value for --cname" err &&
printf "redacted\n" | test_cmp - important_dir/.wallet.dat
'
test_expect_success "should fail with empty cname" '
test_must_fail run --target putative_output --cname "" 2>err &&
grep -qF -- "empty value for --cname" err &&
printf "redacted\n" | test_cmp - important_dir/.wallet.dat
'
test_expect_success "should fail with multiple cname values" '
test_must_fail run --target putative_output \
--cname a.com --cname b.com 2>err &&
grep -qF -- "--cname specified multiple times" err &&
printf "redacted\n" | test_cmp - important_dir/.wallet.dat
'
#
# Now, actually generate output in two cases: one with repositories, and
# one with no repositories. We can only do this if we have a token.
if [ -n "${SOURCECRED_GITHUB_TOKEN:-}" ]; then
test_set_prereq HAVE_GITHUB_TOKEN
fi
# run_build PREREQ_NAME DESCRIPTION [FLAGS...]
# Build the site with the given FLAGS, and create a prereq PREREQ_NAME
# to be used in any tests that depend on this build. The build will
# itself have the EXPENSIVE prereq.
run_build() {
prereq_name="$1"; shift
description="$1"; shift
output_dir="build_output/output_${prereq_name}"
2018-08-06 20:05:40 +00:00
api_dir="${output_dir}/api/v1/data"
data_dir="${api_dir}/data"
unsafe_arg=
2018-08-06 20:05:40 +00:00
for arg in "${output_dir}" "$@"; do
unusual_chars="$(printf '%s' "$arg" | sed -e 's#[A-Za-z0-9:/_.-]##g')"
2018-08-06 20:05:40 +00:00
if [ -n "${unusual_chars}" ]; then
unsafe_arg="${arg}"
break
2018-08-06 20:05:40 +00:00
fi
done
flags="--target $output_dir $*" # only used if ! [ -n "${unsafe_arg}" ]
2018-08-06 20:05:40 +00:00
test_expect_success EXPENSIVE,HAVE_GITHUB_TOKEN \
"${prereq_name}: ${description}" '
if [ -n "${unsafe_arg}" ]; then
printf >&2 "fatal: potentially unsafe argument: %s\n" "${arg}" &&
false
fi &&
run '"${flags}"' >out 2>err &&
2018-08-06 20:05:40 +00:00
test_must_fail grep -vF \
-e "Removing contents of build directory: " \
2018-08-06 20:05:40 +00:00
-e "info: loading repository" \
err &&
test_path_is_dir "${output_dir}" &&
test_path_is_dir "${api_dir}" &&
test_set_prereq "${prereq_name}"
'
test_expect_success "${prereq_name}" \
"${prereq_name}: should have no cache" '
test_must_fail test_path_is_dir "${api_dir}/cache"
'
test_expect_success "${prereq_name}" \
"${prereq_name}: should have a bundle" '
js_bundle_path= &&
js_bundle_path_glob="${output_dir}"/static/js/main.*.js &&
for main_js in ${js_bundle_path_glob}; do
if ! [ -e "${main_js}" ]; then
printf >&2 "fatal: no main bundle found\n" &&
return 1
elif [ -n "${js_bundle_path}" ]; then
printf >&2 "fatal: multiple main bundles found:\n" &&
printf >&2 " %s\n" ${js_bundle_path_glob} &&
return 1
else
js_bundle_path="${main_js}"
fi
done
'
2018-08-06 20:05:40 +00:00
}
# test_pages PREREQ_NAME
# Test that the PREREQ_NAME build output includes a valid home page, a
# valid prototype page, and a valid Discord invite page (which should be
# a redirect).
test_pages() {
prereq="$1"
test_expect_success "${prereq}" "${prereq}: should have a favicon" '
test_path_is_file "${output_dir}/favicon.png" &&
file -b --mime-type "${output_dir}/favicon.png" >./favicon_filetype &&
printf "image/png\n" | test_cmp - ./favicon_filetype &&
rm ./favicon_filetype
'
2018-08-06 20:05:40 +00:00
test_expect_success "${prereq}" \
"${prereq}: should have a home page and a prototype" '
test_path_is_file "${output_dir}/index.html" &&
grep -qF "<script src=" "${output_dir}/index.html" &&
test_path_is_file "${output_dir}/prototype/index.html" &&
grep -qF "<script src=" "${output_dir}/prototype/index.html"
'
test_expect_success "${prereq}" \
"${prereq}: should have a discord-invite with redirect" '
file="${output_dir}/discord-invite/index.html" &&
test_path_is_file "${file}" &&
test_must_fail grep -qF "<script src=" "${file}" &&
url="https://discord.gg/tsBTgc9" &&
needle="<meta http-equiv=\"refresh\" content=\"0;url=$url\" />" &&
grep -qxF "${needle}" "${file}"
'
}
run_build TWO_REPOS \
"should build the site with two repositories and a CNAME" \
--no-backend \
2018-08-06 20:05:40 +00:00
--cname sourcecred.example.com \
--repo sourcecred/example-git \
--repo sourcecred/example-github \
;
test_pages TWO_REPOS
test_expect_success TWO_REPOS \
"TWO_REPOS: should have a registry with two repositories" '
registry_file="${api_dir}/repositoryRegistry.json" &&
test_path_is_file "${registry_file}" &&
grep -oF "\"name\":" "${registry_file}" | wc -l >actual_count &&
printf "2\n" | test_cmp - actual_count
'
test_expect_success TWO_REPOS \
"TWO_REPOS: should have a repo registry loaded into env" '
grep -F "REPO_REGISTRY" out &&
grep -xF "REPO_REGISTRY: [{\"repoId\":{\"name\":\"example-git\",\"owner\":\"sourcecred\"}},{\"repoId\":{\"name\":\"example-github\",\"owner\":\"sourcecred\"}}]" out
'
2018-08-06 20:05:40 +00:00
test_expect_success TWO_REPOS \
"TWO_REPOS: should have data for the two repositories" '
for repo in sourcecred/example-git sourcecred/example-github; do
Store GitHub data gzipped at rest (#751) Summary: We store the relational view in `view.json.gz` instead of `view.json`, taking advantage of the isomorphic `pako` library for gzip encoding and decoding. Sample space savings (note that post bodies are included; i.e., #747 has not been applied): SAVE OLD (B) NEW (B) REPO 89.7% 25326 2617 sourcecred/example-github 82.9% 3257576 555948 sourcecred/sourcecred 85.2% 11287621 1665884 ipfs/js-ipfs 88.0% 20953425 2520358 gitcoinco/web 84.4% 38196825 5951459 ipfs/go-ipfs 84.9% 205770642 31101452 tensorflow/tensorflow <details> <summary>Script to generate space savings output</summary> ```shell savings() { printf '% 7s % 11s % 11s %s\n' 'SAVE' 'OLD (B)' 'NEW (B)' 'REPO' for repo; do file="${SOURCECRED_DIRECTORY}/data/${repo}/github/view.json.gz" if ! [ -f "${file}" ]; then printf >&2 'warn: no such file %s\n' "${file}" continue fi script="$(sed -e 's/^ *//' <<EOF repo = '${repo}' pre_size = $(<"${file}" gzip -dc | wc -c) post_size = $(<"${file}" wc -c) percentage = '%0.1f%%' % (100 * (1 - post_size / pre_size)) p = '% 7s % 11d % 11d %s' % (percentage, pre_size, post_size, repo) print(p) EOF )" python3 -c "${script}" done } ``` </details> Closes #750. Test Plan: Comparing the raw old version with the decompressed new version shows that they are identical: ``` $ <~/tmp/sourcecred/data/sourcecred/example-github/github/view.json \ > shasum -a 256 - 63853b9d3f918274aafacf5198787e18185a61b9c95faf640a1e61f5d11fa19f - $ <~/tmp/sourcecred/data/sourcecred/example-github/github/view.json.gz \ > gzip -dc | shasum -a 256 63853b9d3f918274aafacf5198787e18185a61b9c95faf640a1e61f5d11fa19f - ``` Additionally, `yarn test --full` passes, and `yarn start` still loads data and runs PageRank properly. wchargin-branch: gzip-relational-view
2018-09-01 17:42:30 +00:00
for file in github/view.json.gz; do
2018-08-06 20:05:40 +00:00
test -s "${data_dir}/${repo}/${file}" || return
done
done
'
test_expect_success TWO_REPOS "TWO_REPOS: should have a correct CNAME record" '
test_path_is_file "${output_dir}/CNAME" &&
printf "sourcecred.example.com" | test_cmp - "${output_dir}/CNAME"
'
test_pages NO_REPOS
test_expect_success NO_REPOS \
"NO_REPOS: should not have a repository registry" '
registry_file="${api_dir}/repositoryRegistry.json" &&
test_must_fail test -e "${registry_file}"
'
test_expect_success NO_REPOS \
"NO_REPOS: should have empty repo registry loaded into env" '
grep -F "REPO_REGISTRY" out &&
grep -xF "REPO_REGISTRY: []" out
'
2018-08-06 20:05:40 +00:00
test_expect_success NO_REPOS \
"NO_REPOS: should not have repository data" '
for repo in sourcecred/example-git sourcecred/example-github; do
Store GitHub data gzipped at rest (#751) Summary: We store the relational view in `view.json.gz` instead of `view.json`, taking advantage of the isomorphic `pako` library for gzip encoding and decoding. Sample space savings (note that post bodies are included; i.e., #747 has not been applied): SAVE OLD (B) NEW (B) REPO 89.7% 25326 2617 sourcecred/example-github 82.9% 3257576 555948 sourcecred/sourcecred 85.2% 11287621 1665884 ipfs/js-ipfs 88.0% 20953425 2520358 gitcoinco/web 84.4% 38196825 5951459 ipfs/go-ipfs 84.9% 205770642 31101452 tensorflow/tensorflow <details> <summary>Script to generate space savings output</summary> ```shell savings() { printf '% 7s % 11s % 11s %s\n' 'SAVE' 'OLD (B)' 'NEW (B)' 'REPO' for repo; do file="${SOURCECRED_DIRECTORY}/data/${repo}/github/view.json.gz" if ! [ -f "${file}" ]; then printf >&2 'warn: no such file %s\n' "${file}" continue fi script="$(sed -e 's/^ *//' <<EOF repo = '${repo}' pre_size = $(<"${file}" gzip -dc | wc -c) post_size = $(<"${file}" wc -c) percentage = '%0.1f%%' % (100 * (1 - post_size / pre_size)) p = '% 7s % 11d % 11d %s' % (percentage, pre_size, post_size, repo) print(p) EOF )" python3 -c "${script}" done } ``` </details> Closes #750. Test Plan: Comparing the raw old version with the decompressed new version shows that they are identical: ``` $ <~/tmp/sourcecred/data/sourcecred/example-github/github/view.json \ > shasum -a 256 - 63853b9d3f918274aafacf5198787e18185a61b9c95faf640a1e61f5d11fa19f - $ <~/tmp/sourcecred/data/sourcecred/example-github/github/view.json.gz \ > gzip -dc | shasum -a 256 63853b9d3f918274aafacf5198787e18185a61b9c95faf640a1e61f5d11fa19f - ``` Additionally, `yarn test --full` passes, and `yarn start` still loads data and runs PageRank properly. wchargin-branch: gzip-relational-view
2018-09-01 17:42:30 +00:00
for file in git/graph.json github/view.json.gz; do
2018-08-06 20:05:40 +00:00
test_must_fail test -f "${data_dir}/${repo}/${file}" || return
done
done
'
test_expect_success NO_REPOS "NO_REPOS: should have no CNAME record" '
test_must_fail test -e "${output_dir}/CNAME"
'
test_done
# vim: ft=sh