sourcecred/sharness/test_build_static_site.t

289 lines
10 KiB
Perl
Raw Normal View History

2018-08-06 20:05:40 +00:00
#!/bin/sh
# Disable these lint rules globally:
# 2034 = unused variable (used by sharness)
# 2016 = parameter expansion in single quotes
# 1004 = backslash-newline in single quotes
# shellcheck disable=SC2034,SC2016,SC1004
:
test_description='tests for scripts/build_static_site.sh'
export GIT_CONFIG_NOSYSTEM=1
export GIT_ATTR_NOSYSTEM=1
# shellcheck disable=SC1091
. ./sharness.sh
run() (
set -eu
toplevel="$(git -C "$(dirname "$0")" rev-parse --show-toplevel)"
"${toplevel}"/scripts/build_static_site.sh "$@"
)
#
# Start by checking a bunch of easy cases related to the argument
# parser, mostly about rejecting various ill-formed invocations.
test_expect_success "should print a help message" '
run --help >msg 2>err &&
test_must_be_empty err &&
test_path_is_file msg &&
grep -qF "usage: build_static_site.sh" msg
'
test_expect_success "should fail with no target" '
test_must_fail run 2>err &&
grep -qF -- "target directory not specified" err
'
test_expect_success "should fail with missing target value" '
test_must_fail run --target 2>err &&
grep -qF -- "missing value for --target" err
'
test_expect_success "should fail with multiple targets" '
mkdir one two &&
test_must_fail run --target one --target two 2>err &&
grep -qF -- "--target specified multiple times" err
'
test_expect_success "should fail with a file as target" '
printf "important\nstuff" >important_data &&
test_must_fail run --target important_data 2>err &&
grep -qF -- "target is not a directory" err &&
printf "important\nstuff" | test_cmp - important_data
'
test_expect_success "should fail with a target under a file" '
printf "important\nstuff" >important_data &&
test_must_fail run --target important_data/something 2>err &&
grep -q -- "cannot create directory.*Not a directory" err &&
2018-08-06 20:05:40 +00:00
printf "important\nstuff" | test_cmp - important_data
'
test_expect_success "should fail with a nonempty directory as target" '
mkdir important_dir &&
printf "redacted\n" >important_dir/.wallet.dat &&
test_must_fail run --target important_dir 2>err &&
grep -qF -- "target directory is nonempty: important_dir" err &&
2018-08-06 20:05:40 +00:00
printf "redacted\n" | test_cmp - important_dir/.wallet.dat
'
mkdir putative_output
test_expect_success "should fail with missing repo value" '
test_must_fail run --target putative_output --repo 2>err &&
grep -qF -- "missing value for --repo" err &&
printf "redacted\n" | test_cmp - important_dir/.wallet.dat
'
test_expect_success "should fail with missing feedback-url value" '
test_must_fail run --target putative_output --feedback-url 2>err &&
grep -qF -- "missing value for --feedback-url" err &&
printf "redacted\n" | test_cmp - important_dir/.wallet.dat
'
test_expect_success "should fail with empty feedback-url" '
test_must_fail run --target putative_output --feedback-url "" 2>err &&
grep -qF -- "empty value for --feedback-url" err &&
printf "redacted\n" | test_cmp - important_dir/.wallet.dat
'
test_expect_success "should fail with multiple feedback-url values" '
test_must_fail run --target putative_output \
--feedback-url a.com --feedback-url b.com 2>err &&
grep -qF -- "--feedback-url specified multiple times" err &&
printf "redacted\n" | test_cmp - important_dir/.wallet.dat
'
2018-08-06 20:05:40 +00:00
test_expect_success "should fail with missing cname value" '
test_must_fail run --target putative_output --cname 2>err &&
grep -qF -- "missing value for --cname" err &&
printf "redacted\n" | test_cmp - important_dir/.wallet.dat
'
test_expect_success "should fail with empty cname" '
test_must_fail run --target putative_output --cname "" 2>err &&
grep -qF -- "empty value for --cname" err &&
printf "redacted\n" | test_cmp - important_dir/.wallet.dat
'
test_expect_success "should fail with multiple cname values" '
test_must_fail run --target putative_output \
--cname a.com --cname b.com 2>err &&
grep -qF -- "--cname specified multiple times" err &&
printf "redacted\n" | test_cmp - important_dir/.wallet.dat
'
#
# Now, actually generate output in two cases: one with repositories, and
# one with no repositories. We can only do this if we have a token.
if [ -n "${SOURCECRED_GITHUB_TOKEN:-}" ]; then
test_set_prereq HAVE_GITHUB_TOKEN
fi
# run_build PREREQ_NAME DESCRIPTION [FLAGS...]
# Build the site with the given FLAGS, and create a prereq PREREQ_NAME
# to be used in any tests that depend on this build. The build will
# itself have the EXPENSIVE prereq.
run_build() {
prereq_name="$1"; shift
description="$1"; shift
output_dir="build_output/output_${prereq_name}"
2018-08-06 20:05:40 +00:00
api_dir="${output_dir}/api/v1/data"
data_dir="${api_dir}/data"
unsafe_arg=
2018-08-06 20:05:40 +00:00
for arg in "${output_dir}" "$@"; do
unusual_chars="$(printf '%s' "$arg" | sed -e 's#[A-Za-z0-9:/_.-]##g')"
2018-08-06 20:05:40 +00:00
if [ -n "${unusual_chars}" ]; then
unsafe_arg="${arg}"
break
2018-08-06 20:05:40 +00:00
fi
done
flags="--target $output_dir $*" # only used if ! [ -n "${unsafe_arg}" ]
2018-08-06 20:05:40 +00:00
test_expect_success EXPENSIVE,HAVE_GITHUB_TOKEN \
"${prereq_name}: ${description}" '
if [ -n "${unsafe_arg}" ]; then
printf >&2 "fatal: potentially unsafe argument: %s\n" "${arg}" &&
false
fi &&
2018-08-06 20:05:40 +00:00
run '"${flags}"' 2>err &&
test_must_fail grep -vF \
-e "Removing contents of build directory: " \
Fix insidious quoting bug in build test script (#772) Summary: This patch fixes a particularly sneaky bug. Our test script contains a literal backtick inside single quotes. This is generally not a problem, because backticks inside single quotes do nothing. But the contents of the single quotes are interpreted as Bash by our test runner, and at that time the single quotes are expanded to a command substitution. Therefore, `grep` is invoked as if writing grep -e "warning: running $(yarn backend)" at the CLI. This will actually invoke `yarn backend`! The magnificent aspect of this bug is that it both makes the test script slower by about ten seconds _and_ completely and silently defeats the assertion in which it’s contained. The output of `yarn backend` contains several blank lines. Therefore, one of the literal patterns to `grep` contains a blank line. This causes `grep` to match _every_ line in the error file, regardless of whether it is one of the intended messages. This patch is the 666th PR to SourceCred. In my opinion, it deserves this dubious honor. Test Plan: Note that `yarn test --full` works, but fails if one of the expected error message patterns is deleted or munged. Confirm the behavior by prepending `echo backend >>/tmp/log &&` to the `yarn backend` script in `package.json`, noting that the resulting log file contains four lines before this patch and two lines after it. (Don’t forget to delete/clear the log file before invocations.) Confirm the behavior of `grep` by writing: ```shell $ printf 'things went wrong!\n' >err $ printf 'wat\n\nwot\n' >patterns $ grep -vF -e "okay" -e "warn: `cat patterns`" err; echo $? 1 $ printf 'wat\nwot\n' >patterns # no empty line $ grep -vF -e "okay" -e "warn: `cat patterns`" err; echo $? things went wrong! 0 ``` wchargin-branch: fix-build-test-quoting
2018-09-05 17:53:05 +00:00
-e "warn: running \`yarn backend\`" \
2018-08-06 20:05:40 +00:00
-e "warn: if this offends you" \
-e "info: loading repository" \
err &&
test_path_is_dir "${output_dir}" &&
test_path_is_dir "${api_dir}" &&
test_set_prereq "${prereq_name}"
'
test_expect_success "${prereq_name}" \
"${prereq_name}: should have no cache" '
test_must_fail test_path_is_dir "${api_dir}/cache"
'
test_expect_success "${prereq_name}" \
"${prereq_name}: should have a bundle" '
js_bundle_path= &&
js_bundle_path_glob="${output_dir}"/static/js/main.*.js &&
for main_js in ${js_bundle_path_glob}; do
if ! [ -e "${main_js}" ]; then
printf >&2 "fatal: no main bundle found\n" &&
return 1
elif [ -n "${js_bundle_path}" ]; then
printf >&2 "fatal: multiple main bundles found:\n" &&
printf >&2 " %s\n" ${js_bundle_path_glob} &&
return 1
else
js_bundle_path="${main_js}"
fi
done
'
2018-08-06 20:05:40 +00:00
}
# test_pages PREREQ_NAME
# Test that the PREREQ_NAME build output includes a valid home page, a
# valid prototype page, and a valid Discord invite page (which should be
# a redirect).
test_pages() {
prereq="$1"
test_expect_success "${prereq}" "${prereq}: should have a favicon" '
test_path_is_file "${output_dir}/favicon.png" &&
file -b --mime-type "${output_dir}/favicon.png" >./favicon_filetype &&
printf "image/png\n" | test_cmp - ./favicon_filetype &&
rm ./favicon_filetype
'
2018-08-06 20:05:40 +00:00
test_expect_success "${prereq}" \
"${prereq}: should have a home page and a prototype" '
test_path_is_file "${output_dir}/index.html" &&
grep -qF "<script src=" "${output_dir}/index.html" &&
test_path_is_file "${output_dir}/prototype/index.html" &&
grep -qF "<script src=" "${output_dir}/prototype/index.html"
'
test_expect_success "${prereq}" \
"${prereq}: should have a discord-invite with redirect" '
file="${output_dir}/discord-invite/index.html" &&
test_path_is_file "${file}" &&
test_must_fail grep -qF "<script src=" "${file}" &&
url="https://discord.gg/tsBTgc9" &&
needle="<meta http-equiv=\"refresh\" content=\"0;url=$url\" />" &&
grep -qxF "${needle}" "${file}"
'
}
run_build TWO_REPOS \
"should build the site with two repositories and a CNAME" \
--cname sourcecred.example.com \
--feedback-url http://discuss.example.com/feedback/ \
2018-08-06 20:05:40 +00:00
--repo sourcecred/example-git \
--repo sourcecred/example-github \
;
test_pages TWO_REPOS
test_expect_success TWO_REPOS \
"TWO_REPOS: should have a registry with two repositories" '
registry_file="${api_dir}/repositoryRegistry.json" &&
test_path_is_file "${registry_file}" &&
grep -oF "\"name\":" "${registry_file}" | wc -l >actual_count &&
printf "2\n" | test_cmp - actual_count
'
test_expect_success TWO_REPOS \
"TWO_REPOS: should have data for the two repositories" '
for repo in sourcecred/example-git sourcecred/example-github; do
Store GitHub data gzipped at rest (#751) Summary: We store the relational view in `view.json.gz` instead of `view.json`, taking advantage of the isomorphic `pako` library for gzip encoding and decoding. Sample space savings (note that post bodies are included; i.e., #747 has not been applied): SAVE OLD (B) NEW (B) REPO 89.7% 25326 2617 sourcecred/example-github 82.9% 3257576 555948 sourcecred/sourcecred 85.2% 11287621 1665884 ipfs/js-ipfs 88.0% 20953425 2520358 gitcoinco/web 84.4% 38196825 5951459 ipfs/go-ipfs 84.9% 205770642 31101452 tensorflow/tensorflow <details> <summary>Script to generate space savings output</summary> ```shell savings() { printf '% 7s % 11s % 11s %s\n' 'SAVE' 'OLD (B)' 'NEW (B)' 'REPO' for repo; do file="${SOURCECRED_DIRECTORY}/data/${repo}/github/view.json.gz" if ! [ -f "${file}" ]; then printf >&2 'warn: no such file %s\n' "${file}" continue fi script="$(sed -e 's/^ *//' <<EOF repo = '${repo}' pre_size = $(<"${file}" gzip -dc | wc -c) post_size = $(<"${file}" wc -c) percentage = '%0.1f%%' % (100 * (1 - post_size / pre_size)) p = '% 7s % 11d % 11d %s' % (percentage, pre_size, post_size, repo) print(p) EOF )" python3 -c "${script}" done } ``` </details> Closes #750. Test Plan: Comparing the raw old version with the decompressed new version shows that they are identical: ``` $ <~/tmp/sourcecred/data/sourcecred/example-github/github/view.json \ > shasum -a 256 - 63853b9d3f918274aafacf5198787e18185a61b9c95faf640a1e61f5d11fa19f - $ <~/tmp/sourcecred/data/sourcecred/example-github/github/view.json.gz \ > gzip -dc | shasum -a 256 63853b9d3f918274aafacf5198787e18185a61b9c95faf640a1e61f5d11fa19f - ``` Additionally, `yarn test --full` passes, and `yarn start` still loads data and runs PageRank properly. wchargin-branch: gzip-relational-view
2018-09-01 17:42:30 +00:00
for file in github/view.json.gz; do
2018-08-06 20:05:40 +00:00
test -s "${data_dir}/${repo}/${file}" || return
done
done
'
test_expect_success TWO_REPOS \
"TWO_REPOS: should include the feedback URL somewhere in the bundle" '
grep -qF http://discuss.example.com/feedback/ "${js_bundle_path}"
'
2018-08-06 20:05:40 +00:00
test_expect_success TWO_REPOS "TWO_REPOS: should have a correct CNAME record" '
test_path_is_file "${output_dir}/CNAME" &&
printf "sourcecred.example.com" | test_cmp - "${output_dir}/CNAME"
'
# This feedback URL is "pollution" in the source environment and should
# _not_ be passed down to the actual application.
SOURCECRED_FEEDBACK_URL=http://wat.com/wat \
run_build NO_REPOS \
2018-08-06 20:05:40 +00:00
"should build the site with no repositories and no CNAME" \
# no arguments here
test_pages NO_REPOS
test_expect_success NO_REPOS \
"NO_REPOS: should not have a repository registry" '
registry_file="${api_dir}/repositoryRegistry.json" &&
test_must_fail test -e "${registry_file}"
'
test_expect_success NO_REPOS \
"NO_REPOS: should not have repository data" '
for repo in sourcecred/example-git sourcecred/example-github; do
Store GitHub data gzipped at rest (#751) Summary: We store the relational view in `view.json.gz` instead of `view.json`, taking advantage of the isomorphic `pako` library for gzip encoding and decoding. Sample space savings (note that post bodies are included; i.e., #747 has not been applied): SAVE OLD (B) NEW (B) REPO 89.7% 25326 2617 sourcecred/example-github 82.9% 3257576 555948 sourcecred/sourcecred 85.2% 11287621 1665884 ipfs/js-ipfs 88.0% 20953425 2520358 gitcoinco/web 84.4% 38196825 5951459 ipfs/go-ipfs 84.9% 205770642 31101452 tensorflow/tensorflow <details> <summary>Script to generate space savings output</summary> ```shell savings() { printf '% 7s % 11s % 11s %s\n' 'SAVE' 'OLD (B)' 'NEW (B)' 'REPO' for repo; do file="${SOURCECRED_DIRECTORY}/data/${repo}/github/view.json.gz" if ! [ -f "${file}" ]; then printf >&2 'warn: no such file %s\n' "${file}" continue fi script="$(sed -e 's/^ *//' <<EOF repo = '${repo}' pre_size = $(<"${file}" gzip -dc | wc -c) post_size = $(<"${file}" wc -c) percentage = '%0.1f%%' % (100 * (1 - post_size / pre_size)) p = '% 7s % 11d % 11d %s' % (percentage, pre_size, post_size, repo) print(p) EOF )" python3 -c "${script}" done } ``` </details> Closes #750. Test Plan: Comparing the raw old version with the decompressed new version shows that they are identical: ``` $ <~/tmp/sourcecred/data/sourcecred/example-github/github/view.json \ > shasum -a 256 - 63853b9d3f918274aafacf5198787e18185a61b9c95faf640a1e61f5d11fa19f - $ <~/tmp/sourcecred/data/sourcecred/example-github/github/view.json.gz \ > gzip -dc | shasum -a 256 63853b9d3f918274aafacf5198787e18185a61b9c95faf640a1e61f5d11fa19f - ``` Additionally, `yarn test --full` passes, and `yarn start` still loads data and runs PageRank properly. wchargin-branch: gzip-relational-view
2018-09-01 17:42:30 +00:00
for file in git/graph.json github/view.json.gz; do
2018-08-06 20:05:40 +00:00
test_must_fail test -f "${data_dir}/${repo}/${file}" || return
done
done
'
test_expect_success NO_REPOS \
"NO_REPOS: should not include a feedback URL from a polluted environment" '
test_must_fail grep -qF http://wat.com/wat "${js_bundle_path}"
'
2018-08-06 20:05:40 +00:00
test_expect_success NO_REPOS "NO_REPOS: should have no CNAME record" '
test_must_fail test -e "${output_dir}/CNAME"
'
test_done
# vim: ft=sh