Create script to build static site (#592)

Summary: Currently, we create the static site and deploy it all at once in `scripts/deploy.sh`. This commit creates a new script that only builds the static site. This has the advantage that it is easier/less scary to change that script (because it can be tested without worrying about deploying to a local test target), and that we can write automated tests for it. Test Plan: Run `yarn sharness`; note that it completes very quickly. Then, in a shell with your GitHub token exported, run `yarn sharness-full`. Expect all tests to pass. For a sanity check, you can run: ```shell outdir="$(mktemp -d --suffix .sourcecred-site)" ./scripts/build_static_site.sh --target "${outdir}" \ --cname sourcecred.io \ --repo sourcecred/example-git \ --repo sourcecred/example-github \ ; (cd "${outdir}" && python -m SimpleHTTPServer) ``` and ensure that <http://localhost:8000/> is as expected. One test case that is not covered is the following: _if_ the actual app somehow tries to emit a `CNAME` file at root, _and_ our script’s logic to catch this is broken, then we will not catch this failure. I’ve tested the logic manually by adding `>"${cname_file}"` after definition of that variable, but I don’t see a good way to test it automatically, without adding flags like `--but-actually-emit-cname-too` to the build. The compound probability of this happening is sufficiently low that this doesn’t bother me. wchargin-branch: build-static-site-script
2025-01-11 13:14:28 +00:00 · 2018-08-06 13:05:40 -07:00 · 2018-08-06 13:05:40 -07:00 · d7cb4c65fa
commit d7cb4c65fa
parent baa0cbff1b
2 changed files with 364 additions and 0 deletions
--- a/scripts/build_static_site.sh
+++ b/scripts/build_static_site.sh
@ -0,0 +1,136 @@
+#!/bin/bash
+set -eu
+
+usage() {
+    printf 'usage: build_static_site.sh --target TARGET\n'
+    printf '                            [--repo OWNER/NAME [...]]\n'
+    printf '                            [--cname DOMAIN]\n'
+    printf '                            [-h|--help]\n'
+    printf '\n'
+    printf 'Build the static SourceCred website, including example data.\n'
+    printf '\n'
+    printf '%s\n' '--target TARGET'
+    printf '\t%s\n' 'an empty directory into which to build the site'
+    printf '%s\n' '--repo OWNER/NAME'
+    printf '\t%s\n' 'a GitHub repository (e.g., torvalds/linux) for which'
+    printf '\t%s\n' 'to include example data'
+    printf '%s\n' '--cname DOMAIN'
+    printf '\t%s\n' 'configure DNS for a GitHub Pages site to point to'
+    printf '\t%s\n' 'the provided custom domain'
+    printf '%s\n' '-h|--help'
+    printf '\t%s\n' 'show this message'
+}
+
+main() {
+    parse_args "$@"
+
+    toplevel="$(git -C "$(dirname "$0")" rev-parse --show-toplevel)"
+    cd "${toplevel}"
+
+    sourcecred_data=
+    trap cleanup EXIT
+
+    build
+}
+
+parse_args() {
+    target=
+    cname=
+    repos=( )
+    while [ $# -gt 0 ]; do
+        case "$1" in
+            --target)
+                if [ -n "${target}" ]; then
+                    die '--target specified multiple times'
+                fi
+                shift
+                if [ $# -eq 0 ]; then die 'missing value for --target'; fi
+                if ! target="$(readlink -e "$1")"; then
+                    die "target does not exist: $1"
+                fi
+                ;;
+            --repo)
+                shift
+                if [ $# -eq 0 ]; then die 'missing value for --repo'; fi
+                repos+=( "$1" )
+                ;;
+            --cname)
+                shift
+                if [ $# -eq 0 ]; then die 'missing value for --cname'; fi
+                if [ -n "${cname}" ]; then
+                    die '--cname specified multiple times'
+                fi
+                cname="$1"
+                if [ -z "${cname}" ]; then
+                    die 'empty value for --cname'
+                fi
+                ;;
+            -h|--help)
+                usage
+                exit 0
+                ;;
+            *)
+                printf >&2 'fatal: unknown argument: %s\n' "$1"
+                exit 1
+                ;;
+        esac
+        shift
+    done
+    if [ -z "${target}" ]; then
+        die 'target directory not specified'
+    fi
+    if ! [ -d "${target}" ]; then
+        die "target is not a directory: ${target}"
+    fi
+    if [ "$(command ls -A "${target}" | wc -l)" != 0 ]; then
+        die "target directory is nonempty: ${target}"
+    fi
+}
+
+build() {
+    sourcecred_data="$(mktemp -d --suffix ".sourcecred-data")"
+    export SOURCECRED_DIRECTORY="${sourcecred_data}"
+
+    yarn
+    # shellcheck disable=SC2016
+    printf >&2 'warn: running `yarn backend`, overwriting `bin/` in your repo\n'
+    printf >&2 'warn: if this offends you, please see: %s\n' \
+        'https://github.com/sourcecred/sourcecred/issues/580'
+    yarn backend
+    yarn build --output-path "${target}"
+
+    if [ "${#repos[@]}" -ne 0 ]; then
+        for repo in "${repos[@]}"; do
+            printf >&2 'info: loading repository: %s\n' "${repo}"
+            node ./bin/sourcecred.js load "${repo}"
+        done
+    fi
+
+    # Copy the SourceCred data into the appropriate API route. Using
+    # `mkdir` here will fail in the case where an `api/` folder exists,
+    # which is the correct behavior. (In this case, our site's
+    # architecture conflicts with the required static structure, and we
+    # must fail.)
+    mkdir "${target}/api/"
+    mkdir "${target}/api/v1/"
+    cp -r "${sourcecred_data}" "${target}/api/v1/data"
+
+    if [ -n "${cname:-}" ]; then
+        cname_file="${target}/CNAME"
+        if [ -e "${cname_file}" ]; then
+            die 'CNAME file exists in static site output'
+        fi
+        printf '%s' "${cname}" >"${cname_file}"  # no newline
+    fi
+}
+
+cleanup() {
+    if [ -d "${sourcecred_data}" ]; then rm -rf "${sourcecred_data}"; fi
+}
+
+die() {
+    printf >&2 'fatal: %s\n' "$@"
+    exit 1
+}
+
+main "$@"
--- a/sharness/test_build_static_site.t
+++ b/sharness/test_build_static_site.t
@ -0,0 +1,228 @@
+#!/bin/sh
+
+# Disable these lint rules globally:
+#   2034 = unused variable (used by sharness)
+#   2016 = parameter expansion in single quotes
+#   1004 = backslash-newline in single quotes
+# shellcheck disable=SC2034,SC2016,SC1004
+:
+
+test_description='tests for scripts/build_static_site.sh'
+
+export GIT_CONFIG_NOSYSTEM=1
+export GIT_ATTR_NOSYSTEM=1
+
+# shellcheck disable=SC1091
+. ./sharness.sh
+
+run() (
+    set -eu
+    toplevel="$(git -C "$(dirname "$0")" rev-parse --show-toplevel)"
+    "${toplevel}"/scripts/build_static_site.sh "$@"
+)
+
+#
+# Start by checking a bunch of easy cases related to the argument
+# parser, mostly about rejecting various ill-formed invocations.
+
+test_expect_success "should print a help message" '
+    run --help >msg 2>err &&
+    test_must_be_empty err &&
+    test_path_is_file msg &&
+    grep -qF "usage: build_static_site.sh" msg
+'
+
+test_expect_success "should fail with no target" '
+    test_must_fail run 2>err &&
+    grep -qF -- "target directory not specified" err
+'
+
+test_expect_success "should fail with missing target value" '
+    test_must_fail run --target 2>err &&
+    grep -qF -- "missing value for --target" err
+'
+
+test_expect_success "should fail with multiple targets" '
+    mkdir one two &&
+    test_must_fail run --target one --target two 2>err &&
+    grep -qF -- "--target specified multiple times" err
+'
+
+test_expect_success "should fail with nonexistent targets" '
+    test_must_fail run --target wat 2>err &&
+    grep -qF -- "target does not exist: wat" err
+'
+
+test_expect_success "should fail with nonexistent targets with subcomponents" '
+    # using "readlink -f", this behavior can be different.
+    test_must_fail run --target wat/wat 2>err &&
+    grep -qF -- "target does not exist: wat/wat" err
+'
+
+test_expect_success "should fail with a file as target" '
+    printf "important\nstuff" >important_data &&
+    test_must_fail run --target important_data 2>err &&
+    grep -qF -- "target is not a directory: ${PWD}/important_data" err &&
+    printf "important\nstuff" | test_cmp - important_data
+'
+
+test_expect_success "should fail with a nonempty directory as target" '
+    mkdir important_dir &&
+    printf "redacted\n" >important_dir/.wallet.dat &&
+    test_must_fail run --target important_dir 2>err &&
+    grep -qF -- "target directory is nonempty: ${PWD}/important_dir" err &&
+    printf "redacted\n" | test_cmp - important_dir/.wallet.dat
+'
+
+mkdir putative_output
+
+test_expect_success "should fail with missing repo value" '
+    test_must_fail run --target putative_output --repo 2>err &&
+    grep -qF -- "missing value for --repo" err &&
+    printf "redacted\n" | test_cmp - important_dir/.wallet.dat
+'
+
+test_expect_success "should fail with missing cname value" '
+    test_must_fail run --target putative_output --cname 2>err &&
+    grep -qF -- "missing value for --cname" err &&
+    printf "redacted\n" | test_cmp - important_dir/.wallet.dat
+'
+
+test_expect_success "should fail with empty cname" '
+    test_must_fail run --target putative_output --cname "" 2>err &&
+    grep -qF -- "empty value for --cname" err &&
+    printf "redacted\n" | test_cmp - important_dir/.wallet.dat
+'
+
+test_expect_success "should fail with multiple cname values" '
+    test_must_fail run --target putative_output \
+        --cname a.com --cname b.com 2>err &&
+    grep -qF -- "--cname specified multiple times" err &&
+    printf "redacted\n" | test_cmp - important_dir/.wallet.dat
+'
+
+#
+# Now, actually generate output in two cases: one with repositories, and
+# one with no repositories. We can only do this if we have a token.
+
+if [ -n "${SOURCECRED_GITHUB_TOKEN:-}" ]; then
+    test_set_prereq HAVE_GITHUB_TOKEN
+fi
+
+# run_build PREREQ_NAME DESCRIPTION [FLAGS...]
+# Build the site with the given FLAGS, and create a prereq PREREQ_NAME
+# to be used in any tests that depend on this build. The build will
+# itself have the EXPENSIVE prereq.
+run_build() {
+    prereq_name="$1"; shift
+    description="$1"; shift
+    output_dir="output_${prereq_name}"
+    api_dir="${output_dir}/api/v1/data"
+    data_dir="${api_dir}/data"
+    for arg in "${output_dir}" "$@"; do
+        unusual_chars="$(printf '%s' "$arg" | sed -e 's#[A-Za-z0-9/_.-]##g')"
+        if [ -n "${unusual_chars}" ]; then
+            printf 'fatal: potentially unsafe argument: %s\n' "${arg}"
+            return
+        fi
+    done
+    flags="--target $output_dir $*"  # checked for sanity above
+    test_expect_success EXPENSIVE,HAVE_GITHUB_TOKEN \
+        "${prereq_name}: ${description}" '
+        mkdir "${output_dir}" &&
+        run '"${flags}"' 2>err &&
+        test_must_fail grep -vF \
+            -e "Removing build directory: " \
+            -e "warn: running `yarn backend`" \
+            -e "warn: if this offends you" \
+            -e "info: loading repository" \
+            err &&
+        test_path_is_dir "${output_dir}" &&
+        test_path_is_dir "${api_dir}" &&
+        test_set_prereq "${prereq_name}"
+    '
+}
+
+# test_pages PREREQ_NAME
+# Test that the PREREQ_NAME build output includes a valid home page, a
+# valid prototype page, and a valid Discord invite page (which should be
+# a redirect).
+test_pages() {
+    prereq="$1"
+    test_expect_success "${prereq}" \
+        "${prereq}: should have a home page and a prototype" '
+        test_path_is_file "${output_dir}/index.html" &&
+        grep -qF "<script src=" "${output_dir}/index.html" &&
+        test_path_is_file "${output_dir}/prototype/index.html" &&
+        grep -qF "<script src=" "${output_dir}/prototype/index.html"
+    '
+    test_expect_success "${prereq}" \
+        "${prereq}: should have a discord-invite with redirect" '
+        file="${output_dir}/discord-invite/index.html" &&
+        test_path_is_file "${file}" &&
+        test_must_fail grep -qF "<script src=" "${file}" &&
+        url="https://discord.gg/tsBTgc9" &&
+        needle="<meta http-equiv=\"refresh\" content=\"0;url=$url\" />" &&
+        grep -qxF "${needle}" "${file}"
+    '
+}
+
+run_build TWO_REPOS \
+    "should build the site with two repositories and a CNAME" \
+    --cname sourcecred.example.com \
+    --repo sourcecred/example-git \
+    --repo sourcecred/example-github \
+    ;
+
+test_pages TWO_REPOS
+
+test_expect_success TWO_REPOS \
+    "TWO_REPOS: should have a registry with two repositories" '
+    registry_file="${api_dir}/repositoryRegistry.json" &&
+    test_path_is_file "${registry_file}" &&
+    grep -oF "\"name\":" "${registry_file}" | wc -l >actual_count &&
+    printf "2\n" | test_cmp - actual_count
+'
+
+test_expect_success TWO_REPOS \
+    "TWO_REPOS: should have data for the two repositories" '
+    for repo in sourcecred/example-git sourcecred/example-github; do
+        for file in git/graph.json github/view.json; do
+            test -s "${data_dir}/${repo}/${file}" || return
+        done
+    done
+'
+
+test_expect_success TWO_REPOS "TWO_REPOS: should have a correct CNAME record" '
+    test_path_is_file "${output_dir}/CNAME" &&
+    printf "sourcecred.example.com" | test_cmp - "${output_dir}/CNAME"
+'
+
+run_build NO_REPOS \
+    "should build the site with no repositories and no CNAME" \
+    # no arguments here
+
+test_pages NO_REPOS
+
+test_expect_success NO_REPOS \
+    "NO_REPOS: should not have a repository registry" '
+    registry_file="${api_dir}/repositoryRegistry.json" &&
+    test_must_fail test -e "${registry_file}"
+'
+
+test_expect_success NO_REPOS \
+    "NO_REPOS: should not have repository data" '
+    for repo in sourcecred/example-git sourcecred/example-github; do
+        for file in git/graph.json github/view.json; do
+            test_must_fail test -f "${data_dir}/${repo}/${file}" || return
+        done
+    done
+'
+
+test_expect_success NO_REPOS "NO_REPOS: should have no CNAME record" '
+    test_must_fail test -e "${output_dir}/CNAME"
+'
+
+test_done
+
+# vim: ft=sh