mirror of
https://github.com/logos-blockchain/lez-fuzzing.git
synced 2026-07-02 07:49:45 +00:00
447 lines
17 KiB
YAML
447 lines
17 KiB
YAML
name: Corpus update
|
|
|
|
# Fully-automated weekly corpus maintenance.
|
|
#
|
|
# Every Sunday, for each fuzz target (libFuzzer + AFL++ lanes, in parallel):
|
|
# Phase 1 — GROW: fuzz for 30 min starting from the checked-in corpus,
|
|
# keeping every new input it discovers.
|
|
# Phase 2 — MINIMISE: re-minimise that target's *entire* corpus
|
|
# (cmin / afl-cmin) so dominated inputs are dropped and
|
|
# the tree never balloons.
|
|
#
|
|
# Corpus minimisation is per-target by construction (each target has its own
|
|
# corpus dir + its own instrumented binary), so running Phase 2 right after
|
|
# Phase 1 inside the same job is equivalent to a separate global minimise pass
|
|
# — without shipping the whole corpus between jobs.
|
|
#
|
|
# Every per-target result is uploaded as an artifact; a single `commit` job
|
|
# aggregates them into ONE pull request. Matrix jobs never push, so they never
|
|
# race on the branch. The PR is opened with a classic PAT (secret
|
|
# CORPUS_BOT_TOKEN).
|
|
|
|
on:
|
|
schedule:
|
|
- cron: "0 3 * * 0" # Sundays, 03:00 UTC
|
|
workflow_dispatch:
|
|
inputs:
|
|
duration:
|
|
description: "Seconds to fuzz per target in the grow phase"
|
|
required: false
|
|
default: "1800"
|
|
minimize_only:
|
|
description: "Skip fuzzing; only minimise the existing corpus"
|
|
type: boolean
|
|
default: false
|
|
push:
|
|
branches:
|
|
- feat-automatic-corpus-updates
|
|
|
|
env:
|
|
RISC0_DEV_MODE: "1"
|
|
CARGO_TERM_COLOR: always
|
|
|
|
permissions:
|
|
contents: read
|
|
|
|
jobs:
|
|
# ── Resolve the target matrix + run parameters ────────────────────────────────
|
|
config:
|
|
name: Resolve matrix & config
|
|
runs-on: ubuntu-latest
|
|
outputs:
|
|
targets: ${{ steps.targets.outputs.targets }}
|
|
duration: ${{ steps.cfg.outputs.duration }}
|
|
minimize_only: ${{ steps.cfg.outputs.minimize_only }}
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
- id: targets
|
|
uses: ./.github/actions/resolve-targets
|
|
- id: cfg
|
|
run: |
|
|
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
|
|
DUR="${{ inputs.duration }}"
|
|
MIN="${{ inputs.minimize_only }}"
|
|
else
|
|
DUR="1800" # scheduled weekly grow: 30 minutes per target
|
|
MIN="false"
|
|
fi
|
|
[ -n "$DUR" ] || DUR="1800"
|
|
[ -n "$MIN" ] || MIN="false"
|
|
echo "duration=$DUR" >> "$GITHUB_OUTPUT"
|
|
echo "minimize_only=$MIN" >> "$GITHUB_OUTPUT"
|
|
echo "duration=${DUR}s minimize_only=${MIN}"
|
|
|
|
# ── libFuzzer lane: grow 30 min, then minimise ────────────────────────────────
|
|
libfuzz:
|
|
name: "libFuzzer — ${{ matrix.target }}"
|
|
needs: config
|
|
runs-on: ubuntu-latest
|
|
strategy:
|
|
fail-fast: false
|
|
matrix:
|
|
target: ${{ fromJSON(needs.config.outputs.targets) }}
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
- uses: ./.github/actions/checkout-lez
|
|
- name: Install logos-blockchain-circuits
|
|
uses: ./logos-execution-zone/.github/actions/install-logos-blockchain-circuits
|
|
with:
|
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
|
- uses: ./.github/actions/setup-libfuzzer
|
|
|
|
- name: Build fuzz target
|
|
run: cargo fuzz build ${{ matrix.target }}
|
|
|
|
- name: "Phase 1 — grow (fuzz ${{ needs.config.outputs.duration }}s)"
|
|
if: needs.config.outputs.minimize_only != 'true'
|
|
run: |
|
|
T="${{ matrix.target }}"
|
|
mkdir -p "corpus/libfuzz/$T"
|
|
before=$(ls "corpus/libfuzz/$T" | wc -l)
|
|
cargo fuzz run "$T" "corpus/libfuzz/$T" -- \
|
|
-max_total_time=${{ needs.config.outputs.duration }} -jobs=2 -workers=2
|
|
echo "grew corpus/libfuzz/$T: $before → $(ls "corpus/libfuzz/$T" | wc -l) inputs"
|
|
|
|
- name: "Phase 2 — minimise entire corpus (cmin)"
|
|
run: |
|
|
T="${{ matrix.target }}"
|
|
mkdir -p "corpus/libfuzz/$T"
|
|
before=$(ls "corpus/libfuzz/$T" | wc -l)
|
|
cargo fuzz cmin "$T" "corpus/libfuzz/$T"
|
|
echo "minimised corpus/libfuzz/$T: $before → $(ls "corpus/libfuzz/$T" | wc -l) inputs"
|
|
|
|
- name: Upload corpus
|
|
uses: actions/upload-artifact@v4
|
|
with:
|
|
name: libfuzz-corpus-${{ matrix.target }}
|
|
path: corpus/libfuzz/${{ matrix.target }}/
|
|
if-no-files-found: ignore
|
|
|
|
- name: Upload crash artifacts
|
|
if: failure()
|
|
uses: actions/upload-artifact@v4
|
|
with:
|
|
name: libfuzz-crash-${{ matrix.target }}
|
|
path: fuzz/artifacts/${{ matrix.target }}/
|
|
if-no-files-found: ignore
|
|
|
|
# ── AFL++ lane: grow 30 min, then minimise ────────────────────────────────────
|
|
afl:
|
|
name: "AFL++ — ${{ matrix.target }}"
|
|
needs: config
|
|
runs-on: ubuntu-latest
|
|
strategy:
|
|
fail-fast: false
|
|
matrix:
|
|
target: ${{ fromJSON(needs.config.outputs.targets) }}
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
- uses: ./.github/actions/checkout-lez
|
|
- name: Install logos-blockchain-circuits
|
|
uses: ./logos-execution-zone/.github/actions/install-logos-blockchain-circuits
|
|
with:
|
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
|
- uses: ./.github/actions/setup-afl
|
|
|
|
- name: Build AFL++ target
|
|
run: |
|
|
cargo afl build \
|
|
--manifest-path fuzz/Cargo.toml \
|
|
--no-default-features \
|
|
--features fuzzer-afl \
|
|
--release \
|
|
--bin ${{ matrix.target }}
|
|
|
|
- name: Prepare seed corpus
|
|
if: needs.config.outputs.minimize_only != 'true'
|
|
run: |
|
|
T="${{ matrix.target }}"
|
|
SEEDS="afl-seeds/$T"
|
|
mkdir -p "$SEEDS"
|
|
for src in "corpus/libfuzz/$T" "corpus/afl/$T"; do
|
|
[ -d "$src" ] || continue
|
|
for f in "$src"/*; do [ -f "$f" ] && cp -n "$f" "$SEEDS/" 2>/dev/null || true; done
|
|
done
|
|
[ -n "$(ls -A "$SEEDS")" ] || echo -n "seed" > "$SEEDS/default_seed"
|
|
echo "Seed inputs: $(ls "$SEEDS" | wc -l)"
|
|
|
|
- name: "Phase 1 — grow (AFL++ ${{ needs.config.outputs.duration }}s)"
|
|
if: needs.config.outputs.minimize_only != 'true'
|
|
env:
|
|
AFL_SKIP_CPUFREQ: "1"
|
|
AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES: "1"
|
|
run: |
|
|
T="${{ matrix.target }}"
|
|
mkdir -p "afl-output/$T"
|
|
set +e
|
|
timeout ${{ needs.config.outputs.duration }} \
|
|
afl-fuzz -i "afl-seeds/$T" -o "afl-output/$T" -- "fuzz/target/release/$T"
|
|
rc=$?
|
|
set -e
|
|
# 124 = SIGALRM from timeout (expected end); 0 = clean exit; else real failure
|
|
[ $rc -eq 0 ] || [ $rc -eq 124 ] || exit $rc
|
|
|
|
- name: Sync new queue entries into corpus/afl
|
|
if: needs.config.outputs.minimize_only != 'true'
|
|
run: |
|
|
T="${{ matrix.target }}"
|
|
DEST="corpus/afl/$T"
|
|
mkdir -p "$DEST"
|
|
added=0
|
|
for instance_dir in "afl-output/$T"/*/; do
|
|
QUEUE="${instance_dir}queue"
|
|
[ -d "$QUEUE" ] || continue
|
|
for f in "$QUEUE"/id:*; do
|
|
[ -f "$f" ] || continue
|
|
HASH=$(sha1sum "$f" | cut -d' ' -f1)
|
|
if [ ! -f "$DEST/$HASH" ]; then
|
|
cp "$f" "$DEST/$HASH"
|
|
added=$((added + 1))
|
|
fi
|
|
done
|
|
done
|
|
echo "grew corpus/afl/$T → $(ls "$DEST" | wc -l) inputs (+$added new)"
|
|
|
|
- name: "Phase 2 — minimise entire corpus (afl-cmin)"
|
|
env:
|
|
AFL_SKIP_CPUFREQ: "1"
|
|
AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES: "1"
|
|
run: |
|
|
T="${{ matrix.target }}"
|
|
SRC="corpus/afl/$T"
|
|
if [ ! -d "$SRC" ] || [ -z "$(ls -A "$SRC" 2>/dev/null)" ]; then
|
|
echo "corpus/afl/$T is empty — nothing to minimise."
|
|
exit 0
|
|
fi
|
|
before=$(ls "$SRC" | wc -l)
|
|
# afl-cmin can fail on pathological corpora; fall back to leaving SRC as-is.
|
|
if afl-cmin -i "$SRC" -o "afl-cmin/$T" -- "fuzz/target/release/$T"; then
|
|
rm -rf "$SRC"
|
|
mkdir -p "$SRC"
|
|
cp "afl-cmin/$T"/* "$SRC"/ 2>/dev/null || true
|
|
else
|
|
echo "afl-cmin failed — keeping corpus/afl/$T unchanged."
|
|
fi
|
|
echo "minimised corpus/afl/$T: $before → $(ls "$SRC" | wc -l) inputs"
|
|
|
|
- name: Upload corpus
|
|
uses: actions/upload-artifact@v4
|
|
with:
|
|
name: afl-corpus-${{ matrix.target }}
|
|
path: corpus/afl/${{ matrix.target }}/
|
|
if-no-files-found: ignore
|
|
|
|
- name: Package AFL findings on failure
|
|
if: failure()
|
|
run: |
|
|
T="${{ matrix.target }}"
|
|
# AFL filenames contain colons (forbidden by upload-artifact) — tar them.
|
|
tar -czf "afl-findings-$T.tar.gz" -C afl-output "$T" 2>/dev/null \
|
|
|| tar -czf "afl-findings-$T.tar.gz" -T /dev/null
|
|
- name: Upload AFL findings on failure
|
|
if: failure()
|
|
uses: actions/upload-artifact@v4
|
|
with:
|
|
name: afl-crash-${{ matrix.target }}
|
|
path: afl-findings-${{ matrix.target }}.tar.gz
|
|
if-no-files-found: ignore
|
|
|
|
# ── Aggregate every per-target corpus into ONE pull request ───────────────────
|
|
commit:
|
|
name: Open corpus update PR
|
|
needs: [config, libfuzz, afl]
|
|
# Run as long as config succeeded; individual matrix failures (fail-fast:false)
|
|
# must not block the PR for the targets that did succeed.
|
|
if: ${{ !cancelled() && needs.config.result == 'success' }}
|
|
runs-on: ubuntu-latest
|
|
permissions:
|
|
contents: write
|
|
pull-requests: write
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
with:
|
|
ref: main
|
|
|
|
- name: Download corpus artifacts
|
|
uses: actions/download-artifact@v4
|
|
with:
|
|
path: corpus-artifacts
|
|
pattern: "*-corpus-*" # libfuzz-corpus-* and afl-corpus-* only
|
|
merge-multiple: false
|
|
continue-on-error: true
|
|
|
|
- name: Apply corpus deltas to the working tree
|
|
run: |
|
|
shopt -s nullglob
|
|
applied=0
|
|
# Replace per-target dirs only for targets that produced an artifact, so a
|
|
# crashed/skipped target never has its checked-in corpus deleted. Replacing
|
|
# (rm + repopulate) lets cmin-driven deletions show up in the PR diff.
|
|
for d in corpus-artifacts/libfuzz-corpus-*; do
|
|
t="${d##*/libfuzz-corpus-}"
|
|
rm -rf "corpus/libfuzz/$t"; mkdir -p "corpus/libfuzz/$t"
|
|
cp "$d"/* "corpus/libfuzz/$t/" 2>/dev/null || true
|
|
applied=$((applied + 1))
|
|
done
|
|
for d in corpus-artifacts/afl-corpus-*; do
|
|
t="${d##*/afl-corpus-}"
|
|
rm -rf "corpus/afl/$t"; mkdir -p "corpus/afl/$t"
|
|
cp "$d"/* "corpus/afl/$t/" 2>/dev/null || true
|
|
applied=$((applied + 1))
|
|
done
|
|
echo "Applied corpus for $applied target lane(s)."
|
|
echo "Changed files: $(git status --porcelain corpus | wc -l)"
|
|
|
|
- name: Summarise corpus changes for the PR body
|
|
id: summary
|
|
env:
|
|
RUN_URL: "${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
|
|
RUN_ID: "${{ github.run_id }}"
|
|
DURATION: "${{ needs.config.outputs.duration }}"
|
|
run: |
|
|
set -euo pipefail
|
|
|
|
BODY="$RUNNER_TEMP/pr-body.md"
|
|
UNUSUAL="$RUNNER_TEMP/unusual.txt"
|
|
OUTSIDE="$RUNNER_TEMP/outside.txt"
|
|
: > "$UNUSUAL"; : > "$OUTSIDE"
|
|
|
|
# Scan the WHOLE working tree (not just corpus/) so anything touched
|
|
# outside corpus/ is surfaced for the reviewer.
|
|
mapfile -t changes < <(git status --porcelain --untracked-files=all)
|
|
|
|
added=0; deleted=0; modified=0; other=0
|
|
declare -A tgt_add tgt_del tgt_mod
|
|
for line in "${changes[@]}"; do
|
|
x="${line:0:2}"
|
|
path="${line:3}"
|
|
# "old -> new" for renames; keep the destination path.
|
|
case "$path" in *" -> "*) path="${path##* -> }";; esac
|
|
# git C-quotes odd names — drop the surrounding quotes for display.
|
|
path="${path%\"}"; path="${path#\"}"
|
|
|
|
case "$x" in
|
|
"??"|"A "|"AM") added=$((added+1)); cls=add ;;
|
|
" D"|"D ") deleted=$((deleted+1)); cls=del ;;
|
|
" M"|"M "|"MM") modified=$((modified+1)); cls=mod ;;
|
|
*) other=$((other+1)); cls=other ;;
|
|
esac
|
|
|
|
case "$path" in
|
|
corpus/*) ;;
|
|
*) printf '%s %s\n' "$x" "$path" >> "$OUTSIDE" ;;
|
|
esac
|
|
|
|
base="${path##*/}"
|
|
if [[ "$path" == corpus/* ]] && ! [[ "$base" =~ ^[0-9a-f]{40}$ ]]; then
|
|
printf '%s %s\n' "$x" "$path" >> "$UNUSUAL"
|
|
fi
|
|
|
|
if [[ "$path" =~ ^corpus/(libfuzz|afl)/([^/]+)/ ]]; then
|
|
key="${BASH_REMATCH[1]}/${BASH_REMATCH[2]}"
|
|
case "$cls" in
|
|
add) tgt_add[$key]=$(( ${tgt_add[$key]:-0} + 1 )) ;;
|
|
del) tgt_del[$key]=$(( ${tgt_del[$key]:-0} + 1 )) ;;
|
|
mod) tgt_mod[$key]=$(( ${tgt_mod[$key]:-0} + 1 )) ;;
|
|
esac
|
|
fi
|
|
done
|
|
total=${#changes[@]}
|
|
|
|
{
|
|
echo "Automated weekly corpus update produced by"
|
|
echo "\`.github/workflows/corpus-update.yml\` (run [#${RUN_ID}](${RUN_URL}))."
|
|
echo
|
|
echo "Per target, in parallel: **Phase 1** fuzzed ${DURATION}s (libFuzzer + AFL++),"
|
|
echo "**Phase 2** re-minimised the entire corpus (\`cmin\` / \`afl-cmin\`)."
|
|
echo
|
|
echo "## Change statistics"
|
|
echo
|
|
echo "| Metric | Count |"
|
|
echo "| --- | ---: |"
|
|
echo "| Files changed | ${total} |"
|
|
echo "| Added | ${added} |"
|
|
echo "| Deleted | ${deleted} |"
|
|
echo "| Modified | ${modified} |"
|
|
[ "$other" -gt 0 ] && echo "| Other status | ${other} |"
|
|
echo
|
|
} > "$BODY"
|
|
|
|
if [ "${#tgt_add[@]}" -gt 0 ] || [ "${#tgt_del[@]}" -gt 0 ] || [ "${#tgt_mod[@]}" -gt 0 ]; then
|
|
{
|
|
echo "### Per target"
|
|
echo
|
|
echo "| Corpus | Added | Deleted | Modified |"
|
|
echo "| --- | ---: | ---: | ---: |"
|
|
printf '%s\n' "${!tgt_add[@]}" "${!tgt_del[@]}" "${!tgt_mod[@]}" \
|
|
| sort -u | while read -r key; do
|
|
[ -n "$key" ] || continue
|
|
echo "| \`$key\` | ${tgt_add[$key]:-0} | ${tgt_del[$key]:-0} | ${tgt_mod[$key]:-0} |"
|
|
done
|
|
echo
|
|
} >> "$BODY"
|
|
fi
|
|
|
|
# ── Reviewer flags ────────────────────────────────────────────────
|
|
emit_list() { # title, file, intro
|
|
local title="$1" file="$2" intro="$3" n cap=50
|
|
n=$(wc -l < "$file" | tr -d ' ')
|
|
{
|
|
echo "### ⚠️ $title ($n)"
|
|
echo
|
|
echo "$intro"
|
|
echo
|
|
echo '```'
|
|
head -n "$cap" "$file"
|
|
[ "$n" -gt "$cap" ] && echo "... and $((n - cap)) more"
|
|
echo '```'
|
|
echo
|
|
} >> "$BODY"
|
|
}
|
|
|
|
flagged=0
|
|
if [ -s "$OUTSIDE" ]; then
|
|
flagged=1
|
|
emit_list "Files changed outside \`corpus/\`" "$OUTSIDE" \
|
|
"A corpus update should only touch \`corpus/\` — review these carefully."
|
|
fi
|
|
if [ -s "$UNUSUAL" ]; then
|
|
flagged=1
|
|
emit_list "Corpus files with unusual names" "$UNUSUAL" \
|
|
"Corpus inputs are normally named by their 40-char SHA-1. These are not:"
|
|
fi
|
|
if [ "$flagged" -eq 0 ]; then
|
|
echo "✅ All changes are under \`corpus/\` and named by SHA-1 as expected." >> "$BODY"
|
|
echo >> "$BODY"
|
|
fi
|
|
|
|
{
|
|
echo "---"
|
|
echo "Per-target corpora that crashed or were skipped are left untouched."
|
|
echo "Review the diff, confirm CI is green, and merge."
|
|
} >> "$BODY"
|
|
|
|
echo "body_path=$BODY" >> "$GITHUB_OUTPUT"
|
|
echo "::group::Generated PR body"; cat "$BODY"; echo "::endgroup::"
|
|
|
|
- name: Generate unique branch suffix
|
|
id: suffix
|
|
run: echo "value=$(LC_ALL=C tr -dc 'a-z' </dev/urandom | head -c 4)" >> "$GITHUB_OUTPUT"
|
|
|
|
- name: Create or update pull request
|
|
uses: peter-evans/create-pull-request@v6
|
|
with:
|
|
token: ${{ secrets.CORPUS_BOT_TOKEN }}
|
|
base: main
|
|
branch: automation/corpus-update-${{ steps.suffix.outputs.value }}
|
|
delete-branch: true
|
|
add-paths: |
|
|
corpus/libfuzz/**
|
|
corpus/afl/**
|
|
commit-message: "chore: weekly corpus update (grow + minimise)"
|
|
title: "chore: automated weekly corpus update"
|
|
labels: |
|
|
automation
|
|
corpus
|
|
body-path: ${{ steps.summary.outputs.body_path }}
|