Merge 1c48fc7e8c75e8ae9aabe66726ae125faa84df15 into e0a11767893dd988b4b6bcd904a68afd3eb1093f

2026-07-05 14:59:39 +00:00 · 2026-07-03 13:31:05 +03:00 · 2026-07-03 13:31:05 +03:00 · 87a512b986
commit 87a512b986
parent e0a1176789 1c48fc7e8c
28 changed files with 43273 additions and 0 deletions
--- a/pq-bench-rpi5/.gitignore
+++ b/pq-bench-rpi5/.gitignore
@ -0,0 +1,41 @@
+# Build trees / vendored deps (rebuilt by setup/setup.sh)
+/vendor/
+/build/
+setup/versions.lock
+
+# Compiled harness binaries
+bench/kem_sig/bench_pq
+bench/tls/bench_tls
+*.o
+
+# Generated TLS material
+bench/tls/pki/
+
+# Per-run scratch dirs (intermediate artifacts; never committed)
+results/.work-*/
+
+# Local results are kept by users, not committed by default.
+# Comment the next line out if you DO want to commit your machine's results.
+results/*.json
+# Consolidated baseline-grade RPi5 run (KEM + sig + TLS in one pass): the Pi
+# validator reference baseline.
+!results/rasberrypi5-20260625T202356Z.json
+# Consolidated Apple M3 cross-platform run (apple-m3, KEM + sig + TLS in one
+# pass): is_baseline_grade=false by design, kept as the cross-platform reference.
+!results/mehmetmac-20260625T220618Z.json
+
+# Merged dataset consumed by the dashboard. Tracked: ships the published
+# baselines so a fresh clone renders charts out-of-the-box. Contributors who
+# re-merge overwrite it locally (modified file they can commit or discard).
+analyze/png/
+
+# Python venv (matplotlib lives here; system python stays clean)
+analyze/.venv/
+
+# Machine-local Claude Code settings (never shared)
+.claude/settings.local.json
+
+# Python / OS cruft
+__pycache__/
+*.pyc
+.DS_Store
--- a/pq-bench-rpi5/Dockerfile
+++ b/pq-bench-rpi5/Dockerfile
@ -0,0 +1,41 @@
+# =============================================================================
+# Reproducible BUILD of the PQ benchmark toolchain on Debian aarch64 (the same
+# OS family as Raspberry Pi OS / Ubuntu on the RPi5).
+#
+# This image is for BUILDING ONLY — it pins and compiles liboqs / OpenSSL /
+# oqs-provider reproducibly. It is NOT for running the benchmark.
+#
+#   docker build -t pq-bench-rpi5 .          # build + pin the toolchain
+#
+# Run the MEASUREMENT bare-metal on the host, never in the container. A
+# container cannot reliably control the CPU governor, pin to an isolated core,
+# or read the Pi's SoC thermal/throttle sensors (vcgencmd) — the three knobs the
+# reference-grade gate depends on — so an in-container run could never be
+# baseline-grade and would only add noise. Build here if you like; then:
+#
+#   ./run.sh                                 # on the host (see README)
+# =============================================================================
+FROM debian:bookworm-slim
+
+ENV DEBIAN_FRONTEND=noninteractive
+RUN apt-get update && apt-get install -y --no-install-recommends \
+      build-essential cmake ninja-build git perl pkg-config \
+      python3 python3-venv ca-certificates \
+      libssl-dev cpufrequtils util-linux \
+    && rm -rf /var/lib/apt/lists/*
+
+WORKDIR /app
+COPY . /app
+
+# Build + pin the toolchain at image-build time so the image is self-contained.
+# (Comment this out to keep the image thin and run setup.sh at container start.)
+RUN ./setup/setup.sh all || (echo "setup failed — see log above" && exit 1)
+
+# Optional: matplotlib PNG export in an isolated venv.
+RUN python3 -m venv analyze/.venv \
+    && analyze/.venv/bin/pip install --no-cache-dir -r analyze/requirements.txt
+
+ENTRYPOINT ["/bin/bash", "-lc"]
+# This image builds the toolchain; it does not run the benchmark. The default
+# command just says so — run the measurement bare-metal on the host (see README).
+CMD ["echo 'Toolchain built. Run the benchmark BARE-METAL on the host (./run.sh) — Docker is for reproducible builds only; a container cannot meet the baseline-grade gate.'"]
--- a/pq-bench-rpi5/README.md
+++ b/pq-bench-rpi5/README.md
@ -0,0 +1,424 @@
+# pq-bench-rpi5
+
+A reproducible, general-purpose **post-quantum cryptography benchmark** whose
+baseline target is the **Raspberry Pi 5** (Broadcom BCM2712, Cortex-A76,
+aarch64). Anyone can run it on their own Pi 5 and the results aggregate and
+compare apples-to-apples.
+
+**Framing — migration cost.** How much does moving from the cryptography Logos
+uses *today* (X25519 key exchange + Ed25519 signatures) to PQ candidates cost on
+validator-grade hardware? Every chart draws that classical baseline as the
+reference line, so the PQ "tax" is always visible.
+
+Phase 1 covers **PQ KEMs**, **PQ signatures**, and **PQ TLS 1.3 handshakes**.
+Hooks are left for a later SNARK/STARK phase (see `config.yaml`); it is not
+implemented yet.
+
+---
+
+## What gets measured
+
+| Layer | Metrics |
+|-------|---------|
+| **KEM** | keygen / encaps / decaps wall-clock (median, MAD, IQR, min, max, mean, stddev, ops/sec) · pk/sk/ct sizes · heap high-water |
+| **Signature** | keygen / sign / verify wall-clock (same stats) · pk/sig sizes |
+| **TLS 1.3** | full-handshake latency · handshakes/sec · bytes-on-wire · ClientHello size (+ fragmentation flag) — as a matrix of (KEM group × signature) |
+
+The **classical baseline** (X25519 / Ed25519 / X25519+Ed25519) is always
+included as the reference point — measured as a real primitive via OpenSSL, not
+hand-waved.
+
+---
+
+## Project layout
+
+```
+pq-bench-rpi5/
+  setup/         build + pin liboqs, OpenSSL 3.5+, oqs-provider (versions.env / versions.lock)
+  bench/kem_sig/ bench_pq.c     primitive KEM/sig harness (liboqs + OpenSSL EVP baselines)
+  bench/tls/     bench_tls.c    in-process TLS 1.3 handshake harness (OpenSSL API)
+                 run_tls.sh      PKI generation + (KEM × sig) matrix driver
+  bench/lib/     assemble.py / merge helpers / miniyaml.py (zero-dep YAML)
+  results/       results/<host>-<timestamp>.json  (one per run, full metadata)
+  analyze/       merge.py (combine machines) + plot.py (matplotlib PNGs, optional venv)
+  dashboard/     static HTML/JS (Chart.js) — no backend, GitHub-Pages deployable
+  run.sh         governor + taskset + thermal wrapper + orchestrator
+  config.yaml    candidate lists (extend here)
+  Dockerfile     reproducible Debian-aarch64 build
+```
+
+---
+
+## Quick start
+
+### On a Raspberry Pi 5 (the real measurement target)
+
+```bash
+git clone <this repo> && cd pq-bench-rpi5
+./setup/setup.sh                 # build + pin liboqs, OpenSSL 3.5+, oqs-provider
+sudo ./run.sh                    # sudo only to set the performance governor (see below)
+python3 analyze/merge.py results/*.json -o dashboard/data/merged.json
+# open dashboard/index.html (or deploy dashboard/ to GitHub Pages)
+```
+
+**On `sudo`:** it is **optional, not a prerequisite.** The only thing it does is
+set the CPU governor to `performance` — none of the crypto needs root. `./run.sh`
+runs fine without it: it warns, skips the governor step, completes the run, and
+the results JSON is automatically stamped `is_baseline_grade=false` (governor
+demerit). So use `sudo` when you want a baseline-grade reference run; drop it for
+a quick local run you don't intend to submit.
+
+`./run.sh --smoke` runs tiny iteration counts as a fast pipeline check.
+`./run.sh --kemsig-only` / `--tls-only` scope the run. `--iters/--warmup/--reps`
+override the `config.yaml` knobs.
+
+### On macOS (development / smoke testing only)
+
+```bash
+brew install cmake openssl@3 git
+./setup/setup.sh
+./run.sh --smoke                 # produces valid JSON; stamped is_baseline_grade=false
+```
+
+> **macOS runs are cross-platform / smoke data, never baseline-grade — by
+> design, for three concrete reasons:**
+> 1. **Not a Raspberry Pi**, so it fails the gate's first condition outright.
+> 2. **No userspace cycle counter, and ~1 µs timer granularity.** macOS exposes
+>    no readable PMU cycle counter and its wall-clock quantizes to ~1 µs steps —
+>    a ~10% floor on the fastest ops (ML-KEM ~10 µs), negligible for anything
+>    ≥100 µs (McEliece, FrodoKEM). (See "Timing source" above.)
+> 3. **No Linux cpufreq governor, and core-pinning isn't guaranteed.** Two of the
+>    noise-control knobs the gate relies on — `performance` governor and a pinned
+>    core — aren't available, and the build flags aren't `cortex-a76` either.
+>
+> Every macOS results file records `is_baseline_grade=false` with the exact
+> reasons, and the dashboard hides such runs by default. They still produce
+> **useful cross-platform numbers** (the heavier McEliece/FrodoKEM ops are barely
+> affected by the timer floor) — they just can't meet the controlled reference
+> bar, hence smoke-only.
+
+### Docker (reproducible build — build only, never run)
+
+Docker is for reproducibly **building** the pinned toolchain (liboqs / OpenSSL /
+oqs-provider), not for running the benchmark:
+
+```bash
+docker build -t pq-bench-rpi5 .   # builds + pins the toolchain inside the image
+```
+
+**Run the measurement bare-metal on the host.** A container can't reliably set
+the CPU governor, pin to an isolated core, or read the Pi's thermal/throttle
+sensors — the noise-control knobs the reference-grade gate relies on — so an
+in-container run could never be baseline-grade and would only add jitter. Build
+in Docker if you like; then run `./run.sh` on the host.
+
+---
+
+## Measurement methodology (why the numbers are credible)
+
+`run.sh` is the wrapper that makes a number defensible:
+
+- **CPU governor → `performance`** (Linux; needs `sudo`). Recorded before/after.
+  If it can't be set (e.g. not root) the run **continues anyway**: it warns,
+  proceeds, and the missing governor becomes an `is_baseline_grade=false`
+  demerit. `sudo` is only ever for this step — never for the crypto.
+- **Core pinning via `taskset -c 3`.** This is a **single-operation latency**
+  benchmark (one keygen, one encaps, one sign — timed in isolation), not a
+  parallel-throughput one, so pinning the whole sweep to one core keeps that
+  core's cache warm and removes cross-core migration scheduling noise, which
+  tightens the median and MAD. The Pi 5 has 4 cores (0–3); core **3** is chosen
+  because core 0 typically absorbs the most OS/IRQ/RPS work. The pinned core and
+  exact `taskset` command are recorded.
+  - *Planned (separate axis):* a multi-core **throughput/scaling** mode — run an
+    op across 1..N cores and report ops/sec plus scaling efficiency per
+    algorithm. Some schemes (SLH-DSA, and later STARK proving) parallelize far
+    better than others, so it's a worthwhile dimension — but kept **separate**
+    from these per-op latency numbers, not mixed into them.
+- **Thermal/clock trace.** A background sampler logs ARM clock
+  (`vcgencmd measure_clock arm`) and SoC temperature (`vcgencmd measure_temp`)
+  ~once a second for the whole run. The full trace is embedded in the results
+  JSON, and **thermal throttling** (`vcgencmd get_throttled`, plus a clock-droop
+  heuristic) is detected and flagged — a throttled run is not baseline-grade.
+- **Warmup + N timed iterations, multiple repetitions.** Primary metric is
+  wall-clock nanoseconds via `clock_gettime(CLOCK_MONOTONIC)`. We report
+  **median, MAD, IQR, min, max, mean, stddev, ops/sec**, plus per-repetition
+  medians — not just a mean.
+- **Timing source — two clocks, honestly recorded.** There are two ways to time
+  an op:
+  1. **Cycle-based** via the ARM hardware cycle counter (`PMCCNTR_EL0`) — the
+     most precise, but on Linux **userspace can't read it by default**: the
+     register traps unless a kernel module enables the userspace PMU (e.g.
+     `enable_arm_pmu`).
+  2. **Time-based** wall-clock via `clock_gettime(CLOCK_MONOTONIC)` — always
+     available, and accurate enough for the millisecond/microsecond ranges here.
+
+  The harness probes the cycle counter and, when it isn't available, **falls
+  back to wall-clock and records exactly that** in the JSON
+  (`run.cycles_available=false` + the reason). **On a stock machine the cycle
+  counter is not available, so runs use the wall-clock timer by default** — and
+  both published runs reflect this: the RPi5 baseline and the macOS run *both*
+  have `cycles_available=false` (both wall-clock). The remaining difference
+  between them is wall-clock **granularity**, not clock *type*: the Pi's
+  wall-clock lands on fractional microseconds, while macOS quantizes to ~1 µs
+  steps — a ~10% resolution floor on the fastest ops (ML-KEM keygen ~10 µs),
+  negligible for anything ≥100 µs (McEliece, FrodoKEM).
+- **CPU features / Keccak acceleration.** NEON, SHA2, SHA3, SHA512, AES, PMULL
+  are detected (`/proc/cpuinfo` on Linux, `sysctl` on macOS). **Note:** the
+  Cortex-A76 has the SHA2/AES extensions but **not** the ARMv8.2 SHA3
+  extension, so on the Pi 5 Keccak runs on NEON/scalar code — the results record
+  both the hardware capability and whether liboqs was compiled with SHA3
+  instructions, so this is explicit rather than assumed.
+
+### The AArch64-optimized backend
+
+liboqs is built with `OQS_DIST_BUILD=OFF` and the pinned flags so the optimized
+aarch64 ML-KEM backend (`mlkem-native`) and Falcon/Keccak asm are compiled in.
+`setup/setup.sh` extracts the proof from the generated `oqsconfig.h` (e.g.
+`OQS_ENABLE_KEM_ml_kem_768_aarch64 1`) into `versions.lock`, which is stamped
+into every results file under `toolchain.liboqs_opt_defines`.
+
+---
+
+## Methodology & trustworthiness (verify it yourself)
+
+Every claim below points at the exact code so you can read it, not take our word.
+All `bench_pq.c` references are `bench/kem_sig/bench_pq.c`.
+
+1. **Correctness gate — broken crypto emits *zero* numbers.** Before any timing,
+   each algorithm runs a full round-trip and asserts it: for KEM,
+   keygen→encaps→decaps then `memcmp(ss_encaps, ss_decaps)`
+   (`bench_pq.c:357-363`); for signatures, keygen→sign→`verify` must succeed
+   (`bench_pq.c:428-434`). On any failure, `die()` prints to **stderr** and
+   `exit(3)` (`bench_pq.c:303-307`) — and the JSON is only printed *after* all
+   measurement (`bench_pq.c:372-381`), so a failed gate yields **no stdout at
+   all**. The gate runs once, *outside* the timed loop. A runtime guard
+   (`must_measure`, `bench_pq.c:311-315`) also aborts if a timed op ever fails
+   mid-run. *Verify it:* flip one byte of the decaps shared secret right before
+   `bench_pq.c:362`, rebuild, run — the process exits `3` with empty stdout.
+
+2. **No dead-code elimination — the `volatile` sink.** At `-O3` the compiler may
+   delete work whose result is never observed. Each timed op folds an output
+   byte into a file-scope `volatile uint64_t g_sink` (`bench_pq.c:300`; uses at
+   `:333,:336,:339,:407,:410,:486`), forcing the store to be materialized so the
+   crypto call **cannot** be optimized away. Without it the loop could time
+   nothing and report meaningless near-zero numbers.
+
+3. **What is timed — only the op, never setup.** The timed region brackets a
+   single `fn(ctx)` call between two `now_ns()` reads (`bench_pq.c:274-281`);
+   per-rep warmup runs *outside* it (`bench_pq.c:272-273`). Inputs are canonical
+   and pre-validated, so e.g. KEM decaps (`bench_pq.c:337-339`) times one
+   `OQS_KEM_decaps` and nothing else. For the X25519 baseline, keygen is timed
+   separately (`bench_pq.c:507`), a stable key is re-primed *outside* timing
+   (`bench_pq.c:509`), then derive is timed alone (`bench_pq.c:510`) — setup is
+   never folded into a measured number.
+
+4. **Per-op auto-calibration with clamps.** `calibrate_op` (`bench_pq.c:209-250`)
+   runs a doubling probe (`:223-230`, also cache warmup) to estimate per-op cost
+   `est_ns` (`:231`), then picks iterations to hit `target_time_ms` of real work
+   (`:234-235`), clamped to `[min_samples, max_iters]` (`:236-237`). So a fast
+   18 µs keygen and a 0.74 s SLH-DSA sign each get the iteration count *they*
+   need: slow ops floor at `min_samples` (30), fast ops ceil at `max_iters`
+   (20000). The chosen `timed_iters` and `calib_est_ns` are recorded per op.
+
+5. **Robust statistics — median + MAD.** `compute_stats` (`bench_pq.c:111-146`)
+   reports median, MAD, IQR, q1/q3, min, max, mean, stddev, ops/sec, plus
+   per-repetition medians (`print_stats_json`, `bench_pq.c:184-203`). The
+   headline metric is the **median**, with **MAD** as spread: timing
+   distributions are right-skewed with a hard floor (true cost) and a long tail
+   of OS-scheduling/interrupt contamination that drags mean/stddev but not
+   median/MAD. Mean and stddev are kept in the JSON so the skew is visible. The
+   clock is `clock_gettime(CLOCK_MONOTONIC)` (`bench_pq.c:44-48`); userspace PMU
+   cycles are probed and honestly reported absent when they trap
+   (`probe_pmu`, `bench_pq.c:66-86`).
+
+6. **`is_baseline_grade` demerit gate.** Computed in
+   `bench/lib/assemble.py:155-168` as a demerit accumulator — the flag is `true`
+   only if *every* condition holds: real Pi (`:157`), `performance` governor
+   (`:160`), core-pinned (`:162`), `cortex-a76` build flags (`:164`), and no
+   thermal throttling (`:166`). Throttling is read from `vcgencmd get_throttled`
+   bits 2/18 plus a clock-droop heuristic (`assemble.py:91-98,:110-113`). Any
+   failure appends a human-readable reason and flips the flag to `false`; the
+   dashboard and `plot.py` default to baseline-grade runs only.
+
+---
+
+## Reproducibility & provenance
+
+- **Pinned versions** live in `setup/versions.env` (liboqs `0.15.0`, OpenSSL
+  `3.5.0`/`≥3.5`, oqs-provider `0.9.0`). After cloning, `setup.sh` records the
+  **actually resolved git commits** and the **exact build flags + compiler
+  version** into `setup/versions.lock`.
+- **Every results JSON carries full environment metadata**: RPi model, RAM,
+  kernel, OS, governor, the clock/temp trace during the run, compiler version,
+  liboqs/oqs-provider/OpenSSL versions+commits, build flags, and the candidate
+  list. A macOS smoke file and an RPi5 baseline file can never be confused.
+- **Identical flags for every candidate:** `-O3 -mcpu=cortex-a76` on the Pi.
+  Document your `gcc`/`clang` version — it is auto-captured in `versions.lock`
+  (`CC_VERSION`).
+
+### `is_baseline_grade`
+
+A **reference-measurement quality gate**, not a deployment requirement. It marks
+whether a run was produced under controlled, reproducible *reference* conditions,
+so the numbers are comparable across algorithms and across machines. It is `true`
+**only** when all hold: real Raspberry Pi · `performance` governor · core-pinned ·
+`cortex-a76` build flags · no thermal throttling. Otherwise it is `false` with a
+list of reasons.
+
+- **What it is:** a label that says "this run is clean enough to sit in the
+  cross-algorithm / cross-machine reference comparison." The dashboard and
+  `plot.py` default to baseline-grade runs only, so noisy runs don't distort the
+  picture.
+- **What it is *not*:** a claim about how nodes must be configured in production.
+  Real deployments are heterogeneous (different SoCs, governors, thermals) —
+  that's a separate question this flag does not speak to.
+- A run that doesn't meet the gate **isn't wrong** — it's just flagged
+  `is_baseline_grade=false` with the reasons and kept out of the reference set.
+  The macOS cross-platform runs are exactly this: useful, honest numbers that
+  simply aren't reference-grade.
+
+---
+
+## Candidates (edit `config.yaml`)
+
+- **KEM:** ML-KEM-512/768/1024; hybrids X25519MLKEM768, SecP256r1MLKEM768
+  (hybrids are benchmarked in the TLS layer; at the primitive layer liboqs
+  exposes them only as TLS groups, so they show as `enabled:false` there).
+  Code-based + conservative-LWE backups: Classic McEliece
+  348864/460896/460896f/6688128/6960119/8192128 (tiny ciphertext, slow keygen)
+  and FrodoKEM 640/976/1344-AES (unstructured LWE). Baseline: **X25519**.
+- **Signatures:** ML-DSA-44/65/87; SLH-DSA (SPHINCS+) variants;
+  Falcon/FN-DSA-512/1024. Baseline: **Ed25519**.
+- **TLS:** matrix of configured KEM groups × signature algorithms, always
+  including the classical **X25519 + Ed25519** pair.
+
+Classic McEliece and FrodoKEM are now measured (above). **HQC** is not — it is
+not enabled in the linked liboqs 0.15.0 build (disabled upstream after the
+IND-CCA2 implementation issue), so it is intentionally omitted rather than
+listed-and-disabled; re-add it once linked against a liboqs that re-enables it.
+Add further algorithms by uncommenting/adding entries — the harness skips
+anything your liboqs build doesn't enable (and says so).
+
+---
+
+## Output & analysis
+
+- `results/<hostname>-<timestamp>.json` — one self-describing file per run.
+- `analyze/merge.py results/*.json -o dashboard/data/merged.json` — merge runs
+  from many machines into one dataset (keeps each run distinct; never mixes
+  baseline with smoke).
+- `analyze/plot.py` — matplotlib PNGs for papers (optional; install into
+  `analyze/.venv` via `analyze/requirements.txt` to keep system python clean —
+  it gracefully skips if matplotlib is absent).
+- `dashboard/` — static, no-backend dashboard: grouped bars by security level,
+  size-vs-speed scatter, TLS handshakes/sec, and ClientHello size — each with
+  the classical baseline drawn as a reference line. Deploy the folder to GitHub
+  Pages, or open `index.html` via any static server.
+
+---
+
+## Contributing your RPi5 results
+
+The whole point is a **shared, aggregated baseline**: the more Raspberry Pi 5
+results we collect under identical conditions, the more confident the migration-
+cost picture. If you have a Pi 5, please contribute a run — it takes one command
+and a pull request.
+
+### 1. Run under baseline conditions
+
+For your numbers to count as baseline-grade, the run must satisfy the
+`is_baseline_grade` gate (real Pi 5 · `performance` governor · core-pinned ·
+`cortex-a76` flags · no thermal throttling). To give it the best shot:
+
+- **Use a Raspberry Pi 5** with active cooling (the official Active Cooler or a
+  fan). PQ signing (esp. SLH-DSA) runs the core hot for a while; without cooling
+  you *will* throttle and the run is flagged non-baseline.
+- **Use the official 27 W USB-C PSU.** Under-voltage also trips the throttle flag.
+- **Run on a quiet machine** (close other workloads) so core 3 stays clean.
+- **Don't edit `config.yaml`'s candidate list** if you want your run to be
+  directly comparable to others. (Extending it is fine — just say so in your PR;
+  extra algorithms simply add columns.)
+
+```bash
+git clone <this repo> && cd pq-bench-rpi5
+./setup/setup.sh                 # build + pin liboqs / OpenSSL 3.5+ / oqs-provider
+sudo ./run.sh                    # sudo lets it set the performance governor
+```
+
+A full run takes a while (SLH-DSA signing dominates). To check the pipeline
+first without committing to the full run, use `sudo ./run.sh --smoke` — but only
+a **full** run (not `--smoke`) counts as a submission.
+
+### 2. Confirm it's baseline-grade
+
+When the run finishes, the summary prints `baseline-grade (RPi5): True`. Verify
+in the JSON too:
+
+```bash
+f=$(ls -t results/*.json | head -1)
+python3 -c "import json;d=json.load(open('$f'));print('baseline_grade:',d['is_baseline_grade']);\
+print('reasons:',d['baseline_grade_reasons']);\
+print('throttled:',d['thermal_trace']['throttling_detected']);\
+print('aarch64 ML-KEM backend:', 'ml_kem_768_aarch64 1' in d['toolchain']['liboqs_opt_defines'])"
+```
+
+You want `baseline_grade: True`, `reasons: []`, `throttled: False`, and the
+backend line `True`. If `is_baseline_grade` is false, the printed reasons tell
+you what to fix (usually cooling/PSU/governor) — fix and re-run.
+
+### 3. Submit it
+
+Your `results/<hostname>-<timestamp>.json` is fully self-describing (host model,
+kernel, OS, governor, clock/temp trace, compiler + liboqs/oqs-provider/OpenSSL
+commits, build flags). It contains your **hostname** and Pi model and nothing
+else identifying — if you'd rather not share the hostname, set a name first with
+`HOSTNAME=mypi5 sudo ./run.sh`, or just rename the file before submitting.
+
+`results/*.json` is git-ignored by default (so you never accidentally commit
+local experiments), so add yours explicitly:
+
+```bash
+git checkout -b results/<your-handle>-pi5
+git add -f results/<hostname>-<timestamp>.json
+git commit -m "results: RPi5 baseline from <your-handle>"
+# push to your fork and open a PR
+```
+
+**PR checklist** (maintainers will look for these):
+
+- [ ] `is_baseline_grade: true` with empty `baseline_grade_reasons`
+- [ ] `thermal_trace.throttling_detected: false`
+- [ ] `host.is_rpi: true` and `host.rpi_model` mentions "Raspberry Pi 5"
+- [ ] `run.governor_after: performance` and `run.pinned: true`
+- [ ] `toolchain.cflags_target: cortex-a76`
+- [ ] full run (not `--smoke`): `run.timed_iters` is the `config.yaml` value, not 25
+- [ ] unmodified candidate list (or extensions noted in the PR description)
+
+Once merged, your file joins `results/`; anyone can regenerate the aggregated
+dataset and dashboard with
+`python3 analyze/merge.py results/*.json -o dashboard/data/merged.json`. The
+dashboard's run selector will then include your Pi alongside everyone else's.
+
+> Prefer not to use GitHub? Open an issue and attach the JSON file instead — a
+> maintainer will add it.
+
+---
+
+## Limitations
+
+- **macOS is smoke-only** (see above): coarse timer, no governor/pinning,
+  fallback flags.
+- **Userspace cycle counts** require a kernel PMU module; default is time-based.
+- **Heap/stack memory** is best-effort (`mallinfo2` on glibc; reported
+  unavailable elsewhere); pk/sk/ct/sig **sizes** are authoritative.
+- **TLS handshakes are in-process over memory BIOs** — this isolates crypto
+  cost cleanly (no socket/scheduler noise) but is not a network RTT model;
+  ClientHello fragmentation is flagged against a typical 1400-byte MSS.
+- **Docker is build-only.** The benchmark is not run in a container — a
+  container can't reliably control the governor, core pinning, or throttle
+  detection, so measurement runs bare-metal on the host (see the Docker section).
+
+## Future phase (not implemented)
+
+`config.yaml` reserves a `zk:` section for SNARK/STARK proving/verification
+benchmarks; the results schema and dashboard are structured to absorb it later.
--- a/pq-bench-rpi5/RUNNING-ON-YOUR-RPI5.md
+++ b/pq-bench-rpi5/RUNNING-ON-YOUR-RPI5.md
@ -0,0 +1,86 @@
+# Running pq-bench-rpi5 on Your Own Raspberry Pi 5
+
+This benchmark measures post-quantum KEMs, signatures, and TLS 1.3 handshakes
+against the classical baseline Logos uses today (X25519 / Ed25519), so every
+chart shows the **migration cost** of moving to PQ on validator-grade hardware.
+
+There's no manual tuning: the benchmark **auto-calibrates the iteration count
+per operation** to your Pi's speed, so results stay comparable across machines.
+
+## Prerequisites
+
+- **Raspberry Pi 5** (Cortex-A76, aarch64), ideally the 8GB model, with
+  **active cooling** so it doesn't thermal-throttle mid-run.
+- **Raspberry Pi OS / Debian Trixie or newer** — system OpenSSL 3.5+ so PQ TLS
+  works without a source build.
+- **Internet access** and **sudo**.
+
+## Step 1 — Clone (public repo, no auth)
+
+```sh
+git clone <REPO_URL>
+cd pq-bench-rpi5
+```
+
+## Step 2 — Build the toolchain
+
+```sh
+./setup/setup.sh all
+```
+
+Takes 5–15 min: installs dependencies and builds liboqs + oqs-provider. Run it
+inside `tmux` so it survives an SSH disconnect.
+
+## Step 3 — Run
+
+```sh
+sudo ./run.sh
+```
+
+`sudo` is needed to set the performance governor, pin cores, and read the
+temperature. The run takes ~4–5 min, with no iteration counts to set.
+
+Output lands in `results/<hostname>-<timestamp>.json`, stamped with full
+provenance (Pi model, RAM, kernel, governor, thermal trace, library versions)
+and an `is_baseline_grade` flag.
+
+## Step 4 — View results
+
+```sh
+cd dashboard
+python3 -m http.server 8765
+# then open http://<pi-ip>:8765
+```
+
+The charts show KEM, signature, and TLS results with the classical X25519 /
+Ed25519 baseline drawn as a reference line.
+
+## Step 5 — Contribute (optional)
+
+Share your `results/*.json` (open a PR or send it over). To merge results from
+multiple machines:
+
+```sh
+python3 analyze/merge.py results/*.json -o dashboard/data/merged.json
+```
+
+The dashboard then shows every Pi side by side.
+
+## What the results tell you
+
+PQ is not so much *slower* as *bigger*. Lattice schemes (ML-KEM, ML-DSA) run
+close to classical in speed but have much larger keys and signatures, while the
+hash-based SLH-DSA (SPHINCS+) is an outlier in both signing time and signature
+size. On TLS, the classical baseline fits in a single packet, while PQ and
+hybrid handshakes grow past it and fragment.
+
+## Notes and limitations
+
+- Measures **liboqs** (C / assembly) implementations — a pure-Rust backend is a
+  separate, optional axis.
+- Userspace PMU cycle counts are usually unavailable, so the primary metric is
+  **wall-clock time + ops/sec**.
+- SNARK / STARK benchmarking is **out of scope** for this phase (`config.yaml`
+  reserves a hook for it).
+- The candidate list lives in `config.yaml` — use the exact liboqs algorithm
+  names.
--- a/pq-bench-rpi5/analyze/merge.py
+++ b/pq-bench-rpi5/analyze/merge.py
@ -0,0 +1,145 @@
+#!/usr/bin/env python3
+"""merge.py — merge many results/<host>-<ts>.json files into one dataset the
+dashboard (and PNG export) consume.
+
+  python3 analyze/merge.py results/*.json -o dashboard/data/merged.json
+
+The merged file keeps every run as a separate record (so multiple machines /
+repetitions can be compared) plus a flat index for quick charting. It never
+collapses RPi5 baseline-grade runs together with non-baseline (e.g. macOS smoke)
+runs — each record carries its own `is_baseline_grade` flag and host, and the
+dashboard filters on it by default.
+"""
+from __future__ import annotations
+import argparse
+import glob
+import json
+import os
+import sys
+
+MERGED_SCHEMA = "1.0.0"
+
+
+def load_runs(paths):
+    runs = []
+    for p in paths:
+        try:
+            with open(p) as f:
+                d = json.load(f)
+            d["_source_file"] = os.path.basename(p)
+            runs.append(d)
+        except (json.JSONDecodeError, OSError) as e:
+            print(f"[merge] skipping {p}: {e}", file=sys.stderr)
+    return runs
+
+
+def run_id(run):
+    h = run.get("host", {})
+    return f"{h.get('hostname','?')}/{h.get('cpu_brand','?')}@{run.get('generated_utc','?')}"
+
+
+def flatten(runs):
+    """Produce flat per-(run, algorithm, operation) rows for easy charting."""
+    kem_rows, sig_rows, tls_rows = [], [], []
+    for run in runs:
+        rid = run_id(run)
+        host = run.get("host", {})
+        meta = {
+            "run_id": rid,
+            "hostname": host.get("hostname"),
+            "cpu_brand": host.get("cpu_brand"),
+            "is_rpi": host.get("is_rpi"),
+            "is_baseline_grade": run.get("is_baseline_grade"),
+            "source_file": run.get("_source_file"),
+        }
+        for k in run.get("kem", []):
+            if not k.get("enabled"):
+                continue
+            for op, st in (k.get("operations") or {}).items():
+                kem_rows.append({**meta,
+                    "alg": k["alg"], "backend": k.get("backend"),
+                    "classical": bool(k.get("classical")),
+                    "nist_level": k.get("claimed_nist_level"),
+                    "operation": op,
+                    "median_ns": st.get("median"), "mad_ns": st.get("mad"),
+                    "iqr_ns": st.get("iqr"), "min_ns": st.get("min"),
+                    "stddev_ns": st.get("stddev"), "ops_per_sec": st.get("ops_per_sec"),
+                    "sizes": k.get("sizes")})
+        for s in run.get("sig", []):
+            if not s.get("enabled"):
+                continue
+            for op, st in (s.get("operations") or {}).items():
+                sig_rows.append({**meta,
+                    "alg": s["alg"], "backend": s.get("backend"),
+                    "classical": bool(s.get("classical")),
+                    "nist_level": s.get("claimed_nist_level"),
+                    "operation": op,
+                    "median_ns": st.get("median"), "mad_ns": st.get("mad"),
+                    "iqr_ns": st.get("iqr"), "min_ns": st.get("min"),
+                    "stddev_ns": st.get("stddev"), "ops_per_sec": st.get("ops_per_sec"),
+                    "sizes": s.get("sizes")})
+        tls = run.get("tls") or {}
+        for cell in (tls.get("matrix") or []):
+            if not cell.get("enabled"):
+                continue
+            tls_rows.append({**meta,
+                "label": cell.get("label"), "group": cell.get("group"),
+                "is_baseline_pair": cell.get("label") == (tls.get("baseline") or {}).get("label"),
+                "handshakes_per_sec": cell.get("handshakes_per_sec"),
+                "median_ns": (cell.get("handshake_latency_ns") or {}).get("median"),
+                "bytes_total": (cell.get("bytes_on_wire") or {}).get("total"),
+                "client_hello_bytes": cell.get("client_hello_bytes"),
+                "client_hello_fragmented": cell.get("client_hello_fragmented")})
+    return kem_rows, sig_rows, tls_rows
+
+
+def main():
+    ap = argparse.ArgumentParser()
+    ap.add_argument("inputs", nargs="+", help="results JSON files or globs")
+    ap.add_argument("-o", "--out", default="dashboard/data/merged.json")
+    args = ap.parse_args()
+
+    paths = []
+    for pat in args.inputs:
+        paths.extend(sorted(glob.glob(pat)) if any(c in pat for c in "*?[") else [pat])
+    paths = [p for p in paths if os.path.isfile(p)]
+    if not paths:
+        sys.exit("no input files matched")
+
+    runs = load_runs(paths)
+    kem_rows, sig_rows, tls_rows = flatten(runs)
+
+    merged = {
+        "merged_schema": MERGED_SCHEMA,
+        "n_runs": len(runs),
+        "runs": [{
+            "run_id": run_id(r),
+            "host": r.get("host"),
+            "is_baseline_grade": r.get("is_baseline_grade"),
+            "baseline_grade_reasons": r.get("baseline_grade_reasons", []),
+            "toolchain": r.get("toolchain"),
+            "cpu_features": r.get("cpu_features"),
+            "run": r.get("run"),
+            "thermal_summary": {
+                "temp_c": (r.get("thermal_trace") or {}).get("temp_c"),
+                "throttling_detected": (r.get("thermal_trace") or {}).get("throttling_detected"),
+            },
+            "generated_utc": r.get("generated_utc"),
+            "source_file": r.get("_source_file"),
+        } for r in runs],
+        "kem": kem_rows,
+        "sig": sig_rows,
+        "tls": tls_rows,
+    }
+
+    os.makedirs(os.path.dirname(args.out) or ".", exist_ok=True)
+    with open(args.out, "w") as f:
+        json.dump(merged, f, indent=2)
+    n_base = sum(1 for r in runs if r.get("is_baseline_grade"))
+    print(f"merged {len(runs)} run(s) -> {args.out}  "
+          f"({n_base} baseline-grade, {len(runs)-n_base} smoke/other)")
+    print(f"  kem rows: {len(kem_rows)}  sig rows: {len(sig_rows)}  tls rows: {len(tls_rows)}")
+
+
+if __name__ == "__main__":
+    main()
--- a/pq-bench-rpi5/analyze/plot.py
+++ b/pq-bench-rpi5/analyze/plot.py
@ -0,0 +1,162 @@
+#!/usr/bin/env python3
+"""plot.py — export publication-ready PNGs from a merged dataset.
+
+  python3 analyze/plot.py dashboard/data/merged.json -o analyze/png
+
+matplotlib is an OPTIONAL dependency. If it is not installed this prints a clear
+hint and exits 0 (the HTML dashboard remains the primary, dependency-free view).
+
+By default it plots only baseline-grade (RPi5) runs so paper figures are never
+polluted with macOS smoke data; pass --include-smoke to override. The classical
+Logos baseline (X25519 / Ed25519) is drawn as a reference line on every chart.
+"""
+from __future__ import annotations
+import argparse
+import json
+import os
+import sys
+
+try:
+    import matplotlib
+    matplotlib.use("Agg")
+    import matplotlib.pyplot as plt
+except ImportError:
+    print("matplotlib not installed — skipping PNG export.\n"
+          "  enable it in a project venv (keeps your system python clean):\n"
+          "    python3 -m venv analyze/.venv\n"
+          "    analyze/.venv/bin/pip install -r analyze/requirements.txt\n"
+          "    analyze/.venv/bin/python analyze/plot.py dashboard/data/merged.json\n"
+          "  (the HTML dashboard works without it)", file=sys.stderr)
+    sys.exit(0)
+
+
+def median_ms(ns):
+    return (ns or 0) / 1e6
+
+
+def pick_runs(merged, include_smoke):
+    ids = set()
+    for r in merged.get("runs", []):
+        if include_smoke or r.get("is_baseline_grade"):
+            ids.add(r["run_id"])
+    return ids
+
+
+def grouped_bar_by_level(rows, op, title, outpath):
+    """Grouped bars of median latency per algorithm, grouped by NIST level."""
+    data = [r for r in rows if r["operation"] == op]
+    if not data:
+        return
+    # one bar per algorithm; baseline highlighted
+    data.sort(key=lambda r: (r.get("nist_level") or 0, r["median_ns"]))
+    labels = [r["alg"] for r in data]
+    vals = [median_ms(r["median_ns"]) for r in data]
+    colors = ["#888" if r.get("classical") else "#3b6" for r in data]
+
+    fig, ax = plt.subplots(figsize=(max(6, len(labels) * 0.55), 4))
+    ax.bar(range(len(labels)), vals, color=colors)
+    # baseline reference line
+    base = next((r for r in data if r.get("classical")), None)
+    if base:
+        ax.axhline(median_ms(base["median_ns"]), color="#c33", ls="--", lw=1,
+                   label=f"classical baseline ({base['alg']})")
+        ax.legend(fontsize=8)
+    ax.set_xticks(range(len(labels)))
+    ax.set_xticklabels(labels, rotation=45, ha="right", fontsize=8)
+    ax.set_ylabel("median latency (ms)")
+    ax.set_title(title)
+    ax.grid(axis="y", alpha=0.3)
+    fig.tight_layout()
+    fig.savefig(outpath, dpi=150)
+    plt.close(fig)
+    print("wrote", outpath)
+
+
+def size_speed_scatter(rows, op, size_key, title, outpath):
+    data = [r for r in rows if r["operation"] == op and r.get("sizes")]
+    pts = []
+    for r in data:
+        sz = (r["sizes"] or {}).get(size_key)
+        if sz:
+            pts.append((sz, median_ms(r["median_ns"]), r["alg"], r.get("classical")))
+    if not pts:
+        return
+    fig, ax = plt.subplots(figsize=(7, 5))
+    for sz, lat, alg, classical in pts:
+        ax.scatter(sz, lat, c="#c33" if classical else "#3b6",
+                   s=60, edgecolors="k", linewidths=0.4, zorder=3)
+        ax.annotate(alg, (sz, lat), fontsize=7, xytext=(4, 3),
+                    textcoords="offset points")
+    ax.set_xlabel(f"{size_key} size (bytes)")
+    ax.set_ylabel("median latency (ms)")
+    ax.set_title(title)
+    ax.grid(alpha=0.3)
+    fig.tight_layout()
+    fig.savefig(outpath, dpi=150)
+    plt.close(fig)
+    print("wrote", outpath)
+
+
+def tls_bar(tls_rows, outpath):
+    if not tls_rows:
+        return
+    tls_rows = sorted(tls_rows, key=lambda r: -(r.get("handshakes_per_sec") or 0))
+    labels = [r["label"] for r in tls_rows]
+    vals = [r.get("handshakes_per_sec") or 0 for r in tls_rows]
+    colors = ["#c33" if r.get("is_baseline_pair") else "#36c" for r in tls_rows]
+    fig, ax = plt.subplots(figsize=(max(6, len(labels) * 0.5), 4.5))
+    ax.barh(range(len(labels)), vals, color=colors)
+    base = next((r for r in tls_rows if r.get("is_baseline_pair")), None)
+    if base:
+        ax.axvline(base.get("handshakes_per_sec") or 0, color="#c33", ls="--", lw=1,
+                   label=f"classical baseline ({base['label']})")
+        ax.legend(fontsize=8)
+    ax.set_yticks(range(len(labels)))
+    ax.set_yticklabels(labels, fontsize=7)
+    ax.invert_yaxis()
+    ax.set_xlabel("handshakes / sec")
+    ax.set_title("TLS 1.3 handshake throughput (higher is better)")
+    ax.grid(axis="x", alpha=0.3)
+    fig.tight_layout()
+    fig.savefig(outpath, dpi=150)
+    plt.close(fig)
+    print("wrote", outpath)
+
+
+def main():
+    ap = argparse.ArgumentParser()
+    ap.add_argument("merged")
+    ap.add_argument("-o", "--outdir", default="analyze/png")
+    ap.add_argument("--include-smoke", action="store_true")
+    args = ap.parse_args()
+
+    with open(args.merged) as f:
+        merged = json.load(f)
+    ids = pick_runs(merged, args.include_smoke)
+    if not ids:
+        print("no baseline-grade runs to plot (use --include-smoke for macOS/dev data)",
+              file=sys.stderr)
+        return
+
+    def keep(rows):
+        return [r for r in rows if r["run_id"] in ids]
+
+    kem, sig, tls = keep(merged["kem"]), keep(merged["sig"]), keep(merged["tls"])
+    os.makedirs(args.outdir, exist_ok=True)
+    O = args.outdir
+
+    grouped_bar_by_level(kem, "keygen", "KEM keygen latency by algorithm", f"{O}/kem_keygen.png")
+    grouped_bar_by_level(kem, "encaps", "KEM encapsulation latency", f"{O}/kem_encaps.png")
+    grouped_bar_by_level(kem, "decaps", "KEM decapsulation latency", f"{O}/kem_decaps.png")
+    grouped_bar_by_level(sig, "sign", "Signature signing latency", f"{O}/sig_sign.png")
+    grouped_bar_by_level(sig, "verify", "Signature verification latency", f"{O}/sig_verify.png")
+    size_speed_scatter(kem, "encaps", "public_key",
+                       "KEM: public-key size vs encaps latency", f"{O}/kem_size_speed.png")
+    size_speed_scatter(sig, "sign", "signature",
+                       "Signature: signature size vs sign latency", f"{O}/sig_size_speed.png")
+    tls_bar(tls, f"{O}/tls_throughput.png")
+    print(f"PNGs in {O}/ ({'incl. smoke' if args.include_smoke else 'baseline-grade only'})")
+
+
+if __name__ == "__main__":
+    main()
--- a/pq-bench-rpi5/analyze/requirements.txt
+++ b/pq-bench-rpi5/analyze/requirements.txt
@ -0,0 +1 @@
+matplotlib>=3.7
--- a/pq-bench-rpi5/bench/kem_sig/Makefile
+++ b/pq-bench-rpi5/bench/kem_sig/Makefile
@ -0,0 +1,26 @@
+# Build the primitive KEM/sig harness against the pinned liboqs + system OpenSSL.
+#
+# Paths/flags default to the vendored toolchain but are overridable so run.sh can
+# pass the exact values from setup/versions.lock (single source of truth):
+#   make LIBOQS_PREFIX=... OPENSSL_PREFIX=... BENCH_CFLAGS="-O3 -mcpu=cortex-a76"
+
+ROOT          := $(abspath $(dir $(lastword $(MAKEFILE_LIST)))../..)
+LIBOQS_PREFIX ?= $(ROOT)/vendor/install
+# OpenSSL: prefer Homebrew openssl@3 on macOS, else system.
+OPENSSL_PREFIX ?= $(shell brew --prefix openssl@3 2>/dev/null || echo /usr)
+BENCH_CFLAGS  ?= -O3
+
+CC     ?= cc
+CFLAGS := $(BENCH_CFLAGS) -std=c11 -Wall -Wextra \
+          -I$(LIBOQS_PREFIX)/include -I$(OPENSSL_PREFIX)/include
+LDFLAGS := -L$(LIBOQS_PREFIX)/lib -L$(OPENSSL_PREFIX)/lib \
+           -Wl,-rpath,$(LIBOQS_PREFIX)/lib -Wl,-rpath,$(OPENSSL_PREFIX)/lib
+LDLIBS  := -loqs -lcrypto -lm
+
+bench_pq: bench_pq.c
+	$(CC) $(CFLAGS) -o $@ $< $(LDFLAGS) $(LDLIBS)
+
+clean:
+	rm -f bench_pq
+
+.PHONY: clean
--- a/pq-bench-rpi5/bench/kem_sig/bench_pq.c
+++ b/pq-bench-rpi5/bench/kem_sig/bench_pq.c
@ -0,0 +1,656 @@
+/* ===========================================================================
+ * bench_pq.c — primitive-level KEM / signature benchmark harness.
+ *
+ * One algorithm per invocation (fresh process keeps cache state clean):
+ *   bench_pq --kind kem --alg ML-KEM-768 --warmup 1000 --iters 10000 --reps 5
+ *   bench_pq --kind sig --alg ML-DSA-65  ...
+ *
+ * Emits a single JSON object describing the algorithm to stdout. The orchestrator
+ * (run.sh / assemble.py) wraps these with environment metadata.
+ *
+ * Two backends, selected by algorithm name:
+ *   - liboqs            : all PQ candidates (ML-KEM, ML-DSA, Falcon, SLH-DSA, ...)
+ *   - OpenSSL EVP       : the classical Logos baselines X25519 (KEM-analog) and
+ *                         Ed25519 (signature), which liboqs does not implement.
+ * This lets the classical reference be drawn on the same primitive charts.
+ *
+ * Metrics per operation: full per-iteration wall-clock nanosecond distribution
+ * -> median, MAD, IQR, min, max, mean, stddev, ops/sec, plus per-repetition
+ * medians. Optional userspace PMU cycle counts when available. Heap high-water
+ * via mallinfo2 on glibc (the RPi5 target); honestly reported unavailable
+ * elsewhere (e.g. the macOS smoke box).
+ * ===========================================================================*/
+#define _POSIX_C_SOURCE 200809L
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include <math.h>
+#include <time.h>
+#include <setjmp.h>
+#include <signal.h>
+
+#include <oqs/oqs.h>
+
+#include <openssl/evp.h>
+#include <openssl/err.h>
+
+#if defined(__linux__) && defined(__GLIBC__)
+#include <malloc.h>
+#define HAVE_MALLINFO2 1
+#endif
+
+/* ---- timing ------------------------------------------------------------- */
+static inline uint64_t now_ns(void) {
+    struct timespec ts;
+    clock_gettime(CLOCK_MONOTONIC, &ts);
+    return (uint64_t)ts.tv_sec * 1000000000ull + (uint64_t)ts.tv_nsec;
+}
+
+/* ---- userspace PMU cycle counter probe (aarch64) ------------------------ */
+static sigjmp_buf g_sigill_jmp;
+static volatile sig_atomic_t g_pmu_ok = 0;
+static void sigill_handler(int sig) { (void)sig; siglongjmp(g_sigill_jmp, 1); }
+
+static inline uint64_t read_cycles(void) {
+#if defined(__aarch64__)
+    uint64_t v;
+    __asm__ volatile("mrs %0, pmccntr_el0" : "=r"(v));
+    return v;
+#else
+    return 0;
+#endif
+}
+
+/* Returns 1 and a reason string if userspace cycle counting works. */
+static int probe_pmu(const char **reason) {
+#if defined(__aarch64__)
+    struct sigaction sa, old;
+    memset(&sa, 0, sizeof sa);
+    sa.sa_handler = sigill_handler;
+    sigaction(SIGILL, &sa, &old);
+    if (sigsetjmp(g_sigill_jmp, 1) == 0) {
+        (void)read_cycles();
+        g_pmu_ok = 1;
+    } else {
+        g_pmu_ok = 0;
+    }
+    sigaction(SIGILL, &old, NULL);
+    if (g_pmu_ok) { *reason = "PMCCNTR_EL0 readable from userspace"; return 1; }
+    *reason = "PMCCNTR_EL0 traps (kernel module not loaded; needs e.g. enable_arm_pmu)";
+    return 0;
+#else
+    *reason = "not aarch64";
+    return 0;
+#endif
+}
+
+/* ---- statistics --------------------------------------------------------- */
+typedef struct {
+    double median, mad, iqr, q1, q3, min, max, mean, stddev;
+    double ops_per_sec;
+    uint64_t n;
+} stats_t;
+
+static int cmp_u64(const void *a, const void *b) {
+    uint64_t x = *(const uint64_t *)a, y = *(const uint64_t *)b;
+    return (x > y) - (x < y);
+}
+
+/* percentile on an already-sorted array (linear interpolation) */
+static double pct_sorted(const uint64_t *s, uint64_t n, double p) {
+    if (n == 0) return 0;
+    if (n == 1) return (double)s[0];
+    double idx = p * (double)(n - 1);
+    uint64_t lo = (uint64_t)idx;
+    double frac = idx - (double)lo;
+    if (lo + 1 >= n) return (double)s[n - 1];
+    return (double)s[lo] + frac * ((double)s[lo + 1] - (double)s[lo]);
+}
+
+static stats_t compute_stats(uint64_t *samples, uint64_t n) {
+    stats_t st; memset(&st, 0, sizeof st);
+    st.n = n;
+    if (n == 0) return st;
+    qsort(samples, n, sizeof(uint64_t), cmp_u64);
+    st.min = (double)samples[0];
+    st.max = (double)samples[n - 1];
+    st.median = pct_sorted(samples, n, 0.5);
+    st.q1 = pct_sorted(samples, n, 0.25);
+    st.q3 = pct_sorted(samples, n, 0.75);
+    st.iqr = st.q3 - st.q1;
+
+    double sum = 0;
+    for (uint64_t i = 0; i < n; i++) sum += (double)samples[i];
+    st.mean = sum / (double)n;
+    double ss = 0;
+    for (uint64_t i = 0; i < n; i++) {
+        double d = (double)samples[i] - st.mean;
+        ss += d * d;
+    }
+    st.stddev = (n > 1) ? sqrt(ss / (double)(n - 1)) : 0;
+
+    /* MAD = median(|x - median|); needs a second sorted buffer */
+    uint64_t *dev = malloc(n * sizeof(uint64_t));
+    if (dev) {
+        for (uint64_t i = 0; i < n; i++) {
+            double d = (double)samples[i] - st.median;
+            dev[i] = (uint64_t)(d < 0 ? -d : d);
+        }
+        qsort(dev, n, sizeof(uint64_t), cmp_u64);
+        st.mad = pct_sorted(dev, n, 0.5);
+        free(dev);
+    }
+    st.ops_per_sec = st.median > 0 ? 1e9 / st.median : 0;
+    return st;
+}
+
+/* ---- measurement configuration + generic loop --------------------------- */
+/* A closure-ish: the caller provides a function pointer that runs one op. */
+typedef int (*op_fn)(void *ctx);
+
+/* How each op is sized. Two modes, decided per invocation:
+ *   - fixed-count   (fixed_iters > 0): exactly fixed_iters timed iters per rep,
+ *                   with the configured fixed-mode warmup. This is the explicit
+ *                   `--iters N` override / fixed-count fallback.
+ *   - auto-calibrate(fixed_iters == 0): each op is timed long enough to put
+ *                   ~target_ns of aggregate timed work on it, so a 18 us keygen
+ *                   and a 750 ms SLH-DSA sign both yield a stable median. The
+ *                   chosen count is clamped to [min_samples, max_iters]:
+ *                     fast ops hit max_iters, slow ops hit min_samples.
+ * Auto mode estimates per-op cost with a short doubling calibration (which also
+ * serves as cache warmup), then derives the timed count + a per-rep re-warm. */
+typedef struct {
+    uint64_t fixed_iters;   /* >0 => fixed-count mode; 0 => auto-calibrate */
+    uint64_t target_ns;     /* auto: per-op aggregate timed-work target */
+    uint64_t min_samples;   /* auto: floor on timed iters per rep */
+    uint64_t max_iters;     /* auto: ceiling on timed iters per rep */
+    uint64_t warmup;        /* fixed-mode warmup count per rep */
+    uint64_t reps;
+} bench_cfg;
+
+typedef struct {
+    uint64_t *all;        /* all samples across reps */
+    uint64_t  all_n;
+    double    per_rep_median[64];
+    int       n_rep_med;
+    uint64_t  timed_iters;   /* timed iters per rep actually used */
+    uint64_t  warmup_iters;  /* warmup iters per rep actually used */
+    uint64_t  reps;
+    int       calibrated;    /* 1 if auto-calibrated, 0 if fixed-count */
+    double    est_ns;        /* per-op cost estimate from calibration (auto) */
+} measure_out;
+
+static void print_stats_json(FILE *f, const char *name, stats_t st,
+                             const measure_out *m) {
+    fprintf(f, "\"%s\":{", name);
+    fprintf(f, "\"unit\":\"ns\",\"warmup_iters\":%llu,\"timed_iters\":%llu,\"repetitions\":%llu,",
+            (unsigned long long)m->warmup_iters, (unsigned long long)m->timed_iters,
+            (unsigned long long)m->reps);
+    fprintf(f, "\"calibrated\":%s,", m->calibrated ? "true" : "false");
+    if (m->calibrated)
+        fprintf(f, "\"calib_est_ns\":%.2f,", m->est_ns);
+    fprintf(f, "\"samples\":%llu,", (unsigned long long)st.n);
+    fprintf(f, "\"median\":%.2f,\"mad\":%.2f,\"iqr\":%.2f,\"q1\":%.2f,\"q3\":%.2f,",
+            st.median, st.mad, st.iqr, st.q1, st.q3);
+    fprintf(f, "\"min\":%.2f,\"max\":%.2f,\"mean\":%.2f,\"stddev\":%.2f,",
+            st.min, st.max, st.mean, st.stddev);
+    fprintf(f, "\"ops_per_sec\":%.2f,", st.ops_per_sec);
+    fprintf(f, "\"per_rep_median\":[");
+    for (int i = 0; i < m->n_rep_med; i++)
+        fprintf(f, "%s%.2f", i ? "," : "", m->per_rep_median[i]);
+    fprintf(f, "]}");
+}
+
+/* Pick the timed-iter count (and per-rep warmup) for one op under cfg.
+ * In auto mode this runs a doubling calibration loop on fn (which warms caches)
+ * to estimate per-op cost, then solves for the count that hits target_ns.
+ * Returns 0 on success, -2 if the op ever fails during calibration. */
+static int calibrate_op(op_fn fn, void *ctx, const bench_cfg *cfg,
+                        uint64_t *timed_out, uint64_t *warm_out,
+                        double *est_ns_out, int *calibrated_out) {
+    if (cfg->fixed_iters > 0) {
+        *timed_out = cfg->fixed_iters;
+        *warm_out = cfg->warmup;
+        *est_ns_out = 0.0;
+        *calibrated_out = 0;
+        return 0;
+    }
+    /* doubling calibration: run batches 1,2,4,... until ~CALIB_BUDGET elapses,
+     * capped at max_iters so a sub-microsecond op can't spin forever. */
+    const uint64_t CALIB_BUDGET_NS = 30ull * 1000 * 1000;   /* 30 ms */
+    uint64_t cops = 0, cel = 0, batch = 1;
+    while (cel < CALIB_BUDGET_NS && cops < cfg->max_iters) {
+        uint64_t t0 = now_ns();
+        for (uint64_t i = 0; i < batch; i++)
+            if (fn(ctx) != 0) return -2;
+        cel += now_ns() - t0;
+        cops += batch;
+        batch *= 2;
+    }
+    double est_ns = cops ? (double)cel / (double)cops : 1.0;
+    if (est_ns < 1.0) est_ns = 1.0;
+
+    double want = (double)cfg->target_ns / est_ns;
+    uint64_t n = (uint64_t)(want + 0.5);
+    if (n < cfg->min_samples) n = cfg->min_samples;   /* slow ops floor here  */
+    if (n > cfg->max_iters)   n = cfg->max_iters;     /* fast ops ceil here   */
+
+    /* per-rep re-warm ~= 20% of the timed budget, at least 1, capped. */
+    double w = ((double)cfg->target_ns * 0.2) / est_ns;
+    uint64_t warm = (uint64_t)(w + 0.5);
+    if (warm < 1) warm = 1;
+    if (warm > cfg->max_iters) warm = cfg->max_iters;
+
+    *timed_out = n;
+    *warm_out = warm;
+    *est_ns_out = est_ns;
+    *calibrated_out = 1;
+    return 0;
+}
+
+/* Returns 0 on success. Fills out with samples. Re-warms before each rep. */
+static int measure_op(op_fn fn, void *ctx, const bench_cfg *cfg, measure_out *out) {
+    uint64_t iters, warmup;
+    double est_ns; int calibrated;
+    if (calibrate_op(fn, ctx, cfg, &iters, &warmup, &est_ns, &calibrated) != 0)
+        return -2;
+
+    out->all = malloc(iters * cfg->reps * sizeof(uint64_t));
+    if (!out->all) return -1;
+    out->all_n = 0;
+    out->n_rep_med = 0;
+    out->timed_iters = iters;
+    out->warmup_iters = warmup;
+    out->reps = cfg->reps;
+    out->calibrated = calibrated;
+    out->est_ns = est_ns;
+    uint64_t *rep_buf = malloc(iters * sizeof(uint64_t));
+    if (!rep_buf) { free(out->all); return -1; }
+
+    for (uint64_t r = 0; r < cfg->reps; r++) {
+        for (uint64_t i = 0; i < warmup; i++)
+            if (fn(ctx) != 0) { free(rep_buf); free(out->all); return -2; }
+        for (uint64_t i = 0; i < iters; i++) {
+            uint64_t t0 = now_ns();
+            int rc = fn(ctx);
+            uint64_t dt = now_ns() - t0;
+            if (rc != 0) { free(rep_buf); free(out->all); return -2; }
+            rep_buf[i] = dt;
+            out->all[out->all_n++] = dt;
+        }
+        /* per-rep median (sorts a copy of this rep's slice) */
+        uint64_t *copy = malloc(iters * sizeof(uint64_t));
+        if (copy) {
+            memcpy(copy, rep_buf, iters * sizeof(uint64_t));
+            qsort(copy, iters, sizeof(uint64_t), cmp_u64);
+            if (out->n_rep_med < 64)
+                out->per_rep_median[out->n_rep_med++] = pct_sorted(copy, iters, 0.5);
+            free(copy);
+        }
+    }
+    free(rep_buf);
+    return 0;
+}
+
+/* ---- anti-DCE sink + fatal helpers -------------------------------------- */
+/* File-scope volatile: the compiler must materialize every store, so it cannot
+ * dead-code-eliminate the crypto outputs we feed into it. Every timed op sinks
+ * one output byte here. */
+static volatile uint64_t g_sink = 0;
+
+/* Abort the whole run: a broken build must NEVER silently emit timing numbers. */
+static void die(const char *alg, const char *what) {
+    fprintf(stderr, "[bench_pq] FATAL: %s: %s — aborting so no numbers are emitted\n",
+            alg, what);
+    exit(3);
+}
+
+/* measure_op wrapper: if any timed op ever returns failure (e.g. a verify that
+ * stopped succeeding), abort instead of reading freed/partial buffers. */
+static void must_measure(const char *alg, const char *op, op_fn fn, void *ctx,
+                         const bench_cfg *cfg, measure_out *out) {
+    if (measure_op(fn, ctx, cfg, out) != 0)
+        die(alg, op);
+}
+
+#define MSGLEN 32
+
+/* ======================= liboqs KEM ====================================== */
+/* Timed ops consume canonical, pre-validated inputs; keygen/encaps write to
+ * scratch buffers so they never clobber the matched (pk,sk,ct) that the other
+ * timed ops depend on. Each op sinks an output byte into g_sink. */
+typedef struct {
+    OQS_KEM *kem;
+    uint8_t *pk, *sk;     /* canonical matched keypair (encaps/decaps inputs) */
+    uint8_t *ct;          /* canonical ciphertext (decaps input)             */
+    uint8_t *pk_s, *sk_s; /* scratch: keygen outputs                          */
+    uint8_t *ct_s, *ss_s; /* scratch: encaps outputs                          */
+    uint8_t *ss_d;        /* scratch: decaps output                           */
+} kem_ctx;
+static int kem_keygen(void *c){ kem_ctx*x=c;
+    if (OQS_KEM_keypair(x->kem, x->pk_s, x->sk_s) != OQS_SUCCESS) return 1;
+    g_sink += x->pk_s[0]; return 0; }
+static int kem_encaps(void *c){ kem_ctx*x=c;
+    if (OQS_KEM_encaps(x->kem, x->ct_s, x->ss_s, x->pk) != OQS_SUCCESS) return 1;
+    g_sink += (uint64_t)x->ss_s[0] ^ x->ct_s[0]; return 0; }
+static int kem_decaps(void *c){ kem_ctx*x=c;
+    if (OQS_KEM_decaps(x->kem, x->ss_d, x->ct, x->sk) != OQS_SUCCESS) return 1;
+    g_sink += x->ss_d[0]; return 0; }
+
+static int run_kem(const char *alg, const bench_cfg *cfg) {
+    OQS_KEM *kem = OQS_KEM_new(alg);
+    if (!kem) {
+        printf("{\"alg\":\"%s\",\"kind\":\"kem\",\"backend\":\"liboqs\",\"enabled\":false,"
+               "\"reason\":\"not enabled in this liboqs build\"}\n", alg);
+        return 0;
+    }
+    kem_ctx x; memset(&x, 0, sizeof x); x.kem = kem;
+    x.pk   = malloc(kem->length_public_key);  x.sk   = malloc(kem->length_secret_key);
+    x.ct   = malloc(kem->length_ciphertext);
+    x.pk_s = malloc(kem->length_public_key);  x.sk_s = malloc(kem->length_secret_key);
+    x.ct_s = malloc(kem->length_ciphertext);
+    x.ss_s = malloc(kem->length_shared_secret);
+    x.ss_d = malloc(kem->length_shared_secret);
+    if (!x.pk||!x.sk||!x.ct||!x.pk_s||!x.sk_s||!x.ct_s||!x.ss_s||!x.ss_d) die(alg,"out of memory");
+
+    /* ---- correctness check (ONCE, outside the timed loop) ----
+     * keygen -> encaps -> decaps, then assert the shared secrets match. */
+    if (OQS_KEM_keypair(kem, x.pk, x.sk) != OQS_SUCCESS) die(alg, "keygen failed");
+    if (OQS_KEM_encaps(kem, x.ct, x.ss_s, x.pk) != OQS_SUCCESS) die(alg, "encaps failed");
+    if (OQS_KEM_decaps(kem, x.ss_d, x.ct, x.sk) != OQS_SUCCESS) die(alg, "decaps failed");
+    if (memcmp(x.ss_s, x.ss_d, kem->length_shared_secret) != 0)
+        die(alg, "KEM shared-secret mismatch (ss_encaps != ss_decaps)");
+
+    /* timed phases run on the canonical, validated (pk,sk,ct); any op failure
+     * during timing aborts via must_measure. */
+    measure_out kg={0}, en={0}, de={0};
+    must_measure(alg,"keygen",kem_keygen,&x,cfg,&kg);
+    must_measure(alg,"encaps",kem_encaps,&x,cfg,&en);
+    must_measure(alg,"decaps",kem_decaps,&x,cfg,&de);
+
+    printf("{\"alg\":\"%s\",\"kind\":\"kem\",\"backend\":\"liboqs\",\"enabled\":true,", alg);
+    printf("\"claimed_nist_level\":%d,", kem->claimed_nist_level);
+    printf("\"sizes\":{\"public_key\":%zu,\"secret_key\":%zu,\"ciphertext\":%zu,\"shared_secret\":%zu},",
+           kem->length_public_key, kem->length_secret_key, kem->length_ciphertext, kem->length_shared_secret);
+    printf("\"operations\":{");
+    stats_t s;
+    s=compute_stats(kg.all,kg.all_n); print_stats_json(stdout,"keygen",s,&kg); printf(",");
+    s=compute_stats(en.all,en.all_n); print_stats_json(stdout,"encaps",s,&en); printf(",");
+    s=compute_stats(de.all,de.all_n); print_stats_json(stdout,"decaps",s,&de);
+    printf("}}\n");
+
+    free(kg.all); free(en.all); free(de.all);
+    free(x.pk); free(x.sk); free(x.ct);
+    free(x.pk_s); free(x.sk_s); free(x.ct_s); free(x.ss_s); free(x.ss_d);
+    OQS_KEM_free(kem);
+    return 0;
+}
+
+/* ======================= liboqs SIG ====================================== */
+/* sign writes a scratch signature; verify reads the canonical (sg,sglen) over
+ * the canonical msg with the canonical pk — all pre-validated. */
+typedef struct {
+    OQS_SIG *sig;
+    uint8_t *pk, *sk;      /* canonical keypair (sign/verify inputs) */
+    uint8_t *msg;          /* canonical message                      */
+    uint8_t *sg; size_t sglen;     /* canonical signature (verify input) */
+    uint8_t *pk_s, *sk_s;  /* scratch: keygen outputs                */
+    uint8_t *sg_s; size_t sg_s_len;/* scratch: sign output           */
+} sig_ctx;
+static int sig_keygen(void *c){ sig_ctx*x=c;
+    if (OQS_SIG_keypair(x->sig, x->pk_s, x->sk_s) != OQS_SUCCESS) return 1;
+    g_sink += x->pk_s[0]; return 0; }
+static int sig_sign(void *c){ sig_ctx*x=c;
+    x->sg_s_len = x->sig->length_signature;
+    if (OQS_SIG_sign(x->sig, x->sg_s, &x->sg_s_len, x->msg, MSGLEN, x->sk) != OQS_SUCCESS) return 1;
+    g_sink += x->sg_s[0]; return 0; }
+static int sig_verify(void *c){ sig_ctx*x=c;
+    if (OQS_SIG_verify(x->sig, x->msg, MSGLEN, x->sg, x->sglen, x->pk) != OQS_SUCCESS) return 1;
+    g_sink += 1; return 0; }
+
+static int run_sig(const char *alg, const bench_cfg *cfg) {
+    OQS_SIG *sig = OQS_SIG_new(alg);
+    if (!sig) {
+        printf("{\"alg\":\"%s\",\"kind\":\"sig\",\"backend\":\"liboqs\",\"enabled\":false,"
+               "\"reason\":\"not enabled in this liboqs build\"}\n", alg);
+        return 0;
+    }
+    sig_ctx x; memset(&x,0,sizeof x); x.sig=sig;
+    x.pk   = malloc(sig->length_public_key);  x.sk   = malloc(sig->length_secret_key);
+    x.msg  = malloc(MSGLEN);
+    x.sg   = malloc(sig->length_signature);
+    x.pk_s = malloc(sig->length_public_key);  x.sk_s = malloc(sig->length_secret_key);
+    x.sg_s = malloc(sig->length_signature);
+    if (!x.pk||!x.sk||!x.msg||!x.sg||!x.pk_s||!x.sk_s||!x.sg_s) die(alg,"out of memory");
+    memset(x.msg, 0xA5, MSGLEN);
+
+    /* ---- correctness check (ONCE, outside the timed loop) ----
+     * keygen -> sign -> verify; the verify MUST succeed on a valid signature. */
+    if (OQS_SIG_keypair(sig, x.pk, x.sk) != OQS_SUCCESS) die(alg, "keygen failed");
+    x.sglen = sig->length_signature;
+    if (OQS_SIG_sign(sig, x.sg, &x.sglen, x.msg, MSGLEN, x.sk) != OQS_SUCCESS) die(alg, "sign failed");
+    if (OQS_SIG_verify(sig, x.msg, MSGLEN, x.sg, x.sglen, x.pk) != OQS_SUCCESS)
+        die(alg, "signature verify failed on a valid signature (broken build)");
+
+    measure_out kg={0}, sg={0}, vf={0};
+    must_measure(alg,"keygen",sig_keygen,&x,cfg,&kg);
+    must_measure(alg,"sign",  sig_sign,  &x,cfg,&sg);
+    must_measure(alg,"verify",sig_verify,&x,cfg,&vf);
+
+    printf("{\"alg\":\"%s\",\"kind\":\"sig\",\"backend\":\"liboqs\",\"enabled\":true,", alg);
+    printf("\"claimed_nist_level\":%d,", sig->claimed_nist_level);
+    printf("\"sizes\":{\"public_key\":%zu,\"secret_key\":%zu,\"signature\":%zu},",
+           sig->length_public_key, sig->length_secret_key, sig->length_signature);
+    printf("\"operations\":{");
+    stats_t s;
+    s=compute_stats(kg.all,kg.all_n); print_stats_json(stdout,"keygen",s,&kg); printf(",");
+    s=compute_stats(sg.all,sg.all_n); print_stats_json(stdout,"sign",s,&sg); printf(",");
+    s=compute_stats(vf.all,vf.all_n); print_stats_json(stdout,"verify",s,&vf);
+    printf("}}\n");
+
+    free(kg.all); free(sg.all); free(vf.all);
+    free(x.pk); free(x.sk); free(x.msg); free(x.sg);
+    free(x.pk_s); free(x.sk_s); free(x.sg_s);
+    OQS_SIG_free(sig);
+    return 0;
+}
+
+/* ======================= OpenSSL classical baselines ===================== */
+/* X25519 as a KEM-analog: keygen + ECDH derive (one shared-secret derivation). */
+typedef struct { EVP_PKEY *self; EVP_PKEY *peer; } x25519_ctx;
+static int x25519_keygen(void *c){
+    x25519_ctx*x=c;
+    if (x->self) { EVP_PKEY_free(x->self); x->self=NULL; }
+    EVP_PKEY_CTX *p = EVP_PKEY_CTX_new_id(EVP_PKEY_X25519,NULL);
+    if(!p) return 1;
+    int ok = EVP_PKEY_keygen_init(p)>0 && EVP_PKEY_keygen(p,&x->self)>0;
+    EVP_PKEY_CTX_free(p);
+    return ok?0:1;
+}
+/* derive shared secret a·b into out[32]; returns 1 on success */
+static int x25519_derive_into(EVP_PKEY *a, EVP_PKEY *b, unsigned char out[32]){
+    EVP_PKEY_CTX *p = EVP_PKEY_CTX_new(a,NULL);
+    if(!p) return 0;
+    size_t slen=32;
+    int ok = EVP_PKEY_derive_init(p)>0 &&
+             EVP_PKEY_derive_set_peer(p,b)>0 &&
+             EVP_PKEY_derive(p,out,&slen)>0;
+    EVP_PKEY_CTX_free(p);
+    return ok;
+}
+static int x25519_derive(void *c){
+    x25519_ctx*x=c;
+    unsigned char secret[32];
+    if(!x25519_derive_into(x->self,x->peer,secret)) return 1;
+    g_sink += secret[0];           /* sink the derived shared secret */
+    return 0;
+}
+
+static int run_x25519(const bench_cfg *cfg) {
+    x25519_ctx x={0};
+    if (x25519_keygen(&x) != 0) die("X25519","keygen failed");   /* self */
+    /* a fixed peer key for derive */
+    EVP_PKEY_CTX *p = EVP_PKEY_CTX_new_id(EVP_PKEY_X25519,NULL);
+    if(!p || EVP_PKEY_keygen_init(p)<=0 || EVP_PKEY_keygen(p,&x.peer)<=0) die("X25519","peer keygen failed");
+    EVP_PKEY_CTX_free(p);
+
+    /* ---- correctness check (ONCE, outside timing): ECDH must agree ---- */
+    {
+        unsigned char sa[32], sb[32];
+        if (!x25519_derive_into(x.self, x.peer, sa)) die("X25519","derive(self,peer) failed");
+        if (!x25519_derive_into(x.peer, x.self, sb)) die("X25519","derive(peer,self) failed");
+        if (memcmp(sa, sb, 32) != 0) die("X25519","ECDH shared-secret mismatch");
+    }
+
+    measure_out kg={0}, dv={0};
+    must_measure("X25519","keygen",x25519_keygen,&x,cfg,&kg);
+    /* keygen frees+replaces self each call; re-make a stable self for derive */
+    if (x25519_keygen(&x) != 0) die("X25519","keygen failed");
+    must_measure("X25519","derive",x25519_derive,&x,cfg,&dv);
+
+    printf("{\"alg\":\"X25519\",\"kind\":\"kem\",\"backend\":\"openssl\",\"classical\":true,\"enabled\":true,");
+    printf("\"claimed_nist_level\":1,");
+    printf("\"sizes\":{\"public_key\":32,\"secret_key\":32,\"ciphertext\":null,\"shared_secret\":32},");
+    printf("\"operations\":{");
+    stats_t s;
+    s=compute_stats(kg.all,kg.all_n); print_stats_json(stdout,"keygen",s,&kg); printf(",");
+    s=compute_stats(dv.all,dv.all_n); print_stats_json(stdout,"derive",s,&dv);
+    printf("}}\n");
+    free(kg.all); free(dv.all);
+    if(x.self)EVP_PKEY_free(x.self); if(x.peer)EVP_PKEY_free(x.peer);
+    return 0;
+}
+
+/* Ed25519 signature baseline. */
+typedef struct { EVP_PKEY *key; unsigned char msg[32]; unsigned char sig[64]; size_t siglen; } ed_ctx;
+static int ed_keygen(void *c){
+    ed_ctx*x=c;
+    if(x->key){EVP_PKEY_free(x->key);x->key=NULL;}
+    EVP_PKEY_CTX *p=EVP_PKEY_CTX_new_id(EVP_PKEY_ED25519,NULL);
+    if(!p)return 1;
+    int ok=EVP_PKEY_keygen_init(p)>0 && EVP_PKEY_keygen(p,&x->key)>0;
+    EVP_PKEY_CTX_free(p);
+    return ok?0:1;
+}
+static int ed_sign(void *c){
+    ed_ctx*x=c;
+    EVP_MD_CTX *m=EVP_MD_CTX_new(); if(!m)return 1;
+    x->siglen=sizeof x->sig;
+    int ok = EVP_DigestSignInit(m,NULL,NULL,NULL,x->key)>0 &&
+             EVP_DigestSign(m,x->sig,&x->siglen,x->msg,sizeof x->msg)>0;
+    EVP_MD_CTX_free(m);
+    if(!ok) return 1;
+    g_sink += x->sig[0];           /* sink the signature */
+    return 0;
+}
+static int ed_verify(void *c){
+    ed_ctx*x=c;
+    EVP_MD_CTX *m=EVP_MD_CTX_new(); if(!m)return 1;
+    int ok = EVP_DigestVerifyInit(m,NULL,NULL,NULL,x->key)>0 &&
+             EVP_DigestVerify(m,x->sig,x->siglen,x->msg,sizeof x->msg)>0;
+    EVP_MD_CTX_free(m);
+    if(!ok) return 1;
+    g_sink += 1;                   /* sink the (successful) verify result */
+    return 0;
+}
+
+static int run_ed25519(const bench_cfg *cfg) {
+    ed_ctx x; memset(&x,0,sizeof x); memset(x.msg,0xA5,sizeof x.msg);
+
+    /* ---- correctness check (ONCE, outside timing): verify MUST succeed ---- */
+    if (ed_keygen(&x) != 0) die("Ed25519","keygen failed");
+    if (ed_sign(&x)   != 0) die("Ed25519","sign failed");
+    if (ed_verify(&x) != 0) die("Ed25519","verify failed on a valid signature (broken build)");
+
+    measure_out kg={0}, sg={0}, vf={0};
+    must_measure("Ed25519","keygen",ed_keygen,&x,cfg,&kg);
+    if (ed_keygen(&x)!=0 || ed_sign(&x)!=0) die("Ed25519","re-priming key+sig failed");
+    must_measure("Ed25519","sign",  ed_sign,  &x,cfg,&sg);
+    must_measure("Ed25519","verify",ed_verify,&x,cfg,&vf);
+
+    printf("{\"alg\":\"Ed25519\",\"kind\":\"sig\",\"backend\":\"openssl\",\"classical\":true,\"enabled\":true,");
+    printf("\"claimed_nist_level\":1,");
+    printf("\"sizes\":{\"public_key\":32,\"secret_key\":32,\"signature\":64},");
+    printf("\"operations\":{");
+    stats_t s;
+    s=compute_stats(kg.all,kg.all_n); print_stats_json(stdout,"keygen",s,&kg); printf(",");
+    s=compute_stats(sg.all,sg.all_n); print_stats_json(stdout,"sign",s,&sg); printf(",");
+    s=compute_stats(vf.all,vf.all_n); print_stats_json(stdout,"verify",s,&vf);
+    printf("}}\n");
+    free(kg.all); free(sg.all); free(vf.all);
+    if(x.key)EVP_PKEY_free(x.key);
+    return 0;
+}
+
+/* ---- main --------------------------------------------------------------- */
+static void usage(void){
+    fprintf(stderr,
+      "usage: bench_pq --kind kem|sig --alg NAME [options]\n"
+      "  auto-calibration (default): each op is timed to ~--target-time-ms of\n"
+      "    aggregate work, clamped to [--min-samples, --max-iters].\n"
+      "  --target-time-ms N   per-op timed-work target (default 250)\n"
+      "  --min-samples N      floor on timed iters per rep (default 30)\n"
+      "  --max-iters N        ceiling on timed iters per rep (default 20000)\n"
+      "  --reps N             independent repetitions (default 5)\n"
+      "  --iters N            FIXED-count fallback: exactly N timed iters/rep\n"
+      "                       (disables calibration; pairs with --warmup)\n"
+      "  --warmup N           warmup iters/rep in fixed-count mode (default 1000)\n");
+}
+
+int main(int argc, char **argv) {
+    const char *kind=NULL, *alg=NULL;
+    /* fixed-count fallback knobs */
+    uint64_t warmup=1000, iters=0, reps=5;   /* iters=0 => auto-calibrate */
+    /* auto-calibration knobs */
+    uint64_t target_time_ms=250, min_samples=30, max_iters=20000;
+    for (int i=1;i<argc;i++){
+        if(!strcmp(argv[i],"--kind")&&i+1<argc) kind=argv[++i];
+        else if(!strcmp(argv[i],"--alg")&&i+1<argc) alg=argv[++i];
+        else if(!strcmp(argv[i],"--warmup")&&i+1<argc) warmup=strtoull(argv[++i],0,10);
+        else if(!strcmp(argv[i],"--iters")&&i+1<argc) iters=strtoull(argv[++i],0,10);
+        else if(!strcmp(argv[i],"--reps")&&i+1<argc) reps=strtoull(argv[++i],0,10);
+        else if(!strcmp(argv[i],"--target-time-ms")&&i+1<argc) target_time_ms=strtoull(argv[++i],0,10);
+        else if(!strcmp(argv[i],"--min-samples")&&i+1<argc) min_samples=strtoull(argv[++i],0,10);
+        else if(!strcmp(argv[i],"--max-iters")&&i+1<argc) max_iters=strtoull(argv[++i],0,10);
+        else { usage(); return 2; }
+    }
+    if(!kind||!alg){ usage(); return 2; }
+    if(reps < 1) reps = 1;
+    if(min_samples < 1) min_samples = 1;
+    if(max_iters < min_samples) max_iters = min_samples;
+
+    bench_cfg cfg = {
+        .fixed_iters = iters,                       /* >0 => fixed-count mode */
+        .target_ns   = target_time_ms * 1000000ull,
+        .min_samples = min_samples,
+        .max_iters   = max_iters,
+        .warmup      = warmup,
+        .reps        = reps,
+    };
+    if (iters>0)
+        fprintf(stderr,"[bench_pq] mode=fixed-count reps=%llu warmup=%llu iters=%llu\n",
+                (unsigned long long)reps,(unsigned long long)warmup,(unsigned long long)iters);
+    else
+        fprintf(stderr,"[bench_pq] mode=auto-calibrate reps=%llu target=%llums min_samples=%llu max_iters=%llu\n",
+                (unsigned long long)reps,(unsigned long long)target_time_ms,
+                (unsigned long long)min_samples,(unsigned long long)max_iters);
+
+    /* PMU cycle availability probe (reported once via stderr-free channel:
+     * embedded into the JSON header line below). */
+    const char *pmu_reason=NULL;
+    int pmu_ok = probe_pmu(&pmu_reason);
+    fprintf(stderr,"[bench_pq] cycles_available=%d (%s)\n", pmu_ok, pmu_reason);
+
+    OQS_init();
+    int rc;
+    if(!strcmp(kind,"kem")){
+        if(!strcmp(alg,"X25519")) rc=run_x25519(&cfg);
+        else rc=run_kem(alg,&cfg);
+    } else if(!strcmp(kind,"sig")){
+        if(!strcmp(alg,"Ed25519")) rc=run_ed25519(&cfg);
+        else rc=run_sig(alg,&cfg);
+    } else { usage(); rc=2; }
+    OQS_destroy();
+    return rc;
+}
--- a/pq-bench-rpi5/bench/lib/assemble.py
+++ b/pq-bench-rpi5/bench/lib/assemble.py
@ -0,0 +1,245 @@
+#!/usr/bin/env python3
+"""assemble.py — merge harness outputs + thermal trace + environment metadata
+into one results JSON with full provenance.
+
+Inputs (all paths):
+  --meta      meta.env       KEY=VALUE run/host facts collected by run.sh
+  --lock      versions.lock  toolchain provenance from setup.sh
+  --features  cpu_features.json  CPU/crypto-extension detection (from run.sh)
+  --kemsig    kemsig.jsonl   one JSON object per algorithm from bench_pq
+  --tls       tls.json       output of the TLS harness (optional)
+  --thermal   thermal.csv    epoch_s,arm_clock_hz,temp_c,throttled_hex samples
+  --out       results/<host>-<ts>.json
+
+The single most important output field is `is_baseline_grade`: true ONLY on a
+real RPi5 with performance governor, core pinning, A76-targeted flags, and no
+thermal throttling. Everything else (notably macOS smoke runs) is false, with
+reasons recorded — so smoke output can never be mistaken for the baseline.
+"""
+from __future__ import annotations
+import argparse
+import json
+import os
+import statistics
+import sys
+
+SCHEMA_VERSION = "1.0.0"
+
+
+def parse_envfile(path: str) -> dict:
+    """Parse KEY=VALUE / KEY="value" lines (shared format of meta.env + versions.lock)."""
+    out = {}
+    if not path or not os.path.exists(path):
+        return out
+    with open(path) as f:
+        for line in f:
+            line = line.strip()
+            if not line or line.startswith("#") or "=" not in line:
+                continue
+            k, v = line.split("=", 1)
+            v = v.strip()
+            if len(v) >= 2 and v[0] == v[-1] and v[0] in "\"'":
+                v = v[1:-1]
+            out[k.strip()] = v
+    return out
+
+
+def load_jsonl(path: str) -> list:
+    items = []
+    if not path or not os.path.exists(path):
+        return items
+    with open(path) as f:
+        for line in f:
+            line = line.strip()
+            if line:
+                try:
+                    items.append(json.loads(line))
+                except json.JSONDecodeError as e:
+                    print(f"[assemble] skipping bad JSONL line: {e}", file=sys.stderr)
+    return items
+
+
+def load_json(path: str):
+    if path and os.path.exists(path):
+        with open(path) as f:
+            return json.load(f)
+    return None
+
+
+def parse_thermal(path: str) -> dict:
+    """Reduce the raw CSV trace to a compact embedded record + summary."""
+    cols = ["epoch_s", "arm_clock_hz", "temp_c", "throttled_hex"]
+    samples, temps, clocks = [], [], []
+    throttling_detected = False
+    if path and os.path.exists(path):
+        with open(path) as f:
+            for line in f:
+                parts = line.strip().split(",")
+                if len(parts) != 4:
+                    continue
+                ep, clk, temp, thr = parts
+                samples.append([
+                    int(ep) if ep else None,
+                    int(clk) if clk else None,
+                    float(temp) if temp else None,
+                    thr or None,
+                ])
+                if temp:
+                    temps.append(float(temp))
+                if clk:
+                    clocks.append(int(clk))
+                if thr:
+                    try:
+                        v = int(thr, 16)
+                        # bit2 = throttling now, bit18 = throttling has occurred
+                        if v & 0x4 or v & 0x40000:
+                            throttling_detected = True
+                    except ValueError:
+                        pass
+
+    def summarize(vals):
+        if not vals:
+            return None
+        return {
+            "min": min(vals), "max": max(vals),
+            "mean": round(statistics.fmean(vals), 3),
+            "samples": len(vals),
+        }
+
+    clock_summary = summarize(clocks)
+    # Detect frequency droop as a secondary throttling signal.
+    if clock_summary and clocks:
+        spread = (max(clocks) - min(clocks)) / max(clocks)
+        clock_summary["spread_frac"] = round(spread, 4)
+
+    return {
+        "columns": cols,
+        "samples": samples,
+        "temp_c": summarize(temps),
+        "arm_clock_hz": clock_summary,
+        "throttling_detected": throttling_detected,
+    }
+
+
+def to_int(s, default=None):
+    try:
+        return int(s)
+    except (TypeError, ValueError):
+        return default
+
+
+def main():
+    ap = argparse.ArgumentParser()
+    ap.add_argument("--meta", required=True)
+    ap.add_argument("--lock", default="")
+    ap.add_argument("--features", default="")
+    ap.add_argument("--kemsig", default="")
+    ap.add_argument("--tls", default="")
+    ap.add_argument("--thermal", default="")
+    ap.add_argument("--config", default="")
+    ap.add_argument("--out", required=True)
+    args = ap.parse_args()
+
+    meta = parse_envfile(args.meta)
+    lock = parse_envfile(args.lock)
+    features = load_json(args.features) or {}
+    kemsig = load_jsonl(args.kemsig)
+    tls = load_json(args.tls)
+    thermal = parse_thermal(args.thermal)
+
+    is_rpi = meta.get("IS_RPI") == "1"
+    governor = meta.get("GOVERNOR_AFTER") or meta.get("GOVERNOR_BEFORE") or "unknown"
+    pinned = meta.get("PINNED") == "1"
+    cflags_target = lock.get("CFLAGS_TARGET", "unknown")
+
+    # ---- the anti-confusion gate -----------------------------------------
+    baseline_reasons = []
+    if not is_rpi:
+        baseline_reasons.append(
+            f"host is not a Raspberry Pi (model='{meta.get('RPI_MODEL','')}', os={meta.get('OS')})")
+    if governor != "performance":
+        baseline_reasons.append(f"CPU governor is '{governor}', not 'performance'")
+    if not pinned:
+        baseline_reasons.append("benchmark was not pinned to a dedicated core (no taskset)")
+    if cflags_target != "cortex-a76":
+        baseline_reasons.append(f"build flags targeted '{cflags_target}', not cortex-a76")
+    if thermal.get("throttling_detected"):
+        baseline_reasons.append("thermal throttling was detected during the run")
+    is_baseline_grade = len(baseline_reasons) == 0
+
+    warnings = []
+    raw_warn = meta.get("WARNINGS", "")
+    if raw_warn:
+        warnings.extend([w for w in raw_warn.split("||") if w])
+    if not is_baseline_grade:
+        warnings.append("NOT RPi5-baseline-grade: " + "; ".join(baseline_reasons))
+
+    result = {
+        "schema_version": SCHEMA_VERSION,
+        "tool_version": meta.get("TOOL_VERSION", "0.1.0"),
+        "generated_utc": meta.get("TS_END_UTC", ""),
+        "is_baseline_grade": is_baseline_grade,
+        "baseline_grade_reasons": baseline_reasons,
+        "host": {
+            "hostname": meta.get("HOSTNAME", ""),
+            "os": meta.get("OS", ""),
+            "os_pretty": meta.get("OS_PRETTY", ""),
+            "arch": meta.get("ARCH", ""),
+            "kernel": meta.get("KERNEL", ""),
+            "is_rpi": is_rpi,
+            "rpi_model": meta.get("RPI_MODEL", ""),
+            "cpu_brand": meta.get("CPU_BRAND", ""),
+            "ncpu": to_int(meta.get("NCPU")),
+            "ram_bytes": to_int(meta.get("RAM_BYTES")),
+        },
+        "cpu_features": features,
+        "run": {
+            "timestamp_start_utc": meta.get("TS_START_UTC", ""),
+            "timestamp_end_utc": meta.get("TS_END_UTC", ""),
+            "duration_s": to_int(meta.get("DURATION_S")),
+            "governor_requested": meta.get("GOVERNOR_REQUESTED", ""),
+            "governor_before": meta.get("GOVERNOR_BEFORE", ""),
+            "governor_after": meta.get("GOVERNOR_AFTER", ""),
+            "bench_core": to_int(meta.get("BENCH_CORE")),
+            "pinned": pinned,
+            "taskset_cmd": meta.get("TASKSET_CMD", ""),
+            # Per-op sizing. In auto-calibration mode warmup_iters/timed_iters are
+            # chosen per operation (see each entry under kem/sig "operations");
+            # the run-level target/min/max below describe how they were derived.
+            "calibration_mode": meta.get("CALIB_MODE", "auto"),
+            "target_time_ms": to_int(meta.get("TARGET_TIME_MS")),
+            "min_samples": to_int(meta.get("MIN_SAMPLES")),
+            "max_iters": to_int(meta.get("MAX_ITERS")),
+            "warmup_iters": to_int(meta.get("WARMUP")),
+            "timed_iters": to_int(meta.get("ITERS")),
+            "repetitions": to_int(meta.get("REPS")),
+            "cycles_mode": meta.get("CYCLES_MODE", ""),
+            "cycles_available": meta.get("CYCLES_AVAILABLE") == "1",
+            "cycles_reason": meta.get("CYCLES_REASON", ""),
+        },
+        "toolchain": {
+            "cc_version": lock.get("CC_VERSION", ""),
+            "bench_cflags": lock.get("BENCH_CFLAGS", ""),
+            "cflags_target": cflags_target,
+            "liboqs_ref": lock.get("LIBOQS_REF", ""),
+            "liboqs_commit": lock.get("LIBOQS_COMMIT", ""),
+            "liboqs_opt_defines": lock.get("LIBOQS_OPT_DEFINES", ""),
+            "openssl": lock.get("OPENSSL_COMMIT", ""),
+            "oqsprovider_ref": lock.get("OQSPROVIDER_REF", ""),
+            "oqsprovider_commit": lock.get("OQSPROVIDER_COMMIT", ""),
+        },
+        "thermal_trace": thermal,
+        "warnings": warnings,
+        "kem": [r for r in kemsig if r.get("kind") == "kem"],
+        "sig": [r for r in kemsig if r.get("kind") == "sig"],
+        "tls": tls,
+    }
+
+    os.makedirs(os.path.dirname(args.out) or ".", exist_ok=True)
+    with open(args.out, "w") as f:
+        json.dump(result, f, indent=2)
+    print(args.out)
+
+
+if __name__ == "__main__":
+    main()
--- a/pq-bench-rpi5/bench/lib/list_algs.py
+++ b/pq-bench-rpi5/bench/lib/list_algs.py
@ -0,0 +1,59 @@
+#!/usr/bin/env python3
+"""list_algs.py — expand config.yaml into shell-consumable lines for run.sh.
+
+Modes:
+  measurement   -> KEY=VALUE lines (auto-calib: target/min_samples/max_iters/
+                   reps/cycles_mode; plus warmup/iters fixed-count fallback)
+  kemsig        -> one "kind<TAB>alg<TAB>is_classical" line per algorithm,
+                   baselines first (so charts always have the reference point)
+  tls           -> emits the TLS matrix as JSON on one line
+
+Uses the dependency-free miniyaml parser so no PyYAML is required.
+"""
+import os
+import sys
+import json
+
+sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
+import miniyaml  # noqa: E402
+
+
+def main():
+    mode = sys.argv[1] if len(sys.argv) > 1 else "kemsig"
+    cfg_path = sys.argv[2] if len(sys.argv) > 2 else "config.yaml"
+    cfg = miniyaml.load_file(cfg_path)
+
+    if mode == "measurement":
+        m = cfg.get("measurement", {}) or {}
+        # auto-calibration knobs (default path)
+        print(f"TARGET_TIME_MS={m.get('target_time_ms', 250)}")
+        print(f"MIN_SAMPLES={m.get('min_samples', 30)}")
+        print(f"MAX_ITERS={m.get('max_iters', 20000)}")
+        print(f"REPS={m.get('repetitions', 5)}")
+        print(f"CYCLES_MODE={m.get('cycles_mode', 'auto')}")
+        # fixed-count fallback knobs (used only with ./run.sh --iters)
+        print(f"WARMUP={m.get('warmup_iters', 1000)}")
+        print(f"ITERS={m.get('timed_iters', 10000)}")
+
+    elif mode == "kemsig":
+        for kind in ("kem", "sig"):
+            section = cfg.get(kind, {}) or {}
+            for grp in ("baseline", "candidates"):
+                for item in (section.get(grp) or []):
+                    if isinstance(item, dict):
+                        name = item.get("name")
+                        classical = "1" if item.get("classical") else "0"
+                    else:
+                        name, classical = item, "0"
+                    if name:
+                        print(f"{kind}\t{name}\t{classical}")
+
+    elif mode == "tls":
+        print(json.dumps(cfg.get("tls", {})))
+
+    else:
+        sys.exit(f"unknown mode: {mode}")
+
+
+if __name__ == "__main__":
+    main()
--- a/pq-bench-rpi5/bench/lib/miniyaml.py
+++ b/pq-bench-rpi5/bench/lib/miniyaml.py
@ -0,0 +1,151 @@
+"""miniyaml — a dependency-free parser for the restricted YAML subset used by
+config.yaml.
+
+We deliberately avoid a PyYAML runtime dependency so the core pipeline runs on a
+stock Python 3 (the RPi5 / Mac smoke box both have only stdlib by default).
+
+Supported subset (sufficient for config.yaml):
+  - nested mappings via indentation (2 spaces per level by convention)
+  - lists of scalars:           "- value"
+  - lists of mappings:          "- key: value" then indented "key: value"
+  - scalars: int, float, bool (true/false), null/~, quoted or bare strings
+  - "# comment" to end of line (outside quotes)
+
+It is NOT a general YAML implementation. If a user needs full YAML they can
+install PyYAML and set PQB_USE_PYYAML=1.
+"""
+from __future__ import annotations
+import os
+import re
+
+
+def _scalar(tok: str):
+    s = tok.strip()
+    if s == "" or s in ("~", "null", "Null", "NULL"):
+        return None
+    if (s[0] == '"' and s[-1] == '"') or (s[0] == "'" and s[-1] == "'"):
+        return s[1:-1]
+    low = s.lower()
+    if low in ("true", "yes"):
+        return True
+    if low in ("false", "no"):
+        return False
+    if re.fullmatch(r"[+-]?\d+", s):
+        return int(s)
+    if re.fullmatch(r"[+-]?\d*\.\d+([eE][+-]?\d+)?", s):
+        return float(s)
+    return s
+
+
+def _strip_comment(line: str) -> str:
+    out, q = [], None
+    for ch in line:
+        if q:
+            out.append(ch)
+            if ch == q:
+                q = None
+        elif ch in ("'", '"'):
+            q = ch
+            out.append(ch)
+        elif ch == "#":
+            break
+        else:
+            out.append(ch)
+    return "".join(out).rstrip()
+
+
+def _indent(line: str) -> int:
+    return len(line) - len(line.lstrip(" "))
+
+
+def loads(text: str):
+    # Tokenize into (indent, content) ignoring blank/comment-only lines.
+    lines = []
+    for raw in text.splitlines():
+        c = _strip_comment(raw)
+        if c.strip() == "":
+            continue
+        lines.append((_indent(c), c.strip(), c))
+    pos = [0]
+
+    def parse_block(min_indent: int):
+        if pos[0] >= len(lines):
+            return None
+        indent = lines[pos[0]][0]
+        if lines[pos[0]][1].startswith("- "):
+            return parse_list(indent)
+        return parse_map(indent)
+
+    def parse_map(indent: int):
+        obj = {}
+        while pos[0] < len(lines):
+            ind, stripped, _ = lines[pos[0]]
+            if ind < indent or stripped.startswith("- "):
+                break
+            if ind > indent:  # malformed; skip
+                pos[0] += 1
+                continue
+            m = re.match(r"^([^:]+):\s*(.*)$", stripped)
+            if not m:
+                pos[0] += 1
+                continue
+            key, val = m.group(1).strip(), m.group(2)
+            pos[0] += 1
+            if val == "":
+                # nested block or empty
+                if pos[0] < len(lines) and lines[pos[0]][0] > indent:
+                    obj[key] = parse_block(indent + 1)
+                else:
+                    obj[key] = None
+            else:
+                obj[key] = _scalar(val)
+        return obj
+
+    def parse_list(indent: int):
+        arr = []
+        while pos[0] < len(lines):
+            ind, stripped, _ = lines[pos[0]]
+            if ind < indent or not stripped.startswith("- "):
+                break
+            if ind > indent:
+                break
+            item = stripped[2:].strip()
+            pos[0] += 1
+            if ":" in item and not (item[0] in "'\""):
+                # list of mappings — first pair is inline, rest are indented deeper
+                sub = {}
+                m = re.match(r"^([^:]+):\s*(.*)$", item)
+                key, val = m.group(1).strip(), m.group(2)
+                sub[key] = _scalar(val) if val != "" else None
+                child_indent = indent + 2
+                while pos[0] < len(lines) and lines[pos[0]][0] >= child_indent \
+                        and not lines[pos[0]][1].startswith("- "):
+                    ind2, strip2, _ = lines[pos[0]]
+                    m2 = re.match(r"^([^:]+):\s*(.*)$", strip2)
+                    if not m2:
+                        pos[0] += 1
+                        continue
+                    k2, v2 = m2.group(1).strip(), m2.group(2)
+                    pos[0] += 1
+                    sub[k2] = _scalar(v2) if v2 != "" else None
+                arr.append(sub)
+            else:
+                arr.append(_scalar(item))
+        return arr
+
+    return parse_block(0) or {}
+
+
+def load_file(path: str):
+    if os.environ.get("PQB_USE_PYYAML") == "1":
+        import yaml  # type: ignore
+        with open(path) as f:
+            return yaml.safe_load(f)
+    with open(path) as f:
+        return loads(f.read())
+
+
+if __name__ == "__main__":
+    import json
+    import sys
+    print(json.dumps(load_file(sys.argv[1]), indent=2))
--- a/pq-bench-rpi5/bench/tls/Makefile
+++ b/pq-bench-rpi5/bench/tls/Makefile
@ -0,0 +1,17 @@
+# Build the TLS handshake harness against the OpenSSL that has oqs-provider.
+#   make OPENSSL_PREFIX=...   (defaults to Homebrew openssl@3, else system)
+
+OPENSSL_PREFIX ?= $(shell brew --prefix openssl@3 2>/dev/null || echo /usr)
+
+CC     ?= cc
+CFLAGS := -O3 -std=c11 -Wall -Wextra -I$(OPENSSL_PREFIX)/include
+LDFLAGS := -L$(OPENSSL_PREFIX)/lib -Wl,-rpath,$(OPENSSL_PREFIX)/lib
+LDLIBS  := -lssl -lcrypto -lm
+
+bench_tls: bench_tls.c
+	$(CC) $(CFLAGS) -o $@ $< $(LDFLAGS) $(LDLIBS)
+
+clean:
+	rm -f bench_tls
+
+.PHONY: clean
--- a/pq-bench-rpi5/bench/tls/bench_tls.c
+++ b/pq-bench-rpi5/bench/tls/bench_tls.c
@ -0,0 +1,194 @@
+/* ===========================================================================
+ * bench_tls.c — TLS 1.3 handshake benchmark via the OpenSSL API.
+ *
+ * Performs full TLS 1.3 handshakes entirely in-process over a pair of memory
+ * BIOs (no sockets, no CLI scraping). This gives:
+ *   - clean per-handshake wall-clock latency (no socket/scheduler noise)
+ *   - exact bytes-on-the-wire each direction
+ *   - the precise ClientHello flight size (with a fragmentation note vs MSS)
+ *   - real server-cert signature verification cost (client verifies the chain),
+ *     which is the whole point of sweeping PQ signature algorithms.
+ *
+ *   bench_tls --group X25519MLKEM768 --ca ca.pem \
+ *             --cert server.pem --key server.key --connections 1000 \
+ *             --label "X25519MLKEM768+mldsa65"
+ *
+ * Emits one JSON object. PQ groups/sigs require oqs-provider to be loadable
+ * (point OPENSSL_MODULES at its directory); if it cannot load, we say so.
+ * ===========================================================================*/
+#define _POSIX_C_SOURCE 200809L
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include <math.h>
+#include <time.h>
+
+#include <openssl/ssl.h>
+#include <openssl/bio.h>
+#include <openssl/err.h>
+#include <openssl/provider.h>
+
+#define TYPICAL_MSS 1400   /* note fragmentation when ClientHello exceeds this */
+
+static inline uint64_t now_ns(void) {
+    struct timespec ts; clock_gettime(CLOCK_MONOTONIC, &ts);
+    return (uint64_t)ts.tv_sec * 1000000000ull + (uint64_t)ts.tv_nsec;
+}
+
+static int cmp_u64(const void *a, const void *b) {
+    uint64_t x = *(const uint64_t *)a, y = *(const uint64_t *)b;
+    return (x > y) - (x < y);
+}
+static double pct(const uint64_t *s, uint64_t n, double p) {
+    if (!n) return 0; if (n == 1) return (double)s[0];
+    double idx = p * (double)(n - 1); uint64_t lo = (uint64_t)idx; double f = idx - lo;
+    if (lo + 1 >= n) return (double)s[n - 1];
+    return (double)s[lo] + f * ((double)s[lo + 1] - (double)s[lo]);
+}
+
+/* Shuttle all pending bytes from src's mem BIO into dst's mem BIO.
+ * Returns bytes moved. */
+static size_t pump(BIO *src, BIO *dst) {
+    char buf[16384]; size_t total = 0; int n;
+    while ((n = BIO_read(src, buf, sizeof buf)) > 0) {
+        BIO_write(dst, buf, n);
+        total += (size_t)n;
+    }
+    return total;
+}
+
+/* One full handshake. Records bytes each way and ClientHello size (first flight).
+ * Returns 0 on success. */
+static int one_handshake(SSL_CTX *cctx, SSL_CTX *sctx,
+                         size_t *c2s_bytes, size_t *s2c_bytes, size_t *chello) {
+    SSL *cli = SSL_new(cctx), *srv = SSL_new(sctx);
+    if (!cli || !srv) { if (cli) SSL_free(cli); if (srv) SSL_free(srv); return -1; }
+    BIO *cli_rb = BIO_new(BIO_s_mem()), *cli_wb = BIO_new(BIO_s_mem());
+    BIO *srv_rb = BIO_new(BIO_s_mem()), *srv_wb = BIO_new(BIO_s_mem());
+    SSL_set_bio(cli, cli_rb, cli_wb);   /* takes ownership */
+    SSL_set_bio(srv, srv_rb, srv_wb);
+    SSL_set_connect_state(cli);
+    SSL_set_accept_state(srv);
+
+    *c2s_bytes = *s2c_bytes = *chello = 0;
+    int first_flight = 1, rc = 0;
+    for (int i = 0; i < 64; i++) {
+        int r_c = SSL_do_handshake(cli);
+        size_t moved = pump(cli_wb, srv_rb);
+        if (first_flight && moved > 0) { *chello = moved; first_flight = 0; }
+        *c2s_bytes += moved;
+        (void)r_c;
+
+        int r_s = SSL_do_handshake(srv);
+        *s2c_bytes += pump(srv_wb, cli_rb);
+        (void)r_s;
+
+        if (SSL_is_init_finished(cli) && SSL_is_init_finished(srv)) break;
+        /* if both stalled with nothing to transfer, it's a failure */
+        if (BIO_ctrl_pending(cli_wb) == 0 && BIO_ctrl_pending(srv_wb) == 0 &&
+            !SSL_is_init_finished(cli) && i > 4) { rc = -2; break; }
+    }
+    if (!SSL_is_init_finished(cli) || !SSL_is_init_finished(srv)) rc = -2;
+    SSL_free(cli); SSL_free(srv);
+    return rc;
+}
+
+int main(int argc, char **argv) {
+    const char *group = "X25519", *ca = NULL, *cert = NULL, *key = NULL, *label = "";
+    uint64_t connections = 1000, warmup = 20;
+    for (int i = 1; i < argc; i++) {
+        if (!strcmp(argv[i], "--group") && i+1<argc) group = argv[++i];
+        else if (!strcmp(argv[i], "--ca") && i+1<argc) ca = argv[++i];
+        else if (!strcmp(argv[i], "--cert") && i+1<argc) cert = argv[++i];
+        else if (!strcmp(argv[i], "--key") && i+1<argc) key = argv[++i];
+        else if (!strcmp(argv[i], "--connections") && i+1<argc) connections = strtoull(argv[++i],0,10);
+        else if (!strcmp(argv[i], "--warmup") && i+1<argc) warmup = strtoull(argv[++i],0,10);
+        else if (!strcmp(argv[i], "--label") && i+1<argc) label = argv[++i];
+        else { fprintf(stderr,"bad arg %s\n",argv[i]); return 2; }
+    }
+    if (!cert || !key) { fprintf(stderr,"--cert and --key required\n"); return 2; }
+
+    /* providers: default always; oqs if discoverable (OPENSSL_MODULES) */
+    OSSL_PROVIDER_load(NULL, "default");
+    int have_oqs = OSSL_PROVIDER_load(NULL, "oqsprovider") != NULL;
+
+    SSL_CTX *cctx = SSL_CTX_new(TLS_client_method());
+    SSL_CTX *sctx = SSL_CTX_new(TLS_server_method());
+    if (!cctx || !sctx) { fprintf(stderr,"ctx alloc failed\n"); return 1; }
+    SSL_CTX_set_min_proto_version(cctx, TLS1_3_VERSION);
+    SSL_CTX_set_max_proto_version(cctx, TLS1_3_VERSION);
+    SSL_CTX_set_min_proto_version(sctx, TLS1_3_VERSION);
+    SSL_CTX_set_max_proto_version(sctx, TLS1_3_VERSION);
+
+    int group_ok = SSL_CTX_set1_groups_list(cctx, group) &&
+                   SSL_CTX_set1_groups_list(sctx, group);
+
+    int cert_ok = SSL_CTX_use_certificate_chain_file(sctx, cert) == 1 &&
+                  SSL_CTX_use_PrivateKey_file(sctx, key, SSL_FILETYPE_PEM) == 1;
+
+    /* client verifies the server chain -> exercises PQ signature verify cost */
+    int verify_setup = 1;
+    if (ca) {
+        if (SSL_CTX_load_verify_locations(cctx, ca, NULL) != 1) verify_setup = 0;
+        SSL_CTX_set_verify(cctx, SSL_VERIFY_PEER, NULL);
+    } else {
+        SSL_CTX_set_verify(cctx, SSL_VERIFY_NONE, NULL);
+    }
+
+    if (!group_ok || !cert_ok || !verify_setup) {
+        printf("{\"label\":\"%s\",\"group\":\"%s\",\"enabled\":false,\"have_oqs_provider\":%s,"
+               "\"reason\":\"%s%s%s\"}\n",
+               label, group, have_oqs?"true":"false",
+               group_ok?"":"group-not-supported ",
+               cert_ok?"":"cert-load-failed ",
+               verify_setup?"":"ca-load-failed");
+        ERR_print_errors_fp(stderr);
+        return 0;
+    }
+
+    /* warmup */
+    size_t c2s, s2c, ch;
+    for (uint64_t i = 0; i < warmup; i++) {
+        if (one_handshake(cctx, sctx, &c2s, &s2c, &ch) != 0) {
+            printf("{\"label\":\"%s\",\"group\":\"%s\",\"enabled\":false,"
+                   "\"have_oqs_provider\":%s,\"reason\":\"handshake failed\"}\n",
+                   label, group, have_oqs?"true":"false");
+            ERR_print_errors_fp(stderr);
+            return 0;
+        }
+    }
+
+    uint64_t *lat = malloc(connections * sizeof(uint64_t));
+    size_t ch_last = 0, c2s_last = 0, s2c_last = 0;
+    uint64_t ok = 0;
+    for (uint64_t i = 0; i < connections; i++) {
+        uint64_t t0 = now_ns();
+        int rc = one_handshake(cctx, sctx, &c2s, &s2c, &ch);
+        uint64_t dt = now_ns() - t0;
+        if (rc == 0) { lat[ok++] = dt; ch_last = ch; c2s_last = c2s; s2c_last = s2c; }
+    }
+    if (ok == 0) { fprintf(stderr,"all handshakes failed\n"); return 1; }
+    qsort(lat, ok, sizeof(uint64_t), cmp_u64);
+    double median = pct(lat, ok, 0.5);
+    double p95 = pct(lat, ok, 0.95);
+    double mn = (double)lat[0], mx = (double)lat[ok-1];
+    double sum=0; for (uint64_t i=0;i<ok;i++) sum+=(double)lat[i];
+    double mean = sum/ok;
+    double ss=0; for (uint64_t i=0;i<ok;i++){double d=(double)lat[i]-mean; ss+=d*d;}
+    double stddev = ok>1?sqrt(ss/(ok-1)):0;
+    double hs_per_sec = median>0 ? 1e9/median : 0;
+
+    printf("{\"label\":\"%s\",\"group\":\"%s\",\"enabled\":true,\"have_oqs_provider\":%s,",
+           label, group, have_oqs?"true":"false");
+    printf("\"connections\":%llu,\"succeeded\":%llu,", (unsigned long long)connections,(unsigned long long)ok);
+    printf("\"handshake_latency_ns\":{\"median\":%.1f,\"p95\":%.1f,\"min\":%.1f,\"max\":%.1f,\"mean\":%.1f,\"stddev\":%.1f},",
+           median,p95,mn,mx,mean,stddev);
+    printf("\"handshakes_per_sec\":%.1f,", hs_per_sec);
+    printf("\"bytes_on_wire\":{\"client_to_server\":%zu,\"server_to_client\":%zu,\"total\":%zu},",
+           c2s_last, s2c_last, c2s_last+s2c_last);
+    printf("\"client_hello_bytes\":%zu,\"client_hello_fragmented\":%s,\"mss_assumed\":%d}\n",
+           ch_last, ch_last>TYPICAL_MSS?"true":"false", TYPICAL_MSS);
+    free(lat);
+    return 0;
+}
--- a/pq-bench-rpi5/bench/tls/run_tls.sh
+++ b/pq-bench-rpi5/bench/tls/run_tls.sh
@ -0,0 +1,157 @@
+#!/usr/bin/env bash
+# =============================================================================
+# run_tls.sh — generate PKI and run the TLS 1.3 (KEM-group x signature) matrix.
+#
+# Always benchmarks the classical Logos baseline (X25519 key exchange + Ed25519
+# server auth) using stock OpenSSL. PQ rows additionally require oqs-provider to
+# be loadable; if it is not present we record those rows as unavailable (with a
+# reason) rather than failing — so this still smoke-tests cleanly on a dev box.
+#
+#   ./run_tls.sh --out tls.json --connections 1000
+#
+# Honors $PQB_TASKSET (a taskset/numactl prefix) to pin bench_tls to a core.
+# =============================================================================
+set -euo pipefail
+
+HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+ROOT="$(cd "$HERE/../.." && pwd)"
+# shellcheck source=setup/lib_platform.sh
+source "$ROOT/setup/lib_platform.sh"
+LOCK="$ROOT/setup/versions.lock"
+# shellcheck disable=SC1090
+[ -f "$LOCK" ] && source "$LOCK" || true
+pqb_detect_platform
+
+OUT=""; CONNS=1000; WARMUP=20
+while [ $# -gt 0 ]; do
+  case "$1" in
+    --out) OUT="$2"; shift ;;
+    --connections) CONNS="$2"; shift ;;
+    --warmup) WARMUP="$2"; shift ;;
+    *) pqb_err "unknown arg: $1"; exit 2 ;;
+  esac
+  shift
+done
+[ -n "$OUT" ] || { pqb_err "--out required"; exit 2; }
+
+TASKSET="${PQB_TASKSET:-}"
+
+# ---- choose the OpenSSL that has the provider ------------------------------
+OSSL="${OPENSSL_BIN:-$(command -v openssl)}"
+OSSL_PREFIX="${OPENSSL_PREFIX:-$(brew --prefix openssl@3 2>/dev/null || echo /usr)}"
+PROV_MODULE="${OQSPROVIDER_MODULE:-}"
+PROV_ARGS=""
+HAVE_OQS=0
+if [ -n "$PROV_MODULE" ] && [ -f "$PROV_MODULE" ]; then
+  export OPENSSL_MODULES="$(dirname "$PROV_MODULE")"
+  if "$OSSL" list -providers -provider oqsprovider -provider default >/dev/null 2>&1; then
+    PROV_ARGS="-provider oqsprovider -provider default"
+    HAVE_OQS=1
+    pqb_log "oqs-provider available: $PROV_MODULE"
+  fi
+fi
+[ "$HAVE_OQS" = 0 ] && pqb_warn "oqs-provider not available — PQ TLS rows will be marked unavailable (classical baseline still runs)"
+
+# ---- build the harness -----------------------------------------------------
+make -C "$HERE" OPENSSL_PREFIX="$OSSL_PREFIX" >/dev/null
+BENCH="$HERE/bench_tls"
+[ -n "$TASKSET" ] && export OPENSSL_MODULES="${OPENSSL_MODULES:-}"
+
+# ---- PKI workspace ---------------------------------------------------------
+PKI="$HERE/pki"; rm -rf "$PKI"; mkdir -p "$PKI"
+
+# gen_cert <sig_alg> <out_prefix> [provider]  -> CA + server cert/key of that alg
+gen_cert() {
+  local alg="$1" pfx="$2" prov="${3:-}"
+  local ca_key="$PKI/${pfx}_ca.key" ca_crt="$PKI/${pfx}_ca.pem"
+  local sv_key="$PKI/${pfx}_server.key" sv_csr="$PKI/${pfx}_server.csr" sv_crt="$PKI/${pfx}_server.pem"
+  # CA
+  "$OSSL" req -x509 -new -newkey "$alg" -nodes $prov \
+      -keyout "$ca_key" -out "$ca_crt" -days 3650 \
+      -subj "/CN=PQB Test CA ($alg)" >/dev/null 2>&1 || return 1
+  # server key + CSR + cert signed by CA
+  "$OSSL" genpkey -algorithm "$alg" $prov -out "$sv_key" >/dev/null 2>&1 || return 1
+  "$OSSL" req -new -key "$sv_key" $prov -out "$sv_csr" -subj "/CN=localhost" >/dev/null 2>&1 || return 1
+  "$OSSL" x509 -req -in "$sv_csr" -CA "$ca_crt" -CAkey "$ca_key" $prov \
+      -out "$sv_crt" -days 3650 -CAcreateserial >/dev/null 2>&1 || return 1
+  return 0
+}
+
+# ---- read config -----------------------------------------------------------
+TLS_JSON="$(python3 "$ROOT/bench/lib/list_algs.py" tls "$ROOT/config.yaml")"
+read_list() { python3 -c "import json,sys; print('\n'.join(json.loads(sys.argv[1]).get(sys.argv[2],[])))" "$TLS_JSON" "$1"; }
+BASE_KEM="$(python3 -c "import json,sys;print(json.loads(sys.argv[1])['baseline']['kem_group'])" "$TLS_JSON")"
+BASE_SIG="$(python3 -c "import json,sys;print(json.loads(sys.argv[1])['baseline']['sig_alg'])" "$TLS_JSON")"
+
+# ---- generate certs --------------------------------------------------------
+declare -a SIG_OK_ALGS=()
+# classical baseline cert (always)
+if gen_cert "$BASE_SIG" "base_$BASE_SIG"; then
+  pqb_log "generated baseline cert ($BASE_SIG)"
+else
+  pqb_err "failed to generate classical baseline cert ($BASE_SIG) — TLS layer cannot run"
+  echo '{"available":false,"reason":"baseline cert generation failed"}' > "$OUT"; exit 0
+fi
+if [ "$HAVE_OQS" = 1 ]; then
+  while IFS= read -r s; do
+    [ -z "$s" ] && continue
+    if gen_cert "$s" "pq_$s" "$PROV_ARGS"; then
+      SIG_OK_ALGS+=("$s"); pqb_log "generated PQ cert ($s)"
+    else
+      pqb_warn "could not generate cert for sig alg '$s' (skipping)"
+    fi
+  done < <(read_list sig_algs)
+fi
+
+# cert path helpers
+ca_for()  { case "$1" in "$BASE_SIG") echo "$PKI/base_${BASE_SIG}_ca.pem";; *) echo "$PKI/pq_${1}_ca.pem";; esac; }
+crt_for() { case "$1" in "$BASE_SIG") echo "$PKI/base_${BASE_SIG}_server.pem";; *) echo "$PKI/pq_${1}_server.pem";; esac; }
+key_for() { case "$1" in "$BASE_SIG") echo "$PKI/base_${BASE_SIG}_server.key";; *) echo "$PKI/pq_${1}_server.key";; esac; }
+
+# run one matrix cell -> appends JSON object to $ROWS file
+run_cell() {
+  local kem="$1" sig="$2"
+  local label="${kem}+${sig}"
+  # shellcheck disable=SC2086
+  $TASKSET "$BENCH" --group "$kem" --ca "$(ca_for "$sig")" \
+      --cert "$(crt_for "$sig")" --key "$(key_for "$sig")" \
+      --connections "$CONNS" --warmup "$WARMUP" --label "$label" 2>>"$PKI/bench_tls.err"
+}
+
+ROWS="$PKI/rows.jsonl"; : > "$ROWS"
+
+# baseline row always first
+pqb_log "TLS baseline: $BASE_KEM + $BASE_SIG ($CONNS handshakes)"
+run_cell "$BASE_KEM" "$BASE_SIG" >> "$ROWS" || pqb_warn "baseline TLS cell failed"
+
+# PQ matrix: (kem_groups x sig_algs) — only cells whose cert exists
+if [ "$HAVE_OQS" = 1 ] && [ "${#SIG_OK_ALGS[@]}" -gt 0 ]; then
+  while IFS= read -r kem; do
+    [ -z "$kem" ] && continue
+    for sig in "${SIG_OK_ALGS[@]}"; do
+      pqb_log "TLS cell: $kem + $sig"
+      run_cell "$kem" "$sig" >> "$ROWS" || pqb_warn "cell $kem+$sig failed"
+    done
+  done < <(read_list kem_groups)
+fi
+
+# ---- assemble tls.json -----------------------------------------------------
+python3 - "$ROWS" "$OUT" "$HAVE_OQS" "$BASE_KEM" "$BASE_SIG" <<'PY'
+import json,sys
+rows_path, out_path, have_oqs, base_kem, base_sig = sys.argv[1:6]
+rows=[]
+with open(rows_path) as f:
+    for line in f:
+        line=line.strip()
+        if line:
+            try: rows.append(json.loads(line))
+            except json.JSONDecodeError: pass
+out={
+  "available": True,
+  "have_oqs_provider": have_oqs=="1",
+  "baseline": {"kem_group": base_kem, "sig_alg": base_sig, "label": f"{base_kem}+{base_sig}"},
+  "matrix": rows,
+}
+json.dump(out, open(out_path,"w"), indent=2)
+print(f"wrote {out_path}: {len(rows)} cells (have_oqs={have_oqs})")
+PY
--- a/pq-bench-rpi5/config.yaml
+++ b/pq-bench-rpi5/config.yaml
@ -0,0 +1,163 @@
+# =============================================================================
+# pq-bench-rpi5 candidate configuration
+#
+# This file is the single place users edit to extend the benchmark. The harness
+# reads it with a dependency-free YAML subset parser (bench/lib/miniyaml.py), so
+# keep to the simple shape used below: top-level maps, lists of scalars, and
+# lists of "- key: value" maps. Comments (#) and quotes are fine.
+#
+# Algorithm names must match liboqs / oqs-provider identifiers exactly. To see
+# what your built liboqs supports:  ./vendor/liboqs/build/tests/test_kem  (lists)
+# or check the dashboard's "available algorithms" note after a run.
+# =============================================================================
+
+# ---- measurement knobs ------------------------------------------------------
+# Default sizing is PER-OPERATION AUTO-CALIBRATION: each op (keygen/encaps/sign/
+# ...) is timed long enough to accumulate ~target_time_ms of work, clamped to
+# [min_samples, max_iters]. This makes an 18 us ML-KEM keygen and a ~750 ms
+# SLH-DSA sign both yield a stable median without a hand-tuned per-alg count:
+#   - fast ops are bounded by max_iters
+#   - slow ops are bounded by min_samples (so even ~1 op per target still gets
+#     enough samples for a stable median/MAD)
+# To force FIXED counts instead, pass `./run.sh --iters N` — that disables
+# calibration and uses the warmup_iters/timed_iters fallback below.
+measurement:
+  target_time_ms: 250       # auto: aggregate timed work to put on each op
+  min_samples: 30           # auto: floor on timed iters/rep (stable median+MAD)
+  max_iters: 20000          # auto: ceiling on timed iters/rep (caps fast ops)
+  repetitions: 5            # independent repetitions (fresh process each time)
+  cycles_mode: auto         # auto | on | off  (PMU userspace cycle counting)
+  # ---- fixed-count fallback (only used when ./run.sh --iters is given) ----
+  warmup_iters: 1000        # untimed iterations to settle caches/branch predictors
+  timed_iters: 10000        # timed iterations per repetition
+
+# ---- security_level (NIST category) ----------------------------------------
+# Every candidate carries `security_level: N` — the NIST PQC category the
+# parameter set targets (1/2/3/5) — used only to group algorithms on the charts.
+# The mapping is PER-SCHEME, so different schemes reach the same security target
+# with different-looking sets: a level 1 sitting next to a level 2 is the real
+# NIST categorization, not a typo. Notably:
+#   - ML-KEM-512, Falcon-512, X25519         -> Category 1
+#   - ML-DSA-44                              -> Category 2  (FIPS 204 defines NO
+#                                               Category-1 ML-DSA; -44 is its
+#                                               smallest set and lands at Cat 2)
+#   - ML-KEM-768  / ML-DSA-65                -> Category 3
+#   - ML-KEM-1024 / ML-DSA-87 / Falcon-1024  -> Category 5
+# (NIST levels: 1 ~ AES-128, 2 ~ SHA-256 collision, 3 ~ AES-192, 5 ~ AES-256.)
+
+# ---- KEMs ------------------------------------------------------------------
+# baseline = the classical reference Logos uses TODAY (drawn on every chart).
+kem:
+  baseline:
+    - name: X25519
+      classical: true
+      security_level: 1     # NIST category (see "security_level" note above)
+  candidates:
+    - name: ML-KEM-512
+      security_level: 1
+    - name: ML-KEM-768
+      security_level: 3
+    - name: ML-KEM-1024
+      security_level: 5
+    # Hybrids (classical + PQ) — these are oqs-provider TLS group names; for the
+    # raw KEM bench they are skipped unless liboqs exposes them as a KEM.
+    - name: X25519MLKEM768
+      security_level: 3
+      hybrid: true
+    - name: SecP256r1MLKEM768
+      security_level: 3
+      hybrid: true
+    # --- Code-based + conservative-LWE backup candidates (added 2026-06) -------
+    # Verified enabled in the linked liboqs 0.15.0 build via OQS_KEM_alg_is_enabled
+    # (oqsconfig.h + runtime cross-check). Keygen/encaps/decaps are reported as
+    # separate ops so the docs' "keygen is a one-time setup cost" argument can show
+    # the (slow) McEliece keygen explicitly next to its tiny ciphertext.
+    #
+    # Classic McEliece — code-based. Huge public key, TINY ciphertext, very slow
+    # keygen on the Pi (auto-calibration clamps keygen to min_samples=30, like
+    # SLH-DSA sign); encaps/decaps are fast and calibrate normally. Standard sets,
+    # plus the 460896f fast-keygen variant directly next to standard 460896: same
+    # params and same tiny ciphertext but a faster keygen, so the pair gives the
+    # keygen trade-off directly (the most useful McEliece data point for the
+    # migration doc). Other f-variants omitted as redundant.
+    - name: Classic-McEliece-348864
+      security_level: 1
+    - name: Classic-McEliece-460896
+      security_level: 3
+    - name: Classic-McEliece-460896f
+      security_level: 3
+    - name: Classic-McEliece-6688128
+      security_level: 5
+    - name: Classic-McEliece-6960119
+      security_level: 5
+    - name: Classic-McEliece-8192128
+      security_level: 5
+    # FrodoKEM — conservative (unstructured) LWE. ~100x slower encaps/decaps than
+    # ML-KEM and ~15x larger ciphertext. AES variant (uses ARM AES instructions,
+    # which this build compiles in); SHAKE variants also available if needed.
+    - name: FrodoKEM-640-AES
+      security_level: 1
+    - name: FrodoKEM-976-AES
+      security_level: 3
+    - name: FrodoKEM-1344-AES
+      security_level: 5
+    # HQC-128/192/256: NOT enabled in this liboqs 0.15.0 build (disabled upstream
+    # after the IND-CCA2 implementation issue; oqsconfig.h has it #undef and the
+    # runtime OQS_KEM_alg_is_enabled returns 0). Intentionally omitted rather than
+    # listed-and-disabled. Re-add once linked against a liboqs with HQC re-enabled.
+
+# ---- Signatures ------------------------------------------------------------
+sig:
+  baseline:
+    - name: Ed25519
+      classical: true
+      security_level: 1
+  candidates:
+    - name: ML-DSA-44
+      security_level: 2     # Cat 2, not 1 — FIPS 204 has no Cat-1 ML-DSA (see note above)
+    - name: ML-DSA-65
+      security_level: 3
+    - name: ML-DSA-87
+      security_level: 5
+    - name: Falcon-512
+      security_level: 1
+    - name: Falcon-1024
+      security_level: 5
+    # SLH-DSA (SPHINCS+) — many parameter sets; a representative spread.
+    - name: SPHINCS+-SHA2-128f-simple
+      security_level: 1
+    - name: SPHINCS+-SHA2-128s-simple
+      security_level: 1
+    - name: SPHINCS+-SHA2-192f-simple
+      security_level: 3
+    - name: SPHINCS+-SHA2-256f-simple
+      security_level: 5
+    # --- extend here ---
+
+# ---- TLS 1.3 handshake matrix ----------------------------------------------
+# The bench runs (kem_group x sig_alg). The classical baseline pair is ALWAYS
+# included as the reference point regardless of what is listed here.
+tls:
+  baseline:
+    kem_group: X25519
+    sig_alg: ed25519        # OpenSSL classical auth Logos uses today
+  # PQ key-exchange groups to test (oqs-provider TLS group names)
+  kem_groups:
+    - X25519MLKEM768
+    - SecP256r1MLKEM768
+    - mlkem512
+    - mlkem768
+    - mlkem1024
+  # PQ signature algorithms for the server/CA cert (oqs-provider names)
+  sig_algs:
+    - mldsa44
+    - mldsa65
+    - mldsa87
+    - falcon512
+    - sphincssha2128fsimple
+  connections: 1000         # handshakes timed against a persistent s_server
+
+# ---- future phase (NOT implemented now — hooks only) -----------------------
+# zk:
+#   snark: []   # e.g. groth16, plonk, halo2
+#   stark: []   # e.g. risc0, winterfell
--- a/pq-bench-rpi5/dashboard/app.js
+++ b/pq-bench-rpi5/dashboard/app.js
@ -0,0 +1,304 @@
+/* pq-bench-rpi5 dashboard — pure client-side, reads a merged.json produced by
+ * analyze/merge.py. No backend. Renders KEM/sig/TLS charts with the classical
+ * Logos baseline as a reference line on each. Defaults to baseline-grade (RPi5)
+ * runs only; a toggle includes macOS/dev smoke runs. */
+
+const LEVEL_COLORS = { 1:"#3bd67a", 2:"#46c0c0", 3:"#3a7bff", 5:"#b06bff", 0:"#888" };
+const BASE_COLOR = "#e0533d", PQ_COLOR = "#3a7bff";
+let MERGED = null, CHARTS = [];
+
+const $ = (id) => document.getElementById(id);
+const nsToMs = (ns) => (ns || 0) / 1e6;
+/* compact ms label: more decimals for small values, fewer for large. Guards its
+ * input — Chart.js may hand a value-label formatter a parsed {x,y} point or a
+ * non-number; extract the numeric value without coercing an object (Number() on
+ * a null-prototype object would itself throw), and return "" for anything not
+ * finite so the value-label draw never throws and halts later charts. */
+const fmtMs = (v) => {
+  const ms = typeof v === "number" ? v
+           : (v && typeof v === "object") ? (typeof v.y === "number" ? v.y : NaN)
+           : Number(v);
+  if (!Number.isFinite(ms)) return "";
+  return ms>=100?ms.toFixed(0):ms>=10?ms.toFixed(1):ms>=1?ms.toFixed(2):ms.toFixed(3);
+};
+
+/* Inline Chart.js plugin: draw each bar's value just above the bar. Enabled
+ * per-chart via options.plugins.valueLabels.formatter; charts that don't set it
+ * are untouched (TLS/scatter stay clean). No external dependency, so it works
+ * even when only the Chart.js core CDN is reachable. */
+const valueLabels = {
+  id: "valueLabels",
+  afterDatasetsDraw(chart) {
+    const opt = (chart.options.plugins||{}).valueLabels;
+    if (!opt || !opt.formatter) return;
+    const ctx = chart.ctx;
+    ctx.save();
+    ctx.fillStyle = "#e6e8ee"; ctx.font = "10px sans-serif";
+    ctx.textAlign = "center"; ctx.textBaseline = "bottom";
+    chart.data.datasets.forEach((ds, di) => {
+      const meta = chart.getDatasetMeta(di);
+      if (meta.hidden) return;
+      meta.data.forEach((el, i) => {
+        const v = ds.data[i];
+        if (v == null) return;
+        ctx.fillText(opt.formatter(v, i), el.x, el.y - 3);
+      });
+    });
+    ctx.restore();
+  }
+};
+if (window.Chart) Chart.register(valueLabels);
+
+async function boot() {
+  $("fileInput").addEventListener("change", onFile);
+  $("includeSmoke").addEventListener("change", render);
+  $("runSelect").addEventListener("change", render);
+  try {
+    const r = await fetch("data/merged.json", { cache: "no-store" });
+    if (r.ok) { MERGED = await r.json(); afterLoad(); }
+    else showEmpty("No data/merged.json yet. Run a benchmark, then: " +
+                   "<code>python3 analyze/merge.py results/*.json -o dashboard/data/merged.json</code> " +
+                   "— or load a results file above.");
+  } catch (e) {
+    showEmpty("Could not auto-load data/merged.json (open via a local server or use the file picker).");
+  }
+}
+
+function onFile(ev) {
+  const f = ev.target.files[0]; if (!f) return;
+  const rd = new FileReader();
+  rd.onload = () => {
+    const d = JSON.parse(rd.result);
+    // accept either a merged file or a single results file
+    MERGED = d.merged_schema ? d : wrapSingle(d);
+    afterLoad();
+  };
+  rd.readAsText(f);
+}
+
+/* wrap a single results JSON into the merged shape so the picker works too */
+function wrapSingle(d) {
+  const rid = `${(d.host||{}).hostname}@${d.generated_utc}`;
+  const meta = { run_id: rid, hostname:(d.host||{}).hostname, cpu_brand:(d.host||{}).cpu_brand,
+                 is_rpi:(d.host||{}).is_rpi, is_baseline_grade:d.is_baseline_grade };
+  const kem=[], sig=[], tls=[];
+  (d.kem||[]).forEach(k=>{ if(k.enabled) Object.entries(k.operations||{}).forEach(([op,st])=>
+     kem.push({...meta, alg:k.alg, classical:!!k.classical, nist_level:k.claimed_nist_level,
+               operation:op, median_ns:st.median, sizes:k.sizes})); });
+  (d.sig||[]).forEach(s=>{ if(s.enabled) Object.entries(s.operations||{}).forEach(([op,st])=>
+     sig.push({...meta, alg:s.alg, classical:!!s.classical, nist_level:s.claimed_nist_level,
+               operation:op, median_ns:st.median, sizes:s.sizes})); });
+  ((d.tls||{}).matrix||[]).forEach(c=>{ if(c.enabled) tls.push({...meta, label:c.label, group:c.group,
+     is_baseline_pair:c.label===((d.tls||{}).baseline||{}).label,
+     handshakes_per_sec:c.handshakes_per_sec, median_ns:(c.handshake_latency_ns||{}).median,
+     bytes_total:(c.bytes_on_wire||{}).total, client_hello_bytes:c.client_hello_bytes,
+     client_hello_fragmented:c.client_hello_fragmented}); });
+  return { merged_schema:"single", n_runs:1,
+           runs:[{run_id:rid, host:d.host, is_baseline_grade:d.is_baseline_grade,
+                  baseline_grade_reasons:d.baseline_grade_reasons||[], toolchain:d.toolchain,
+                  cpu_features:d.cpu_features, run:d.run,
+                  thermal_summary:{temp_c:(d.thermal_trace||{}).temp_c,
+                                   throttling_detected:(d.thermal_trace||{}).throttling_detected},
+                  generated_utc:d.generated_utc}],
+           kem, sig, tls };
+}
+
+function afterLoad() {
+  const sel = $("runSelect");
+  sel.innerHTML = "";
+  MERGED.runs.forEach(r => {
+    const o = document.createElement("option");
+    o.value = r.run_id;
+    o.textContent = `${(r.host||{}).cpu_brand||"?"} — ${r.is_baseline_grade?"✅ baseline":"⚠ smoke"} — ${r.generated_utc||""}`;
+    sel.appendChild(o);
+  });
+  render();
+}
+
+function currentRun() {
+  const id = $("runSelect").value;
+  return MERGED.runs.find(r => r.run_id === id) || MERGED.runs[0];
+}
+
+function render() {
+  if (!MERGED) return;
+  CHARTS.forEach(c => c.destroy()); CHARTS = [];
+  const run = currentRun();
+  const includeSmoke = $("includeSmoke").checked;
+  const allowed = new Set(MERGED.runs
+     .filter(r => includeSmoke || r.is_baseline_grade)
+     .map(r => r.run_id));
+  // chart the selected run if allowed, else fall back to allowed set
+  const rid = allowed.has(run.run_id) ? run.run_id : null;
+  const filt = (rows) => rows.filter(r => rid ? r.run_id === rid : allowed.has(r.run_id));
+
+  renderBanner(run);
+  renderEnv(run);
+
+  const kem = filt(MERGED.kem), sig = filt(MERGED.sig), tls = filt(MERGED.tls);
+
+  barByLevel("kem_keygen", kem, "keygen", "KEM keygen — median latency (ms)");
+  barByLevel("kem_encaps", kem, "encaps", "KEM encaps — median latency (ms)", "derive");
+  barByLevel("kem_decaps", kem, "decaps", "KEM decaps — median latency (ms)", "derive");
+  scatter("kem_scatter", kem, "encaps", "public_key", "KEM size vs speed (encaps)", "public key (B)");
+  barByLevel("sig_sign", sig, "sign", "Signature sign — median latency (ms)", "sign", true);
+  barByLevel("sig_verify", sig, "verify", "Signature verify — median latency (ms)", "verify", true);
+  scatter("sig_scatter", sig, "sign", "signature", "Signature size vs speed (sign)", "signature (B)", true);
+  tlsThroughput("tls_hs", tls);
+  tlsClientHello("tls_chello", tls);
+}
+
+function renderBanner(run) {
+  const el = $("quality-banner");
+  if (run.is_baseline_grade) {
+    el.className = "banner-good";
+    el.innerHTML = "✅ RPi5 baseline-grade run — performance governor, core-pinned, " +
+                   "cortex-a76 flags, no thermal throttling.";
+  } else {
+    el.className = "banner-warn";
+    const rs = (run.baseline_grade_reasons||[]).map(r=>`<li>${r}</li>`).join("");
+    el.innerHTML = "⚠ NOT RPi5 baseline-grade — treat as a pipeline smoke test, not measurement data." +
+                   (rs ? `<ul>${rs}</ul>` : "");
+  }
+}
+
+function renderEnv(run) {
+  const h = run.host||{}, t = run.toolchain||{}, f = run.cpu_features||{}, rn = run.run||{};
+  const chip = (label, val) => `<span class="chip"><b>${label}</b> ${val}</span>`;
+  const sha3 = f.sha3 ? "SHA3 hw ✓" : "SHA3 hw ✗ (Keccak on NEON)";
+  $("env-summary").innerHTML = [
+    chip("host", `${h.cpu_brand||"?"} (${h.os_pretty||h.os||"?"})`),
+    chip("governor", rn.governor_after||"?"),
+    chip("pinned core", rn.pinned ? rn.bench_core : "no"),
+    chip("flags", `${t.bench_cflags||"?"} → ${t.cflags_target||"?"}`),
+    chip("liboqs", `${t.liboqs_ref||"?"} ${(t.liboqs_commit||"").slice(0,8)}`),
+    chip("crypto-ext", `${f.neon?"NEON ":""}${f.sha2?"SHA2 ":""}${sha3}`),
+    chip("cycles", rn.cycles_available ? "PMU ✓" : "time-based"),
+    chip("temp", run.thermal_summary && run.thermal_summary.temp_c
+         ? `${run.thermal_summary.temp_c.mean}°C` + (run.thermal_summary.throttling_detected?" ⚠throttled":"")
+         : "n/a"),
+  ].join("");
+}
+
+/* ---- chart builders ----------------------------------------------------- */
+function baselineAnnotation(value, label) {
+  if (value == null) return {};
+  return { annotations: { base: {
+    type:"line", yMin:value, yMax:value, borderColor:BASE_COLOR,
+    borderWidth:2, borderDash:[6,4],
+    label:{ display:true, content:label, position:"end",
+            backgroundColor:BASE_COLOR, font:{size:10} } } } };
+}
+
+function barByLevel(canvasId, rows, op, title, baselineOp = op, logY = false) {
+  const data = rows.filter(r => r.operation === op)
+                   .sort((a,b)=>(a.nist_level||0)-(b.nist_level||0) || a.median_ns-b.median_ns);
+  if (!data.length) return drawEmpty(canvasId, title);
+  /* Baseline reference line: the classical row for baselineOp (defaults to this
+   * chart's own op). KEM encaps/decaps have no classical encaps/decaps op, so
+   * they map to the X25519 key-agreement (derive) timing instead. */
+  const base = rows.find(r => r.classical && r.operation === baselineOp);
+  const ctx = $(canvasId).getContext("2d");
+  CHARTS.push(new Chart(ctx, {
+    type:"bar",
+    data:{ labels:data.map(r=>r.alg),
+      datasets:[{ label:title,
+        data:data.map(r=>nsToMs(r.median_ns)),
+        backgroundColor:data.map(r=> r.classical?BASE_COLOR:(LEVEL_COLORS[r.nist_level]||PQ_COLOR)) }] },
+    options:{ responsive:true, plugins:{
+        title:{display:true,text:title,color:"#e6e8ee"},
+        legend:{display:false},
+        valueLabels:{ formatter:(v)=>fmtMs(v) },
+        tooltip:{callbacks:{
+          label:(it)=>`median ${it.raw.toFixed(4)} ms (${Math.round(it.raw*1e6).toLocaleString()} ns)`,
+          afterLabel:(it)=>{
+          const r=data[it.dataIndex]; return `NIST L${r.nist_level} · ${r.classical?"classical baseline":"PQ"}`; }}},
+        annotation: base ? baselineAnnotation(nsToMs(base.median_ns),
+            baselineOp === op ? `baseline ${base.alg}` : `baseline ${base.alg} ${base.operation}`) : {} },
+      scales:{ x:{ticks:{color:"#9aa3b2",maxRotation:50,minRotation:40}},
+               y:{ type: logY?"logarithmic":"linear",
+                   title:{display:true,text:logY?"ms (log)":"ms",color:"#9aa3b2"},ticks:{color:"#9aa3b2"}} } }
+  }));
+}
+
+function scatter(canvasId, rows, op, sizeKey, title, xlabel, logScale = false) {
+  const data = rows.filter(r => r.operation === op && r.sizes && r.sizes[sizeKey]);
+  if (!data.length) return drawEmpty(canvasId, title);
+  const pts = data.map(r => ({ x:r.sizes[sizeKey], y:nsToMs(r.median_ns), alg:r.alg, classical:r.classical }));
+  const ctx = $(canvasId).getContext("2d");
+  CHARTS.push(new Chart(ctx, {
+    type:"scatter",
+    data:{ datasets:[{ label:title, data:pts, pointRadius:6,
+        backgroundColor:pts.map(p=>p.classical?BASE_COLOR:PQ_COLOR) }] },
+    options:{ responsive:true, plugins:{
+        title:{display:true,text:title,color:"#e6e8ee"}, legend:{display:false},
+        tooltip:{callbacks:{label:(it)=>`${it.raw.alg}: ${it.raw.x} B, ${it.raw.y.toFixed(3)} ms`}} },
+      scales:{ x:{ type: logScale?"logarithmic":"linear",
+                   title:{display:true,text:logScale?`${xlabel} (log)`:xlabel,color:"#9aa3b2"},ticks:{color:"#9aa3b2"}},
+               y:{ type: logScale?"logarithmic":"linear",
+                   title:{display:true,text:logScale?"median latency (ms, log)":"median latency (ms)",color:"#9aa3b2"},ticks:{color:"#9aa3b2"}} } }
+  }));
+}
+
+function tlsThroughput(canvasId, rows) {
+  if (!rows.length) return drawEmpty(canvasId, "TLS handshakes/sec — run the TLS layer");
+  const data = rows.slice().sort((a,b)=>(b.handshakes_per_sec||0)-(a.handshakes_per_sec||0));
+  const base = data.find(r => r.is_baseline_pair);
+  const ctx = $(canvasId).getContext("2d");
+  CHARTS.push(new Chart(ctx, {
+    type:"bar",
+    data:{ labels:data.map(r=>r.label),
+      datasets:[{ label:"handshakes/sec",
+        data:data.map(r=>r.handshakes_per_sec),
+        backgroundColor:data.map(r=>r.is_baseline_pair?BASE_COLOR:PQ_COLOR) }] },
+    options:{ indexAxis:"y", responsive:true, plugins:{
+        title:{display:true,text:"TLS 1.3 handshake throughput (higher = better)",color:"#e6e8ee"},
+        legend:{display:false},
+        annotation: base ? { annotations:{ base:{ type:"line",
+            xMin:base.handshakes_per_sec, xMax:base.handshakes_per_sec,
+            borderColor:BASE_COLOR, borderWidth:2, borderDash:[6,4],
+            label:{display:true,content:`baseline ${base.label}`,position:"end",
+                   backgroundColor:BASE_COLOR,font:{size:10}} } } } : {} },
+      scales:{ x:{title:{display:true,text:"handshakes/sec",color:"#9aa3b2"},ticks:{color:"#9aa3b2"}},
+               y:{ticks:{color:"#9aa3b2",font:{size:10}}} } }
+  }));
+}
+
+function tlsClientHello(canvasId, rows) {
+  if (!rows.length) return drawEmpty(canvasId, "ClientHello size — run the TLS layer");
+  const data = rows.slice().sort((a,b)=>(b.client_hello_bytes||0)-(a.client_hello_bytes||0));
+  const base = data.find(r => r.is_baseline_pair);
+  const ctx = $(canvasId).getContext("2d");
+  CHARTS.push(new Chart(ctx, {
+    type:"bar",
+    data:{ labels:data.map(r=>r.label),
+      datasets:[{ label:"ClientHello bytes",
+        data:data.map(r=>r.client_hello_bytes),
+        backgroundColor:data.map(r=> r.is_baseline_pair?BASE_COLOR
+            : (r.client_hello_fragmented?"#d98b2b":PQ_COLOR)) }] },
+    options:{ indexAxis:"y", responsive:true, plugins:{
+        title:{display:true,text:"ClientHello size (orange = exceeds ~1400B MSS → fragments)",color:"#e6e8ee"},
+        legend:{display:false},
+        annotation:{ annotations:{ mss:{ type:"line", xMin:1400, xMax:1400,
+            borderColor:"#d98b2b", borderWidth:1, borderDash:[4,4],
+            label:{display:true,content:"~MSS 1400B",position:"start",backgroundColor:"#d98b2b",font:{size:9}} },
+            ...(base?{base:{type:"line",xMin:base.client_hello_bytes,xMax:base.client_hello_bytes,
+            borderColor:BASE_COLOR,borderWidth:2,borderDash:[6,4],
+            label:{display:true,content:`baseline ${base.label}`,position:"end",backgroundColor:BASE_COLOR,font:{size:10}}}}:{}) } } },
+      scales:{ x:{title:{display:true,text:"bytes",color:"#9aa3b2"},ticks:{color:"#9aa3b2"}},
+               y:{ticks:{color:"#9aa3b2",font:{size:10}}} } }
+  }));
+}
+
+function drawEmpty(canvasId, msg) {
+  const c = $(canvasId); const ctx = c.getContext("2d");
+  ctx.clearRect(0,0,c.width,c.height);
+  ctx.fillStyle = "#9aa3b2"; ctx.font = "13px sans-serif"; ctx.textAlign="center";
+  ctx.fillText(msg, c.width/2, c.height/2);
+}
+
+function showEmpty(html) {
+  document.querySelector("main").innerHTML = `<div class="empty">${html}</div>`;
+}
+
+boot();
--- a/pq-bench-rpi5/dashboard/data/merged.json
+++ b/pq-bench-rpi5/dashboard/data/merged.json
--- a/pq-bench-rpi5/dashboard/index.html
+++ b/pq-bench-rpi5/dashboard/index.html
@ -0,0 +1,68 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width, initial-scale=1">
+<title>PQ Benchmark — RPi5 baseline</title>
+<link rel="stylesheet" href="style.css">
+<!-- Chart.js + annotation plugin via CDN (works on GitHub Pages; charts still
+     render without the annotation line if the CDN is unreachable). -->
+<script src="https://cdn.jsdelivr.net/npm/chart.js@4.4.1/dist/chart.umd.min.js"></script>
+<script src="https://cdn.jsdelivr.net/npm/chartjs-plugin-annotation@3.0.1/dist/chartjs-plugin-annotation.min.js"></script>
+</head>
+<body>
+<header>
+  <h1>Post-Quantum Crypto Benchmark <span class="sub">Raspberry Pi 5 baseline</span></h1>
+  <p class="framing">Migration cost: moving from what Logos uses <b>today</b>
+     (X25519 + Ed25519) to PQ candidates, on validator-grade hardware. The
+     classical baseline is drawn as a reference line on every chart.</p>
+</header>
+
+<section id="controls">
+  <label>Dataset:
+    <input type="file" id="fileInput" accept="application/json">
+    <span class="hint">or auto-loads <code>data/merged.json</code></span>
+  </label>
+  <label class="toggle">
+    <input type="checkbox" id="includeSmoke"> include non-baseline (smoke / dev) runs
+  </label>
+  <label>Run:
+    <select id="runSelect"></select>
+  </label>
+</section>
+
+<div id="quality-banner"></div>
+<div id="env-summary"></div>
+
+<main>
+  <h2>Key Encapsulation (KEM)</h2>
+  <div class="chart-grid">
+    <figure><canvas id="kem_keygen"></canvas></figure>
+    <figure><canvas id="kem_encaps"></canvas></figure>
+    <figure><canvas id="kem_decaps"></canvas></figure>
+    <figure><canvas id="kem_scatter"></canvas></figure>
+  </div>
+
+  <h2>Signatures</h2>
+  <div class="chart-grid">
+    <figure><canvas id="sig_sign"></canvas></figure>
+    <figure><canvas id="sig_verify"></canvas></figure>
+    <figure><canvas id="sig_scatter"></canvas></figure>
+  </div>
+
+  <h2>TLS 1.3 Handshakes <span class="sub">(KEM group × signature)</span></h2>
+  <div class="chart-grid">
+    <figure><canvas id="tls_hs"></canvas></figure>
+    <figure><canvas id="tls_chello"></canvas></figure>
+  </div>
+</main>
+
+<footer>
+  <p>Generated by <code>pq-bench-rpi5</code>. Numbers are only RPi5-baseline-grade
+     when the run banner says so (real Pi 5 · performance governor · core-pinned ·
+     cortex-a76 flags · no thermal throttling). Everything else is a pipeline smoke test.</p>
+</footer>
+
+<script src="app.js"></script>
+</body>
+</html>
--- a/pq-bench-rpi5/dashboard/style.css
+++ b/pq-bench-rpi5/dashboard/style.css
@ -0,0 +1,40 @@
+:root {
+  --bg:#0f1115; --panel:#181b22; --ink:#e6e8ee; --muted:#9aa3b2;
+  --accent:#3bd67a; --base:#e0533d; --pq:#3a7bff; --line:#2a2f3a;
+}
+* { box-sizing:border-box; }
+body { margin:0; font:15px/1.5 -apple-system,Segoe UI,Roboto,sans-serif;
+  background:var(--bg); color:var(--ink); }
+header { padding:24px 28px 8px; }
+h1 { margin:0; font-size:22px; }
+h1 .sub, h2 .sub { color:var(--muted); font-weight:400; font-size:0.7em; }
+.framing { color:var(--muted); max-width:760px; }
+h2 { margin:30px 28px 8px; border-bottom:1px solid var(--line); padding-bottom:6px; }
+
+#controls { display:flex; flex-wrap:wrap; gap:18px; align-items:center;
+  padding:12px 28px; background:var(--panel); margin:8px 0; }
+#controls label { color:var(--muted); font-size:14px; }
+#controls .hint { font-size:12px; opacity:.7; }
+select, input[type=file] { color:var(--ink); background:#0c0e12;
+  border:1px solid var(--line); border-radius:6px; padding:4px 6px; }
+
+#quality-banner { margin:8px 28px; padding:12px 16px; border-radius:8px; font-weight:600; }
+.banner-good { background:#12331f; border:1px solid #1f7a44; color:#7df0a8; }
+.banner-warn { background:#3a2410; border:1px solid #aa6a1f; color:#ffce8a; }
+.banner-warn ul { font-weight:400; margin:6px 0 0; color:#ffd9a8; }
+
+#env-summary { margin:0 28px 8px; color:var(--muted); font-size:13px;
+  display:flex; flex-wrap:wrap; gap:6px 18px; }
+#env-summary b { color:var(--ink); }
+#env-summary .chip { background:var(--panel); padding:3px 9px; border-radius:20px;
+  border:1px solid var(--line); }
+
+.chart-grid { display:grid; grid-template-columns:repeat(auto-fit,minmax(420px,1fr));
+  gap:18px; padding:14px 28px; }
+figure { margin:0; background:var(--panel); border:1px solid var(--line);
+  border-radius:10px; padding:12px; min-height:320px; }
+canvas { max-height:340px; }
+
+footer { color:var(--muted); font-size:12px; padding:18px 28px 40px; max-width:820px; }
+code { background:#0c0e12; padding:1px 5px; border-radius:4px; }
+.empty { color:var(--muted); padding:30px; text-align:center; }
--- a/pq-bench-rpi5/results/.gitkeep
+++ b/pq-bench-rpi5/results/.gitkeep
--- a/pq-bench-rpi5/results/mehmetmac-20260625T220618Z.json
+++ b/pq-bench-rpi5/results/mehmetmac-20260625T220618Z.json
--- a/pq-bench-rpi5/results/rasberrypi5-20260625T202356Z.json
+++ b/pq-bench-rpi5/results/rasberrypi5-20260625T202356Z.json
--- a/pq-bench-rpi5/results/thomas-pi-20260703T083302Z.json
+++ b/pq-bench-rpi5/results/thomas-pi-20260703T083302Z.json
--- a/pq-bench-rpi5/run.sh
+++ b/pq-bench-rpi5/run.sh
@ -0,0 +1,290 @@
+#!/usr/bin/env bash
+# =============================================================================
+# run.sh — measurement wrapper + orchestrator.
+#
+# Does the things that make a number credible:
+#   * sets the CPU governor to `performance` (Linux; warns elsewhere)
+#   * pins the benchmark to a single isolated core via taskset (core 3 on RPi5;
+#     core 3 stays clear of CPU0 where the kernel steers IRQs/RPS)
+#   * logs ARM clock + SoC temperature throughout, embeds the trace in results,
+#     and warns on thermal throttling
+#   * runs every candidate from config.yaml, then assembles one results JSON
+#     stamped with full host + toolchain provenance.
+#
+# Usage:
+#   ./run.sh                 # full run using config.yaml knobs
+#   ./run.sh --smoke         # tiny iteration counts: pipeline smoke test
+#   ./run.sh --kemsig-only   # skip the TLS layer
+#   ./run.sh --tls-only      # only the TLS layer
+#   ./run.sh --iters N --warmup N --reps N   # override measurement knobs
+#   sudo ./run.sh            # needed on Linux to set the governor
+# =============================================================================
+set -euo pipefail
+
+ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+TOOL_VERSION="0.1.0"
+# shellcheck source=setup/lib_platform.sh
+source "$ROOT/setup/lib_platform.sh"
+# shellcheck source=setup/versions.env
+source "$ROOT/setup/versions.env"
+LOCK="$ROOT/setup/versions.lock"
+[ -f "$LOCK" ] && source "$LOCK" || pqb_warn "no versions.lock — run ./setup/setup.sh first"
+
+pqb_detect_platform
+
+# ---- args ------------------------------------------------------------------
+SMOKE=0; DO_KEMSIG=1; DO_TLS=1
+OVR_ITERS=""; OVR_WARMUP=""; OVR_REPS=""
+while [ $# -gt 0 ]; do
+  case "$1" in
+    --smoke) SMOKE=1 ;;
+    --kemsig-only) DO_TLS=0 ;;
+    --tls-only) DO_KEMSIG=0 ;;
+    --no-tls) DO_TLS=0 ;;
+    --iters) OVR_ITERS="$2"; shift ;;
+    --warmup) OVR_WARMUP="$2"; shift ;;
+    --reps) OVR_REPS="$2"; shift ;;
+    -h|--help) grep '^#' "$0" | sed 's/^# \{0,1\}//'; exit 0 ;;
+    *) pqb_err "unknown arg: $1"; exit 2 ;;
+  esac
+  shift
+done
+
+# ---- measurement knobs (config.yaml, overridable) --------------------------
+# Sets TARGET_TIME_MS MIN_SAMPLES MAX_ITERS REPS CYCLES_MODE (auto-calibration,
+# the default path) plus WARMUP ITERS (the fixed-count fallback).
+eval "$(python3 "$ROOT/bench/lib/list_algs.py" measurement "$ROOT/config.yaml")"
+
+# Mode: auto-calibrate per op (default) unless --iters forces a fixed count.
+CALIB_MODE="auto"
+if [ -n "$OVR_ITERS" ]; then CALIB_MODE="fixed"; ITERS="$OVR_ITERS"; fi
+[ -n "$OVR_WARMUP" ] && WARMUP="$OVR_WARMUP"
+[ -n "$OVR_REPS" ]   && REPS="$OVR_REPS"
+if [ "$SMOKE" = 1 ]; then
+  # Keep auto-calibration (so each op still reaches target) but a single rep, so
+  # the sweep stays short. This is a pipeline test, NOT measurement data.
+  REPS=1
+  pqb_warn "SMOKE MODE: reps=1 — pipeline test only, NOT measurement data"
+fi
+
+# Assemble the per-op sizing args passed to every bench_pq invocation.
+if [ "$CALIB_MODE" = "fixed" ]; then
+  BENCH_SIZE_ARGS=(--warmup "$WARMUP" --iters "$ITERS" --reps "$REPS")
+  pqb_log "sizing: FIXED-count warmup=$WARMUP iters=$ITERS reps=$REPS"
+else
+  BENCH_SIZE_ARGS=(--target-time-ms "$TARGET_TIME_MS" --min-samples "$MIN_SAMPLES" \
+                   --max-iters "$MAX_ITERS" --reps "$REPS")
+  pqb_log "sizing: AUTO-calibrate target=${TARGET_TIME_MS}ms min_samples=$MIN_SAMPLES max_iters=$MAX_ITERS reps=$REPS"
+fi
+BENCH_CORE="${BENCH_CORE:-3}"
+export PQB_BENCH_CORE="$BENCH_CORE"
+
+# ---- work directory --------------------------------------------------------
+HOST="$(pqb_resolve_hostname)"
+TS="$(date -u +%Y%m%dT%H%M%SZ)"
+WORK="$ROOT/results/.work-$HOST-$TS"
+mkdir -p "$WORK" "$ROOT/results"
+KEMSIG_OUT="$WORK/kemsig.jsonl"; : > "$KEMSIG_OUT"
+TLS_OUT="$WORK/tls.json"
+THERMAL="$WORK/thermal.csv"; : > "$THERMAL"
+META="$WORK/meta.env"
+FEATURES="$WORK/cpu_features.json"
+WARN_ACC=""
+
+add_warn() { WARN_ACC="${WARN_ACC:+$WARN_ACC||}$1"; pqb_warn "$1"; }
+
+# ---- governor --------------------------------------------------------------
+GOV_BEFORE="$(pqb_get_governor)"
+GOV_AFTER="$(pqb_set_governor_performance || true)"
+GOV_REQUESTED="performance"
+if [ "$GOV_AFTER" != "performance" ]; then
+  add_warn "governor is '$GOV_AFTER', not 'performance' (need root on Linux, or unsupported on macOS)"
+fi
+
+# ---- core pinning ----------------------------------------------------------
+TASKSET="$(pqb_taskset_prefix "$BENCH_CORE")"
+if [ -n "$TASKSET" ]; then
+  PINNED=1; pqb_log "pinning to core $BENCH_CORE via: $TASKSET"
+else
+  PINNED=0; add_warn "core pinning unavailable (no taskset/numactl) — results will be noisier"
+fi
+
+# ---- thermal sampler (background) ------------------------------------------
+SAMPLE_INTERVAL="${SAMPLE_INTERVAL:-1}"
+pqb_log "starting thermal/clock sampler (every ${SAMPLE_INTERVAL}s) -> $THERMAL"
+( while :; do pqb_sample_thermal >> "$THERMAL" 2>/dev/null; sleep "$SAMPLE_INTERVAL"; done ) &
+SAMPLER_PID=$!
+disown "$SAMPLER_PID" 2>/dev/null || true   # suppress job-control "Terminated" noise on kill
+# shellcheck disable=SC2064
+trap "kill $SAMPLER_PID 2>/dev/null || true" EXIT
+pqb_sample_thermal >> "$THERMAL" 2>/dev/null   # one immediate sample
+
+# ---- CPU features ----------------------------------------------------------
+pqb_cpu_features_json > "$FEATURES"
+pqb_log "cpu features: $(cat "$FEATURES")"
+# Report whether Keccak/SHA3 *instruction* acceleration is available + compiled.
+SHA3_HW="$(python3 -c "import json;print(json.load(open('$FEATURES'))['sha3'])" 2>/dev/null || echo unknown)"
+case "${LIBOQS_OPT_DEFINES:-}" in
+  *"OQS_USE_ARM_SHA3_INSTRUCTIONS 1"*) SHA3_COMPILED=1 ;;
+  *) SHA3_COMPILED=0 ;;
+esac
+pqb_log "Keccak/SHA3: hw_instructions=$SHA3_HW liboqs_compiled_sha3=$SHA3_COMPILED (A76 has no SHA3 ext; Keccak runs on NEON there)"
+
+TS_START="$(date -u +%Y-%m-%dT%H:%M:%SZ)"
+START_EPOCH="$(date +%s)"
+
+# ---- build harness if needed -----------------------------------------------
+OSSL_PREFIX_FOR_BUILD="${OPENSSL_PREFIX:-$(brew --prefix openssl@3 2>/dev/null || echo /usr)}"
+if [ ! -x "$ROOT/bench/kem_sig/bench_pq" ] || [ "$ROOT/bench/kem_sig/bench_pq.c" -nt "$ROOT/bench/kem_sig/bench_pq" ]; then
+  pqb_log "building bench_pq harness"
+  make -C "$ROOT/bench/kem_sig" \
+    LIBOQS_PREFIX="${PREFIX:-$ROOT/vendor/install}" \
+    OPENSSL_PREFIX="$OSSL_PREFIX_FOR_BUILD" \
+    BENCH_CFLAGS="${BENCH_CFLAGS:--O3}" >/dev/null
+fi
+
+# ---- KEM/sig sweep ---------------------------------------------------------
+CYCLES_AVAILABLE=0; CYCLES_REASON="not probed"
+if [ "$DO_KEMSIG" = 1 ]; then
+  if [ "$CALIB_MODE" = "fixed" ]; then
+    pqb_log "running KEM/sig sweep (fixed: warmup=$WARMUP iters=$ITERS reps=$REPS)"
+  else
+    pqb_log "running KEM/sig sweep (auto-calibrate: target=${TARGET_TIME_MS}ms min_samples=$MIN_SAMPLES max_iters=$MAX_ITERS reps=$REPS)"
+  fi
+  while IFS=$'\t' read -r kind alg classical; do
+    [ -z "$alg" ] && continue
+    pqb_log "  $kind $alg"
+    ERRF="$WORK/err.$kind.$alg.txt"
+    # shellcheck disable=SC2086
+    if $TASKSET "$ROOT/bench/kem_sig/bench_pq" --kind "$kind" --alg "$alg" \
+        "${BENCH_SIZE_ARGS[@]}" >> "$KEMSIG_OUT" 2>"$ERRF"; then
+      :
+    else
+      add_warn "harness failed for $kind $alg (see $ERRF)"
+    fi
+    # capture PMU availability from the harness's stderr (first occurrence)
+    if grep -q 'cycles_available=1' "$ERRF" 2>/dev/null; then
+      CYCLES_AVAILABLE=1; CYCLES_REASON="$(sed -n 's/.*cycles_available=1 (\(.*\))/\1/p' "$ERRF" | head -1)"
+    elif [ "$CYCLES_AVAILABLE" = 0 ] && grep -q 'cycles_available=0' "$ERRF" 2>/dev/null; then
+      CYCLES_REASON="$(sed -n 's/.*cycles_available=0 (\(.*\))/\1/p' "$ERRF" | head -1)"
+    fi
+  done < <(python3 "$ROOT/bench/lib/list_algs.py" kemsig "$ROOT/config.yaml")
+fi
+
+# ---- TLS layer -------------------------------------------------------------
+if [ "$DO_TLS" = 1 ]; then
+  if [ -x "$ROOT/bench/tls/run_tls.sh" ]; then
+    TLS_CONNS="$(python3 -c "import json,sys;print(json.loads(sys.argv[1]).get('connections',1000))" \
+                  "$(python3 "$ROOT/bench/lib/list_algs.py" tls "$ROOT/config.yaml")")"
+    [ "$SMOKE" = 1 ] && TLS_CONNS=50
+    pqb_log "running TLS handshake matrix ($TLS_CONNS handshakes/cell)"
+    if PQB_TASKSET="$TASKSET" "$ROOT/bench/tls/run_tls.sh" \
+         --out "$TLS_OUT" --connections "$TLS_CONNS" >"$WORK/tls.log" 2>&1; then
+      pqb_log "TLS layer done ($(grep -c '"label"' "$TLS_OUT" 2>/dev/null || echo 0) cells)"
+    else
+      add_warn "TLS layer failed or unavailable (see $WORK/tls.log) — continuing without it"
+      TLS_OUT=""
+    fi
+  else
+    pqb_warn "TLS harness not present yet — skipping (will be added)"
+    TLS_OUT=""
+  fi
+fi
+
+# ---- stop sampler, gather timing -------------------------------------------
+kill "$SAMPLER_PID" 2>/dev/null || true
+trap - EXIT
+TS_END="$(date -u +%Y-%m-%dT%H:%M:%SZ)"
+DURATION=$(( $(date +%s) - START_EPOCH ))
+GOV_AFTER_END="$(pqb_get_governor)"
+
+# ---- host facts ------------------------------------------------------------
+collect_host_facts() {
+  local cpu_brand="" ncpu="" ram="" os_pretty="" kernel
+  kernel="$(uname -r)"
+  if [ "$PQB_OS" = "macos" ]; then
+    cpu_brand="$(sysctl -n machdep.cpu.brand_string 2>/dev/null)"
+    ncpu="$(sysctl -n hw.ncpu 2>/dev/null)"
+    ram="$(sysctl -n hw.memsize 2>/dev/null)"
+    os_pretty="macOS $(sw_vers -productVersion 2>/dev/null) ($(sw_vers -buildVersion 2>/dev/null))"
+  else
+    # aarch64 /proc/cpuinfo has no 'model name' line, so this grep misses; the
+    # trailing `|| true` keeps `set -o pipefail` from aborting the run (errexit)
+    # before the PQB_RPI_MODEL fallback below can supply the brand.
+    cpu_brand="$(grep -m1 'model name' /proc/cpuinfo 2>/dev/null | sed 's/.*: //' || true)"
+    [ -z "$cpu_brand" ] && cpu_brand="$PQB_RPI_MODEL"
+    ncpu="$( (command -v nproc >/dev/null && nproc) || grep -c ^processor /proc/cpuinfo)"
+    ram="$(( $(grep -m1 MemTotal /proc/meminfo 2>/dev/null | awk '{print $2}') * 1024 ))"
+    os_pretty="$(. /etc/os-release 2>/dev/null; echo "$PRETTY_NAME")"
+  fi
+  {
+    echo "TOOL_VERSION=$TOOL_VERSION"
+    echo "HOSTNAME=$HOST"
+    echo "OS=$PQB_OS"
+    echo "ARCH=$PQB_ARCH"
+    echo "KERNEL=$kernel"
+    echo "OS_PRETTY=\"$os_pretty\""
+    echo "IS_RPI=$PQB_IS_RPI"
+    echo "RPI_MODEL=\"$PQB_RPI_MODEL\""
+    echo "CPU_BRAND=\"$cpu_brand\""
+    echo "NCPU=$ncpu"
+    echo "RAM_BYTES=$ram"
+    echo "GOVERNOR_REQUESTED=$GOV_REQUESTED"
+    echo "GOVERNOR_BEFORE=$GOV_BEFORE"
+    echo "GOVERNOR_AFTER=$GOV_AFTER_END"
+    echo "BENCH_CORE=$BENCH_CORE"
+    echo "PINNED=$PINNED"
+    echo "TASKSET_CMD=\"$TASKSET\""
+    echo "CALIB_MODE=$CALIB_MODE"
+    echo "TARGET_TIME_MS=$TARGET_TIME_MS"
+    echo "MIN_SAMPLES=$MIN_SAMPLES"
+    echo "MAX_ITERS=$MAX_ITERS"
+    echo "REPS=$REPS"
+    # warmup/timed_iters are single values only in fixed-count mode; in auto mode
+    # they are chosen per-op and recorded in each operation's JSON instead.
+    if [ "$CALIB_MODE" = "fixed" ]; then
+      echo "WARMUP=$WARMUP"
+      echo "ITERS=$ITERS"
+    else
+      echo "WARMUP="
+      echo "ITERS="
+    fi
+    echo "CYCLES_MODE=$CYCLES_MODE"
+    echo "CYCLES_AVAILABLE=$CYCLES_AVAILABLE"
+    echo "CYCLES_REASON=\"$CYCLES_REASON\""
+    echo "TS_START_UTC=$TS_START"
+    echo "TS_END_UTC=$TS_END"
+    echo "DURATION_S=$DURATION"
+    echo "WARNINGS=\"$WARN_ACC\""
+  } > "$META"
+}
+collect_host_facts
+
+# ---- assemble final results JSON -------------------------------------------
+OUT="$ROOT/results/${HOST}-${TS}.json"
+python3 "$ROOT/bench/lib/assemble.py" \
+  --meta "$META" --lock "$LOCK" --features "$FEATURES" \
+  --kemsig "$KEMSIG_OUT" ${TLS_OUT:+--tls "$TLS_OUT"} \
+  --thermal "$THERMAL" --config "$ROOT/config.yaml" \
+  --out "$OUT" >/dev/null
+
+# ---- summary ---------------------------------------------------------------
+echo
+pqb_log "================ RUN COMPLETE ================"
+python3 - "$OUT" <<'PY'
+import json,sys
+d=json.load(open(sys.argv[1]))
+g=d["is_baseline_grade"]
+print(f"  results: {sys.argv[1]}")
+print(f"  host: {d['host']['cpu_brand']} ({d['host']['os_pretty']})")
+print(f"  baseline-grade (RPi5): {g}")
+if not g:
+    for r in d['baseline_grade_reasons']:
+        print(f"     - {r}")
+tt=d['thermal_trace']
+print(f"  thermal: {tt.get('temp_c')}  throttling={tt.get('throttling_detected')}")
+print(f"  kem algos: {sum(1 for x in d['kem'] if x.get('enabled'))} enabled / {len(d['kem'])}")
+print(f"  sig algos: {sum(1 for x in d['sig'] if x.get('enabled'))} enabled / {len(d['sig'])}")
+print(f"  cycles available: {d['run']['cycles_available']}")
+PY
+pqb_log "keep raw work dir? -> $WORK (safe to delete)"
--- a/pq-bench-rpi5/setup/lib_platform.sh
+++ b/pq-bench-rpi5/setup/lib_platform.sh
@ -0,0 +1,227 @@
+# shellcheck shell=bash
+# =============================================================================
+# lib_platform.sh — portable platform abstraction
+#
+# Sourced by setup/setup.sh and run.sh. Every operation that differs between the
+# RPi5 (Debian/Ubuntu aarch64) measurement target and the macOS/Apple-Silicon
+# dev box is funneled through one of these functions, so the *identical* codebase
+# runs unchanged on both. Where a capability does not exist on a platform
+# (governor control, core pinning, on-die thermal sensors), the function degrades
+# gracefully and the caller records that it was unavailable — it never silently
+# pretends the action happened.
+# =============================================================================
+
+# ---- platform detection ----------------------------------------------------
+# Sets: PQB_OS (macos|linux), PQB_ARCH, PQB_IS_RPI (1|0), PQB_RPI_MODEL
+pqb_detect_platform() {
+  PQB_ARCH="$(uname -m)"
+  case "$(uname -s)" in
+    Darwin) PQB_OS="macos" ;;
+    Linux)  PQB_OS="linux" ;;
+    *)      PQB_OS="unknown" ;;
+  esac
+
+  PQB_IS_RPI=0
+  PQB_RPI_MODEL=""
+  if [ "$PQB_OS" = "linux" ] && [ -r /proc/device-tree/model ]; then
+    # /proc/device-tree/model is NUL-terminated
+    PQB_RPI_MODEL="$(tr -d '\0' < /proc/device-tree/model 2>/dev/null)"
+    case "$PQB_RPI_MODEL" in
+      *"Raspberry Pi"*) PQB_IS_RPI=1 ;;
+    esac
+  fi
+  export PQB_OS PQB_ARCH PQB_IS_RPI PQB_RPI_MODEL
+}
+
+# ---- friendly logging ------------------------------------------------------
+pqb_log()  { printf '\033[1;34m[pqb]\033[0m %s\n' "$*" >&2; }
+pqb_warn() { printf '\033[1;33m[pqb WARN]\033[0m %s\n' "$*" >&2; }
+pqb_err()  { printf '\033[1;31m[pqb ERR]\033[0m %s\n' "$*" >&2; }
+
+# ---- hostname resolution ---------------------------------------------------
+# A "good" host id is non-empty, not localhost, and not an avahi/macOS
+# auto-assigned "unknown<hexMAC>" placeholder (which is what shows up when no
+# real hostname is set — that produced the ugly results filename before).
+_pqb_good_host() {
+  local h="$1"
+  [ -n "$h" ] || return 1
+  case "$h" in
+    localhost|localhost.*) return 1 ;;
+  esac
+  printf '%s' "$h" | grep -Eq '^[Uu]nknown[0-9a-fA-F]{6,}$' && return 1
+  return 0
+}
+
+# Resolve a readable, stable host identifier, falling through:
+#   $HOSTNAME -> `hostname` -> hostnamectl --static (Linux) /
+#   scutil --get LocalHostName (macOS) -> short machine id (last resort).
+# Domain suffixes (.home/.local/...) are stripped. On the RPi5 this yields the
+# actual pi hostname; on this Mac it falls through to LocalHostName.
+pqb_resolve_hostname() {
+  local cands=() c h
+  cands+=("${HOSTNAME:-}")
+  cands+=("$(hostname 2>/dev/null || true)")
+  if [ "${PQB_OS:-}" = "linux" ]; then
+    cands+=("$(hostnamectl --static 2>/dev/null || true)")
+  elif [ "${PQB_OS:-}" = "macos" ]; then
+    cands+=("$(scutil --get LocalHostName 2>/dev/null || true)")
+  fi
+  for c in "${cands[@]}"; do
+    h="${c%%.*}"                         # strip domain suffix
+    if _pqb_good_host "$h"; then echo "$h"; return 0; fi
+  done
+  # last resort: a short, stable machine id so files never collide as "unknown"
+  local mid=""
+  if [ -r /etc/machine-id ]; then
+    mid="$(cut -c1-12 /etc/machine-id 2>/dev/null)"
+  elif [ "${PQB_OS:-}" = "macos" ]; then
+    mid="$(ioreg -rd1 -c IOPlatformExpertDevice 2>/dev/null \
+           | awk -F'"' '/IOPlatformUUID/{print $4}' | tr -d '-' | cut -c1-12)"
+  fi
+  echo "host-${mid:-unknown}"
+}
+
+# ---- CPU governor ----------------------------------------------------------
+# Returns 0 if it set 'performance', 1 if unavailable. Prints the governor it
+# left the system in on stdout.
+pqb_set_governor_performance() {
+  if [ "$PQB_OS" = "linux" ] && [ -d /sys/devices/system/cpu/cpu0/cpufreq ]; then
+    local ok=1 g
+    for g in /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor; do
+      [ -w "$g" ] || { ok=0; continue; }
+      echo performance > "$g" 2>/dev/null || ok=0
+    done
+    if [ "$ok" = 1 ]; then
+      cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor 2>/dev/null
+      return 0
+    fi
+    # try cpupower as a fallback (may need sudo)
+    if command -v cpupower >/dev/null 2>&1 && cpupower frequency-set -g performance >/dev/null 2>&1; then
+      echo performance; return 0
+    fi
+    pqb_warn "could not set governor to performance (need root? try: sudo ./run.sh)"
+    cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor 2>/dev/null || echo "unknown"
+    return 1
+  fi
+  # macOS / other: no userspace governor control.
+  echo "unavailable"
+  return 1
+}
+
+pqb_get_governor() {
+  if [ "$PQB_OS" = "linux" ] && [ -r /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor ]; then
+    cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor
+  else
+    echo "unavailable"
+  fi
+}
+
+# ---- core pinning ----------------------------------------------------------
+# pqb_taskset_prefix <core> -> echoes a command prefix to pin to that core, or
+# empty string if pinning is unavailable (caller warns).
+pqb_taskset_prefix() {
+  local core="$1"
+  if command -v taskset >/dev/null 2>&1; then
+    echo "taskset -c $core"
+  elif command -v numactl >/dev/null 2>&1; then
+    echo "numactl --physcpubind=$core"
+  else
+    echo ""   # no pinning available (e.g. macOS)
+  fi
+}
+
+# ---- thermal / clock sampling ----------------------------------------------
+# pqb_sample_thermal -> one CSV line: epoch_s,arm_clock_hz,temp_c,throttled_hex
+# Fields that cannot be read on a platform are emitted as empty (no fake zeros).
+pqb_sample_thermal() {
+  local ts clk temp thr
+  ts="$(date +%s)"
+  clk=""; temp=""; thr=""
+
+  if command -v vcgencmd >/dev/null 2>&1; then
+    # Raspberry Pi: authoritative SoC sensors.
+    clk="$(vcgencmd measure_clock arm 2>/dev/null | sed -n 's/.*=//p')"
+    temp="$(vcgencmd measure_temp 2>/dev/null | sed -n "s/temp=\([0-9.]*\).*/\1/p")"
+    thr="$(vcgencmd get_throttled 2>/dev/null | sed -n 's/.*=//p')"
+  elif [ "$PQB_OS" = "linux" ]; then
+    # Generic Linux fallback (cpufreq + thermal_zone).
+    local f
+    f=/sys/devices/system/cpu/cpu${PQB_BENCH_CORE:-0}/cpufreq/scaling_cur_freq
+    [ -r "$f" ] && clk="$(( $(cat "$f") * 1000 ))"   # kHz -> Hz
+    if [ -r /sys/class/thermal/thermal_zone0/temp ]; then
+      local milli; milli="$(cat /sys/class/thermal/thermal_zone0/temp)"
+      temp="$(awk -v m="$milli" 'BEGIN{printf "%.1f", m/1000}')"
+    fi
+  fi
+  # macOS: live per-core freq/temp require sudo powermetrics; we intentionally
+  # leave them empty rather than emit misleading values. (Smoke test only.)
+
+  printf '%s,%s,%s,%s\n' "$ts" "$clk" "$temp" "$thr"
+}
+
+# pqb_throttled_active <throttled_hex> -> 0 if thermal throttling currently/has
+# occurred, 1 otherwise. RPi get_throttled bit 0 = under-voltage now,
+# bit 1 = arm freq capped now, bit 2 = currently throttled,
+# bit 3 = soft temp limit active (and bits 16-19 = "has occurred" latches).
+pqb_throttled_active() {
+  local hex="${1#0x}"
+  [ -z "$hex" ] && return 1
+  local val=$(( 16#$hex ))
+  # bit2 (throttling now) or bit18 (throttling has occurred)
+  if [ $(( val & 0x4 )) -ne 0 ] || [ $(( val & 0x40000 )) -ne 0 ]; then
+    return 0
+  fi
+  return 1
+}
+
+# ---- CPU feature / crypto-extension detection ------------------------------
+# Echoes a JSON object describing NEON + SHA3/SHA512 acceleration. Consumed
+# verbatim by env metadata so results record whether Keccak accel is in use.
+pqb_cpu_features_json() {
+  local neon=false sha2=false sha3=false sha512=false aes=false pmull=false src="unknown"
+  if [ "$PQB_OS" = "linux" ] && [ -r /proc/cpuinfo ]; then
+    src="/proc/cpuinfo"
+    local feats; feats="$(grep -m1 -i '^Features' /proc/cpuinfo | tr 'A-Z' 'a-z')"
+    case "$feats" in *" asimd"*|*"neon"*) neon=true;; esac
+    case "$feats" in *" sha2"*) sha2=true;; esac
+    case "$feats" in *" sha3"*) sha3=true;; esac
+    case "$feats" in *" sha512"*) sha512=true;; esac
+    case "$feats" in *" aes"*) aes=true;; esac
+    case "$feats" in *" pmull"*) pmull=true;; esac
+  elif [ "$PQB_OS" = "macos" ]; then
+    src="sysctl"
+    neon=true   # all Apple Silicon has NEON/ASIMD
+    [ "$(sysctl -n hw.optional.arm.FEAT_SHA256 2>/dev/null)" = 1 ] && sha2=true
+    [ "$(sysctl -n hw.optional.arm.FEAT_SHA3   2>/dev/null)" = 1 ] && sha3=true
+    [ "$(sysctl -n hw.optional.arm.FEAT_SHA512 2>/dev/null)" = 1 ] && sha512=true
+    [ "$(sysctl -n hw.optional.arm.FEAT_AES    2>/dev/null)" = 1 ] && aes=true
+    [ "$(sysctl -n hw.optional.arm.FEAT_PMULL  2>/dev/null)" = 1 ] && pmull=true
+  fi
+  printf '{"source":"%s","neon":%s,"sha2":%s,"sha3":%s,"sha512":%s,"aes":%s,"pmull":%s}' \
+    "$src" "$neon" "$sha2" "$sha3" "$sha512" "$aes" "$pmull"
+}
+
+# ---- package installation --------------------------------------------------
+# pqb_install_build_deps -> installs compiler/cmake/openssl headers per platform.
+pqb_install_build_deps() {
+  if [ "$PQB_OS" = "macos" ]; then
+    command -v brew >/dev/null 2>&1 || { pqb_err "Homebrew required on macOS: https://brew.sh"; return 1; }
+    pqb_log "installing build deps via Homebrew"
+    brew install cmake ninja openssl@3 git python3 >/dev/null || true
+  elif [ "$PQB_OS" = "linux" ]; then
+    if command -v apt-get >/dev/null 2>&1; then
+      pqb_log "installing build deps via apt"
+      local SUDO=""; [ "$(id -u)" -ne 0 ] && SUDO="sudo"
+      $SUDO apt-get update -qq
+      # linux-cpupower provides the `cpupower` binary used by
+      # pqb_set_governor_performance. (Older releases shipped cpufrequtils, which
+      # was dropped in Debian 13/trixie — cpupower is the supported replacement.)
+      $SUDO apt-get install -y -qq \
+        build-essential cmake ninja-build git python3 perl \
+        libssl-dev pkg-config astyle doxygen \
+        linux-cpupower util-linux >/dev/null
+    else
+      pqb_warn "no apt-get found; install cmake/ninja/gcc/libssl-dev manually"
+    fi
+  fi
+}
--- a/pq-bench-rpi5/setup/setup.sh
+++ b/pq-bench-rpi5/setup/setup.sh
@ -0,0 +1,205 @@
+#!/usr/bin/env bash
+# =============================================================================
+# setup.sh — build + pin the full PQ toolchain from scratch.
+#
+#   ./setup/setup.sh            # everything: deps, liboqs, openssl(if needed), oqs-provider
+#   ./setup/setup.sh liboqs     # just liboqs
+#   ./setup/setup.sh openssl    # just openssl (forced from source)
+#   ./setup/setup.sh provider   # just oqs-provider
+#   ./setup/setup.sh deps       # just OS packages
+#
+# Everything is installed under ./vendor/install (no system pollution). The exact
+# resolved git commits + the optimization flags actually used are written to
+# setup/versions.lock, which run.sh stamps into every results JSON.
+#
+# Identical flags for every candidate: -O3 -mcpu=cortex-a76 on the RPi5. On a
+# non-A76 host (the macOS smoke box) we fall back to -O3 and RECORD that, so
+# smoke-test numbers can never masquerade as the RPi5 baseline.
+# =============================================================================
+set -euo pipefail
+
+HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+ROOT="$(cd "$HERE/.." && pwd)"
+# shellcheck source=setup/lib_platform.sh
+source "$HERE/lib_platform.sh"
+# shellcheck source=setup/versions.env
+source "$HERE/versions.env"
+
+pqb_detect_platform
+
+VENDOR="$ROOT/vendor"
+SRC="$VENDOR/src"
+PREFIX="$VENDOR/install"
+mkdir -p "$SRC" "$PREFIX"
+
+JOBS="$( (command -v nproc >/dev/null && nproc) || sysctl -n hw.ncpu 2>/dev/null || echo 4)"
+
+# ---- decide the real optimization flags for THIS host ----------------------
+# We only use -mcpu=cortex-a76 if the compiler accepts it AND we're on aarch64.
+choose_cflags() {
+  local cc="${CC:-cc}" probe="$SRC/.flagprobe.c"
+  echo 'int main(void){return 0;}' > "$probe"
+  if [ "$PQB_ARCH" = "aarch64" ] && $cc $TARGET_CFLAGS_RPI5 "$probe" -o "$probe.out" 2>/dev/null; then
+    BENCH_CFLAGS="$TARGET_CFLAGS_RPI5"; CFLAGS_TARGET="cortex-a76"
+  else
+    BENCH_CFLAGS="$TARGET_CFLAGS_FALLBACK"; CFLAGS_TARGET="generic-fallback"
+  fi
+  rm -f "$probe" "$probe.out"
+}
+
+cc_version_string() {
+  local cc="${CC:-cc}"
+  "$cc" --version 2>/dev/null | head -1
+}
+
+git_pin() { # repo ref destdir
+  local repo="$1" ref="$2" dest="$3"
+  if [ -d "$dest/.git" ]; then
+    pqb_log "updating $(basename "$dest") -> $ref"
+    git -C "$dest" fetch -q --depth 1 origin "$ref" || git -C "$dest" fetch -q --tags origin
+  else
+    pqb_log "cloning $(basename "$dest") @ $ref"
+    git clone -q --depth 1 --branch "$ref" "$repo" "$dest" 2>/dev/null \
+      || git clone -q "$repo" "$dest"
+  fi
+  git -C "$dest" checkout -q "$ref" 2>/dev/null || true
+  git -C "$dest" rev-parse HEAD
+}
+
+# ---------------------------------------------------------------------------
+build_liboqs() {
+  choose_cflags
+  local dest="$SRC/liboqs" commit
+  commit="$(git_pin "$LIBOQS_REPO" "$LIBOQS_REF" "$dest")"
+  pqb_log "building liboqs ($LIBOQS_REF @ ${commit:0:12}) flags: $BENCH_CFLAGS"
+
+  # OQS_DIST_BUILD=OFF -> native build for the fixed target (no runtime CPU
+  # dispatch), so -mcpu=cortex-a76 fully drives codegen. The AArch64-optimized
+  # ML-KEM (mlkem-native) and AArch64 asm backends are enabled by default on
+  # aarch64 when DIST_BUILD is OFF (compile-time CPU features); verified post-build.
+  local GEN=(); command -v ninja >/dev/null 2>&1 && GEN=(-G Ninja)
+  cmake -S "$dest" -B "$dest/build" ${GEN[@]+"${GEN[@]}"} \
+    -DCMAKE_INSTALL_PREFIX="$PREFIX" \
+    -DCMAKE_BUILD_TYPE=Release \
+    -DOQS_DIST_BUILD=OFF \
+    -DOQS_BUILD_ONLY_LIB=OFF \
+    -DBUILD_SHARED_LIBS=ON \
+    -DCMAKE_C_FLAGS="$BENCH_CFLAGS" >/dev/null
+  cmake --build "$dest/build" --parallel "$JOBS" >/dev/null
+  cmake --install "$dest/build" >/dev/null
+
+  # Prove the optimized backend: capture the aarch64/native defines from the
+  # generated build config so versions.lock can show what was actually compiled.
+  local cfg="$dest/build/include/oqs/oqsconfig.h"
+  LIBOQS_OPT_DEFINES="(oqsconfig.h not found)"
+  if [ -r "$cfg" ]; then
+    # strip embedded double-quotes so the value stays valid in versions.lock
+    LIBOQS_OPT_DEFINES="$(grep -Ei 'AARCH64|ARM|_ASM|MLKEM_NATIVE|OPT_TARGET|CPU_EXT' "$cfg" \
+      | grep -i 'define' | sed 's/^#define //' | tr -d '"' | tr '\n' ';' || true)"
+  fi
+  LIBOQS_COMMIT="$commit"
+}
+
+# ---------------------------------------------------------------------------
+locate_or_build_openssl() {
+  # Prefer an existing >= 3.5 openssl (Homebrew on macOS, distro on Linux) unless
+  # BUILD_OPENSSL=1. PQ sig certs for TLS only need >= 3.5.0.
+  local want_major=3 want_minor=5
+  if [ "${1:-}" != "force" ] && [ "${BUILD_OPENSSL:-0}" != 1 ]; then
+    local cand
+    for cand in "$(command -v openssl || true)" /opt/homebrew/opt/openssl@3/bin/openssl /usr/bin/openssl; do
+      [ -x "$cand" ] || continue
+      local v; v="$("$cand" version 2>/dev/null | awk '{print $2}')"
+      # NB: assign on separate lines. A single `local a=.. b=.. c="${b..}"` makes
+      # bash 5.2 declare all names (unset) *before* expanding any RHS, so the
+      # reference to `rest` here trips `set -u` (unbound variable) on the Pi.
+      local maj rest min
+      maj="${v%%.*}"; rest="${v#*.}"; min="${rest%%.*}"
+      if [ "${maj:-0}" -gt "$want_major" ] 2>/dev/null || \
+         { [ "${maj:-0}" -eq "$want_major" ] && [ "${min:-0}" -ge "$want_minor" ]; } 2>/dev/null; then
+        OPENSSL_BIN="$cand"
+        OPENSSL_PREFIX="$(dirname "$(dirname "$cand")")"
+        OPENSSL_COMMIT="system:$v"
+        pqb_log "using existing OpenSSL $v at $cand"
+        return 0
+      fi
+    done
+  fi
+  pqb_log "building OpenSSL $OPENSSL_REF from source"
+  local dest="$SRC/openssl" commit
+  commit="$(git_pin "$OPENSSL_REPO" "$OPENSSL_REF" "$dest")"
+  ( cd "$dest" && ./Configure --prefix="$PREFIX" --openssldir="$PREFIX/ssl" shared \
+      && make -j"$JOBS" >/dev/null && make install_sw >/dev/null )
+  OPENSSL_BIN="$PREFIX/bin/openssl"
+  OPENSSL_PREFIX="$PREFIX"
+  OPENSSL_COMMIT="$commit"
+}
+
+# ---------------------------------------------------------------------------
+build_oqs_provider() {
+  [ -n "${OPENSSL_PREFIX:-}" ] || locate_or_build_openssl
+  local dest="$SRC/oqs-provider" commit
+  commit="$(git_pin "$OQSPROVIDER_REPO" "$OQSPROVIDER_REF" "$dest")"
+  pqb_log "building oqs-provider ($OQSPROVIDER_REF @ ${commit:0:12})"
+  local GEN=(); command -v ninja >/dev/null 2>&1 && GEN=(-G Ninja)
+  cmake -S "$dest" -B "$dest/build" ${GEN[@]+"${GEN[@]}"} \
+    -DCMAKE_INSTALL_PREFIX="$PREFIX" \
+    -DCMAKE_BUILD_TYPE=Release \
+    -DOPENSSL_ROOT_DIR="$OPENSSL_PREFIX" \
+    -Dliboqs_DIR="$PREFIX/lib/cmake/liboqs" \
+    -DCMAKE_C_FLAGS="${BENCH_CFLAGS:-$TARGET_CFLAGS_FALLBACK}" >/dev/null
+  cmake --build "$dest/build" --parallel "$JOBS" >/dev/null
+  cmake --install "$dest/build" >/dev/null 2>&1 || true
+  # Provider .so lands under .../lib/ossl-modules or .../oqsprovider
+  OQSPROVIDER_MODULE="$(find "$PREFIX" "$dest/build" -name 'oqsprovider.*' \( -name '*.so' -o -name '*.dylib' \) 2>/dev/null | head -1)"
+  OQSPROVIDER_COMMIT="$commit"
+}
+
+# ---------------------------------------------------------------------------
+write_lock() {
+  choose_cflags 2>/dev/null || true
+  local lock="$HERE/versions.lock"
+  {
+    echo "# Auto-generated by setup.sh — exact toolchain provenance. Stamped into results JSON."
+    echo "PQB_BUILD_HOST_OS=$PQB_OS"
+    echo "PQB_BUILD_HOST_ARCH=$PQB_ARCH"
+    echo "PQB_IS_RPI=$PQB_IS_RPI"
+    echo "PQB_RPI_MODEL=\"${PQB_RPI_MODEL}\""
+    echo "BENCH_CFLAGS=\"${BENCH_CFLAGS:-unknown}\""
+    echo "CFLAGS_TARGET=\"${CFLAGS_TARGET:-unknown}\""
+    echo "CC_VERSION=\"$(cc_version_string)\""
+    echo "LIBOQS_REF=\"$LIBOQS_REF\""
+    echo "LIBOQS_COMMIT=\"${LIBOQS_COMMIT:-not-built}\""
+    echo "LIBOQS_OPT_DEFINES=\"${LIBOQS_OPT_DEFINES:-}\""
+    echo "OPENSSL_BIN=\"${OPENSSL_BIN:-}\""
+    echo "OPENSSL_PREFIX=\"${OPENSSL_PREFIX:-}\""
+    echo "OPENSSL_COMMIT=\"${OPENSSL_COMMIT:-not-built}\""
+    echo "OQSPROVIDER_REF=\"$OQSPROVIDER_REF\""
+    echo "OQSPROVIDER_COMMIT=\"${OQSPROVIDER_COMMIT:-not-built}\""
+    echo "OQSPROVIDER_MODULE=\"${OQSPROVIDER_MODULE:-}\""
+    echo "PREFIX=\"$PREFIX\""
+  } > "$lock"
+  pqb_log "wrote $lock"
+  cat "$lock" >&2
+}
+
+# ---- dispatch --------------------------------------------------------------
+main() {
+  local what="${1:-all}"
+  case "$what" in
+    deps)     pqb_install_build_deps ;;
+    liboqs)   build_liboqs; write_lock ;;
+    openssl)  locate_or_build_openssl force; write_lock ;;
+    provider) build_oqs_provider; write_lock ;;
+    all)
+      pqb_install_build_deps
+      build_liboqs
+      locate_or_build_openssl
+      build_oqs_provider
+      write_lock
+      pqb_log "setup complete. Next: ./run.sh --smoke"
+      ;;
+    *) pqb_err "unknown target: $what (deps|liboqs|openssl|provider|all)"; exit 2 ;;
+  esac
+}
+main "$@"
--- a/pq-bench-rpi5/setup/versions.env
+++ b/pq-bench-rpi5/setup/versions.env
@ -0,0 +1,40 @@
+# =============================================================================
+# Pinned upstream versions for the PQ benchmark toolchain.
+#
+# These are the *intended* refs. setup/setup.sh clones each at the tag below
+# and then records the *actually resolved* commit hash into setup/versions.lock,
+# which is stamped verbatim into every results JSON. That way a results file is
+# always traceable to exact source, even if a tag is ever re-pointed upstream.
+#
+# Override any of these from the environment, e.g.:
+#   LIBOQS_REF=main ./setup/setup.sh
+# =============================================================================
+
+# liboqs: KEM/signature implementations. >= 0.15.0 ships the AArch64-optimized
+# ML-KEM backend (mlkem-native). We enable it explicitly in setup.sh.
+LIBOQS_REPO="${LIBOQS_REPO:-https://github.com/open-quantum-safe/liboqs.git}"
+LIBOQS_REF="${LIBOQS_REF:-0.15.0}"
+
+# OpenSSL >= 3.5.0 — required for PQ signatures in TLS 1.3 server auth.
+# On macOS dev boxes we prefer the Homebrew openssl@3 if it is already >= 3.5;
+# setup.sh only builds OpenSSL from source when the system one is too old or
+# BUILD_OPENSSL=1 is set.
+OPENSSL_REPO="${OPENSSL_REPO:-https://github.com/openssl/openssl.git}"
+OPENSSL_REF="${OPENSSL_REF:-openssl-3.5.0}"
+
+# oqs-provider >= 0.9.0 — wires liboqs algorithms into OpenSSL as a provider,
+# giving us PQ KEM groups and PQ signature certs for the TLS layer.
+OQSPROVIDER_REPO="${OQSPROVIDER_REPO:-https://github.com/open-quantum-safe/oqs-provider.git}"
+OQSPROVIDER_REF="${OQSPROVIDER_REF:-0.9.0}"
+
+# Identical optimization flags for every candidate (the credibility anchor).
+# cortex-a76 is the RPi5 core. On non-A76 hosts (e.g. the macOS smoke box)
+# setup.sh substitutes a safe fallback and records which flags were *actually*
+# used in versions.lock + results JSON, so smoke numbers are never mistaken for
+# the RPi5 baseline.
+TARGET_CFLAGS_RPI5="${TARGET_CFLAGS_RPI5:--O3 -mcpu=cortex-a76}"
+TARGET_CFLAGS_FALLBACK="${TARGET_CFLAGS_FALLBACK:--O3}"
+
+# Pinned core for taskset on the RPi5 (4 cores: 0-3). Core 3 is chosen to stay
+# away from CPU0 where the kernel tends to steer IRQs/RPS. Documented in README.
+BENCH_CORE="${BENCH_CORE:-3}"