Merge 1c48fc7e8c75e8ae9aabe66726ae125faa84df15 into e0a11767893dd988b4b6bcd904a68afd3eb1093f

This commit is contained in:
megonen 2026-07-03 13:31:05 +03:00 committed by GitHub
commit 87a512b986
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
28 changed files with 43273 additions and 0 deletions

41
pq-bench-rpi5/.gitignore vendored Normal file
View File

@ -0,0 +1,41 @@
# Build trees / vendored deps (rebuilt by setup/setup.sh)
/vendor/
/build/
setup/versions.lock
# Compiled harness binaries
bench/kem_sig/bench_pq
bench/tls/bench_tls
*.o
# Generated TLS material
bench/tls/pki/
# Per-run scratch dirs (intermediate artifacts; never committed)
results/.work-*/
# Local results are kept by users, not committed by default.
# Comment the next line out if you DO want to commit your machine's results.
results/*.json
# Consolidated baseline-grade RPi5 run (KEM + sig + TLS in one pass): the Pi
# validator reference baseline.
!results/rasberrypi5-20260625T202356Z.json
# Consolidated Apple M3 cross-platform run (apple-m3, KEM + sig + TLS in one
# pass): is_baseline_grade=false by design, kept as the cross-platform reference.
!results/mehmetmac-20260625T220618Z.json
# Merged dataset consumed by the dashboard. Tracked: ships the published
# baselines so a fresh clone renders charts out-of-the-box. Contributors who
# re-merge overwrite it locally (modified file they can commit or discard).
analyze/png/
# Python venv (matplotlib lives here; system python stays clean)
analyze/.venv/
# Machine-local Claude Code settings (never shared)
.claude/settings.local.json
# Python / OS cruft
__pycache__/
*.pyc
.DS_Store

41
pq-bench-rpi5/Dockerfile Normal file
View File

@ -0,0 +1,41 @@
# =============================================================================
# Reproducible BUILD of the PQ benchmark toolchain on Debian aarch64 (the same
# OS family as Raspberry Pi OS / Ubuntu on the RPi5).
#
# This image is for BUILDING ONLY — it pins and compiles liboqs / OpenSSL /
# oqs-provider reproducibly. It is NOT for running the benchmark.
#
# docker build -t pq-bench-rpi5 . # build + pin the toolchain
#
# Run the MEASUREMENT bare-metal on the host, never in the container. A
# container cannot reliably control the CPU governor, pin to an isolated core,
# or read the Pi's SoC thermal/throttle sensors (vcgencmd) — the three knobs the
# reference-grade gate depends on — so an in-container run could never be
# baseline-grade and would only add noise. Build here if you like; then:
#
# ./run.sh # on the host (see README)
# =============================================================================
FROM debian:bookworm-slim
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential cmake ninja-build git perl pkg-config \
python3 python3-venv ca-certificates \
libssl-dev cpufrequtils util-linux \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /app
COPY . /app
# Build + pin the toolchain at image-build time so the image is self-contained.
# (Comment this out to keep the image thin and run setup.sh at container start.)
RUN ./setup/setup.sh all || (echo "setup failed — see log above" && exit 1)
# Optional: matplotlib PNG export in an isolated venv.
RUN python3 -m venv analyze/.venv \
&& analyze/.venv/bin/pip install --no-cache-dir -r analyze/requirements.txt
ENTRYPOINT ["/bin/bash", "-lc"]
# This image builds the toolchain; it does not run the benchmark. The default
# command just says so — run the measurement bare-metal on the host (see README).
CMD ["echo 'Toolchain built. Run the benchmark BARE-METAL on the host (./run.sh) — Docker is for reproducible builds only; a container cannot meet the baseline-grade gate.'"]

424
pq-bench-rpi5/README.md Normal file
View File

@ -0,0 +1,424 @@
# pq-bench-rpi5
A reproducible, general-purpose **post-quantum cryptography benchmark** whose
baseline target is the **Raspberry Pi 5** (Broadcom BCM2712, Cortex-A76,
aarch64). Anyone can run it on their own Pi 5 and the results aggregate and
compare apples-to-apples.
**Framing — migration cost.** How much does moving from the cryptography Logos
uses *today* (X25519 key exchange + Ed25519 signatures) to PQ candidates cost on
validator-grade hardware? Every chart draws that classical baseline as the
reference line, so the PQ "tax" is always visible.
Phase 1 covers **PQ KEMs**, **PQ signatures**, and **PQ TLS 1.3 handshakes**.
Hooks are left for a later SNARK/STARK phase (see `config.yaml`); it is not
implemented yet.
---
## What gets measured
| Layer | Metrics |
|-------|---------|
| **KEM** | keygen / encaps / decaps wall-clock (median, MAD, IQR, min, max, mean, stddev, ops/sec) · pk/sk/ct sizes · heap high-water |
| **Signature** | keygen / sign / verify wall-clock (same stats) · pk/sig sizes |
| **TLS 1.3** | full-handshake latency · handshakes/sec · bytes-on-wire · ClientHello size (+ fragmentation flag) — as a matrix of (KEM group × signature) |
The **classical baseline** (X25519 / Ed25519 / X25519+Ed25519) is always
included as the reference point — measured as a real primitive via OpenSSL, not
hand-waved.
---
## Project layout
```
pq-bench-rpi5/
setup/ build + pin liboqs, OpenSSL 3.5+, oqs-provider (versions.env / versions.lock)
bench/kem_sig/ bench_pq.c primitive KEM/sig harness (liboqs + OpenSSL EVP baselines)
bench/tls/ bench_tls.c in-process TLS 1.3 handshake harness (OpenSSL API)
run_tls.sh PKI generation + (KEM × sig) matrix driver
bench/lib/ assemble.py / merge helpers / miniyaml.py (zero-dep YAML)
results/ results/<host>-<timestamp>.json (one per run, full metadata)
analyze/ merge.py (combine machines) + plot.py (matplotlib PNGs, optional venv)
dashboard/ static HTML/JS (Chart.js) — no backend, GitHub-Pages deployable
run.sh governor + taskset + thermal wrapper + orchestrator
config.yaml candidate lists (extend here)
Dockerfile reproducible Debian-aarch64 build
```
---
## Quick start
### On a Raspberry Pi 5 (the real measurement target)
```bash
git clone <this repo> && cd pq-bench-rpi5
./setup/setup.sh # build + pin liboqs, OpenSSL 3.5+, oqs-provider
sudo ./run.sh # sudo only to set the performance governor (see below)
python3 analyze/merge.py results/*.json -o dashboard/data/merged.json
# open dashboard/index.html (or deploy dashboard/ to GitHub Pages)
```
**On `sudo`:** it is **optional, not a prerequisite.** The only thing it does is
set the CPU governor to `performance` — none of the crypto needs root. `./run.sh`
runs fine without it: it warns, skips the governor step, completes the run, and
the results JSON is automatically stamped `is_baseline_grade=false` (governor
demerit). So use `sudo` when you want a baseline-grade reference run; drop it for
a quick local run you don't intend to submit.
`./run.sh --smoke` runs tiny iteration counts as a fast pipeline check.
`./run.sh --kemsig-only` / `--tls-only` scope the run. `--iters/--warmup/--reps`
override the `config.yaml` knobs.
### On macOS (development / smoke testing only)
```bash
brew install cmake openssl@3 git
./setup/setup.sh
./run.sh --smoke # produces valid JSON; stamped is_baseline_grade=false
```
> **macOS runs are cross-platform / smoke data, never baseline-grade — by
> design, for three concrete reasons:**
> 1. **Not a Raspberry Pi**, so it fails the gate's first condition outright.
> 2. **No userspace cycle counter, and ~1 µs timer granularity.** macOS exposes
> no readable PMU cycle counter and its wall-clock quantizes to ~1 µs steps —
> a ~10% floor on the fastest ops (ML-KEM ~10 µs), negligible for anything
> ≥100 µs (McEliece, FrodoKEM). (See "Timing source" above.)
> 3. **No Linux cpufreq governor, and core-pinning isn't guaranteed.** Two of the
> noise-control knobs the gate relies on — `performance` governor and a pinned
> core — aren't available, and the build flags aren't `cortex-a76` either.
>
> Every macOS results file records `is_baseline_grade=false` with the exact
> reasons, and the dashboard hides such runs by default. They still produce
> **useful cross-platform numbers** (the heavier McEliece/FrodoKEM ops are barely
> affected by the timer floor) — they just can't meet the controlled reference
> bar, hence smoke-only.
### Docker (reproducible build — build only, never run)
Docker is for reproducibly **building** the pinned toolchain (liboqs / OpenSSL /
oqs-provider), not for running the benchmark:
```bash
docker build -t pq-bench-rpi5 . # builds + pins the toolchain inside the image
```
**Run the measurement bare-metal on the host.** A container can't reliably set
the CPU governor, pin to an isolated core, or read the Pi's thermal/throttle
sensors — the noise-control knobs the reference-grade gate relies on — so an
in-container run could never be baseline-grade and would only add jitter. Build
in Docker if you like; then run `./run.sh` on the host.
---
## Measurement methodology (why the numbers are credible)
`run.sh` is the wrapper that makes a number defensible:
- **CPU governor → `performance`** (Linux; needs `sudo`). Recorded before/after.
If it can't be set (e.g. not root) the run **continues anyway**: it warns,
proceeds, and the missing governor becomes an `is_baseline_grade=false`
demerit. `sudo` is only ever for this step — never for the crypto.
- **Core pinning via `taskset -c 3`.** This is a **single-operation latency**
benchmark (one keygen, one encaps, one sign — timed in isolation), not a
parallel-throughput one, so pinning the whole sweep to one core keeps that
core's cache warm and removes cross-core migration scheduling noise, which
tightens the median and MAD. The Pi 5 has 4 cores (03); core **3** is chosen
because core 0 typically absorbs the most OS/IRQ/RPS work. The pinned core and
exact `taskset` command are recorded.
- *Planned (separate axis):* a multi-core **throughput/scaling** mode — run an
op across 1..N cores and report ops/sec plus scaling efficiency per
algorithm. Some schemes (SLH-DSA, and later STARK proving) parallelize far
better than others, so it's a worthwhile dimension — but kept **separate**
from these per-op latency numbers, not mixed into them.
- **Thermal/clock trace.** A background sampler logs ARM clock
(`vcgencmd measure_clock arm`) and SoC temperature (`vcgencmd measure_temp`)
~once a second for the whole run. The full trace is embedded in the results
JSON, and **thermal throttling** (`vcgencmd get_throttled`, plus a clock-droop
heuristic) is detected and flagged — a throttled run is not baseline-grade.
- **Warmup + N timed iterations, multiple repetitions.** Primary metric is
wall-clock nanoseconds via `clock_gettime(CLOCK_MONOTONIC)`. We report
**median, MAD, IQR, min, max, mean, stddev, ops/sec**, plus per-repetition
medians — not just a mean.
- **Timing source — two clocks, honestly recorded.** There are two ways to time
an op:
1. **Cycle-based** via the ARM hardware cycle counter (`PMCCNTR_EL0`) — the
most precise, but on Linux **userspace can't read it by default**: the
register traps unless a kernel module enables the userspace PMU (e.g.
`enable_arm_pmu`).
2. **Time-based** wall-clock via `clock_gettime(CLOCK_MONOTONIC)` — always
available, and accurate enough for the millisecond/microsecond ranges here.
The harness probes the cycle counter and, when it isn't available, **falls
back to wall-clock and records exactly that** in the JSON
(`run.cycles_available=false` + the reason). **On a stock machine the cycle
counter is not available, so runs use the wall-clock timer by default** — and
both published runs reflect this: the RPi5 baseline and the macOS run *both*
have `cycles_available=false` (both wall-clock). The remaining difference
between them is wall-clock **granularity**, not clock *type*: the Pi's
wall-clock lands on fractional microseconds, while macOS quantizes to ~1 µs
steps — a ~10% resolution floor on the fastest ops (ML-KEM keygen ~10 µs),
negligible for anything ≥100 µs (McEliece, FrodoKEM).
- **CPU features / Keccak acceleration.** NEON, SHA2, SHA3, SHA512, AES, PMULL
are detected (`/proc/cpuinfo` on Linux, `sysctl` on macOS). **Note:** the
Cortex-A76 has the SHA2/AES extensions but **not** the ARMv8.2 SHA3
extension, so on the Pi 5 Keccak runs on NEON/scalar code — the results record
both the hardware capability and whether liboqs was compiled with SHA3
instructions, so this is explicit rather than assumed.
### The AArch64-optimized backend
liboqs is built with `OQS_DIST_BUILD=OFF` and the pinned flags so the optimized
aarch64 ML-KEM backend (`mlkem-native`) and Falcon/Keccak asm are compiled in.
`setup/setup.sh` extracts the proof from the generated `oqsconfig.h` (e.g.
`OQS_ENABLE_KEM_ml_kem_768_aarch64 1`) into `versions.lock`, which is stamped
into every results file under `toolchain.liboqs_opt_defines`.
---
## Methodology & trustworthiness (verify it yourself)
Every claim below points at the exact code so you can read it, not take our word.
All `bench_pq.c` references are `bench/kem_sig/bench_pq.c`.
1. **Correctness gate — broken crypto emits *zero* numbers.** Before any timing,
each algorithm runs a full round-trip and asserts it: for KEM,
keygen→encaps→decaps then `memcmp(ss_encaps, ss_decaps)`
(`bench_pq.c:357-363`); for signatures, keygen→sign→`verify` must succeed
(`bench_pq.c:428-434`). On any failure, `die()` prints to **stderr** and
`exit(3)` (`bench_pq.c:303-307`) — and the JSON is only printed *after* all
measurement (`bench_pq.c:372-381`), so a failed gate yields **no stdout at
all**. The gate runs once, *outside* the timed loop. A runtime guard
(`must_measure`, `bench_pq.c:311-315`) also aborts if a timed op ever fails
mid-run. *Verify it:* flip one byte of the decaps shared secret right before
`bench_pq.c:362`, rebuild, run — the process exits `3` with empty stdout.
2. **No dead-code elimination — the `volatile` sink.** At `-O3` the compiler may
delete work whose result is never observed. Each timed op folds an output
byte into a file-scope `volatile uint64_t g_sink` (`bench_pq.c:300`; uses at
`:333,:336,:339,:407,:410,:486`), forcing the store to be materialized so the
crypto call **cannot** be optimized away. Without it the loop could time
nothing and report meaningless near-zero numbers.
3. **What is timed — only the op, never setup.** The timed region brackets a
single `fn(ctx)` call between two `now_ns()` reads (`bench_pq.c:274-281`);
per-rep warmup runs *outside* it (`bench_pq.c:272-273`). Inputs are canonical
and pre-validated, so e.g. KEM decaps (`bench_pq.c:337-339`) times one
`OQS_KEM_decaps` and nothing else. For the X25519 baseline, keygen is timed
separately (`bench_pq.c:507`), a stable key is re-primed *outside* timing
(`bench_pq.c:509`), then derive is timed alone (`bench_pq.c:510`) — setup is
never folded into a measured number.
4. **Per-op auto-calibration with clamps.** `calibrate_op` (`bench_pq.c:209-250`)
runs a doubling probe (`:223-230`, also cache warmup) to estimate per-op cost
`est_ns` (`:231`), then picks iterations to hit `target_time_ms` of real work
(`:234-235`), clamped to `[min_samples, max_iters]` (`:236-237`). So a fast
18 µs keygen and a 0.74 s SLH-DSA sign each get the iteration count *they*
need: slow ops floor at `min_samples` (30), fast ops ceil at `max_iters`
(20000). The chosen `timed_iters` and `calib_est_ns` are recorded per op.
5. **Robust statistics — median + MAD.** `compute_stats` (`bench_pq.c:111-146`)
reports median, MAD, IQR, q1/q3, min, max, mean, stddev, ops/sec, plus
per-repetition medians (`print_stats_json`, `bench_pq.c:184-203`). The
headline metric is the **median**, with **MAD** as spread: timing
distributions are right-skewed with a hard floor (true cost) and a long tail
of OS-scheduling/interrupt contamination that drags mean/stddev but not
median/MAD. Mean and stddev are kept in the JSON so the skew is visible. The
clock is `clock_gettime(CLOCK_MONOTONIC)` (`bench_pq.c:44-48`); userspace PMU
cycles are probed and honestly reported absent when they trap
(`probe_pmu`, `bench_pq.c:66-86`).
6. **`is_baseline_grade` demerit gate.** Computed in
`bench/lib/assemble.py:155-168` as a demerit accumulator — the flag is `true`
only if *every* condition holds: real Pi (`:157`), `performance` governor
(`:160`), core-pinned (`:162`), `cortex-a76` build flags (`:164`), and no
thermal throttling (`:166`). Throttling is read from `vcgencmd get_throttled`
bits 2/18 plus a clock-droop heuristic (`assemble.py:91-98,:110-113`). Any
failure appends a human-readable reason and flips the flag to `false`; the
dashboard and `plot.py` default to baseline-grade runs only.
---
## Reproducibility & provenance
- **Pinned versions** live in `setup/versions.env` (liboqs `0.15.0`, OpenSSL
`3.5.0`/`≥3.5`, oqs-provider `0.9.0`). After cloning, `setup.sh` records the
**actually resolved git commits** and the **exact build flags + compiler
version** into `setup/versions.lock`.
- **Every results JSON carries full environment metadata**: RPi model, RAM,
kernel, OS, governor, the clock/temp trace during the run, compiler version,
liboqs/oqs-provider/OpenSSL versions+commits, build flags, and the candidate
list. A macOS smoke file and an RPi5 baseline file can never be confused.
- **Identical flags for every candidate:** `-O3 -mcpu=cortex-a76` on the Pi.
Document your `gcc`/`clang` version — it is auto-captured in `versions.lock`
(`CC_VERSION`).
### `is_baseline_grade`
A **reference-measurement quality gate**, not a deployment requirement. It marks
whether a run was produced under controlled, reproducible *reference* conditions,
so the numbers are comparable across algorithms and across machines. It is `true`
**only** when all hold: real Raspberry Pi · `performance` governor · core-pinned ·
`cortex-a76` build flags · no thermal throttling. Otherwise it is `false` with a
list of reasons.
- **What it is:** a label that says "this run is clean enough to sit in the
cross-algorithm / cross-machine reference comparison." The dashboard and
`plot.py` default to baseline-grade runs only, so noisy runs don't distort the
picture.
- **What it is *not*:** a claim about how nodes must be configured in production.
Real deployments are heterogeneous (different SoCs, governors, thermals) —
that's a separate question this flag does not speak to.
- A run that doesn't meet the gate **isn't wrong** — it's just flagged
`is_baseline_grade=false` with the reasons and kept out of the reference set.
The macOS cross-platform runs are exactly this: useful, honest numbers that
simply aren't reference-grade.
---
## Candidates (edit `config.yaml`)
- **KEM:** ML-KEM-512/768/1024; hybrids X25519MLKEM768, SecP256r1MLKEM768
(hybrids are benchmarked in the TLS layer; at the primitive layer liboqs
exposes them only as TLS groups, so they show as `enabled:false` there).
Code-based + conservative-LWE backups: Classic McEliece
348864/460896/460896f/6688128/6960119/8192128 (tiny ciphertext, slow keygen)
and FrodoKEM 640/976/1344-AES (unstructured LWE). Baseline: **X25519**.
- **Signatures:** ML-DSA-44/65/87; SLH-DSA (SPHINCS+) variants;
Falcon/FN-DSA-512/1024. Baseline: **Ed25519**.
- **TLS:** matrix of configured KEM groups × signature algorithms, always
including the classical **X25519 + Ed25519** pair.
Classic McEliece and FrodoKEM are now measured (above). **HQC** is not — it is
not enabled in the linked liboqs 0.15.0 build (disabled upstream after the
IND-CCA2 implementation issue), so it is intentionally omitted rather than
listed-and-disabled; re-add it once linked against a liboqs that re-enables it.
Add further algorithms by uncommenting/adding entries — the harness skips
anything your liboqs build doesn't enable (and says so).
---
## Output & analysis
- `results/<hostname>-<timestamp>.json` — one self-describing file per run.
- `analyze/merge.py results/*.json -o dashboard/data/merged.json` — merge runs
from many machines into one dataset (keeps each run distinct; never mixes
baseline with smoke).
- `analyze/plot.py` — matplotlib PNGs for papers (optional; install into
`analyze/.venv` via `analyze/requirements.txt` to keep system python clean —
it gracefully skips if matplotlib is absent).
- `dashboard/` — static, no-backend dashboard: grouped bars by security level,
size-vs-speed scatter, TLS handshakes/sec, and ClientHello size — each with
the classical baseline drawn as a reference line. Deploy the folder to GitHub
Pages, or open `index.html` via any static server.
---
## Contributing your RPi5 results
The whole point is a **shared, aggregated baseline**: the more Raspberry Pi 5
results we collect under identical conditions, the more confident the migration-
cost picture. If you have a Pi 5, please contribute a run — it takes one command
and a pull request.
### 1. Run under baseline conditions
For your numbers to count as baseline-grade, the run must satisfy the
`is_baseline_grade` gate (real Pi 5 · `performance` governor · core-pinned ·
`cortex-a76` flags · no thermal throttling). To give it the best shot:
- **Use a Raspberry Pi 5** with active cooling (the official Active Cooler or a
fan). PQ signing (esp. SLH-DSA) runs the core hot for a while; without cooling
you *will* throttle and the run is flagged non-baseline.
- **Use the official 27 W USB-C PSU.** Under-voltage also trips the throttle flag.
- **Run on a quiet machine** (close other workloads) so core 3 stays clean.
- **Don't edit `config.yaml`'s candidate list** if you want your run to be
directly comparable to others. (Extending it is fine — just say so in your PR;
extra algorithms simply add columns.)
```bash
git clone <this repo> && cd pq-bench-rpi5
./setup/setup.sh # build + pin liboqs / OpenSSL 3.5+ / oqs-provider
sudo ./run.sh # sudo lets it set the performance governor
```
A full run takes a while (SLH-DSA signing dominates). To check the pipeline
first without committing to the full run, use `sudo ./run.sh --smoke` — but only
a **full** run (not `--smoke`) counts as a submission.
### 2. Confirm it's baseline-grade
When the run finishes, the summary prints `baseline-grade (RPi5): True`. Verify
in the JSON too:
```bash
f=$(ls -t results/*.json | head -1)
python3 -c "import json;d=json.load(open('$f'));print('baseline_grade:',d['is_baseline_grade']);\
print('reasons:',d['baseline_grade_reasons']);\
print('throttled:',d['thermal_trace']['throttling_detected']);\
print('aarch64 ML-KEM backend:', 'ml_kem_768_aarch64 1' in d['toolchain']['liboqs_opt_defines'])"
```
You want `baseline_grade: True`, `reasons: []`, `throttled: False`, and the
backend line `True`. If `is_baseline_grade` is false, the printed reasons tell
you what to fix (usually cooling/PSU/governor) — fix and re-run.
### 3. Submit it
Your `results/<hostname>-<timestamp>.json` is fully self-describing (host model,
kernel, OS, governor, clock/temp trace, compiler + liboqs/oqs-provider/OpenSSL
commits, build flags). It contains your **hostname** and Pi model and nothing
else identifying — if you'd rather not share the hostname, set a name first with
`HOSTNAME=mypi5 sudo ./run.sh`, or just rename the file before submitting.
`results/*.json` is git-ignored by default (so you never accidentally commit
local experiments), so add yours explicitly:
```bash
git checkout -b results/<your-handle>-pi5
git add -f results/<hostname>-<timestamp>.json
git commit -m "results: RPi5 baseline from <your-handle>"
# push to your fork and open a PR
```
**PR checklist** (maintainers will look for these):
- [ ] `is_baseline_grade: true` with empty `baseline_grade_reasons`
- [ ] `thermal_trace.throttling_detected: false`
- [ ] `host.is_rpi: true` and `host.rpi_model` mentions "Raspberry Pi 5"
- [ ] `run.governor_after: performance` and `run.pinned: true`
- [ ] `toolchain.cflags_target: cortex-a76`
- [ ] full run (not `--smoke`): `run.timed_iters` is the `config.yaml` value, not 25
- [ ] unmodified candidate list (or extensions noted in the PR description)
Once merged, your file joins `results/`; anyone can regenerate the aggregated
dataset and dashboard with
`python3 analyze/merge.py results/*.json -o dashboard/data/merged.json`. The
dashboard's run selector will then include your Pi alongside everyone else's.
> Prefer not to use GitHub? Open an issue and attach the JSON file instead — a
> maintainer will add it.
---
## Limitations
- **macOS is smoke-only** (see above): coarse timer, no governor/pinning,
fallback flags.
- **Userspace cycle counts** require a kernel PMU module; default is time-based.
- **Heap/stack memory** is best-effort (`mallinfo2` on glibc; reported
unavailable elsewhere); pk/sk/ct/sig **sizes** are authoritative.
- **TLS handshakes are in-process over memory BIOs** — this isolates crypto
cost cleanly (no socket/scheduler noise) but is not a network RTT model;
ClientHello fragmentation is flagged against a typical 1400-byte MSS.
- **Docker is build-only.** The benchmark is not run in a container — a
container can't reliably control the governor, core pinning, or throttle
detection, so measurement runs bare-metal on the host (see the Docker section).
## Future phase (not implemented)
`config.yaml` reserves a `zk:` section for SNARK/STARK proving/verification
benchmarks; the results schema and dashboard are structured to absorb it later.

View File

@ -0,0 +1,86 @@
# Running pq-bench-rpi5 on Your Own Raspberry Pi 5
This benchmark measures post-quantum KEMs, signatures, and TLS 1.3 handshakes
against the classical baseline Logos uses today (X25519 / Ed25519), so every
chart shows the **migration cost** of moving to PQ on validator-grade hardware.
There's no manual tuning: the benchmark **auto-calibrates the iteration count
per operation** to your Pi's speed, so results stay comparable across machines.
## Prerequisites
- **Raspberry Pi 5** (Cortex-A76, aarch64), ideally the 8GB model, with
**active cooling** so it doesn't thermal-throttle mid-run.
- **Raspberry Pi OS / Debian Trixie or newer** — system OpenSSL 3.5+ so PQ TLS
works without a source build.
- **Internet access** and **sudo**.
## Step 1 — Clone (public repo, no auth)
```sh
git clone <REPO_URL>
cd pq-bench-rpi5
```
## Step 2 — Build the toolchain
```sh
./setup/setup.sh all
```
Takes 515 min: installs dependencies and builds liboqs + oqs-provider. Run it
inside `tmux` so it survives an SSH disconnect.
## Step 3 — Run
```sh
sudo ./run.sh
```
`sudo` is needed to set the performance governor, pin cores, and read the
temperature. The run takes ~45 min, with no iteration counts to set.
Output lands in `results/<hostname>-<timestamp>.json`, stamped with full
provenance (Pi model, RAM, kernel, governor, thermal trace, library versions)
and an `is_baseline_grade` flag.
## Step 4 — View results
```sh
cd dashboard
python3 -m http.server 8765
# then open http://<pi-ip>:8765
```
The charts show KEM, signature, and TLS results with the classical X25519 /
Ed25519 baseline drawn as a reference line.
## Step 5 — Contribute (optional)
Share your `results/*.json` (open a PR or send it over). To merge results from
multiple machines:
```sh
python3 analyze/merge.py results/*.json -o dashboard/data/merged.json
```
The dashboard then shows every Pi side by side.
## What the results tell you
PQ is not so much *slower* as *bigger*. Lattice schemes (ML-KEM, ML-DSA) run
close to classical in speed but have much larger keys and signatures, while the
hash-based SLH-DSA (SPHINCS+) is an outlier in both signing time and signature
size. On TLS, the classical baseline fits in a single packet, while PQ and
hybrid handshakes grow past it and fragment.
## Notes and limitations
- Measures **liboqs** (C / assembly) implementations — a pure-Rust backend is a
separate, optional axis.
- Userspace PMU cycle counts are usually unavailable, so the primary metric is
**wall-clock time + ops/sec**.
- SNARK / STARK benchmarking is **out of scope** for this phase (`config.yaml`
reserves a hook for it).
- The candidate list lives in `config.yaml` — use the exact liboqs algorithm
names.

View File

@ -0,0 +1,145 @@
#!/usr/bin/env python3
"""merge.py — merge many results/<host>-<ts>.json files into one dataset the
dashboard (and PNG export) consume.
python3 analyze/merge.py results/*.json -o dashboard/data/merged.json
The merged file keeps every run as a separate record (so multiple machines /
repetitions can be compared) plus a flat index for quick charting. It never
collapses RPi5 baseline-grade runs together with non-baseline (e.g. macOS smoke)
runs each record carries its own `is_baseline_grade` flag and host, and the
dashboard filters on it by default.
"""
from __future__ import annotations
import argparse
import glob
import json
import os
import sys
MERGED_SCHEMA = "1.0.0"
def load_runs(paths):
runs = []
for p in paths:
try:
with open(p) as f:
d = json.load(f)
d["_source_file"] = os.path.basename(p)
runs.append(d)
except (json.JSONDecodeError, OSError) as e:
print(f"[merge] skipping {p}: {e}", file=sys.stderr)
return runs
def run_id(run):
h = run.get("host", {})
return f"{h.get('hostname','?')}/{h.get('cpu_brand','?')}@{run.get('generated_utc','?')}"
def flatten(runs):
"""Produce flat per-(run, algorithm, operation) rows for easy charting."""
kem_rows, sig_rows, tls_rows = [], [], []
for run in runs:
rid = run_id(run)
host = run.get("host", {})
meta = {
"run_id": rid,
"hostname": host.get("hostname"),
"cpu_brand": host.get("cpu_brand"),
"is_rpi": host.get("is_rpi"),
"is_baseline_grade": run.get("is_baseline_grade"),
"source_file": run.get("_source_file"),
}
for k in run.get("kem", []):
if not k.get("enabled"):
continue
for op, st in (k.get("operations") or {}).items():
kem_rows.append({**meta,
"alg": k["alg"], "backend": k.get("backend"),
"classical": bool(k.get("classical")),
"nist_level": k.get("claimed_nist_level"),
"operation": op,
"median_ns": st.get("median"), "mad_ns": st.get("mad"),
"iqr_ns": st.get("iqr"), "min_ns": st.get("min"),
"stddev_ns": st.get("stddev"), "ops_per_sec": st.get("ops_per_sec"),
"sizes": k.get("sizes")})
for s in run.get("sig", []):
if not s.get("enabled"):
continue
for op, st in (s.get("operations") or {}).items():
sig_rows.append({**meta,
"alg": s["alg"], "backend": s.get("backend"),
"classical": bool(s.get("classical")),
"nist_level": s.get("claimed_nist_level"),
"operation": op,
"median_ns": st.get("median"), "mad_ns": st.get("mad"),
"iqr_ns": st.get("iqr"), "min_ns": st.get("min"),
"stddev_ns": st.get("stddev"), "ops_per_sec": st.get("ops_per_sec"),
"sizes": s.get("sizes")})
tls = run.get("tls") or {}
for cell in (tls.get("matrix") or []):
if not cell.get("enabled"):
continue
tls_rows.append({**meta,
"label": cell.get("label"), "group": cell.get("group"),
"is_baseline_pair": cell.get("label") == (tls.get("baseline") or {}).get("label"),
"handshakes_per_sec": cell.get("handshakes_per_sec"),
"median_ns": (cell.get("handshake_latency_ns") or {}).get("median"),
"bytes_total": (cell.get("bytes_on_wire") or {}).get("total"),
"client_hello_bytes": cell.get("client_hello_bytes"),
"client_hello_fragmented": cell.get("client_hello_fragmented")})
return kem_rows, sig_rows, tls_rows
def main():
ap = argparse.ArgumentParser()
ap.add_argument("inputs", nargs="+", help="results JSON files or globs")
ap.add_argument("-o", "--out", default="dashboard/data/merged.json")
args = ap.parse_args()
paths = []
for pat in args.inputs:
paths.extend(sorted(glob.glob(pat)) if any(c in pat for c in "*?[") else [pat])
paths = [p for p in paths if os.path.isfile(p)]
if not paths:
sys.exit("no input files matched")
runs = load_runs(paths)
kem_rows, sig_rows, tls_rows = flatten(runs)
merged = {
"merged_schema": MERGED_SCHEMA,
"n_runs": len(runs),
"runs": [{
"run_id": run_id(r),
"host": r.get("host"),
"is_baseline_grade": r.get("is_baseline_grade"),
"baseline_grade_reasons": r.get("baseline_grade_reasons", []),
"toolchain": r.get("toolchain"),
"cpu_features": r.get("cpu_features"),
"run": r.get("run"),
"thermal_summary": {
"temp_c": (r.get("thermal_trace") or {}).get("temp_c"),
"throttling_detected": (r.get("thermal_trace") or {}).get("throttling_detected"),
},
"generated_utc": r.get("generated_utc"),
"source_file": r.get("_source_file"),
} for r in runs],
"kem": kem_rows,
"sig": sig_rows,
"tls": tls_rows,
}
os.makedirs(os.path.dirname(args.out) or ".", exist_ok=True)
with open(args.out, "w") as f:
json.dump(merged, f, indent=2)
n_base = sum(1 for r in runs if r.get("is_baseline_grade"))
print(f"merged {len(runs)} run(s) -> {args.out} "
f"({n_base} baseline-grade, {len(runs)-n_base} smoke/other)")
print(f" kem rows: {len(kem_rows)} sig rows: {len(sig_rows)} tls rows: {len(tls_rows)}")
if __name__ == "__main__":
main()

View File

@ -0,0 +1,162 @@
#!/usr/bin/env python3
"""plot.py — export publication-ready PNGs from a merged dataset.
python3 analyze/plot.py dashboard/data/merged.json -o analyze/png
matplotlib is an OPTIONAL dependency. If it is not installed this prints a clear
hint and exits 0 (the HTML dashboard remains the primary, dependency-free view).
By default it plots only baseline-grade (RPi5) runs so paper figures are never
polluted with macOS smoke data; pass --include-smoke to override. The classical
Logos baseline (X25519 / Ed25519) is drawn as a reference line on every chart.
"""
from __future__ import annotations
import argparse
import json
import os
import sys
try:
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
except ImportError:
print("matplotlib not installed — skipping PNG export.\n"
" enable it in a project venv (keeps your system python clean):\n"
" python3 -m venv analyze/.venv\n"
" analyze/.venv/bin/pip install -r analyze/requirements.txt\n"
" analyze/.venv/bin/python analyze/plot.py dashboard/data/merged.json\n"
" (the HTML dashboard works without it)", file=sys.stderr)
sys.exit(0)
def median_ms(ns):
return (ns or 0) / 1e6
def pick_runs(merged, include_smoke):
ids = set()
for r in merged.get("runs", []):
if include_smoke or r.get("is_baseline_grade"):
ids.add(r["run_id"])
return ids
def grouped_bar_by_level(rows, op, title, outpath):
"""Grouped bars of median latency per algorithm, grouped by NIST level."""
data = [r for r in rows if r["operation"] == op]
if not data:
return
# one bar per algorithm; baseline highlighted
data.sort(key=lambda r: (r.get("nist_level") or 0, r["median_ns"]))
labels = [r["alg"] for r in data]
vals = [median_ms(r["median_ns"]) for r in data]
colors = ["#888" if r.get("classical") else "#3b6" for r in data]
fig, ax = plt.subplots(figsize=(max(6, len(labels) * 0.55), 4))
ax.bar(range(len(labels)), vals, color=colors)
# baseline reference line
base = next((r for r in data if r.get("classical")), None)
if base:
ax.axhline(median_ms(base["median_ns"]), color="#c33", ls="--", lw=1,
label=f"classical baseline ({base['alg']})")
ax.legend(fontsize=8)
ax.set_xticks(range(len(labels)))
ax.set_xticklabels(labels, rotation=45, ha="right", fontsize=8)
ax.set_ylabel("median latency (ms)")
ax.set_title(title)
ax.grid(axis="y", alpha=0.3)
fig.tight_layout()
fig.savefig(outpath, dpi=150)
plt.close(fig)
print("wrote", outpath)
def size_speed_scatter(rows, op, size_key, title, outpath):
data = [r for r in rows if r["operation"] == op and r.get("sizes")]
pts = []
for r in data:
sz = (r["sizes"] or {}).get(size_key)
if sz:
pts.append((sz, median_ms(r["median_ns"]), r["alg"], r.get("classical")))
if not pts:
return
fig, ax = plt.subplots(figsize=(7, 5))
for sz, lat, alg, classical in pts:
ax.scatter(sz, lat, c="#c33" if classical else "#3b6",
s=60, edgecolors="k", linewidths=0.4, zorder=3)
ax.annotate(alg, (sz, lat), fontsize=7, xytext=(4, 3),
textcoords="offset points")
ax.set_xlabel(f"{size_key} size (bytes)")
ax.set_ylabel("median latency (ms)")
ax.set_title(title)
ax.grid(alpha=0.3)
fig.tight_layout()
fig.savefig(outpath, dpi=150)
plt.close(fig)
print("wrote", outpath)
def tls_bar(tls_rows, outpath):
if not tls_rows:
return
tls_rows = sorted(tls_rows, key=lambda r: -(r.get("handshakes_per_sec") or 0))
labels = [r["label"] for r in tls_rows]
vals = [r.get("handshakes_per_sec") or 0 for r in tls_rows]
colors = ["#c33" if r.get("is_baseline_pair") else "#36c" for r in tls_rows]
fig, ax = plt.subplots(figsize=(max(6, len(labels) * 0.5), 4.5))
ax.barh(range(len(labels)), vals, color=colors)
base = next((r for r in tls_rows if r.get("is_baseline_pair")), None)
if base:
ax.axvline(base.get("handshakes_per_sec") or 0, color="#c33", ls="--", lw=1,
label=f"classical baseline ({base['label']})")
ax.legend(fontsize=8)
ax.set_yticks(range(len(labels)))
ax.set_yticklabels(labels, fontsize=7)
ax.invert_yaxis()
ax.set_xlabel("handshakes / sec")
ax.set_title("TLS 1.3 handshake throughput (higher is better)")
ax.grid(axis="x", alpha=0.3)
fig.tight_layout()
fig.savefig(outpath, dpi=150)
plt.close(fig)
print("wrote", outpath)
def main():
ap = argparse.ArgumentParser()
ap.add_argument("merged")
ap.add_argument("-o", "--outdir", default="analyze/png")
ap.add_argument("--include-smoke", action="store_true")
args = ap.parse_args()
with open(args.merged) as f:
merged = json.load(f)
ids = pick_runs(merged, args.include_smoke)
if not ids:
print("no baseline-grade runs to plot (use --include-smoke for macOS/dev data)",
file=sys.stderr)
return
def keep(rows):
return [r for r in rows if r["run_id"] in ids]
kem, sig, tls = keep(merged["kem"]), keep(merged["sig"]), keep(merged["tls"])
os.makedirs(args.outdir, exist_ok=True)
O = args.outdir
grouped_bar_by_level(kem, "keygen", "KEM keygen latency by algorithm", f"{O}/kem_keygen.png")
grouped_bar_by_level(kem, "encaps", "KEM encapsulation latency", f"{O}/kem_encaps.png")
grouped_bar_by_level(kem, "decaps", "KEM decapsulation latency", f"{O}/kem_decaps.png")
grouped_bar_by_level(sig, "sign", "Signature signing latency", f"{O}/sig_sign.png")
grouped_bar_by_level(sig, "verify", "Signature verification latency", f"{O}/sig_verify.png")
size_speed_scatter(kem, "encaps", "public_key",
"KEM: public-key size vs encaps latency", f"{O}/kem_size_speed.png")
size_speed_scatter(sig, "sign", "signature",
"Signature: signature size vs sign latency", f"{O}/sig_size_speed.png")
tls_bar(tls, f"{O}/tls_throughput.png")
print(f"PNGs in {O}/ ({'incl. smoke' if args.include_smoke else 'baseline-grade only'})")
if __name__ == "__main__":
main()

View File

@ -0,0 +1 @@
matplotlib>=3.7

View File

@ -0,0 +1,26 @@
# Build the primitive KEM/sig harness against the pinned liboqs + system OpenSSL.
#
# Paths/flags default to the vendored toolchain but are overridable so run.sh can
# pass the exact values from setup/versions.lock (single source of truth):
# make LIBOQS_PREFIX=... OPENSSL_PREFIX=... BENCH_CFLAGS="-O3 -mcpu=cortex-a76"
ROOT := $(abspath $(dir $(lastword $(MAKEFILE_LIST)))../..)
LIBOQS_PREFIX ?= $(ROOT)/vendor/install
# OpenSSL: prefer Homebrew openssl@3 on macOS, else system.
OPENSSL_PREFIX ?= $(shell brew --prefix openssl@3 2>/dev/null || echo /usr)
BENCH_CFLAGS ?= -O3
CC ?= cc
CFLAGS := $(BENCH_CFLAGS) -std=c11 -Wall -Wextra \
-I$(LIBOQS_PREFIX)/include -I$(OPENSSL_PREFIX)/include
LDFLAGS := -L$(LIBOQS_PREFIX)/lib -L$(OPENSSL_PREFIX)/lib \
-Wl,-rpath,$(LIBOQS_PREFIX)/lib -Wl,-rpath,$(OPENSSL_PREFIX)/lib
LDLIBS := -loqs -lcrypto -lm
bench_pq: bench_pq.c
$(CC) $(CFLAGS) -o $@ $< $(LDFLAGS) $(LDLIBS)
clean:
rm -f bench_pq
.PHONY: clean

View File

@ -0,0 +1,656 @@
/* ===========================================================================
* bench_pq.c primitive-level KEM / signature benchmark harness.
*
* One algorithm per invocation (fresh process keeps cache state clean):
* bench_pq --kind kem --alg ML-KEM-768 --warmup 1000 --iters 10000 --reps 5
* bench_pq --kind sig --alg ML-DSA-65 ...
*
* Emits a single JSON object describing the algorithm to stdout. The orchestrator
* (run.sh / assemble.py) wraps these with environment metadata.
*
* Two backends, selected by algorithm name:
* - liboqs : all PQ candidates (ML-KEM, ML-DSA, Falcon, SLH-DSA, ...)
* - OpenSSL EVP : the classical Logos baselines X25519 (KEM-analog) and
* Ed25519 (signature), which liboqs does not implement.
* This lets the classical reference be drawn on the same primitive charts.
*
* Metrics per operation: full per-iteration wall-clock nanosecond distribution
* -> median, MAD, IQR, min, max, mean, stddev, ops/sec, plus per-repetition
* medians. Optional userspace PMU cycle counts when available. Heap high-water
* via mallinfo2 on glibc (the RPi5 target); honestly reported unavailable
* elsewhere (e.g. the macOS smoke box).
* ===========================================================================*/
#define _POSIX_C_SOURCE 200809L
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <math.h>
#include <time.h>
#include <setjmp.h>
#include <signal.h>
#include <oqs/oqs.h>
#include <openssl/evp.h>
#include <openssl/err.h>
#if defined(__linux__) && defined(__GLIBC__)
#include <malloc.h>
#define HAVE_MALLINFO2 1
#endif
/* ---- timing ------------------------------------------------------------- */
static inline uint64_t now_ns(void) {
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return (uint64_t)ts.tv_sec * 1000000000ull + (uint64_t)ts.tv_nsec;
}
/* ---- userspace PMU cycle counter probe (aarch64) ------------------------ */
static sigjmp_buf g_sigill_jmp;
static volatile sig_atomic_t g_pmu_ok = 0;
static void sigill_handler(int sig) { (void)sig; siglongjmp(g_sigill_jmp, 1); }
static inline uint64_t read_cycles(void) {
#if defined(__aarch64__)
uint64_t v;
__asm__ volatile("mrs %0, pmccntr_el0" : "=r"(v));
return v;
#else
return 0;
#endif
}
/* Returns 1 and a reason string if userspace cycle counting works. */
static int probe_pmu(const char **reason) {
#if defined(__aarch64__)
struct sigaction sa, old;
memset(&sa, 0, sizeof sa);
sa.sa_handler = sigill_handler;
sigaction(SIGILL, &sa, &old);
if (sigsetjmp(g_sigill_jmp, 1) == 0) {
(void)read_cycles();
g_pmu_ok = 1;
} else {
g_pmu_ok = 0;
}
sigaction(SIGILL, &old, NULL);
if (g_pmu_ok) { *reason = "PMCCNTR_EL0 readable from userspace"; return 1; }
*reason = "PMCCNTR_EL0 traps (kernel module not loaded; needs e.g. enable_arm_pmu)";
return 0;
#else
*reason = "not aarch64";
return 0;
#endif
}
/* ---- statistics --------------------------------------------------------- */
typedef struct {
double median, mad, iqr, q1, q3, min, max, mean, stddev;
double ops_per_sec;
uint64_t n;
} stats_t;
static int cmp_u64(const void *a, const void *b) {
uint64_t x = *(const uint64_t *)a, y = *(const uint64_t *)b;
return (x > y) - (x < y);
}
/* percentile on an already-sorted array (linear interpolation) */
static double pct_sorted(const uint64_t *s, uint64_t n, double p) {
if (n == 0) return 0;
if (n == 1) return (double)s[0];
double idx = p * (double)(n - 1);
uint64_t lo = (uint64_t)idx;
double frac = idx - (double)lo;
if (lo + 1 >= n) return (double)s[n - 1];
return (double)s[lo] + frac * ((double)s[lo + 1] - (double)s[lo]);
}
static stats_t compute_stats(uint64_t *samples, uint64_t n) {
stats_t st; memset(&st, 0, sizeof st);
st.n = n;
if (n == 0) return st;
qsort(samples, n, sizeof(uint64_t), cmp_u64);
st.min = (double)samples[0];
st.max = (double)samples[n - 1];
st.median = pct_sorted(samples, n, 0.5);
st.q1 = pct_sorted(samples, n, 0.25);
st.q3 = pct_sorted(samples, n, 0.75);
st.iqr = st.q3 - st.q1;
double sum = 0;
for (uint64_t i = 0; i < n; i++) sum += (double)samples[i];
st.mean = sum / (double)n;
double ss = 0;
for (uint64_t i = 0; i < n; i++) {
double d = (double)samples[i] - st.mean;
ss += d * d;
}
st.stddev = (n > 1) ? sqrt(ss / (double)(n - 1)) : 0;
/* MAD = median(|x - median|); needs a second sorted buffer */
uint64_t *dev = malloc(n * sizeof(uint64_t));
if (dev) {
for (uint64_t i = 0; i < n; i++) {
double d = (double)samples[i] - st.median;
dev[i] = (uint64_t)(d < 0 ? -d : d);
}
qsort(dev, n, sizeof(uint64_t), cmp_u64);
st.mad = pct_sorted(dev, n, 0.5);
free(dev);
}
st.ops_per_sec = st.median > 0 ? 1e9 / st.median : 0;
return st;
}
/* ---- measurement configuration + generic loop --------------------------- */
/* A closure-ish: the caller provides a function pointer that runs one op. */
typedef int (*op_fn)(void *ctx);
/* How each op is sized. Two modes, decided per invocation:
* - fixed-count (fixed_iters > 0): exactly fixed_iters timed iters per rep,
* with the configured fixed-mode warmup. This is the explicit
* `--iters N` override / fixed-count fallback.
* - auto-calibrate(fixed_iters == 0): each op is timed long enough to put
* ~target_ns of aggregate timed work on it, so a 18 us keygen
* and a 750 ms SLH-DSA sign both yield a stable median. The
* chosen count is clamped to [min_samples, max_iters]:
* fast ops hit max_iters, slow ops hit min_samples.
* Auto mode estimates per-op cost with a short doubling calibration (which also
* serves as cache warmup), then derives the timed count + a per-rep re-warm. */
typedef struct {
uint64_t fixed_iters; /* >0 => fixed-count mode; 0 => auto-calibrate */
uint64_t target_ns; /* auto: per-op aggregate timed-work target */
uint64_t min_samples; /* auto: floor on timed iters per rep */
uint64_t max_iters; /* auto: ceiling on timed iters per rep */
uint64_t warmup; /* fixed-mode warmup count per rep */
uint64_t reps;
} bench_cfg;
typedef struct {
uint64_t *all; /* all samples across reps */
uint64_t all_n;
double per_rep_median[64];
int n_rep_med;
uint64_t timed_iters; /* timed iters per rep actually used */
uint64_t warmup_iters; /* warmup iters per rep actually used */
uint64_t reps;
int calibrated; /* 1 if auto-calibrated, 0 if fixed-count */
double est_ns; /* per-op cost estimate from calibration (auto) */
} measure_out;
static void print_stats_json(FILE *f, const char *name, stats_t st,
const measure_out *m) {
fprintf(f, "\"%s\":{", name);
fprintf(f, "\"unit\":\"ns\",\"warmup_iters\":%llu,\"timed_iters\":%llu,\"repetitions\":%llu,",
(unsigned long long)m->warmup_iters, (unsigned long long)m->timed_iters,
(unsigned long long)m->reps);
fprintf(f, "\"calibrated\":%s,", m->calibrated ? "true" : "false");
if (m->calibrated)
fprintf(f, "\"calib_est_ns\":%.2f,", m->est_ns);
fprintf(f, "\"samples\":%llu,", (unsigned long long)st.n);
fprintf(f, "\"median\":%.2f,\"mad\":%.2f,\"iqr\":%.2f,\"q1\":%.2f,\"q3\":%.2f,",
st.median, st.mad, st.iqr, st.q1, st.q3);
fprintf(f, "\"min\":%.2f,\"max\":%.2f,\"mean\":%.2f,\"stddev\":%.2f,",
st.min, st.max, st.mean, st.stddev);
fprintf(f, "\"ops_per_sec\":%.2f,", st.ops_per_sec);
fprintf(f, "\"per_rep_median\":[");
for (int i = 0; i < m->n_rep_med; i++)
fprintf(f, "%s%.2f", i ? "," : "", m->per_rep_median[i]);
fprintf(f, "]}");
}
/* Pick the timed-iter count (and per-rep warmup) for one op under cfg.
* In auto mode this runs a doubling calibration loop on fn (which warms caches)
* to estimate per-op cost, then solves for the count that hits target_ns.
* Returns 0 on success, -2 if the op ever fails during calibration. */
static int calibrate_op(op_fn fn, void *ctx, const bench_cfg *cfg,
uint64_t *timed_out, uint64_t *warm_out,
double *est_ns_out, int *calibrated_out) {
if (cfg->fixed_iters > 0) {
*timed_out = cfg->fixed_iters;
*warm_out = cfg->warmup;
*est_ns_out = 0.0;
*calibrated_out = 0;
return 0;
}
/* doubling calibration: run batches 1,2,4,... until ~CALIB_BUDGET elapses,
* capped at max_iters so a sub-microsecond op can't spin forever. */
const uint64_t CALIB_BUDGET_NS = 30ull * 1000 * 1000; /* 30 ms */
uint64_t cops = 0, cel = 0, batch = 1;
while (cel < CALIB_BUDGET_NS && cops < cfg->max_iters) {
uint64_t t0 = now_ns();
for (uint64_t i = 0; i < batch; i++)
if (fn(ctx) != 0) return -2;
cel += now_ns() - t0;
cops += batch;
batch *= 2;
}
double est_ns = cops ? (double)cel / (double)cops : 1.0;
if (est_ns < 1.0) est_ns = 1.0;
double want = (double)cfg->target_ns / est_ns;
uint64_t n = (uint64_t)(want + 0.5);
if (n < cfg->min_samples) n = cfg->min_samples; /* slow ops floor here */
if (n > cfg->max_iters) n = cfg->max_iters; /* fast ops ceil here */
/* per-rep re-warm ~= 20% of the timed budget, at least 1, capped. */
double w = ((double)cfg->target_ns * 0.2) / est_ns;
uint64_t warm = (uint64_t)(w + 0.5);
if (warm < 1) warm = 1;
if (warm > cfg->max_iters) warm = cfg->max_iters;
*timed_out = n;
*warm_out = warm;
*est_ns_out = est_ns;
*calibrated_out = 1;
return 0;
}
/* Returns 0 on success. Fills out with samples. Re-warms before each rep. */
static int measure_op(op_fn fn, void *ctx, const bench_cfg *cfg, measure_out *out) {
uint64_t iters, warmup;
double est_ns; int calibrated;
if (calibrate_op(fn, ctx, cfg, &iters, &warmup, &est_ns, &calibrated) != 0)
return -2;
out->all = malloc(iters * cfg->reps * sizeof(uint64_t));
if (!out->all) return -1;
out->all_n = 0;
out->n_rep_med = 0;
out->timed_iters = iters;
out->warmup_iters = warmup;
out->reps = cfg->reps;
out->calibrated = calibrated;
out->est_ns = est_ns;
uint64_t *rep_buf = malloc(iters * sizeof(uint64_t));
if (!rep_buf) { free(out->all); return -1; }
for (uint64_t r = 0; r < cfg->reps; r++) {
for (uint64_t i = 0; i < warmup; i++)
if (fn(ctx) != 0) { free(rep_buf); free(out->all); return -2; }
for (uint64_t i = 0; i < iters; i++) {
uint64_t t0 = now_ns();
int rc = fn(ctx);
uint64_t dt = now_ns() - t0;
if (rc != 0) { free(rep_buf); free(out->all); return -2; }
rep_buf[i] = dt;
out->all[out->all_n++] = dt;
}
/* per-rep median (sorts a copy of this rep's slice) */
uint64_t *copy = malloc(iters * sizeof(uint64_t));
if (copy) {
memcpy(copy, rep_buf, iters * sizeof(uint64_t));
qsort(copy, iters, sizeof(uint64_t), cmp_u64);
if (out->n_rep_med < 64)
out->per_rep_median[out->n_rep_med++] = pct_sorted(copy, iters, 0.5);
free(copy);
}
}
free(rep_buf);
return 0;
}
/* ---- anti-DCE sink + fatal helpers -------------------------------------- */
/* File-scope volatile: the compiler must materialize every store, so it cannot
* dead-code-eliminate the crypto outputs we feed into it. Every timed op sinks
* one output byte here. */
static volatile uint64_t g_sink = 0;
/* Abort the whole run: a broken build must NEVER silently emit timing numbers. */
static void die(const char *alg, const char *what) {
fprintf(stderr, "[bench_pq] FATAL: %s: %s — aborting so no numbers are emitted\n",
alg, what);
exit(3);
}
/* measure_op wrapper: if any timed op ever returns failure (e.g. a verify that
* stopped succeeding), abort instead of reading freed/partial buffers. */
static void must_measure(const char *alg, const char *op, op_fn fn, void *ctx,
const bench_cfg *cfg, measure_out *out) {
if (measure_op(fn, ctx, cfg, out) != 0)
die(alg, op);
}
#define MSGLEN 32
/* ======================= liboqs KEM ====================================== */
/* Timed ops consume canonical, pre-validated inputs; keygen/encaps write to
* scratch buffers so they never clobber the matched (pk,sk,ct) that the other
* timed ops depend on. Each op sinks an output byte into g_sink. */
typedef struct {
OQS_KEM *kem;
uint8_t *pk, *sk; /* canonical matched keypair (encaps/decaps inputs) */
uint8_t *ct; /* canonical ciphertext (decaps input) */
uint8_t *pk_s, *sk_s; /* scratch: keygen outputs */
uint8_t *ct_s, *ss_s; /* scratch: encaps outputs */
uint8_t *ss_d; /* scratch: decaps output */
} kem_ctx;
static int kem_keygen(void *c){ kem_ctx*x=c;
if (OQS_KEM_keypair(x->kem, x->pk_s, x->sk_s) != OQS_SUCCESS) return 1;
g_sink += x->pk_s[0]; return 0; }
static int kem_encaps(void *c){ kem_ctx*x=c;
if (OQS_KEM_encaps(x->kem, x->ct_s, x->ss_s, x->pk) != OQS_SUCCESS) return 1;
g_sink += (uint64_t)x->ss_s[0] ^ x->ct_s[0]; return 0; }
static int kem_decaps(void *c){ kem_ctx*x=c;
if (OQS_KEM_decaps(x->kem, x->ss_d, x->ct, x->sk) != OQS_SUCCESS) return 1;
g_sink += x->ss_d[0]; return 0; }
static int run_kem(const char *alg, const bench_cfg *cfg) {
OQS_KEM *kem = OQS_KEM_new(alg);
if (!kem) {
printf("{\"alg\":\"%s\",\"kind\":\"kem\",\"backend\":\"liboqs\",\"enabled\":false,"
"\"reason\":\"not enabled in this liboqs build\"}\n", alg);
return 0;
}
kem_ctx x; memset(&x, 0, sizeof x); x.kem = kem;
x.pk = malloc(kem->length_public_key); x.sk = malloc(kem->length_secret_key);
x.ct = malloc(kem->length_ciphertext);
x.pk_s = malloc(kem->length_public_key); x.sk_s = malloc(kem->length_secret_key);
x.ct_s = malloc(kem->length_ciphertext);
x.ss_s = malloc(kem->length_shared_secret);
x.ss_d = malloc(kem->length_shared_secret);
if (!x.pk||!x.sk||!x.ct||!x.pk_s||!x.sk_s||!x.ct_s||!x.ss_s||!x.ss_d) die(alg,"out of memory");
/* ---- correctness check (ONCE, outside the timed loop) ----
* keygen -> encaps -> decaps, then assert the shared secrets match. */
if (OQS_KEM_keypair(kem, x.pk, x.sk) != OQS_SUCCESS) die(alg, "keygen failed");
if (OQS_KEM_encaps(kem, x.ct, x.ss_s, x.pk) != OQS_SUCCESS) die(alg, "encaps failed");
if (OQS_KEM_decaps(kem, x.ss_d, x.ct, x.sk) != OQS_SUCCESS) die(alg, "decaps failed");
if (memcmp(x.ss_s, x.ss_d, kem->length_shared_secret) != 0)
die(alg, "KEM shared-secret mismatch (ss_encaps != ss_decaps)");
/* timed phases run on the canonical, validated (pk,sk,ct); any op failure
* during timing aborts via must_measure. */
measure_out kg={0}, en={0}, de={0};
must_measure(alg,"keygen",kem_keygen,&x,cfg,&kg);
must_measure(alg,"encaps",kem_encaps,&x,cfg,&en);
must_measure(alg,"decaps",kem_decaps,&x,cfg,&de);
printf("{\"alg\":\"%s\",\"kind\":\"kem\",\"backend\":\"liboqs\",\"enabled\":true,", alg);
printf("\"claimed_nist_level\":%d,", kem->claimed_nist_level);
printf("\"sizes\":{\"public_key\":%zu,\"secret_key\":%zu,\"ciphertext\":%zu,\"shared_secret\":%zu},",
kem->length_public_key, kem->length_secret_key, kem->length_ciphertext, kem->length_shared_secret);
printf("\"operations\":{");
stats_t s;
s=compute_stats(kg.all,kg.all_n); print_stats_json(stdout,"keygen",s,&kg); printf(",");
s=compute_stats(en.all,en.all_n); print_stats_json(stdout,"encaps",s,&en); printf(",");
s=compute_stats(de.all,de.all_n); print_stats_json(stdout,"decaps",s,&de);
printf("}}\n");
free(kg.all); free(en.all); free(de.all);
free(x.pk); free(x.sk); free(x.ct);
free(x.pk_s); free(x.sk_s); free(x.ct_s); free(x.ss_s); free(x.ss_d);
OQS_KEM_free(kem);
return 0;
}
/* ======================= liboqs SIG ====================================== */
/* sign writes a scratch signature; verify reads the canonical (sg,sglen) over
* the canonical msg with the canonical pk all pre-validated. */
typedef struct {
OQS_SIG *sig;
uint8_t *pk, *sk; /* canonical keypair (sign/verify inputs) */
uint8_t *msg; /* canonical message */
uint8_t *sg; size_t sglen; /* canonical signature (verify input) */
uint8_t *pk_s, *sk_s; /* scratch: keygen outputs */
uint8_t *sg_s; size_t sg_s_len;/* scratch: sign output */
} sig_ctx;
static int sig_keygen(void *c){ sig_ctx*x=c;
if (OQS_SIG_keypair(x->sig, x->pk_s, x->sk_s) != OQS_SUCCESS) return 1;
g_sink += x->pk_s[0]; return 0; }
static int sig_sign(void *c){ sig_ctx*x=c;
x->sg_s_len = x->sig->length_signature;
if (OQS_SIG_sign(x->sig, x->sg_s, &x->sg_s_len, x->msg, MSGLEN, x->sk) != OQS_SUCCESS) return 1;
g_sink += x->sg_s[0]; return 0; }
static int sig_verify(void *c){ sig_ctx*x=c;
if (OQS_SIG_verify(x->sig, x->msg, MSGLEN, x->sg, x->sglen, x->pk) != OQS_SUCCESS) return 1;
g_sink += 1; return 0; }
static int run_sig(const char *alg, const bench_cfg *cfg) {
OQS_SIG *sig = OQS_SIG_new(alg);
if (!sig) {
printf("{\"alg\":\"%s\",\"kind\":\"sig\",\"backend\":\"liboqs\",\"enabled\":false,"
"\"reason\":\"not enabled in this liboqs build\"}\n", alg);
return 0;
}
sig_ctx x; memset(&x,0,sizeof x); x.sig=sig;
x.pk = malloc(sig->length_public_key); x.sk = malloc(sig->length_secret_key);
x.msg = malloc(MSGLEN);
x.sg = malloc(sig->length_signature);
x.pk_s = malloc(sig->length_public_key); x.sk_s = malloc(sig->length_secret_key);
x.sg_s = malloc(sig->length_signature);
if (!x.pk||!x.sk||!x.msg||!x.sg||!x.pk_s||!x.sk_s||!x.sg_s) die(alg,"out of memory");
memset(x.msg, 0xA5, MSGLEN);
/* ---- correctness check (ONCE, outside the timed loop) ----
* keygen -> sign -> verify; the verify MUST succeed on a valid signature. */
if (OQS_SIG_keypair(sig, x.pk, x.sk) != OQS_SUCCESS) die(alg, "keygen failed");
x.sglen = sig->length_signature;
if (OQS_SIG_sign(sig, x.sg, &x.sglen, x.msg, MSGLEN, x.sk) != OQS_SUCCESS) die(alg, "sign failed");
if (OQS_SIG_verify(sig, x.msg, MSGLEN, x.sg, x.sglen, x.pk) != OQS_SUCCESS)
die(alg, "signature verify failed on a valid signature (broken build)");
measure_out kg={0}, sg={0}, vf={0};
must_measure(alg,"keygen",sig_keygen,&x,cfg,&kg);
must_measure(alg,"sign", sig_sign, &x,cfg,&sg);
must_measure(alg,"verify",sig_verify,&x,cfg,&vf);
printf("{\"alg\":\"%s\",\"kind\":\"sig\",\"backend\":\"liboqs\",\"enabled\":true,", alg);
printf("\"claimed_nist_level\":%d,", sig->claimed_nist_level);
printf("\"sizes\":{\"public_key\":%zu,\"secret_key\":%zu,\"signature\":%zu},",
sig->length_public_key, sig->length_secret_key, sig->length_signature);
printf("\"operations\":{");
stats_t s;
s=compute_stats(kg.all,kg.all_n); print_stats_json(stdout,"keygen",s,&kg); printf(",");
s=compute_stats(sg.all,sg.all_n); print_stats_json(stdout,"sign",s,&sg); printf(",");
s=compute_stats(vf.all,vf.all_n); print_stats_json(stdout,"verify",s,&vf);
printf("}}\n");
free(kg.all); free(sg.all); free(vf.all);
free(x.pk); free(x.sk); free(x.msg); free(x.sg);
free(x.pk_s); free(x.sk_s); free(x.sg_s);
OQS_SIG_free(sig);
return 0;
}
/* ======================= OpenSSL classical baselines ===================== */
/* X25519 as a KEM-analog: keygen + ECDH derive (one shared-secret derivation). */
typedef struct { EVP_PKEY *self; EVP_PKEY *peer; } x25519_ctx;
static int x25519_keygen(void *c){
x25519_ctx*x=c;
if (x->self) { EVP_PKEY_free(x->self); x->self=NULL; }
EVP_PKEY_CTX *p = EVP_PKEY_CTX_new_id(EVP_PKEY_X25519,NULL);
if(!p) return 1;
int ok = EVP_PKEY_keygen_init(p)>0 && EVP_PKEY_keygen(p,&x->self)>0;
EVP_PKEY_CTX_free(p);
return ok?0:1;
}
/* derive shared secret a·b into out[32]; returns 1 on success */
static int x25519_derive_into(EVP_PKEY *a, EVP_PKEY *b, unsigned char out[32]){
EVP_PKEY_CTX *p = EVP_PKEY_CTX_new(a,NULL);
if(!p) return 0;
size_t slen=32;
int ok = EVP_PKEY_derive_init(p)>0 &&
EVP_PKEY_derive_set_peer(p,b)>0 &&
EVP_PKEY_derive(p,out,&slen)>0;
EVP_PKEY_CTX_free(p);
return ok;
}
static int x25519_derive(void *c){
x25519_ctx*x=c;
unsigned char secret[32];
if(!x25519_derive_into(x->self,x->peer,secret)) return 1;
g_sink += secret[0]; /* sink the derived shared secret */
return 0;
}
static int run_x25519(const bench_cfg *cfg) {
x25519_ctx x={0};
if (x25519_keygen(&x) != 0) die("X25519","keygen failed"); /* self */
/* a fixed peer key for derive */
EVP_PKEY_CTX *p = EVP_PKEY_CTX_new_id(EVP_PKEY_X25519,NULL);
if(!p || EVP_PKEY_keygen_init(p)<=0 || EVP_PKEY_keygen(p,&x.peer)<=0) die("X25519","peer keygen failed");
EVP_PKEY_CTX_free(p);
/* ---- correctness check (ONCE, outside timing): ECDH must agree ---- */
{
unsigned char sa[32], sb[32];
if (!x25519_derive_into(x.self, x.peer, sa)) die("X25519","derive(self,peer) failed");
if (!x25519_derive_into(x.peer, x.self, sb)) die("X25519","derive(peer,self) failed");
if (memcmp(sa, sb, 32) != 0) die("X25519","ECDH shared-secret mismatch");
}
measure_out kg={0}, dv={0};
must_measure("X25519","keygen",x25519_keygen,&x,cfg,&kg);
/* keygen frees+replaces self each call; re-make a stable self for derive */
if (x25519_keygen(&x) != 0) die("X25519","keygen failed");
must_measure("X25519","derive",x25519_derive,&x,cfg,&dv);
printf("{\"alg\":\"X25519\",\"kind\":\"kem\",\"backend\":\"openssl\",\"classical\":true,\"enabled\":true,");
printf("\"claimed_nist_level\":1,");
printf("\"sizes\":{\"public_key\":32,\"secret_key\":32,\"ciphertext\":null,\"shared_secret\":32},");
printf("\"operations\":{");
stats_t s;
s=compute_stats(kg.all,kg.all_n); print_stats_json(stdout,"keygen",s,&kg); printf(",");
s=compute_stats(dv.all,dv.all_n); print_stats_json(stdout,"derive",s,&dv);
printf("}}\n");
free(kg.all); free(dv.all);
if(x.self)EVP_PKEY_free(x.self); if(x.peer)EVP_PKEY_free(x.peer);
return 0;
}
/* Ed25519 signature baseline. */
typedef struct { EVP_PKEY *key; unsigned char msg[32]; unsigned char sig[64]; size_t siglen; } ed_ctx;
static int ed_keygen(void *c){
ed_ctx*x=c;
if(x->key){EVP_PKEY_free(x->key);x->key=NULL;}
EVP_PKEY_CTX *p=EVP_PKEY_CTX_new_id(EVP_PKEY_ED25519,NULL);
if(!p)return 1;
int ok=EVP_PKEY_keygen_init(p)>0 && EVP_PKEY_keygen(p,&x->key)>0;
EVP_PKEY_CTX_free(p);
return ok?0:1;
}
static int ed_sign(void *c){
ed_ctx*x=c;
EVP_MD_CTX *m=EVP_MD_CTX_new(); if(!m)return 1;
x->siglen=sizeof x->sig;
int ok = EVP_DigestSignInit(m,NULL,NULL,NULL,x->key)>0 &&
EVP_DigestSign(m,x->sig,&x->siglen,x->msg,sizeof x->msg)>0;
EVP_MD_CTX_free(m);
if(!ok) return 1;
g_sink += x->sig[0]; /* sink the signature */
return 0;
}
static int ed_verify(void *c){
ed_ctx*x=c;
EVP_MD_CTX *m=EVP_MD_CTX_new(); if(!m)return 1;
int ok = EVP_DigestVerifyInit(m,NULL,NULL,NULL,x->key)>0 &&
EVP_DigestVerify(m,x->sig,x->siglen,x->msg,sizeof x->msg)>0;
EVP_MD_CTX_free(m);
if(!ok) return 1;
g_sink += 1; /* sink the (successful) verify result */
return 0;
}
static int run_ed25519(const bench_cfg *cfg) {
ed_ctx x; memset(&x,0,sizeof x); memset(x.msg,0xA5,sizeof x.msg);
/* ---- correctness check (ONCE, outside timing): verify MUST succeed ---- */
if (ed_keygen(&x) != 0) die("Ed25519","keygen failed");
if (ed_sign(&x) != 0) die("Ed25519","sign failed");
if (ed_verify(&x) != 0) die("Ed25519","verify failed on a valid signature (broken build)");
measure_out kg={0}, sg={0}, vf={0};
must_measure("Ed25519","keygen",ed_keygen,&x,cfg,&kg);
if (ed_keygen(&x)!=0 || ed_sign(&x)!=0) die("Ed25519","re-priming key+sig failed");
must_measure("Ed25519","sign", ed_sign, &x,cfg,&sg);
must_measure("Ed25519","verify",ed_verify,&x,cfg,&vf);
printf("{\"alg\":\"Ed25519\",\"kind\":\"sig\",\"backend\":\"openssl\",\"classical\":true,\"enabled\":true,");
printf("\"claimed_nist_level\":1,");
printf("\"sizes\":{\"public_key\":32,\"secret_key\":32,\"signature\":64},");
printf("\"operations\":{");
stats_t s;
s=compute_stats(kg.all,kg.all_n); print_stats_json(stdout,"keygen",s,&kg); printf(",");
s=compute_stats(sg.all,sg.all_n); print_stats_json(stdout,"sign",s,&sg); printf(",");
s=compute_stats(vf.all,vf.all_n); print_stats_json(stdout,"verify",s,&vf);
printf("}}\n");
free(kg.all); free(sg.all); free(vf.all);
if(x.key)EVP_PKEY_free(x.key);
return 0;
}
/* ---- main --------------------------------------------------------------- */
static void usage(void){
fprintf(stderr,
"usage: bench_pq --kind kem|sig --alg NAME [options]\n"
" auto-calibration (default): each op is timed to ~--target-time-ms of\n"
" aggregate work, clamped to [--min-samples, --max-iters].\n"
" --target-time-ms N per-op timed-work target (default 250)\n"
" --min-samples N floor on timed iters per rep (default 30)\n"
" --max-iters N ceiling on timed iters per rep (default 20000)\n"
" --reps N independent repetitions (default 5)\n"
" --iters N FIXED-count fallback: exactly N timed iters/rep\n"
" (disables calibration; pairs with --warmup)\n"
" --warmup N warmup iters/rep in fixed-count mode (default 1000)\n");
}
int main(int argc, char **argv) {
const char *kind=NULL, *alg=NULL;
/* fixed-count fallback knobs */
uint64_t warmup=1000, iters=0, reps=5; /* iters=0 => auto-calibrate */
/* auto-calibration knobs */
uint64_t target_time_ms=250, min_samples=30, max_iters=20000;
for (int i=1;i<argc;i++){
if(!strcmp(argv[i],"--kind")&&i+1<argc) kind=argv[++i];
else if(!strcmp(argv[i],"--alg")&&i+1<argc) alg=argv[++i];
else if(!strcmp(argv[i],"--warmup")&&i+1<argc) warmup=strtoull(argv[++i],0,10);
else if(!strcmp(argv[i],"--iters")&&i+1<argc) iters=strtoull(argv[++i],0,10);
else if(!strcmp(argv[i],"--reps")&&i+1<argc) reps=strtoull(argv[++i],0,10);
else if(!strcmp(argv[i],"--target-time-ms")&&i+1<argc) target_time_ms=strtoull(argv[++i],0,10);
else if(!strcmp(argv[i],"--min-samples")&&i+1<argc) min_samples=strtoull(argv[++i],0,10);
else if(!strcmp(argv[i],"--max-iters")&&i+1<argc) max_iters=strtoull(argv[++i],0,10);
else { usage(); return 2; }
}
if(!kind||!alg){ usage(); return 2; }
if(reps < 1) reps = 1;
if(min_samples < 1) min_samples = 1;
if(max_iters < min_samples) max_iters = min_samples;
bench_cfg cfg = {
.fixed_iters = iters, /* >0 => fixed-count mode */
.target_ns = target_time_ms * 1000000ull,
.min_samples = min_samples,
.max_iters = max_iters,
.warmup = warmup,
.reps = reps,
};
if (iters>0)
fprintf(stderr,"[bench_pq] mode=fixed-count reps=%llu warmup=%llu iters=%llu\n",
(unsigned long long)reps,(unsigned long long)warmup,(unsigned long long)iters);
else
fprintf(stderr,"[bench_pq] mode=auto-calibrate reps=%llu target=%llums min_samples=%llu max_iters=%llu\n",
(unsigned long long)reps,(unsigned long long)target_time_ms,
(unsigned long long)min_samples,(unsigned long long)max_iters);
/* PMU cycle availability probe (reported once via stderr-free channel:
* embedded into the JSON header line below). */
const char *pmu_reason=NULL;
int pmu_ok = probe_pmu(&pmu_reason);
fprintf(stderr,"[bench_pq] cycles_available=%d (%s)\n", pmu_ok, pmu_reason);
OQS_init();
int rc;
if(!strcmp(kind,"kem")){
if(!strcmp(alg,"X25519")) rc=run_x25519(&cfg);
else rc=run_kem(alg,&cfg);
} else if(!strcmp(kind,"sig")){
if(!strcmp(alg,"Ed25519")) rc=run_ed25519(&cfg);
else rc=run_sig(alg,&cfg);
} else { usage(); rc=2; }
OQS_destroy();
return rc;
}

View File

@ -0,0 +1,245 @@
#!/usr/bin/env python3
"""assemble.py — merge harness outputs + thermal trace + environment metadata
into one results JSON with full provenance.
Inputs (all paths):
--meta meta.env KEY=VALUE run/host facts collected by run.sh
--lock versions.lock toolchain provenance from setup.sh
--features cpu_features.json CPU/crypto-extension detection (from run.sh)
--kemsig kemsig.jsonl one JSON object per algorithm from bench_pq
--tls tls.json output of the TLS harness (optional)
--thermal thermal.csv epoch_s,arm_clock_hz,temp_c,throttled_hex samples
--out results/<host>-<ts>.json
The single most important output field is `is_baseline_grade`: true ONLY on a
real RPi5 with performance governor, core pinning, A76-targeted flags, and no
thermal throttling. Everything else (notably macOS smoke runs) is false, with
reasons recorded so smoke output can never be mistaken for the baseline.
"""
from __future__ import annotations
import argparse
import json
import os
import statistics
import sys
SCHEMA_VERSION = "1.0.0"
def parse_envfile(path: str) -> dict:
"""Parse KEY=VALUE / KEY="value" lines (shared format of meta.env + versions.lock)."""
out = {}
if not path or not os.path.exists(path):
return out
with open(path) as f:
for line in f:
line = line.strip()
if not line or line.startswith("#") or "=" not in line:
continue
k, v = line.split("=", 1)
v = v.strip()
if len(v) >= 2 and v[0] == v[-1] and v[0] in "\"'":
v = v[1:-1]
out[k.strip()] = v
return out
def load_jsonl(path: str) -> list:
items = []
if not path or not os.path.exists(path):
return items
with open(path) as f:
for line in f:
line = line.strip()
if line:
try:
items.append(json.loads(line))
except json.JSONDecodeError as e:
print(f"[assemble] skipping bad JSONL line: {e}", file=sys.stderr)
return items
def load_json(path: str):
if path and os.path.exists(path):
with open(path) as f:
return json.load(f)
return None
def parse_thermal(path: str) -> dict:
"""Reduce the raw CSV trace to a compact embedded record + summary."""
cols = ["epoch_s", "arm_clock_hz", "temp_c", "throttled_hex"]
samples, temps, clocks = [], [], []
throttling_detected = False
if path and os.path.exists(path):
with open(path) as f:
for line in f:
parts = line.strip().split(",")
if len(parts) != 4:
continue
ep, clk, temp, thr = parts
samples.append([
int(ep) if ep else None,
int(clk) if clk else None,
float(temp) if temp else None,
thr or None,
])
if temp:
temps.append(float(temp))
if clk:
clocks.append(int(clk))
if thr:
try:
v = int(thr, 16)
# bit2 = throttling now, bit18 = throttling has occurred
if v & 0x4 or v & 0x40000:
throttling_detected = True
except ValueError:
pass
def summarize(vals):
if not vals:
return None
return {
"min": min(vals), "max": max(vals),
"mean": round(statistics.fmean(vals), 3),
"samples": len(vals),
}
clock_summary = summarize(clocks)
# Detect frequency droop as a secondary throttling signal.
if clock_summary and clocks:
spread = (max(clocks) - min(clocks)) / max(clocks)
clock_summary["spread_frac"] = round(spread, 4)
return {
"columns": cols,
"samples": samples,
"temp_c": summarize(temps),
"arm_clock_hz": clock_summary,
"throttling_detected": throttling_detected,
}
def to_int(s, default=None):
try:
return int(s)
except (TypeError, ValueError):
return default
def main():
ap = argparse.ArgumentParser()
ap.add_argument("--meta", required=True)
ap.add_argument("--lock", default="")
ap.add_argument("--features", default="")
ap.add_argument("--kemsig", default="")
ap.add_argument("--tls", default="")
ap.add_argument("--thermal", default="")
ap.add_argument("--config", default="")
ap.add_argument("--out", required=True)
args = ap.parse_args()
meta = parse_envfile(args.meta)
lock = parse_envfile(args.lock)
features = load_json(args.features) or {}
kemsig = load_jsonl(args.kemsig)
tls = load_json(args.tls)
thermal = parse_thermal(args.thermal)
is_rpi = meta.get("IS_RPI") == "1"
governor = meta.get("GOVERNOR_AFTER") or meta.get("GOVERNOR_BEFORE") or "unknown"
pinned = meta.get("PINNED") == "1"
cflags_target = lock.get("CFLAGS_TARGET", "unknown")
# ---- the anti-confusion gate -----------------------------------------
baseline_reasons = []
if not is_rpi:
baseline_reasons.append(
f"host is not a Raspberry Pi (model='{meta.get('RPI_MODEL','')}', os={meta.get('OS')})")
if governor != "performance":
baseline_reasons.append(f"CPU governor is '{governor}', not 'performance'")
if not pinned:
baseline_reasons.append("benchmark was not pinned to a dedicated core (no taskset)")
if cflags_target != "cortex-a76":
baseline_reasons.append(f"build flags targeted '{cflags_target}', not cortex-a76")
if thermal.get("throttling_detected"):
baseline_reasons.append("thermal throttling was detected during the run")
is_baseline_grade = len(baseline_reasons) == 0
warnings = []
raw_warn = meta.get("WARNINGS", "")
if raw_warn:
warnings.extend([w for w in raw_warn.split("||") if w])
if not is_baseline_grade:
warnings.append("NOT RPi5-baseline-grade: " + "; ".join(baseline_reasons))
result = {
"schema_version": SCHEMA_VERSION,
"tool_version": meta.get("TOOL_VERSION", "0.1.0"),
"generated_utc": meta.get("TS_END_UTC", ""),
"is_baseline_grade": is_baseline_grade,
"baseline_grade_reasons": baseline_reasons,
"host": {
"hostname": meta.get("HOSTNAME", ""),
"os": meta.get("OS", ""),
"os_pretty": meta.get("OS_PRETTY", ""),
"arch": meta.get("ARCH", ""),
"kernel": meta.get("KERNEL", ""),
"is_rpi": is_rpi,
"rpi_model": meta.get("RPI_MODEL", ""),
"cpu_brand": meta.get("CPU_BRAND", ""),
"ncpu": to_int(meta.get("NCPU")),
"ram_bytes": to_int(meta.get("RAM_BYTES")),
},
"cpu_features": features,
"run": {
"timestamp_start_utc": meta.get("TS_START_UTC", ""),
"timestamp_end_utc": meta.get("TS_END_UTC", ""),
"duration_s": to_int(meta.get("DURATION_S")),
"governor_requested": meta.get("GOVERNOR_REQUESTED", ""),
"governor_before": meta.get("GOVERNOR_BEFORE", ""),
"governor_after": meta.get("GOVERNOR_AFTER", ""),
"bench_core": to_int(meta.get("BENCH_CORE")),
"pinned": pinned,
"taskset_cmd": meta.get("TASKSET_CMD", ""),
# Per-op sizing. In auto-calibration mode warmup_iters/timed_iters are
# chosen per operation (see each entry under kem/sig "operations");
# the run-level target/min/max below describe how they were derived.
"calibration_mode": meta.get("CALIB_MODE", "auto"),
"target_time_ms": to_int(meta.get("TARGET_TIME_MS")),
"min_samples": to_int(meta.get("MIN_SAMPLES")),
"max_iters": to_int(meta.get("MAX_ITERS")),
"warmup_iters": to_int(meta.get("WARMUP")),
"timed_iters": to_int(meta.get("ITERS")),
"repetitions": to_int(meta.get("REPS")),
"cycles_mode": meta.get("CYCLES_MODE", ""),
"cycles_available": meta.get("CYCLES_AVAILABLE") == "1",
"cycles_reason": meta.get("CYCLES_REASON", ""),
},
"toolchain": {
"cc_version": lock.get("CC_VERSION", ""),
"bench_cflags": lock.get("BENCH_CFLAGS", ""),
"cflags_target": cflags_target,
"liboqs_ref": lock.get("LIBOQS_REF", ""),
"liboqs_commit": lock.get("LIBOQS_COMMIT", ""),
"liboqs_opt_defines": lock.get("LIBOQS_OPT_DEFINES", ""),
"openssl": lock.get("OPENSSL_COMMIT", ""),
"oqsprovider_ref": lock.get("OQSPROVIDER_REF", ""),
"oqsprovider_commit": lock.get("OQSPROVIDER_COMMIT", ""),
},
"thermal_trace": thermal,
"warnings": warnings,
"kem": [r for r in kemsig if r.get("kind") == "kem"],
"sig": [r for r in kemsig if r.get("kind") == "sig"],
"tls": tls,
}
os.makedirs(os.path.dirname(args.out) or ".", exist_ok=True)
with open(args.out, "w") as f:
json.dump(result, f, indent=2)
print(args.out)
if __name__ == "__main__":
main()

View File

@ -0,0 +1,59 @@
#!/usr/bin/env python3
"""list_algs.py — expand config.yaml into shell-consumable lines for run.sh.
Modes:
measurement -> KEY=VALUE lines (auto-calib: target/min_samples/max_iters/
reps/cycles_mode; plus warmup/iters fixed-count fallback)
kemsig -> one "kind<TAB>alg<TAB>is_classical" line per algorithm,
baselines first (so charts always have the reference point)
tls -> emits the TLS matrix as JSON on one line
Uses the dependency-free miniyaml parser so no PyYAML is required.
"""
import os
import sys
import json
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
import miniyaml # noqa: E402
def main():
mode = sys.argv[1] if len(sys.argv) > 1 else "kemsig"
cfg_path = sys.argv[2] if len(sys.argv) > 2 else "config.yaml"
cfg = miniyaml.load_file(cfg_path)
if mode == "measurement":
m = cfg.get("measurement", {}) or {}
# auto-calibration knobs (default path)
print(f"TARGET_TIME_MS={m.get('target_time_ms', 250)}")
print(f"MIN_SAMPLES={m.get('min_samples', 30)}")
print(f"MAX_ITERS={m.get('max_iters', 20000)}")
print(f"REPS={m.get('repetitions', 5)}")
print(f"CYCLES_MODE={m.get('cycles_mode', 'auto')}")
# fixed-count fallback knobs (used only with ./run.sh --iters)
print(f"WARMUP={m.get('warmup_iters', 1000)}")
print(f"ITERS={m.get('timed_iters', 10000)}")
elif mode == "kemsig":
for kind in ("kem", "sig"):
section = cfg.get(kind, {}) or {}
for grp in ("baseline", "candidates"):
for item in (section.get(grp) or []):
if isinstance(item, dict):
name = item.get("name")
classical = "1" if item.get("classical") else "0"
else:
name, classical = item, "0"
if name:
print(f"{kind}\t{name}\t{classical}")
elif mode == "tls":
print(json.dumps(cfg.get("tls", {})))
else:
sys.exit(f"unknown mode: {mode}")
if __name__ == "__main__":
main()

View File

@ -0,0 +1,151 @@
"""miniyaml — a dependency-free parser for the restricted YAML subset used by
config.yaml.
We deliberately avoid a PyYAML runtime dependency so the core pipeline runs on a
stock Python 3 (the RPi5 / Mac smoke box both have only stdlib by default).
Supported subset (sufficient for config.yaml):
- nested mappings via indentation (2 spaces per level by convention)
- lists of scalars: "- value"
- lists of mappings: "- key: value" then indented "key: value"
- scalars: int, float, bool (true/false), null/~, quoted or bare strings
- "# comment" to end of line (outside quotes)
It is NOT a general YAML implementation. If a user needs full YAML they can
install PyYAML and set PQB_USE_PYYAML=1.
"""
from __future__ import annotations
import os
import re
def _scalar(tok: str):
s = tok.strip()
if s == "" or s in ("~", "null", "Null", "NULL"):
return None
if (s[0] == '"' and s[-1] == '"') or (s[0] == "'" and s[-1] == "'"):
return s[1:-1]
low = s.lower()
if low in ("true", "yes"):
return True
if low in ("false", "no"):
return False
if re.fullmatch(r"[+-]?\d+", s):
return int(s)
if re.fullmatch(r"[+-]?\d*\.\d+([eE][+-]?\d+)?", s):
return float(s)
return s
def _strip_comment(line: str) -> str:
out, q = [], None
for ch in line:
if q:
out.append(ch)
if ch == q:
q = None
elif ch in ("'", '"'):
q = ch
out.append(ch)
elif ch == "#":
break
else:
out.append(ch)
return "".join(out).rstrip()
def _indent(line: str) -> int:
return len(line) - len(line.lstrip(" "))
def loads(text: str):
# Tokenize into (indent, content) ignoring blank/comment-only lines.
lines = []
for raw in text.splitlines():
c = _strip_comment(raw)
if c.strip() == "":
continue
lines.append((_indent(c), c.strip(), c))
pos = [0]
def parse_block(min_indent: int):
if pos[0] >= len(lines):
return None
indent = lines[pos[0]][0]
if lines[pos[0]][1].startswith("- "):
return parse_list(indent)
return parse_map(indent)
def parse_map(indent: int):
obj = {}
while pos[0] < len(lines):
ind, stripped, _ = lines[pos[0]]
if ind < indent or stripped.startswith("- "):
break
if ind > indent: # malformed; skip
pos[0] += 1
continue
m = re.match(r"^([^:]+):\s*(.*)$", stripped)
if not m:
pos[0] += 1
continue
key, val = m.group(1).strip(), m.group(2)
pos[0] += 1
if val == "":
# nested block or empty
if pos[0] < len(lines) and lines[pos[0]][0] > indent:
obj[key] = parse_block(indent + 1)
else:
obj[key] = None
else:
obj[key] = _scalar(val)
return obj
def parse_list(indent: int):
arr = []
while pos[0] < len(lines):
ind, stripped, _ = lines[pos[0]]
if ind < indent or not stripped.startswith("- "):
break
if ind > indent:
break
item = stripped[2:].strip()
pos[0] += 1
if ":" in item and not (item[0] in "'\""):
# list of mappings — first pair is inline, rest are indented deeper
sub = {}
m = re.match(r"^([^:]+):\s*(.*)$", item)
key, val = m.group(1).strip(), m.group(2)
sub[key] = _scalar(val) if val != "" else None
child_indent = indent + 2
while pos[0] < len(lines) and lines[pos[0]][0] >= child_indent \
and not lines[pos[0]][1].startswith("- "):
ind2, strip2, _ = lines[pos[0]]
m2 = re.match(r"^([^:]+):\s*(.*)$", strip2)
if not m2:
pos[0] += 1
continue
k2, v2 = m2.group(1).strip(), m2.group(2)
pos[0] += 1
sub[k2] = _scalar(v2) if v2 != "" else None
arr.append(sub)
else:
arr.append(_scalar(item))
return arr
return parse_block(0) or {}
def load_file(path: str):
if os.environ.get("PQB_USE_PYYAML") == "1":
import yaml # type: ignore
with open(path) as f:
return yaml.safe_load(f)
with open(path) as f:
return loads(f.read())
if __name__ == "__main__":
import json
import sys
print(json.dumps(load_file(sys.argv[1]), indent=2))

View File

@ -0,0 +1,17 @@
# Build the TLS handshake harness against the OpenSSL that has oqs-provider.
# make OPENSSL_PREFIX=... (defaults to Homebrew openssl@3, else system)
OPENSSL_PREFIX ?= $(shell brew --prefix openssl@3 2>/dev/null || echo /usr)
CC ?= cc
CFLAGS := -O3 -std=c11 -Wall -Wextra -I$(OPENSSL_PREFIX)/include
LDFLAGS := -L$(OPENSSL_PREFIX)/lib -Wl,-rpath,$(OPENSSL_PREFIX)/lib
LDLIBS := -lssl -lcrypto -lm
bench_tls: bench_tls.c
$(CC) $(CFLAGS) -o $@ $< $(LDFLAGS) $(LDLIBS)
clean:
rm -f bench_tls
.PHONY: clean

View File

@ -0,0 +1,194 @@
/* ===========================================================================
* bench_tls.c TLS 1.3 handshake benchmark via the OpenSSL API.
*
* Performs full TLS 1.3 handshakes entirely in-process over a pair of memory
* BIOs (no sockets, no CLI scraping). This gives:
* - clean per-handshake wall-clock latency (no socket/scheduler noise)
* - exact bytes-on-the-wire each direction
* - the precise ClientHello flight size (with a fragmentation note vs MSS)
* - real server-cert signature verification cost (client verifies the chain),
* which is the whole point of sweeping PQ signature algorithms.
*
* bench_tls --group X25519MLKEM768 --ca ca.pem \
* --cert server.pem --key server.key --connections 1000 \
* --label "X25519MLKEM768+mldsa65"
*
* Emits one JSON object. PQ groups/sigs require oqs-provider to be loadable
* (point OPENSSL_MODULES at its directory); if it cannot load, we say so.
* ===========================================================================*/
#define _POSIX_C_SOURCE 200809L
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <math.h>
#include <time.h>
#include <openssl/ssl.h>
#include <openssl/bio.h>
#include <openssl/err.h>
#include <openssl/provider.h>
#define TYPICAL_MSS 1400 /* note fragmentation when ClientHello exceeds this */
static inline uint64_t now_ns(void) {
struct timespec ts; clock_gettime(CLOCK_MONOTONIC, &ts);
return (uint64_t)ts.tv_sec * 1000000000ull + (uint64_t)ts.tv_nsec;
}
static int cmp_u64(const void *a, const void *b) {
uint64_t x = *(const uint64_t *)a, y = *(const uint64_t *)b;
return (x > y) - (x < y);
}
static double pct(const uint64_t *s, uint64_t n, double p) {
if (!n) return 0; if (n == 1) return (double)s[0];
double idx = p * (double)(n - 1); uint64_t lo = (uint64_t)idx; double f = idx - lo;
if (lo + 1 >= n) return (double)s[n - 1];
return (double)s[lo] + f * ((double)s[lo + 1] - (double)s[lo]);
}
/* Shuttle all pending bytes from src's mem BIO into dst's mem BIO.
* Returns bytes moved. */
static size_t pump(BIO *src, BIO *dst) {
char buf[16384]; size_t total = 0; int n;
while ((n = BIO_read(src, buf, sizeof buf)) > 0) {
BIO_write(dst, buf, n);
total += (size_t)n;
}
return total;
}
/* One full handshake. Records bytes each way and ClientHello size (first flight).
* Returns 0 on success. */
static int one_handshake(SSL_CTX *cctx, SSL_CTX *sctx,
size_t *c2s_bytes, size_t *s2c_bytes, size_t *chello) {
SSL *cli = SSL_new(cctx), *srv = SSL_new(sctx);
if (!cli || !srv) { if (cli) SSL_free(cli); if (srv) SSL_free(srv); return -1; }
BIO *cli_rb = BIO_new(BIO_s_mem()), *cli_wb = BIO_new(BIO_s_mem());
BIO *srv_rb = BIO_new(BIO_s_mem()), *srv_wb = BIO_new(BIO_s_mem());
SSL_set_bio(cli, cli_rb, cli_wb); /* takes ownership */
SSL_set_bio(srv, srv_rb, srv_wb);
SSL_set_connect_state(cli);
SSL_set_accept_state(srv);
*c2s_bytes = *s2c_bytes = *chello = 0;
int first_flight = 1, rc = 0;
for (int i = 0; i < 64; i++) {
int r_c = SSL_do_handshake(cli);
size_t moved = pump(cli_wb, srv_rb);
if (first_flight && moved > 0) { *chello = moved; first_flight = 0; }
*c2s_bytes += moved;
(void)r_c;
int r_s = SSL_do_handshake(srv);
*s2c_bytes += pump(srv_wb, cli_rb);
(void)r_s;
if (SSL_is_init_finished(cli) && SSL_is_init_finished(srv)) break;
/* if both stalled with nothing to transfer, it's a failure */
if (BIO_ctrl_pending(cli_wb) == 0 && BIO_ctrl_pending(srv_wb) == 0 &&
!SSL_is_init_finished(cli) && i > 4) { rc = -2; break; }
}
if (!SSL_is_init_finished(cli) || !SSL_is_init_finished(srv)) rc = -2;
SSL_free(cli); SSL_free(srv);
return rc;
}
int main(int argc, char **argv) {
const char *group = "X25519", *ca = NULL, *cert = NULL, *key = NULL, *label = "";
uint64_t connections = 1000, warmup = 20;
for (int i = 1; i < argc; i++) {
if (!strcmp(argv[i], "--group") && i+1<argc) group = argv[++i];
else if (!strcmp(argv[i], "--ca") && i+1<argc) ca = argv[++i];
else if (!strcmp(argv[i], "--cert") && i+1<argc) cert = argv[++i];
else if (!strcmp(argv[i], "--key") && i+1<argc) key = argv[++i];
else if (!strcmp(argv[i], "--connections") && i+1<argc) connections = strtoull(argv[++i],0,10);
else if (!strcmp(argv[i], "--warmup") && i+1<argc) warmup = strtoull(argv[++i],0,10);
else if (!strcmp(argv[i], "--label") && i+1<argc) label = argv[++i];
else { fprintf(stderr,"bad arg %s\n",argv[i]); return 2; }
}
if (!cert || !key) { fprintf(stderr,"--cert and --key required\n"); return 2; }
/* providers: default always; oqs if discoverable (OPENSSL_MODULES) */
OSSL_PROVIDER_load(NULL, "default");
int have_oqs = OSSL_PROVIDER_load(NULL, "oqsprovider") != NULL;
SSL_CTX *cctx = SSL_CTX_new(TLS_client_method());
SSL_CTX *sctx = SSL_CTX_new(TLS_server_method());
if (!cctx || !sctx) { fprintf(stderr,"ctx alloc failed\n"); return 1; }
SSL_CTX_set_min_proto_version(cctx, TLS1_3_VERSION);
SSL_CTX_set_max_proto_version(cctx, TLS1_3_VERSION);
SSL_CTX_set_min_proto_version(sctx, TLS1_3_VERSION);
SSL_CTX_set_max_proto_version(sctx, TLS1_3_VERSION);
int group_ok = SSL_CTX_set1_groups_list(cctx, group) &&
SSL_CTX_set1_groups_list(sctx, group);
int cert_ok = SSL_CTX_use_certificate_chain_file(sctx, cert) == 1 &&
SSL_CTX_use_PrivateKey_file(sctx, key, SSL_FILETYPE_PEM) == 1;
/* client verifies the server chain -> exercises PQ signature verify cost */
int verify_setup = 1;
if (ca) {
if (SSL_CTX_load_verify_locations(cctx, ca, NULL) != 1) verify_setup = 0;
SSL_CTX_set_verify(cctx, SSL_VERIFY_PEER, NULL);
} else {
SSL_CTX_set_verify(cctx, SSL_VERIFY_NONE, NULL);
}
if (!group_ok || !cert_ok || !verify_setup) {
printf("{\"label\":\"%s\",\"group\":\"%s\",\"enabled\":false,\"have_oqs_provider\":%s,"
"\"reason\":\"%s%s%s\"}\n",
label, group, have_oqs?"true":"false",
group_ok?"":"group-not-supported ",
cert_ok?"":"cert-load-failed ",
verify_setup?"":"ca-load-failed");
ERR_print_errors_fp(stderr);
return 0;
}
/* warmup */
size_t c2s, s2c, ch;
for (uint64_t i = 0; i < warmup; i++) {
if (one_handshake(cctx, sctx, &c2s, &s2c, &ch) != 0) {
printf("{\"label\":\"%s\",\"group\":\"%s\",\"enabled\":false,"
"\"have_oqs_provider\":%s,\"reason\":\"handshake failed\"}\n",
label, group, have_oqs?"true":"false");
ERR_print_errors_fp(stderr);
return 0;
}
}
uint64_t *lat = malloc(connections * sizeof(uint64_t));
size_t ch_last = 0, c2s_last = 0, s2c_last = 0;
uint64_t ok = 0;
for (uint64_t i = 0; i < connections; i++) {
uint64_t t0 = now_ns();
int rc = one_handshake(cctx, sctx, &c2s, &s2c, &ch);
uint64_t dt = now_ns() - t0;
if (rc == 0) { lat[ok++] = dt; ch_last = ch; c2s_last = c2s; s2c_last = s2c; }
}
if (ok == 0) { fprintf(stderr,"all handshakes failed\n"); return 1; }
qsort(lat, ok, sizeof(uint64_t), cmp_u64);
double median = pct(lat, ok, 0.5);
double p95 = pct(lat, ok, 0.95);
double mn = (double)lat[0], mx = (double)lat[ok-1];
double sum=0; for (uint64_t i=0;i<ok;i++) sum+=(double)lat[i];
double mean = sum/ok;
double ss=0; for (uint64_t i=0;i<ok;i++){double d=(double)lat[i]-mean; ss+=d*d;}
double stddev = ok>1?sqrt(ss/(ok-1)):0;
double hs_per_sec = median>0 ? 1e9/median : 0;
printf("{\"label\":\"%s\",\"group\":\"%s\",\"enabled\":true,\"have_oqs_provider\":%s,",
label, group, have_oqs?"true":"false");
printf("\"connections\":%llu,\"succeeded\":%llu,", (unsigned long long)connections,(unsigned long long)ok);
printf("\"handshake_latency_ns\":{\"median\":%.1f,\"p95\":%.1f,\"min\":%.1f,\"max\":%.1f,\"mean\":%.1f,\"stddev\":%.1f},",
median,p95,mn,mx,mean,stddev);
printf("\"handshakes_per_sec\":%.1f,", hs_per_sec);
printf("\"bytes_on_wire\":{\"client_to_server\":%zu,\"server_to_client\":%zu,\"total\":%zu},",
c2s_last, s2c_last, c2s_last+s2c_last);
printf("\"client_hello_bytes\":%zu,\"client_hello_fragmented\":%s,\"mss_assumed\":%d}\n",
ch_last, ch_last>TYPICAL_MSS?"true":"false", TYPICAL_MSS);
free(lat);
return 0;
}

View File

@ -0,0 +1,157 @@
#!/usr/bin/env bash
# =============================================================================
# run_tls.sh — generate PKI and run the TLS 1.3 (KEM-group x signature) matrix.
#
# Always benchmarks the classical Logos baseline (X25519 key exchange + Ed25519
# server auth) using stock OpenSSL. PQ rows additionally require oqs-provider to
# be loadable; if it is not present we record those rows as unavailable (with a
# reason) rather than failing — so this still smoke-tests cleanly on a dev box.
#
# ./run_tls.sh --out tls.json --connections 1000
#
# Honors $PQB_TASKSET (a taskset/numactl prefix) to pin bench_tls to a core.
# =============================================================================
set -euo pipefail
HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
ROOT="$(cd "$HERE/../.." && pwd)"
# shellcheck source=setup/lib_platform.sh
source "$ROOT/setup/lib_platform.sh"
LOCK="$ROOT/setup/versions.lock"
# shellcheck disable=SC1090
[ -f "$LOCK" ] && source "$LOCK" || true
pqb_detect_platform
OUT=""; CONNS=1000; WARMUP=20
while [ $# -gt 0 ]; do
case "$1" in
--out) OUT="$2"; shift ;;
--connections) CONNS="$2"; shift ;;
--warmup) WARMUP="$2"; shift ;;
*) pqb_err "unknown arg: $1"; exit 2 ;;
esac
shift
done
[ -n "$OUT" ] || { pqb_err "--out required"; exit 2; }
TASKSET="${PQB_TASKSET:-}"
# ---- choose the OpenSSL that has the provider ------------------------------
OSSL="${OPENSSL_BIN:-$(command -v openssl)}"
OSSL_PREFIX="${OPENSSL_PREFIX:-$(brew --prefix openssl@3 2>/dev/null || echo /usr)}"
PROV_MODULE="${OQSPROVIDER_MODULE:-}"
PROV_ARGS=""
HAVE_OQS=0
if [ -n "$PROV_MODULE" ] && [ -f "$PROV_MODULE" ]; then
export OPENSSL_MODULES="$(dirname "$PROV_MODULE")"
if "$OSSL" list -providers -provider oqsprovider -provider default >/dev/null 2>&1; then
PROV_ARGS="-provider oqsprovider -provider default"
HAVE_OQS=1
pqb_log "oqs-provider available: $PROV_MODULE"
fi
fi
[ "$HAVE_OQS" = 0 ] && pqb_warn "oqs-provider not available — PQ TLS rows will be marked unavailable (classical baseline still runs)"
# ---- build the harness -----------------------------------------------------
make -C "$HERE" OPENSSL_PREFIX="$OSSL_PREFIX" >/dev/null
BENCH="$HERE/bench_tls"
[ -n "$TASKSET" ] && export OPENSSL_MODULES="${OPENSSL_MODULES:-}"
# ---- PKI workspace ---------------------------------------------------------
PKI="$HERE/pki"; rm -rf "$PKI"; mkdir -p "$PKI"
# gen_cert <sig_alg> <out_prefix> [provider] -> CA + server cert/key of that alg
gen_cert() {
local alg="$1" pfx="$2" prov="${3:-}"
local ca_key="$PKI/${pfx}_ca.key" ca_crt="$PKI/${pfx}_ca.pem"
local sv_key="$PKI/${pfx}_server.key" sv_csr="$PKI/${pfx}_server.csr" sv_crt="$PKI/${pfx}_server.pem"
# CA
"$OSSL" req -x509 -new -newkey "$alg" -nodes $prov \
-keyout "$ca_key" -out "$ca_crt" -days 3650 \
-subj "/CN=PQB Test CA ($alg)" >/dev/null 2>&1 || return 1
# server key + CSR + cert signed by CA
"$OSSL" genpkey -algorithm "$alg" $prov -out "$sv_key" >/dev/null 2>&1 || return 1
"$OSSL" req -new -key "$sv_key" $prov -out "$sv_csr" -subj "/CN=localhost" >/dev/null 2>&1 || return 1
"$OSSL" x509 -req -in "$sv_csr" -CA "$ca_crt" -CAkey "$ca_key" $prov \
-out "$sv_crt" -days 3650 -CAcreateserial >/dev/null 2>&1 || return 1
return 0
}
# ---- read config -----------------------------------------------------------
TLS_JSON="$(python3 "$ROOT/bench/lib/list_algs.py" tls "$ROOT/config.yaml")"
read_list() { python3 -c "import json,sys; print('\n'.join(json.loads(sys.argv[1]).get(sys.argv[2],[])))" "$TLS_JSON" "$1"; }
BASE_KEM="$(python3 -c "import json,sys;print(json.loads(sys.argv[1])['baseline']['kem_group'])" "$TLS_JSON")"
BASE_SIG="$(python3 -c "import json,sys;print(json.loads(sys.argv[1])['baseline']['sig_alg'])" "$TLS_JSON")"
# ---- generate certs --------------------------------------------------------
declare -a SIG_OK_ALGS=()
# classical baseline cert (always)
if gen_cert "$BASE_SIG" "base_$BASE_SIG"; then
pqb_log "generated baseline cert ($BASE_SIG)"
else
pqb_err "failed to generate classical baseline cert ($BASE_SIG) — TLS layer cannot run"
echo '{"available":false,"reason":"baseline cert generation failed"}' > "$OUT"; exit 0
fi
if [ "$HAVE_OQS" = 1 ]; then
while IFS= read -r s; do
[ -z "$s" ] && continue
if gen_cert "$s" "pq_$s" "$PROV_ARGS"; then
SIG_OK_ALGS+=("$s"); pqb_log "generated PQ cert ($s)"
else
pqb_warn "could not generate cert for sig alg '$s' (skipping)"
fi
done < <(read_list sig_algs)
fi
# cert path helpers
ca_for() { case "$1" in "$BASE_SIG") echo "$PKI/base_${BASE_SIG}_ca.pem";; *) echo "$PKI/pq_${1}_ca.pem";; esac; }
crt_for() { case "$1" in "$BASE_SIG") echo "$PKI/base_${BASE_SIG}_server.pem";; *) echo "$PKI/pq_${1}_server.pem";; esac; }
key_for() { case "$1" in "$BASE_SIG") echo "$PKI/base_${BASE_SIG}_server.key";; *) echo "$PKI/pq_${1}_server.key";; esac; }
# run one matrix cell -> appends JSON object to $ROWS file
run_cell() {
local kem="$1" sig="$2"
local label="${kem}+${sig}"
# shellcheck disable=SC2086
$TASKSET "$BENCH" --group "$kem" --ca "$(ca_for "$sig")" \
--cert "$(crt_for "$sig")" --key "$(key_for "$sig")" \
--connections "$CONNS" --warmup "$WARMUP" --label "$label" 2>>"$PKI/bench_tls.err"
}
ROWS="$PKI/rows.jsonl"; : > "$ROWS"
# baseline row always first
pqb_log "TLS baseline: $BASE_KEM + $BASE_SIG ($CONNS handshakes)"
run_cell "$BASE_KEM" "$BASE_SIG" >> "$ROWS" || pqb_warn "baseline TLS cell failed"
# PQ matrix: (kem_groups x sig_algs) — only cells whose cert exists
if [ "$HAVE_OQS" = 1 ] && [ "${#SIG_OK_ALGS[@]}" -gt 0 ]; then
while IFS= read -r kem; do
[ -z "$kem" ] && continue
for sig in "${SIG_OK_ALGS[@]}"; do
pqb_log "TLS cell: $kem + $sig"
run_cell "$kem" "$sig" >> "$ROWS" || pqb_warn "cell $kem+$sig failed"
done
done < <(read_list kem_groups)
fi
# ---- assemble tls.json -----------------------------------------------------
python3 - "$ROWS" "$OUT" "$HAVE_OQS" "$BASE_KEM" "$BASE_SIG" <<'PY'
import json,sys
rows_path, out_path, have_oqs, base_kem, base_sig = sys.argv[1:6]
rows=[]
with open(rows_path) as f:
for line in f:
line=line.strip()
if line:
try: rows.append(json.loads(line))
except json.JSONDecodeError: pass
out={
"available": True,
"have_oqs_provider": have_oqs=="1",
"baseline": {"kem_group": base_kem, "sig_alg": base_sig, "label": f"{base_kem}+{base_sig}"},
"matrix": rows,
}
json.dump(out, open(out_path,"w"), indent=2)
print(f"wrote {out_path}: {len(rows)} cells (have_oqs={have_oqs})")
PY

163
pq-bench-rpi5/config.yaml Normal file
View File

@ -0,0 +1,163 @@
# =============================================================================
# pq-bench-rpi5 candidate configuration
#
# This file is the single place users edit to extend the benchmark. The harness
# reads it with a dependency-free YAML subset parser (bench/lib/miniyaml.py), so
# keep to the simple shape used below: top-level maps, lists of scalars, and
# lists of "- key: value" maps. Comments (#) and quotes are fine.
#
# Algorithm names must match liboqs / oqs-provider identifiers exactly. To see
# what your built liboqs supports: ./vendor/liboqs/build/tests/test_kem (lists)
# or check the dashboard's "available algorithms" note after a run.
# =============================================================================
# ---- measurement knobs ------------------------------------------------------
# Default sizing is PER-OPERATION AUTO-CALIBRATION: each op (keygen/encaps/sign/
# ...) is timed long enough to accumulate ~target_time_ms of work, clamped to
# [min_samples, max_iters]. This makes an 18 us ML-KEM keygen and a ~750 ms
# SLH-DSA sign both yield a stable median without a hand-tuned per-alg count:
# - fast ops are bounded by max_iters
# - slow ops are bounded by min_samples (so even ~1 op per target still gets
# enough samples for a stable median/MAD)
# To force FIXED counts instead, pass `./run.sh --iters N` — that disables
# calibration and uses the warmup_iters/timed_iters fallback below.
measurement:
target_time_ms: 250 # auto: aggregate timed work to put on each op
min_samples: 30 # auto: floor on timed iters/rep (stable median+MAD)
max_iters: 20000 # auto: ceiling on timed iters/rep (caps fast ops)
repetitions: 5 # independent repetitions (fresh process each time)
cycles_mode: auto # auto | on | off (PMU userspace cycle counting)
# ---- fixed-count fallback (only used when ./run.sh --iters is given) ----
warmup_iters: 1000 # untimed iterations to settle caches/branch predictors
timed_iters: 10000 # timed iterations per repetition
# ---- security_level (NIST category) ----------------------------------------
# Every candidate carries `security_level: N` — the NIST PQC category the
# parameter set targets (1/2/3/5) — used only to group algorithms on the charts.
# The mapping is PER-SCHEME, so different schemes reach the same security target
# with different-looking sets: a level 1 sitting next to a level 2 is the real
# NIST categorization, not a typo. Notably:
# - ML-KEM-512, Falcon-512, X25519 -> Category 1
# - ML-DSA-44 -> Category 2 (FIPS 204 defines NO
# Category-1 ML-DSA; -44 is its
# smallest set and lands at Cat 2)
# - ML-KEM-768 / ML-DSA-65 -> Category 3
# - ML-KEM-1024 / ML-DSA-87 / Falcon-1024 -> Category 5
# (NIST levels: 1 ~ AES-128, 2 ~ SHA-256 collision, 3 ~ AES-192, 5 ~ AES-256.)
# ---- KEMs ------------------------------------------------------------------
# baseline = the classical reference Logos uses TODAY (drawn on every chart).
kem:
baseline:
- name: X25519
classical: true
security_level: 1 # NIST category (see "security_level" note above)
candidates:
- name: ML-KEM-512
security_level: 1
- name: ML-KEM-768
security_level: 3
- name: ML-KEM-1024
security_level: 5
# Hybrids (classical + PQ) — these are oqs-provider TLS group names; for the
# raw KEM bench they are skipped unless liboqs exposes them as a KEM.
- name: X25519MLKEM768
security_level: 3
hybrid: true
- name: SecP256r1MLKEM768
security_level: 3
hybrid: true
# --- Code-based + conservative-LWE backup candidates (added 2026-06) -------
# Verified enabled in the linked liboqs 0.15.0 build via OQS_KEM_alg_is_enabled
# (oqsconfig.h + runtime cross-check). Keygen/encaps/decaps are reported as
# separate ops so the docs' "keygen is a one-time setup cost" argument can show
# the (slow) McEliece keygen explicitly next to its tiny ciphertext.
#
# Classic McEliece — code-based. Huge public key, TINY ciphertext, very slow
# keygen on the Pi (auto-calibration clamps keygen to min_samples=30, like
# SLH-DSA sign); encaps/decaps are fast and calibrate normally. Standard sets,
# plus the 460896f fast-keygen variant directly next to standard 460896: same
# params and same tiny ciphertext but a faster keygen, so the pair gives the
# keygen trade-off directly (the most useful McEliece data point for the
# migration doc). Other f-variants omitted as redundant.
- name: Classic-McEliece-348864
security_level: 1
- name: Classic-McEliece-460896
security_level: 3
- name: Classic-McEliece-460896f
security_level: 3
- name: Classic-McEliece-6688128
security_level: 5
- name: Classic-McEliece-6960119
security_level: 5
- name: Classic-McEliece-8192128
security_level: 5
# FrodoKEM — conservative (unstructured) LWE. ~100x slower encaps/decaps than
# ML-KEM and ~15x larger ciphertext. AES variant (uses ARM AES instructions,
# which this build compiles in); SHAKE variants also available if needed.
- name: FrodoKEM-640-AES
security_level: 1
- name: FrodoKEM-976-AES
security_level: 3
- name: FrodoKEM-1344-AES
security_level: 5
# HQC-128/192/256: NOT enabled in this liboqs 0.15.0 build (disabled upstream
# after the IND-CCA2 implementation issue; oqsconfig.h has it #undef and the
# runtime OQS_KEM_alg_is_enabled returns 0). Intentionally omitted rather than
# listed-and-disabled. Re-add once linked against a liboqs with HQC re-enabled.
# ---- Signatures ------------------------------------------------------------
sig:
baseline:
- name: Ed25519
classical: true
security_level: 1
candidates:
- name: ML-DSA-44
security_level: 2 # Cat 2, not 1 — FIPS 204 has no Cat-1 ML-DSA (see note above)
- name: ML-DSA-65
security_level: 3
- name: ML-DSA-87
security_level: 5
- name: Falcon-512
security_level: 1
- name: Falcon-1024
security_level: 5
# SLH-DSA (SPHINCS+) — many parameter sets; a representative spread.
- name: SPHINCS+-SHA2-128f-simple
security_level: 1
- name: SPHINCS+-SHA2-128s-simple
security_level: 1
- name: SPHINCS+-SHA2-192f-simple
security_level: 3
- name: SPHINCS+-SHA2-256f-simple
security_level: 5
# --- extend here ---
# ---- TLS 1.3 handshake matrix ----------------------------------------------
# The bench runs (kem_group x sig_alg). The classical baseline pair is ALWAYS
# included as the reference point regardless of what is listed here.
tls:
baseline:
kem_group: X25519
sig_alg: ed25519 # OpenSSL classical auth Logos uses today
# PQ key-exchange groups to test (oqs-provider TLS group names)
kem_groups:
- X25519MLKEM768
- SecP256r1MLKEM768
- mlkem512
- mlkem768
- mlkem1024
# PQ signature algorithms for the server/CA cert (oqs-provider names)
sig_algs:
- mldsa44
- mldsa65
- mldsa87
- falcon512
- sphincssha2128fsimple
connections: 1000 # handshakes timed against a persistent s_server
# ---- future phase (NOT implemented now — hooks only) -----------------------
# zk:
# snark: [] # e.g. groth16, plonk, halo2
# stark: [] # e.g. risc0, winterfell

View File

@ -0,0 +1,304 @@
/* pq-bench-rpi5 dashboard pure client-side, reads a merged.json produced by
* analyze/merge.py. No backend. Renders KEM/sig/TLS charts with the classical
* Logos baseline as a reference line on each. Defaults to baseline-grade (RPi5)
* runs only; a toggle includes macOS/dev smoke runs. */
const LEVEL_COLORS = { 1:"#3bd67a", 2:"#46c0c0", 3:"#3a7bff", 5:"#b06bff", 0:"#888" };
const BASE_COLOR = "#e0533d", PQ_COLOR = "#3a7bff";
let MERGED = null, CHARTS = [];
const $ = (id) => document.getElementById(id);
const nsToMs = (ns) => (ns || 0) / 1e6;
/* compact ms label: more decimals for small values, fewer for large. Guards its
* input Chart.js may hand a value-label formatter a parsed {x,y} point or a
* non-number; extract the numeric value without coercing an object (Number() on
* a null-prototype object would itself throw), and return "" for anything not
* finite so the value-label draw never throws and halts later charts. */
const fmtMs = (v) => {
const ms = typeof v === "number" ? v
: (v && typeof v === "object") ? (typeof v.y === "number" ? v.y : NaN)
: Number(v);
if (!Number.isFinite(ms)) return "";
return ms>=100?ms.toFixed(0):ms>=10?ms.toFixed(1):ms>=1?ms.toFixed(2):ms.toFixed(3);
};
/* Inline Chart.js plugin: draw each bar's value just above the bar. Enabled
* per-chart via options.plugins.valueLabels.formatter; charts that don't set it
* are untouched (TLS/scatter stay clean). No external dependency, so it works
* even when only the Chart.js core CDN is reachable. */
const valueLabels = {
id: "valueLabels",
afterDatasetsDraw(chart) {
const opt = (chart.options.plugins||{}).valueLabels;
if (!opt || !opt.formatter) return;
const ctx = chart.ctx;
ctx.save();
ctx.fillStyle = "#e6e8ee"; ctx.font = "10px sans-serif";
ctx.textAlign = "center"; ctx.textBaseline = "bottom";
chart.data.datasets.forEach((ds, di) => {
const meta = chart.getDatasetMeta(di);
if (meta.hidden) return;
meta.data.forEach((el, i) => {
const v = ds.data[i];
if (v == null) return;
ctx.fillText(opt.formatter(v, i), el.x, el.y - 3);
});
});
ctx.restore();
}
};
if (window.Chart) Chart.register(valueLabels);
async function boot() {
$("fileInput").addEventListener("change", onFile);
$("includeSmoke").addEventListener("change", render);
$("runSelect").addEventListener("change", render);
try {
const r = await fetch("data/merged.json", { cache: "no-store" });
if (r.ok) { MERGED = await r.json(); afterLoad(); }
else showEmpty("No data/merged.json yet. Run a benchmark, then: " +
"<code>python3 analyze/merge.py results/*.json -o dashboard/data/merged.json</code> " +
"— or load a results file above.");
} catch (e) {
showEmpty("Could not auto-load data/merged.json (open via a local server or use the file picker).");
}
}
function onFile(ev) {
const f = ev.target.files[0]; if (!f) return;
const rd = new FileReader();
rd.onload = () => {
const d = JSON.parse(rd.result);
// accept either a merged file or a single results file
MERGED = d.merged_schema ? d : wrapSingle(d);
afterLoad();
};
rd.readAsText(f);
}
/* wrap a single results JSON into the merged shape so the picker works too */
function wrapSingle(d) {
const rid = `${(d.host||{}).hostname}@${d.generated_utc}`;
const meta = { run_id: rid, hostname:(d.host||{}).hostname, cpu_brand:(d.host||{}).cpu_brand,
is_rpi:(d.host||{}).is_rpi, is_baseline_grade:d.is_baseline_grade };
const kem=[], sig=[], tls=[];
(d.kem||[]).forEach(k=>{ if(k.enabled) Object.entries(k.operations||{}).forEach(([op,st])=>
kem.push({...meta, alg:k.alg, classical:!!k.classical, nist_level:k.claimed_nist_level,
operation:op, median_ns:st.median, sizes:k.sizes})); });
(d.sig||[]).forEach(s=>{ if(s.enabled) Object.entries(s.operations||{}).forEach(([op,st])=>
sig.push({...meta, alg:s.alg, classical:!!s.classical, nist_level:s.claimed_nist_level,
operation:op, median_ns:st.median, sizes:s.sizes})); });
((d.tls||{}).matrix||[]).forEach(c=>{ if(c.enabled) tls.push({...meta, label:c.label, group:c.group,
is_baseline_pair:c.label===((d.tls||{}).baseline||{}).label,
handshakes_per_sec:c.handshakes_per_sec, median_ns:(c.handshake_latency_ns||{}).median,
bytes_total:(c.bytes_on_wire||{}).total, client_hello_bytes:c.client_hello_bytes,
client_hello_fragmented:c.client_hello_fragmented}); });
return { merged_schema:"single", n_runs:1,
runs:[{run_id:rid, host:d.host, is_baseline_grade:d.is_baseline_grade,
baseline_grade_reasons:d.baseline_grade_reasons||[], toolchain:d.toolchain,
cpu_features:d.cpu_features, run:d.run,
thermal_summary:{temp_c:(d.thermal_trace||{}).temp_c,
throttling_detected:(d.thermal_trace||{}).throttling_detected},
generated_utc:d.generated_utc}],
kem, sig, tls };
}
function afterLoad() {
const sel = $("runSelect");
sel.innerHTML = "";
MERGED.runs.forEach(r => {
const o = document.createElement("option");
o.value = r.run_id;
o.textContent = `${(r.host||{}).cpu_brand||"?"} — ${r.is_baseline_grade?"✅ baseline":"⚠ smoke"}${r.generated_utc||""}`;
sel.appendChild(o);
});
render();
}
function currentRun() {
const id = $("runSelect").value;
return MERGED.runs.find(r => r.run_id === id) || MERGED.runs[0];
}
function render() {
if (!MERGED) return;
CHARTS.forEach(c => c.destroy()); CHARTS = [];
const run = currentRun();
const includeSmoke = $("includeSmoke").checked;
const allowed = new Set(MERGED.runs
.filter(r => includeSmoke || r.is_baseline_grade)
.map(r => r.run_id));
// chart the selected run if allowed, else fall back to allowed set
const rid = allowed.has(run.run_id) ? run.run_id : null;
const filt = (rows) => rows.filter(r => rid ? r.run_id === rid : allowed.has(r.run_id));
renderBanner(run);
renderEnv(run);
const kem = filt(MERGED.kem), sig = filt(MERGED.sig), tls = filt(MERGED.tls);
barByLevel("kem_keygen", kem, "keygen", "KEM keygen — median latency (ms)");
barByLevel("kem_encaps", kem, "encaps", "KEM encaps — median latency (ms)", "derive");
barByLevel("kem_decaps", kem, "decaps", "KEM decaps — median latency (ms)", "derive");
scatter("kem_scatter", kem, "encaps", "public_key", "KEM size vs speed (encaps)", "public key (B)");
barByLevel("sig_sign", sig, "sign", "Signature sign — median latency (ms)", "sign", true);
barByLevel("sig_verify", sig, "verify", "Signature verify — median latency (ms)", "verify", true);
scatter("sig_scatter", sig, "sign", "signature", "Signature size vs speed (sign)", "signature (B)", true);
tlsThroughput("tls_hs", tls);
tlsClientHello("tls_chello", tls);
}
function renderBanner(run) {
const el = $("quality-banner");
if (run.is_baseline_grade) {
el.className = "banner-good";
el.innerHTML = "✅ RPi5 baseline-grade run — performance governor, core-pinned, " +
"cortex-a76 flags, no thermal throttling.";
} else {
el.className = "banner-warn";
const rs = (run.baseline_grade_reasons||[]).map(r=>`<li>${r}</li>`).join("");
el.innerHTML = "⚠ NOT RPi5 baseline-grade — treat as a pipeline smoke test, not measurement data." +
(rs ? `<ul>${rs}</ul>` : "");
}
}
function renderEnv(run) {
const h = run.host||{}, t = run.toolchain||{}, f = run.cpu_features||{}, rn = run.run||{};
const chip = (label, val) => `<span class="chip"><b>${label}</b> ${val}</span>`;
const sha3 = f.sha3 ? "SHA3 hw ✓" : "SHA3 hw ✗ (Keccak on NEON)";
$("env-summary").innerHTML = [
chip("host", `${h.cpu_brand||"?"} (${h.os_pretty||h.os||"?"})`),
chip("governor", rn.governor_after||"?"),
chip("pinned core", rn.pinned ? rn.bench_core : "no"),
chip("flags", `${t.bench_cflags||"?"}${t.cflags_target||"?"}`),
chip("liboqs", `${t.liboqs_ref||"?"} ${(t.liboqs_commit||"").slice(0,8)}`),
chip("crypto-ext", `${f.neon?"NEON ":""}${f.sha2?"SHA2 ":""}${sha3}`),
chip("cycles", rn.cycles_available ? "PMU ✓" : "time-based"),
chip("temp", run.thermal_summary && run.thermal_summary.temp_c
? `${run.thermal_summary.temp_c.mean}°C` + (run.thermal_summary.throttling_detected?" ⚠throttled":"")
: "n/a"),
].join("");
}
/* ---- chart builders ----------------------------------------------------- */
function baselineAnnotation(value, label) {
if (value == null) return {};
return { annotations: { base: {
type:"line", yMin:value, yMax:value, borderColor:BASE_COLOR,
borderWidth:2, borderDash:[6,4],
label:{ display:true, content:label, position:"end",
backgroundColor:BASE_COLOR, font:{size:10} } } } };
}
function barByLevel(canvasId, rows, op, title, baselineOp = op, logY = false) {
const data = rows.filter(r => r.operation === op)
.sort((a,b)=>(a.nist_level||0)-(b.nist_level||0) || a.median_ns-b.median_ns);
if (!data.length) return drawEmpty(canvasId, title);
/* Baseline reference line: the classical row for baselineOp (defaults to this
* chart's own op). KEM encaps/decaps have no classical encaps/decaps op, so
* they map to the X25519 key-agreement (derive) timing instead. */
const base = rows.find(r => r.classical && r.operation === baselineOp);
const ctx = $(canvasId).getContext("2d");
CHARTS.push(new Chart(ctx, {
type:"bar",
data:{ labels:data.map(r=>r.alg),
datasets:[{ label:title,
data:data.map(r=>nsToMs(r.median_ns)),
backgroundColor:data.map(r=> r.classical?BASE_COLOR:(LEVEL_COLORS[r.nist_level]||PQ_COLOR)) }] },
options:{ responsive:true, plugins:{
title:{display:true,text:title,color:"#e6e8ee"},
legend:{display:false},
valueLabels:{ formatter:(v)=>fmtMs(v) },
tooltip:{callbacks:{
label:(it)=>`median ${it.raw.toFixed(4)} ms (${Math.round(it.raw*1e6).toLocaleString()} ns)`,
afterLabel:(it)=>{
const r=data[it.dataIndex]; return `NIST L${r.nist_level} · ${r.classical?"classical baseline":"PQ"}`; }}},
annotation: base ? baselineAnnotation(nsToMs(base.median_ns),
baselineOp === op ? `baseline ${base.alg}` : `baseline ${base.alg} ${base.operation}`) : {} },
scales:{ x:{ticks:{color:"#9aa3b2",maxRotation:50,minRotation:40}},
y:{ type: logY?"logarithmic":"linear",
title:{display:true,text:logY?"ms (log)":"ms",color:"#9aa3b2"},ticks:{color:"#9aa3b2"}} } }
}));
}
function scatter(canvasId, rows, op, sizeKey, title, xlabel, logScale = false) {
const data = rows.filter(r => r.operation === op && r.sizes && r.sizes[sizeKey]);
if (!data.length) return drawEmpty(canvasId, title);
const pts = data.map(r => ({ x:r.sizes[sizeKey], y:nsToMs(r.median_ns), alg:r.alg, classical:r.classical }));
const ctx = $(canvasId).getContext("2d");
CHARTS.push(new Chart(ctx, {
type:"scatter",
data:{ datasets:[{ label:title, data:pts, pointRadius:6,
backgroundColor:pts.map(p=>p.classical?BASE_COLOR:PQ_COLOR) }] },
options:{ responsive:true, plugins:{
title:{display:true,text:title,color:"#e6e8ee"}, legend:{display:false},
tooltip:{callbacks:{label:(it)=>`${it.raw.alg}: ${it.raw.x} B, ${it.raw.y.toFixed(3)} ms`}} },
scales:{ x:{ type: logScale?"logarithmic":"linear",
title:{display:true,text:logScale?`${xlabel} (log)`:xlabel,color:"#9aa3b2"},ticks:{color:"#9aa3b2"}},
y:{ type: logScale?"logarithmic":"linear",
title:{display:true,text:logScale?"median latency (ms, log)":"median latency (ms)",color:"#9aa3b2"},ticks:{color:"#9aa3b2"}} } }
}));
}
function tlsThroughput(canvasId, rows) {
if (!rows.length) return drawEmpty(canvasId, "TLS handshakes/sec — run the TLS layer");
const data = rows.slice().sort((a,b)=>(b.handshakes_per_sec||0)-(a.handshakes_per_sec||0));
const base = data.find(r => r.is_baseline_pair);
const ctx = $(canvasId).getContext("2d");
CHARTS.push(new Chart(ctx, {
type:"bar",
data:{ labels:data.map(r=>r.label),
datasets:[{ label:"handshakes/sec",
data:data.map(r=>r.handshakes_per_sec),
backgroundColor:data.map(r=>r.is_baseline_pair?BASE_COLOR:PQ_COLOR) }] },
options:{ indexAxis:"y", responsive:true, plugins:{
title:{display:true,text:"TLS 1.3 handshake throughput (higher = better)",color:"#e6e8ee"},
legend:{display:false},
annotation: base ? { annotations:{ base:{ type:"line",
xMin:base.handshakes_per_sec, xMax:base.handshakes_per_sec,
borderColor:BASE_COLOR, borderWidth:2, borderDash:[6,4],
label:{display:true,content:`baseline ${base.label}`,position:"end",
backgroundColor:BASE_COLOR,font:{size:10}} } } } : {} },
scales:{ x:{title:{display:true,text:"handshakes/sec",color:"#9aa3b2"},ticks:{color:"#9aa3b2"}},
y:{ticks:{color:"#9aa3b2",font:{size:10}}} } }
}));
}
function tlsClientHello(canvasId, rows) {
if (!rows.length) return drawEmpty(canvasId, "ClientHello size — run the TLS layer");
const data = rows.slice().sort((a,b)=>(b.client_hello_bytes||0)-(a.client_hello_bytes||0));
const base = data.find(r => r.is_baseline_pair);
const ctx = $(canvasId).getContext("2d");
CHARTS.push(new Chart(ctx, {
type:"bar",
data:{ labels:data.map(r=>r.label),
datasets:[{ label:"ClientHello bytes",
data:data.map(r=>r.client_hello_bytes),
backgroundColor:data.map(r=> r.is_baseline_pair?BASE_COLOR
: (r.client_hello_fragmented?"#d98b2b":PQ_COLOR)) }] },
options:{ indexAxis:"y", responsive:true, plugins:{
title:{display:true,text:"ClientHello size (orange = exceeds ~1400B MSS → fragments)",color:"#e6e8ee"},
legend:{display:false},
annotation:{ annotations:{ mss:{ type:"line", xMin:1400, xMax:1400,
borderColor:"#d98b2b", borderWidth:1, borderDash:[4,4],
label:{display:true,content:"~MSS 1400B",position:"start",backgroundColor:"#d98b2b",font:{size:9}} },
...(base?{base:{type:"line",xMin:base.client_hello_bytes,xMax:base.client_hello_bytes,
borderColor:BASE_COLOR,borderWidth:2,borderDash:[6,4],
label:{display:true,content:`baseline ${base.label}`,position:"end",backgroundColor:BASE_COLOR,font:{size:10}}}}:{}) } } },
scales:{ x:{title:{display:true,text:"bytes",color:"#9aa3b2"},ticks:{color:"#9aa3b2"}},
y:{ticks:{color:"#9aa3b2",font:{size:10}}} } }
}));
}
function drawEmpty(canvasId, msg) {
const c = $(canvasId); const ctx = c.getContext("2d");
ctx.clearRect(0,0,c.width,c.height);
ctx.fillStyle = "#9aa3b2"; ctx.font = "13px sans-serif"; ctx.textAlign="center";
ctx.fillText(msg, c.width/2, c.height/2);
}
function showEmpty(html) {
document.querySelector("main").innerHTML = `<div class="empty">${html}</div>`;
}
boot();

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,68 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>PQ Benchmark — RPi5 baseline</title>
<link rel="stylesheet" href="style.css">
<!-- Chart.js + annotation plugin via CDN (works on GitHub Pages; charts still
render without the annotation line if the CDN is unreachable). -->
<script src="https://cdn.jsdelivr.net/npm/chart.js@4.4.1/dist/chart.umd.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/chartjs-plugin-annotation@3.0.1/dist/chartjs-plugin-annotation.min.js"></script>
</head>
<body>
<header>
<h1>Post-Quantum Crypto Benchmark <span class="sub">Raspberry Pi 5 baseline</span></h1>
<p class="framing">Migration cost: moving from what Logos uses <b>today</b>
(X25519 + Ed25519) to PQ candidates, on validator-grade hardware. The
classical baseline is drawn as a reference line on every chart.</p>
</header>
<section id="controls">
<label>Dataset:
<input type="file" id="fileInput" accept="application/json">
<span class="hint">or auto-loads <code>data/merged.json</code></span>
</label>
<label class="toggle">
<input type="checkbox" id="includeSmoke"> include non-baseline (smoke / dev) runs
</label>
<label>Run:
<select id="runSelect"></select>
</label>
</section>
<div id="quality-banner"></div>
<div id="env-summary"></div>
<main>
<h2>Key Encapsulation (KEM)</h2>
<div class="chart-grid">
<figure><canvas id="kem_keygen"></canvas></figure>
<figure><canvas id="kem_encaps"></canvas></figure>
<figure><canvas id="kem_decaps"></canvas></figure>
<figure><canvas id="kem_scatter"></canvas></figure>
</div>
<h2>Signatures</h2>
<div class="chart-grid">
<figure><canvas id="sig_sign"></canvas></figure>
<figure><canvas id="sig_verify"></canvas></figure>
<figure><canvas id="sig_scatter"></canvas></figure>
</div>
<h2>TLS 1.3 Handshakes <span class="sub">(KEM group × signature)</span></h2>
<div class="chart-grid">
<figure><canvas id="tls_hs"></canvas></figure>
<figure><canvas id="tls_chello"></canvas></figure>
</div>
</main>
<footer>
<p>Generated by <code>pq-bench-rpi5</code>. Numbers are only RPi5-baseline-grade
when the run banner says so (real Pi 5 · performance governor · core-pinned ·
cortex-a76 flags · no thermal throttling). Everything else is a pipeline smoke test.</p>
</footer>
<script src="app.js"></script>
</body>
</html>

View File

@ -0,0 +1,40 @@
:root {
--bg:#0f1115; --panel:#181b22; --ink:#e6e8ee; --muted:#9aa3b2;
--accent:#3bd67a; --base:#e0533d; --pq:#3a7bff; --line:#2a2f3a;
}
* { box-sizing:border-box; }
body { margin:0; font:15px/1.5 -apple-system,Segoe UI,Roboto,sans-serif;
background:var(--bg); color:var(--ink); }
header { padding:24px 28px 8px; }
h1 { margin:0; font-size:22px; }
h1 .sub, h2 .sub { color:var(--muted); font-weight:400; font-size:0.7em; }
.framing { color:var(--muted); max-width:760px; }
h2 { margin:30px 28px 8px; border-bottom:1px solid var(--line); padding-bottom:6px; }
#controls { display:flex; flex-wrap:wrap; gap:18px; align-items:center;
padding:12px 28px; background:var(--panel); margin:8px 0; }
#controls label { color:var(--muted); font-size:14px; }
#controls .hint { font-size:12px; opacity:.7; }
select, input[type=file] { color:var(--ink); background:#0c0e12;
border:1px solid var(--line); border-radius:6px; padding:4px 6px; }
#quality-banner { margin:8px 28px; padding:12px 16px; border-radius:8px; font-weight:600; }
.banner-good { background:#12331f; border:1px solid #1f7a44; color:#7df0a8; }
.banner-warn { background:#3a2410; border:1px solid #aa6a1f; color:#ffce8a; }
.banner-warn ul { font-weight:400; margin:6px 0 0; color:#ffd9a8; }
#env-summary { margin:0 28px 8px; color:var(--muted); font-size:13px;
display:flex; flex-wrap:wrap; gap:6px 18px; }
#env-summary b { color:var(--ink); }
#env-summary .chip { background:var(--panel); padding:3px 9px; border-radius:20px;
border:1px solid var(--line); }
.chart-grid { display:grid; grid-template-columns:repeat(auto-fit,minmax(420px,1fr));
gap:18px; padding:14px 28px; }
figure { margin:0; background:var(--panel); border:1px solid var(--line);
border-radius:10px; padding:12px; min-height:320px; }
canvas { max-height:340px; }
footer { color:var(--muted); font-size:12px; padding:18px 28px 40px; max-width:820px; }
code { background:#0c0e12; padding:1px 5px; border-radius:4px; }
.empty { color:var(--muted); padding:30px; text-align:center; }

View File

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

290
pq-bench-rpi5/run.sh Executable file
View File

@ -0,0 +1,290 @@
#!/usr/bin/env bash
# =============================================================================
# run.sh — measurement wrapper + orchestrator.
#
# Does the things that make a number credible:
# * sets the CPU governor to `performance` (Linux; warns elsewhere)
# * pins the benchmark to a single isolated core via taskset (core 3 on RPi5;
# core 3 stays clear of CPU0 where the kernel steers IRQs/RPS)
# * logs ARM clock + SoC temperature throughout, embeds the trace in results,
# and warns on thermal throttling
# * runs every candidate from config.yaml, then assembles one results JSON
# stamped with full host + toolchain provenance.
#
# Usage:
# ./run.sh # full run using config.yaml knobs
# ./run.sh --smoke # tiny iteration counts: pipeline smoke test
# ./run.sh --kemsig-only # skip the TLS layer
# ./run.sh --tls-only # only the TLS layer
# ./run.sh --iters N --warmup N --reps N # override measurement knobs
# sudo ./run.sh # needed on Linux to set the governor
# =============================================================================
set -euo pipefail
ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
TOOL_VERSION="0.1.0"
# shellcheck source=setup/lib_platform.sh
source "$ROOT/setup/lib_platform.sh"
# shellcheck source=setup/versions.env
source "$ROOT/setup/versions.env"
LOCK="$ROOT/setup/versions.lock"
[ -f "$LOCK" ] && source "$LOCK" || pqb_warn "no versions.lock — run ./setup/setup.sh first"
pqb_detect_platform
# ---- args ------------------------------------------------------------------
SMOKE=0; DO_KEMSIG=1; DO_TLS=1
OVR_ITERS=""; OVR_WARMUP=""; OVR_REPS=""
while [ $# -gt 0 ]; do
case "$1" in
--smoke) SMOKE=1 ;;
--kemsig-only) DO_TLS=0 ;;
--tls-only) DO_KEMSIG=0 ;;
--no-tls) DO_TLS=0 ;;
--iters) OVR_ITERS="$2"; shift ;;
--warmup) OVR_WARMUP="$2"; shift ;;
--reps) OVR_REPS="$2"; shift ;;
-h|--help) grep '^#' "$0" | sed 's/^# \{0,1\}//'; exit 0 ;;
*) pqb_err "unknown arg: $1"; exit 2 ;;
esac
shift
done
# ---- measurement knobs (config.yaml, overridable) --------------------------
# Sets TARGET_TIME_MS MIN_SAMPLES MAX_ITERS REPS CYCLES_MODE (auto-calibration,
# the default path) plus WARMUP ITERS (the fixed-count fallback).
eval "$(python3 "$ROOT/bench/lib/list_algs.py" measurement "$ROOT/config.yaml")"
# Mode: auto-calibrate per op (default) unless --iters forces a fixed count.
CALIB_MODE="auto"
if [ -n "$OVR_ITERS" ]; then CALIB_MODE="fixed"; ITERS="$OVR_ITERS"; fi
[ -n "$OVR_WARMUP" ] && WARMUP="$OVR_WARMUP"
[ -n "$OVR_REPS" ] && REPS="$OVR_REPS"
if [ "$SMOKE" = 1 ]; then
# Keep auto-calibration (so each op still reaches target) but a single rep, so
# the sweep stays short. This is a pipeline test, NOT measurement data.
REPS=1
pqb_warn "SMOKE MODE: reps=1 — pipeline test only, NOT measurement data"
fi
# Assemble the per-op sizing args passed to every bench_pq invocation.
if [ "$CALIB_MODE" = "fixed" ]; then
BENCH_SIZE_ARGS=(--warmup "$WARMUP" --iters "$ITERS" --reps "$REPS")
pqb_log "sizing: FIXED-count warmup=$WARMUP iters=$ITERS reps=$REPS"
else
BENCH_SIZE_ARGS=(--target-time-ms "$TARGET_TIME_MS" --min-samples "$MIN_SAMPLES" \
--max-iters "$MAX_ITERS" --reps "$REPS")
pqb_log "sizing: AUTO-calibrate target=${TARGET_TIME_MS}ms min_samples=$MIN_SAMPLES max_iters=$MAX_ITERS reps=$REPS"
fi
BENCH_CORE="${BENCH_CORE:-3}"
export PQB_BENCH_CORE="$BENCH_CORE"
# ---- work directory --------------------------------------------------------
HOST="$(pqb_resolve_hostname)"
TS="$(date -u +%Y%m%dT%H%M%SZ)"
WORK="$ROOT/results/.work-$HOST-$TS"
mkdir -p "$WORK" "$ROOT/results"
KEMSIG_OUT="$WORK/kemsig.jsonl"; : > "$KEMSIG_OUT"
TLS_OUT="$WORK/tls.json"
THERMAL="$WORK/thermal.csv"; : > "$THERMAL"
META="$WORK/meta.env"
FEATURES="$WORK/cpu_features.json"
WARN_ACC=""
add_warn() { WARN_ACC="${WARN_ACC:+$WARN_ACC||}$1"; pqb_warn "$1"; }
# ---- governor --------------------------------------------------------------
GOV_BEFORE="$(pqb_get_governor)"
GOV_AFTER="$(pqb_set_governor_performance || true)"
GOV_REQUESTED="performance"
if [ "$GOV_AFTER" != "performance" ]; then
add_warn "governor is '$GOV_AFTER', not 'performance' (need root on Linux, or unsupported on macOS)"
fi
# ---- core pinning ----------------------------------------------------------
TASKSET="$(pqb_taskset_prefix "$BENCH_CORE")"
if [ -n "$TASKSET" ]; then
PINNED=1; pqb_log "pinning to core $BENCH_CORE via: $TASKSET"
else
PINNED=0; add_warn "core pinning unavailable (no taskset/numactl) — results will be noisier"
fi
# ---- thermal sampler (background) ------------------------------------------
SAMPLE_INTERVAL="${SAMPLE_INTERVAL:-1}"
pqb_log "starting thermal/clock sampler (every ${SAMPLE_INTERVAL}s) -> $THERMAL"
( while :; do pqb_sample_thermal >> "$THERMAL" 2>/dev/null; sleep "$SAMPLE_INTERVAL"; done ) &
SAMPLER_PID=$!
disown "$SAMPLER_PID" 2>/dev/null || true # suppress job-control "Terminated" noise on kill
# shellcheck disable=SC2064
trap "kill $SAMPLER_PID 2>/dev/null || true" EXIT
pqb_sample_thermal >> "$THERMAL" 2>/dev/null # one immediate sample
# ---- CPU features ----------------------------------------------------------
pqb_cpu_features_json > "$FEATURES"
pqb_log "cpu features: $(cat "$FEATURES")"
# Report whether Keccak/SHA3 *instruction* acceleration is available + compiled.
SHA3_HW="$(python3 -c "import json;print(json.load(open('$FEATURES'))['sha3'])" 2>/dev/null || echo unknown)"
case "${LIBOQS_OPT_DEFINES:-}" in
*"OQS_USE_ARM_SHA3_INSTRUCTIONS 1"*) SHA3_COMPILED=1 ;;
*) SHA3_COMPILED=0 ;;
esac
pqb_log "Keccak/SHA3: hw_instructions=$SHA3_HW liboqs_compiled_sha3=$SHA3_COMPILED (A76 has no SHA3 ext; Keccak runs on NEON there)"
TS_START="$(date -u +%Y-%m-%dT%H:%M:%SZ)"
START_EPOCH="$(date +%s)"
# ---- build harness if needed -----------------------------------------------
OSSL_PREFIX_FOR_BUILD="${OPENSSL_PREFIX:-$(brew --prefix openssl@3 2>/dev/null || echo /usr)}"
if [ ! -x "$ROOT/bench/kem_sig/bench_pq" ] || [ "$ROOT/bench/kem_sig/bench_pq.c" -nt "$ROOT/bench/kem_sig/bench_pq" ]; then
pqb_log "building bench_pq harness"
make -C "$ROOT/bench/kem_sig" \
LIBOQS_PREFIX="${PREFIX:-$ROOT/vendor/install}" \
OPENSSL_PREFIX="$OSSL_PREFIX_FOR_BUILD" \
BENCH_CFLAGS="${BENCH_CFLAGS:--O3}" >/dev/null
fi
# ---- KEM/sig sweep ---------------------------------------------------------
CYCLES_AVAILABLE=0; CYCLES_REASON="not probed"
if [ "$DO_KEMSIG" = 1 ]; then
if [ "$CALIB_MODE" = "fixed" ]; then
pqb_log "running KEM/sig sweep (fixed: warmup=$WARMUP iters=$ITERS reps=$REPS)"
else
pqb_log "running KEM/sig sweep (auto-calibrate: target=${TARGET_TIME_MS}ms min_samples=$MIN_SAMPLES max_iters=$MAX_ITERS reps=$REPS)"
fi
while IFS=$'\t' read -r kind alg classical; do
[ -z "$alg" ] && continue
pqb_log " $kind $alg"
ERRF="$WORK/err.$kind.$alg.txt"
# shellcheck disable=SC2086
if $TASKSET "$ROOT/bench/kem_sig/bench_pq" --kind "$kind" --alg "$alg" \
"${BENCH_SIZE_ARGS[@]}" >> "$KEMSIG_OUT" 2>"$ERRF"; then
:
else
add_warn "harness failed for $kind $alg (see $ERRF)"
fi
# capture PMU availability from the harness's stderr (first occurrence)
if grep -q 'cycles_available=1' "$ERRF" 2>/dev/null; then
CYCLES_AVAILABLE=1; CYCLES_REASON="$(sed -n 's/.*cycles_available=1 (\(.*\))/\1/p' "$ERRF" | head -1)"
elif [ "$CYCLES_AVAILABLE" = 0 ] && grep -q 'cycles_available=0' "$ERRF" 2>/dev/null; then
CYCLES_REASON="$(sed -n 's/.*cycles_available=0 (\(.*\))/\1/p' "$ERRF" | head -1)"
fi
done < <(python3 "$ROOT/bench/lib/list_algs.py" kemsig "$ROOT/config.yaml")
fi
# ---- TLS layer -------------------------------------------------------------
if [ "$DO_TLS" = 1 ]; then
if [ -x "$ROOT/bench/tls/run_tls.sh" ]; then
TLS_CONNS="$(python3 -c "import json,sys;print(json.loads(sys.argv[1]).get('connections',1000))" \
"$(python3 "$ROOT/bench/lib/list_algs.py" tls "$ROOT/config.yaml")")"
[ "$SMOKE" = 1 ] && TLS_CONNS=50
pqb_log "running TLS handshake matrix ($TLS_CONNS handshakes/cell)"
if PQB_TASKSET="$TASKSET" "$ROOT/bench/tls/run_tls.sh" \
--out "$TLS_OUT" --connections "$TLS_CONNS" >"$WORK/tls.log" 2>&1; then
pqb_log "TLS layer done ($(grep -c '"label"' "$TLS_OUT" 2>/dev/null || echo 0) cells)"
else
add_warn "TLS layer failed or unavailable (see $WORK/tls.log) — continuing without it"
TLS_OUT=""
fi
else
pqb_warn "TLS harness not present yet — skipping (will be added)"
TLS_OUT=""
fi
fi
# ---- stop sampler, gather timing -------------------------------------------
kill "$SAMPLER_PID" 2>/dev/null || true
trap - EXIT
TS_END="$(date -u +%Y-%m-%dT%H:%M:%SZ)"
DURATION=$(( $(date +%s) - START_EPOCH ))
GOV_AFTER_END="$(pqb_get_governor)"
# ---- host facts ------------------------------------------------------------
collect_host_facts() {
local cpu_brand="" ncpu="" ram="" os_pretty="" kernel
kernel="$(uname -r)"
if [ "$PQB_OS" = "macos" ]; then
cpu_brand="$(sysctl -n machdep.cpu.brand_string 2>/dev/null)"
ncpu="$(sysctl -n hw.ncpu 2>/dev/null)"
ram="$(sysctl -n hw.memsize 2>/dev/null)"
os_pretty="macOS $(sw_vers -productVersion 2>/dev/null) ($(sw_vers -buildVersion 2>/dev/null))"
else
# aarch64 /proc/cpuinfo has no 'model name' line, so this grep misses; the
# trailing `|| true` keeps `set -o pipefail` from aborting the run (errexit)
# before the PQB_RPI_MODEL fallback below can supply the brand.
cpu_brand="$(grep -m1 'model name' /proc/cpuinfo 2>/dev/null | sed 's/.*: //' || true)"
[ -z "$cpu_brand" ] && cpu_brand="$PQB_RPI_MODEL"
ncpu="$( (command -v nproc >/dev/null && nproc) || grep -c ^processor /proc/cpuinfo)"
ram="$(( $(grep -m1 MemTotal /proc/meminfo 2>/dev/null | awk '{print $2}') * 1024 ))"
os_pretty="$(. /etc/os-release 2>/dev/null; echo "$PRETTY_NAME")"
fi
{
echo "TOOL_VERSION=$TOOL_VERSION"
echo "HOSTNAME=$HOST"
echo "OS=$PQB_OS"
echo "ARCH=$PQB_ARCH"
echo "KERNEL=$kernel"
echo "OS_PRETTY=\"$os_pretty\""
echo "IS_RPI=$PQB_IS_RPI"
echo "RPI_MODEL=\"$PQB_RPI_MODEL\""
echo "CPU_BRAND=\"$cpu_brand\""
echo "NCPU=$ncpu"
echo "RAM_BYTES=$ram"
echo "GOVERNOR_REQUESTED=$GOV_REQUESTED"
echo "GOVERNOR_BEFORE=$GOV_BEFORE"
echo "GOVERNOR_AFTER=$GOV_AFTER_END"
echo "BENCH_CORE=$BENCH_CORE"
echo "PINNED=$PINNED"
echo "TASKSET_CMD=\"$TASKSET\""
echo "CALIB_MODE=$CALIB_MODE"
echo "TARGET_TIME_MS=$TARGET_TIME_MS"
echo "MIN_SAMPLES=$MIN_SAMPLES"
echo "MAX_ITERS=$MAX_ITERS"
echo "REPS=$REPS"
# warmup/timed_iters are single values only in fixed-count mode; in auto mode
# they are chosen per-op and recorded in each operation's JSON instead.
if [ "$CALIB_MODE" = "fixed" ]; then
echo "WARMUP=$WARMUP"
echo "ITERS=$ITERS"
else
echo "WARMUP="
echo "ITERS="
fi
echo "CYCLES_MODE=$CYCLES_MODE"
echo "CYCLES_AVAILABLE=$CYCLES_AVAILABLE"
echo "CYCLES_REASON=\"$CYCLES_REASON\""
echo "TS_START_UTC=$TS_START"
echo "TS_END_UTC=$TS_END"
echo "DURATION_S=$DURATION"
echo "WARNINGS=\"$WARN_ACC\""
} > "$META"
}
collect_host_facts
# ---- assemble final results JSON -------------------------------------------
OUT="$ROOT/results/${HOST}-${TS}.json"
python3 "$ROOT/bench/lib/assemble.py" \
--meta "$META" --lock "$LOCK" --features "$FEATURES" \
--kemsig "$KEMSIG_OUT" ${TLS_OUT:+--tls "$TLS_OUT"} \
--thermal "$THERMAL" --config "$ROOT/config.yaml" \
--out "$OUT" >/dev/null
# ---- summary ---------------------------------------------------------------
echo
pqb_log "================ RUN COMPLETE ================"
python3 - "$OUT" <<'PY'
import json,sys
d=json.load(open(sys.argv[1]))
g=d["is_baseline_grade"]
print(f" results: {sys.argv[1]}")
print(f" host: {d['host']['cpu_brand']} ({d['host']['os_pretty']})")
print(f" baseline-grade (RPi5): {g}")
if not g:
for r in d['baseline_grade_reasons']:
print(f" - {r}")
tt=d['thermal_trace']
print(f" thermal: {tt.get('temp_c')} throttling={tt.get('throttling_detected')}")
print(f" kem algos: {sum(1 for x in d['kem'] if x.get('enabled'))} enabled / {len(d['kem'])}")
print(f" sig algos: {sum(1 for x in d['sig'] if x.get('enabled'))} enabled / {len(d['sig'])}")
print(f" cycles available: {d['run']['cycles_available']}")
PY
pqb_log "keep raw work dir? -> $WORK (safe to delete)"

View File

@ -0,0 +1,227 @@
# shellcheck shell=bash
# =============================================================================
# lib_platform.sh — portable platform abstraction
#
# Sourced by setup/setup.sh and run.sh. Every operation that differs between the
# RPi5 (Debian/Ubuntu aarch64) measurement target and the macOS/Apple-Silicon
# dev box is funneled through one of these functions, so the *identical* codebase
# runs unchanged on both. Where a capability does not exist on a platform
# (governor control, core pinning, on-die thermal sensors), the function degrades
# gracefully and the caller records that it was unavailable — it never silently
# pretends the action happened.
# =============================================================================
# ---- platform detection ----------------------------------------------------
# Sets: PQB_OS (macos|linux), PQB_ARCH, PQB_IS_RPI (1|0), PQB_RPI_MODEL
pqb_detect_platform() {
PQB_ARCH="$(uname -m)"
case "$(uname -s)" in
Darwin) PQB_OS="macos" ;;
Linux) PQB_OS="linux" ;;
*) PQB_OS="unknown" ;;
esac
PQB_IS_RPI=0
PQB_RPI_MODEL=""
if [ "$PQB_OS" = "linux" ] && [ -r /proc/device-tree/model ]; then
# /proc/device-tree/model is NUL-terminated
PQB_RPI_MODEL="$(tr -d '\0' < /proc/device-tree/model 2>/dev/null)"
case "$PQB_RPI_MODEL" in
*"Raspberry Pi"*) PQB_IS_RPI=1 ;;
esac
fi
export PQB_OS PQB_ARCH PQB_IS_RPI PQB_RPI_MODEL
}
# ---- friendly logging ------------------------------------------------------
pqb_log() { printf '\033[1;34m[pqb]\033[0m %s\n' "$*" >&2; }
pqb_warn() { printf '\033[1;33m[pqb WARN]\033[0m %s\n' "$*" >&2; }
pqb_err() { printf '\033[1;31m[pqb ERR]\033[0m %s\n' "$*" >&2; }
# ---- hostname resolution ---------------------------------------------------
# A "good" host id is non-empty, not localhost, and not an avahi/macOS
# auto-assigned "unknown<hexMAC>" placeholder (which is what shows up when no
# real hostname is set — that produced the ugly results filename before).
_pqb_good_host() {
local h="$1"
[ -n "$h" ] || return 1
case "$h" in
localhost|localhost.*) return 1 ;;
esac
printf '%s' "$h" | grep -Eq '^[Uu]nknown[0-9a-fA-F]{6,}$' && return 1
return 0
}
# Resolve a readable, stable host identifier, falling through:
# $HOSTNAME -> `hostname` -> hostnamectl --static (Linux) /
# scutil --get LocalHostName (macOS) -> short machine id (last resort).
# Domain suffixes (.home/.local/...) are stripped. On the RPi5 this yields the
# actual pi hostname; on this Mac it falls through to LocalHostName.
pqb_resolve_hostname() {
local cands=() c h
cands+=("${HOSTNAME:-}")
cands+=("$(hostname 2>/dev/null || true)")
if [ "${PQB_OS:-}" = "linux" ]; then
cands+=("$(hostnamectl --static 2>/dev/null || true)")
elif [ "${PQB_OS:-}" = "macos" ]; then
cands+=("$(scutil --get LocalHostName 2>/dev/null || true)")
fi
for c in "${cands[@]}"; do
h="${c%%.*}" # strip domain suffix
if _pqb_good_host "$h"; then echo "$h"; return 0; fi
done
# last resort: a short, stable machine id so files never collide as "unknown"
local mid=""
if [ -r /etc/machine-id ]; then
mid="$(cut -c1-12 /etc/machine-id 2>/dev/null)"
elif [ "${PQB_OS:-}" = "macos" ]; then
mid="$(ioreg -rd1 -c IOPlatformExpertDevice 2>/dev/null \
| awk -F'"' '/IOPlatformUUID/{print $4}' | tr -d '-' | cut -c1-12)"
fi
echo "host-${mid:-unknown}"
}
# ---- CPU governor ----------------------------------------------------------
# Returns 0 if it set 'performance', 1 if unavailable. Prints the governor it
# left the system in on stdout.
pqb_set_governor_performance() {
if [ "$PQB_OS" = "linux" ] && [ -d /sys/devices/system/cpu/cpu0/cpufreq ]; then
local ok=1 g
for g in /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor; do
[ -w "$g" ] || { ok=0; continue; }
echo performance > "$g" 2>/dev/null || ok=0
done
if [ "$ok" = 1 ]; then
cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor 2>/dev/null
return 0
fi
# try cpupower as a fallback (may need sudo)
if command -v cpupower >/dev/null 2>&1 && cpupower frequency-set -g performance >/dev/null 2>&1; then
echo performance; return 0
fi
pqb_warn "could not set governor to performance (need root? try: sudo ./run.sh)"
cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor 2>/dev/null || echo "unknown"
return 1
fi
# macOS / other: no userspace governor control.
echo "unavailable"
return 1
}
pqb_get_governor() {
if [ "$PQB_OS" = "linux" ] && [ -r /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor ]; then
cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor
else
echo "unavailable"
fi
}
# ---- core pinning ----------------------------------------------------------
# pqb_taskset_prefix <core> -> echoes a command prefix to pin to that core, or
# empty string if pinning is unavailable (caller warns).
pqb_taskset_prefix() {
local core="$1"
if command -v taskset >/dev/null 2>&1; then
echo "taskset -c $core"
elif command -v numactl >/dev/null 2>&1; then
echo "numactl --physcpubind=$core"
else
echo "" # no pinning available (e.g. macOS)
fi
}
# ---- thermal / clock sampling ----------------------------------------------
# pqb_sample_thermal -> one CSV line: epoch_s,arm_clock_hz,temp_c,throttled_hex
# Fields that cannot be read on a platform are emitted as empty (no fake zeros).
pqb_sample_thermal() {
local ts clk temp thr
ts="$(date +%s)"
clk=""; temp=""; thr=""
if command -v vcgencmd >/dev/null 2>&1; then
# Raspberry Pi: authoritative SoC sensors.
clk="$(vcgencmd measure_clock arm 2>/dev/null | sed -n 's/.*=//p')"
temp="$(vcgencmd measure_temp 2>/dev/null | sed -n "s/temp=\([0-9.]*\).*/\1/p")"
thr="$(vcgencmd get_throttled 2>/dev/null | sed -n 's/.*=//p')"
elif [ "$PQB_OS" = "linux" ]; then
# Generic Linux fallback (cpufreq + thermal_zone).
local f
f=/sys/devices/system/cpu/cpu${PQB_BENCH_CORE:-0}/cpufreq/scaling_cur_freq
[ -r "$f" ] && clk="$(( $(cat "$f") * 1000 ))" # kHz -> Hz
if [ -r /sys/class/thermal/thermal_zone0/temp ]; then
local milli; milli="$(cat /sys/class/thermal/thermal_zone0/temp)"
temp="$(awk -v m="$milli" 'BEGIN{printf "%.1f", m/1000}')"
fi
fi
# macOS: live per-core freq/temp require sudo powermetrics; we intentionally
# leave them empty rather than emit misleading values. (Smoke test only.)
printf '%s,%s,%s,%s\n' "$ts" "$clk" "$temp" "$thr"
}
# pqb_throttled_active <throttled_hex> -> 0 if thermal throttling currently/has
# occurred, 1 otherwise. RPi get_throttled bit 0 = under-voltage now,
# bit 1 = arm freq capped now, bit 2 = currently throttled,
# bit 3 = soft temp limit active (and bits 16-19 = "has occurred" latches).
pqb_throttled_active() {
local hex="${1#0x}"
[ -z "$hex" ] && return 1
local val=$(( 16#$hex ))
# bit2 (throttling now) or bit18 (throttling has occurred)
if [ $(( val & 0x4 )) -ne 0 ] || [ $(( val & 0x40000 )) -ne 0 ]; then
return 0
fi
return 1
}
# ---- CPU feature / crypto-extension detection ------------------------------
# Echoes a JSON object describing NEON + SHA3/SHA512 acceleration. Consumed
# verbatim by env metadata so results record whether Keccak accel is in use.
pqb_cpu_features_json() {
local neon=false sha2=false sha3=false sha512=false aes=false pmull=false src="unknown"
if [ "$PQB_OS" = "linux" ] && [ -r /proc/cpuinfo ]; then
src="/proc/cpuinfo"
local feats; feats="$(grep -m1 -i '^Features' /proc/cpuinfo | tr 'A-Z' 'a-z')"
case "$feats" in *" asimd"*|*"neon"*) neon=true;; esac
case "$feats" in *" sha2"*) sha2=true;; esac
case "$feats" in *" sha3"*) sha3=true;; esac
case "$feats" in *" sha512"*) sha512=true;; esac
case "$feats" in *" aes"*) aes=true;; esac
case "$feats" in *" pmull"*) pmull=true;; esac
elif [ "$PQB_OS" = "macos" ]; then
src="sysctl"
neon=true # all Apple Silicon has NEON/ASIMD
[ "$(sysctl -n hw.optional.arm.FEAT_SHA256 2>/dev/null)" = 1 ] && sha2=true
[ "$(sysctl -n hw.optional.arm.FEAT_SHA3 2>/dev/null)" = 1 ] && sha3=true
[ "$(sysctl -n hw.optional.arm.FEAT_SHA512 2>/dev/null)" = 1 ] && sha512=true
[ "$(sysctl -n hw.optional.arm.FEAT_AES 2>/dev/null)" = 1 ] && aes=true
[ "$(sysctl -n hw.optional.arm.FEAT_PMULL 2>/dev/null)" = 1 ] && pmull=true
fi
printf '{"source":"%s","neon":%s,"sha2":%s,"sha3":%s,"sha512":%s,"aes":%s,"pmull":%s}' \
"$src" "$neon" "$sha2" "$sha3" "$sha512" "$aes" "$pmull"
}
# ---- package installation --------------------------------------------------
# pqb_install_build_deps -> installs compiler/cmake/openssl headers per platform.
pqb_install_build_deps() {
if [ "$PQB_OS" = "macos" ]; then
command -v brew >/dev/null 2>&1 || { pqb_err "Homebrew required on macOS: https://brew.sh"; return 1; }
pqb_log "installing build deps via Homebrew"
brew install cmake ninja openssl@3 git python3 >/dev/null || true
elif [ "$PQB_OS" = "linux" ]; then
if command -v apt-get >/dev/null 2>&1; then
pqb_log "installing build deps via apt"
local SUDO=""; [ "$(id -u)" -ne 0 ] && SUDO="sudo"
$SUDO apt-get update -qq
# linux-cpupower provides the `cpupower` binary used by
# pqb_set_governor_performance. (Older releases shipped cpufrequtils, which
# was dropped in Debian 13/trixie — cpupower is the supported replacement.)
$SUDO apt-get install -y -qq \
build-essential cmake ninja-build git python3 perl \
libssl-dev pkg-config astyle doxygen \
linux-cpupower util-linux >/dev/null
else
pqb_warn "no apt-get found; install cmake/ninja/gcc/libssl-dev manually"
fi
fi
}

205
pq-bench-rpi5/setup/setup.sh Executable file
View File

@ -0,0 +1,205 @@
#!/usr/bin/env bash
# =============================================================================
# setup.sh — build + pin the full PQ toolchain from scratch.
#
# ./setup/setup.sh # everything: deps, liboqs, openssl(if needed), oqs-provider
# ./setup/setup.sh liboqs # just liboqs
# ./setup/setup.sh openssl # just openssl (forced from source)
# ./setup/setup.sh provider # just oqs-provider
# ./setup/setup.sh deps # just OS packages
#
# Everything is installed under ./vendor/install (no system pollution). The exact
# resolved git commits + the optimization flags actually used are written to
# setup/versions.lock, which run.sh stamps into every results JSON.
#
# Identical flags for every candidate: -O3 -mcpu=cortex-a76 on the RPi5. On a
# non-A76 host (the macOS smoke box) we fall back to -O3 and RECORD that, so
# smoke-test numbers can never masquerade as the RPi5 baseline.
# =============================================================================
set -euo pipefail
HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
ROOT="$(cd "$HERE/.." && pwd)"
# shellcheck source=setup/lib_platform.sh
source "$HERE/lib_platform.sh"
# shellcheck source=setup/versions.env
source "$HERE/versions.env"
pqb_detect_platform
VENDOR="$ROOT/vendor"
SRC="$VENDOR/src"
PREFIX="$VENDOR/install"
mkdir -p "$SRC" "$PREFIX"
JOBS="$( (command -v nproc >/dev/null && nproc) || sysctl -n hw.ncpu 2>/dev/null || echo 4)"
# ---- decide the real optimization flags for THIS host ----------------------
# We only use -mcpu=cortex-a76 if the compiler accepts it AND we're on aarch64.
choose_cflags() {
local cc="${CC:-cc}" probe="$SRC/.flagprobe.c"
echo 'int main(void){return 0;}' > "$probe"
if [ "$PQB_ARCH" = "aarch64" ] && $cc $TARGET_CFLAGS_RPI5 "$probe" -o "$probe.out" 2>/dev/null; then
BENCH_CFLAGS="$TARGET_CFLAGS_RPI5"; CFLAGS_TARGET="cortex-a76"
else
BENCH_CFLAGS="$TARGET_CFLAGS_FALLBACK"; CFLAGS_TARGET="generic-fallback"
fi
rm -f "$probe" "$probe.out"
}
cc_version_string() {
local cc="${CC:-cc}"
"$cc" --version 2>/dev/null | head -1
}
git_pin() { # repo ref destdir
local repo="$1" ref="$2" dest="$3"
if [ -d "$dest/.git" ]; then
pqb_log "updating $(basename "$dest") -> $ref"
git -C "$dest" fetch -q --depth 1 origin "$ref" || git -C "$dest" fetch -q --tags origin
else
pqb_log "cloning $(basename "$dest") @ $ref"
git clone -q --depth 1 --branch "$ref" "$repo" "$dest" 2>/dev/null \
|| git clone -q "$repo" "$dest"
fi
git -C "$dest" checkout -q "$ref" 2>/dev/null || true
git -C "$dest" rev-parse HEAD
}
# ---------------------------------------------------------------------------
build_liboqs() {
choose_cflags
local dest="$SRC/liboqs" commit
commit="$(git_pin "$LIBOQS_REPO" "$LIBOQS_REF" "$dest")"
pqb_log "building liboqs ($LIBOQS_REF @ ${commit:0:12}) flags: $BENCH_CFLAGS"
# OQS_DIST_BUILD=OFF -> native build for the fixed target (no runtime CPU
# dispatch), so -mcpu=cortex-a76 fully drives codegen. The AArch64-optimized
# ML-KEM (mlkem-native) and AArch64 asm backends are enabled by default on
# aarch64 when DIST_BUILD is OFF (compile-time CPU features); verified post-build.
local GEN=(); command -v ninja >/dev/null 2>&1 && GEN=(-G Ninja)
cmake -S "$dest" -B "$dest/build" ${GEN[@]+"${GEN[@]}"} \
-DCMAKE_INSTALL_PREFIX="$PREFIX" \
-DCMAKE_BUILD_TYPE=Release \
-DOQS_DIST_BUILD=OFF \
-DOQS_BUILD_ONLY_LIB=OFF \
-DBUILD_SHARED_LIBS=ON \
-DCMAKE_C_FLAGS="$BENCH_CFLAGS" >/dev/null
cmake --build "$dest/build" --parallel "$JOBS" >/dev/null
cmake --install "$dest/build" >/dev/null
# Prove the optimized backend: capture the aarch64/native defines from the
# generated build config so versions.lock can show what was actually compiled.
local cfg="$dest/build/include/oqs/oqsconfig.h"
LIBOQS_OPT_DEFINES="(oqsconfig.h not found)"
if [ -r "$cfg" ]; then
# strip embedded double-quotes so the value stays valid in versions.lock
LIBOQS_OPT_DEFINES="$(grep -Ei 'AARCH64|ARM|_ASM|MLKEM_NATIVE|OPT_TARGET|CPU_EXT' "$cfg" \
| grep -i 'define' | sed 's/^#define //' | tr -d '"' | tr '\n' ';' || true)"
fi
LIBOQS_COMMIT="$commit"
}
# ---------------------------------------------------------------------------
locate_or_build_openssl() {
# Prefer an existing >= 3.5 openssl (Homebrew on macOS, distro on Linux) unless
# BUILD_OPENSSL=1. PQ sig certs for TLS only need >= 3.5.0.
local want_major=3 want_minor=5
if [ "${1:-}" != "force" ] && [ "${BUILD_OPENSSL:-0}" != 1 ]; then
local cand
for cand in "$(command -v openssl || true)" /opt/homebrew/opt/openssl@3/bin/openssl /usr/bin/openssl; do
[ -x "$cand" ] || continue
local v; v="$("$cand" version 2>/dev/null | awk '{print $2}')"
# NB: assign on separate lines. A single `local a=.. b=.. c="${b..}"` makes
# bash 5.2 declare all names (unset) *before* expanding any RHS, so the
# reference to `rest` here trips `set -u` (unbound variable) on the Pi.
local maj rest min
maj="${v%%.*}"; rest="${v#*.}"; min="${rest%%.*}"
if [ "${maj:-0}" -gt "$want_major" ] 2>/dev/null || \
{ [ "${maj:-0}" -eq "$want_major" ] && [ "${min:-0}" -ge "$want_minor" ]; } 2>/dev/null; then
OPENSSL_BIN="$cand"
OPENSSL_PREFIX="$(dirname "$(dirname "$cand")")"
OPENSSL_COMMIT="system:$v"
pqb_log "using existing OpenSSL $v at $cand"
return 0
fi
done
fi
pqb_log "building OpenSSL $OPENSSL_REF from source"
local dest="$SRC/openssl" commit
commit="$(git_pin "$OPENSSL_REPO" "$OPENSSL_REF" "$dest")"
( cd "$dest" && ./Configure --prefix="$PREFIX" --openssldir="$PREFIX/ssl" shared \
&& make -j"$JOBS" >/dev/null && make install_sw >/dev/null )
OPENSSL_BIN="$PREFIX/bin/openssl"
OPENSSL_PREFIX="$PREFIX"
OPENSSL_COMMIT="$commit"
}
# ---------------------------------------------------------------------------
build_oqs_provider() {
[ -n "${OPENSSL_PREFIX:-}" ] || locate_or_build_openssl
local dest="$SRC/oqs-provider" commit
commit="$(git_pin "$OQSPROVIDER_REPO" "$OQSPROVIDER_REF" "$dest")"
pqb_log "building oqs-provider ($OQSPROVIDER_REF @ ${commit:0:12})"
local GEN=(); command -v ninja >/dev/null 2>&1 && GEN=(-G Ninja)
cmake -S "$dest" -B "$dest/build" ${GEN[@]+"${GEN[@]}"} \
-DCMAKE_INSTALL_PREFIX="$PREFIX" \
-DCMAKE_BUILD_TYPE=Release \
-DOPENSSL_ROOT_DIR="$OPENSSL_PREFIX" \
-Dliboqs_DIR="$PREFIX/lib/cmake/liboqs" \
-DCMAKE_C_FLAGS="${BENCH_CFLAGS:-$TARGET_CFLAGS_FALLBACK}" >/dev/null
cmake --build "$dest/build" --parallel "$JOBS" >/dev/null
cmake --install "$dest/build" >/dev/null 2>&1 || true
# Provider .so lands under .../lib/ossl-modules or .../oqsprovider
OQSPROVIDER_MODULE="$(find "$PREFIX" "$dest/build" -name 'oqsprovider.*' \( -name '*.so' -o -name '*.dylib' \) 2>/dev/null | head -1)"
OQSPROVIDER_COMMIT="$commit"
}
# ---------------------------------------------------------------------------
write_lock() {
choose_cflags 2>/dev/null || true
local lock="$HERE/versions.lock"
{
echo "# Auto-generated by setup.sh — exact toolchain provenance. Stamped into results JSON."
echo "PQB_BUILD_HOST_OS=$PQB_OS"
echo "PQB_BUILD_HOST_ARCH=$PQB_ARCH"
echo "PQB_IS_RPI=$PQB_IS_RPI"
echo "PQB_RPI_MODEL=\"${PQB_RPI_MODEL}\""
echo "BENCH_CFLAGS=\"${BENCH_CFLAGS:-unknown}\""
echo "CFLAGS_TARGET=\"${CFLAGS_TARGET:-unknown}\""
echo "CC_VERSION=\"$(cc_version_string)\""
echo "LIBOQS_REF=\"$LIBOQS_REF\""
echo "LIBOQS_COMMIT=\"${LIBOQS_COMMIT:-not-built}\""
echo "LIBOQS_OPT_DEFINES=\"${LIBOQS_OPT_DEFINES:-}\""
echo "OPENSSL_BIN=\"${OPENSSL_BIN:-}\""
echo "OPENSSL_PREFIX=\"${OPENSSL_PREFIX:-}\""
echo "OPENSSL_COMMIT=\"${OPENSSL_COMMIT:-not-built}\""
echo "OQSPROVIDER_REF=\"$OQSPROVIDER_REF\""
echo "OQSPROVIDER_COMMIT=\"${OQSPROVIDER_COMMIT:-not-built}\""
echo "OQSPROVIDER_MODULE=\"${OQSPROVIDER_MODULE:-}\""
echo "PREFIX=\"$PREFIX\""
} > "$lock"
pqb_log "wrote $lock"
cat "$lock" >&2
}
# ---- dispatch --------------------------------------------------------------
main() {
local what="${1:-all}"
case "$what" in
deps) pqb_install_build_deps ;;
liboqs) build_liboqs; write_lock ;;
openssl) locate_or_build_openssl force; write_lock ;;
provider) build_oqs_provider; write_lock ;;
all)
pqb_install_build_deps
build_liboqs
locate_or_build_openssl
build_oqs_provider
write_lock
pqb_log "setup complete. Next: ./run.sh --smoke"
;;
*) pqb_err "unknown target: $what (deps|liboqs|openssl|provider|all)"; exit 2 ;;
esac
}
main "$@"

View File

@ -0,0 +1,40 @@
# =============================================================================
# Pinned upstream versions for the PQ benchmark toolchain.
#
# These are the *intended* refs. setup/setup.sh clones each at the tag below
# and then records the *actually resolved* commit hash into setup/versions.lock,
# which is stamped verbatim into every results JSON. That way a results file is
# always traceable to exact source, even if a tag is ever re-pointed upstream.
#
# Override any of these from the environment, e.g.:
# LIBOQS_REF=main ./setup/setup.sh
# =============================================================================
# liboqs: KEM/signature implementations. >= 0.15.0 ships the AArch64-optimized
# ML-KEM backend (mlkem-native). We enable it explicitly in setup.sh.
LIBOQS_REPO="${LIBOQS_REPO:-https://github.com/open-quantum-safe/liboqs.git}"
LIBOQS_REF="${LIBOQS_REF:-0.15.0}"
# OpenSSL >= 3.5.0 — required for PQ signatures in TLS 1.3 server auth.
# On macOS dev boxes we prefer the Homebrew openssl@3 if it is already >= 3.5;
# setup.sh only builds OpenSSL from source when the system one is too old or
# BUILD_OPENSSL=1 is set.
OPENSSL_REPO="${OPENSSL_REPO:-https://github.com/openssl/openssl.git}"
OPENSSL_REF="${OPENSSL_REF:-openssl-3.5.0}"
# oqs-provider >= 0.9.0 — wires liboqs algorithms into OpenSSL as a provider,
# giving us PQ KEM groups and PQ signature certs for the TLS layer.
OQSPROVIDER_REPO="${OQSPROVIDER_REPO:-https://github.com/open-quantum-safe/oqs-provider.git}"
OQSPROVIDER_REF="${OQSPROVIDER_REF:-0.9.0}"
# Identical optimization flags for every candidate (the credibility anchor).
# cortex-a76 is the RPi5 core. On non-A76 hosts (e.g. the macOS smoke box)
# setup.sh substitutes a safe fallback and records which flags were *actually*
# used in versions.lock + results JSON, so smoke numbers are never mistaken for
# the RPi5 baseline.
TARGET_CFLAGS_RPI5="${TARGET_CFLAGS_RPI5:--O3 -mcpu=cortex-a76}"
TARGET_CFLAGS_FALLBACK="${TARGET_CFLAGS_FALLBACK:--O3}"
# Pinned core for taskset on the RPi5 (4 cores: 0-3). Core 3 is chosen to stay
# away from CPU0 where the kernel tends to steer IRQs/RPS. Documented in README.
BENCH_CORE="${BENCH_CORE:-3}"