ci: add daily rln simulator e2e workflow (#3885)

This commit is contained in:
Darshan 2026-05-22 17:15:31 +05:30 committed by GitHub
parent 04ef12ccf3
commit 67eebe3a02
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 674 additions and 0 deletions

View File

@ -77,3 +77,8 @@ jobs:
}" \
"$DISCORD_WEBHOOK_URL"
# RLN end-to-end against the simulator. Defaults from tests/simulator/rln-sim.env.
rln-simulator:
uses: ./.github/workflows/ci-rln-simulator.yml
secrets: inherit

271
.github/workflows/ci-rln-simulator.yml vendored Normal file
View File

@ -0,0 +1,271 @@
name: RLN E2E — Simulator
# Validates the full RLN flow end-to-end against logos-delivery-simulator:
# keystore generation, on-chain registration, gossipsub propagation,
# per-epoch rate-limit enforcement, and epoch-boundary recovery.
#
# Why this exists: logos-dev runs with RLN disabled, so there is no
# production traffic exercising RLN. Until RLN is enabled there, this is
# the only end-to-end coverage of the RLN + zerokit path.
#
# The image is built ON the runner and tested ON the same runner, so the
# AVX-512 portability issue in container-image.yml does not apply here.
#
# No own schedule: ci-daily.yml is the single daily entry point and calls
# this via workflow_call. workflow_dispatch allows manual runs.
# Run defaults live in tests/simulator/rln-sim.env; inputs override per-run.
on:
workflow_call:
inputs:
branch:
type: string
default: ''
num_nodes:
type: string
default: ''
msg_limit:
type: string
default: ''
epoch_sec:
type: string
default: ''
workflow_dispatch:
inputs:
branch:
description: 'logos-delivery branch to build & test (blank = use rln-sim.env)'
type: string
default: ''
num_nodes:
description: 'Number of nwaku nodes (blank = use rln-sim.env)'
type: string
default: ''
msg_limit:
description: 'RLN_RELAY_MSG_LIMIT, must be >= contract min ~20 (blank = use rln-sim.env)'
type: string
default: ''
epoch_sec:
description: 'RLN_RELAY_EPOCH_SEC, large enough a burst cannot straddle an epoch (blank = use rln-sim.env)'
type: string
default: ''
env:
NPROC: 2
MAKEFLAGS: "-j2"
NIM_VERSION: '2.2.4'
NIMBLE_VERSION: '0.22.3'
jobs:
rln-e2e:
runs-on: ubuntu-22.04
timeout-minutes: 120
name: rln-e2e
steps:
# First checkout: the ref that triggered this workflow (CI branch /
# master). This is where the e2e test script and rln-sim.env live —
# the build branch may not contain them.
- name: Checkout CI ref (for the test script)
uses: actions/checkout@v4
with:
submodules: false
# Defaults come from tests/simulator/rln-sim.env (single source of truth);
# a non-blank input (dispatch or workflow_call) overrides the matching value.
- name: Resolve parameters
id: cfg
env:
IN_BRANCH: ${{ inputs.branch }}
IN_NUM_NODES: ${{ inputs.num_nodes }}
IN_MSG_LIMIT: ${{ inputs.msg_limit }}
IN_EPOCH_SEC: ${{ inputs.epoch_sec }}
run: |
set -euo pipefail
set -a; . tests/simulator/rln-sim.env; set +a
{
echo "branch=${IN_BRANCH:-$BRANCH}"
echo "num_nodes=${IN_NUM_NODES:-$NUM_NODES}"
echo "msg_limit=${IN_MSG_LIMIT:-$MSG_LIMIT}"
echo "epoch_sec=${IN_EPOCH_SEC:-$EPOCH_SEC}"
} >> "$GITHUB_OUTPUT"
- name: Stash e2e test script outside the workspace
run: |
test -f tests/simulator/rln-e2e-test.py \
|| { echo "tests/simulator/rln-e2e-test.py missing on CI ref"; exit 1; }
cp tests/simulator/rln-e2e-test.py "$RUNNER_TEMP/rln-e2e-test.py"
# Second checkout: the branch to build & test. Overwrites the workspace;
# the stashed test script in RUNNER_TEMP survives.
- name: Checkout logos-delivery (${{ steps.cfg.outputs.branch }})
uses: actions/checkout@v4
with:
ref: ${{ steps.cfg.outputs.branch }}
submodules: false
clean: true
- name: Get submodules hash
id: submodules
run: echo "hash=$(git submodule status | awk '{print $1}' | sort | shasum -a 256 | sed 's/[ -]*//g')" >> $GITHUB_OUTPUT
- name: Cache submodules
uses: actions/cache@v3
with:
path: |
vendor/
.git/modules
key: ${{ runner.os }}-vendor-modules-${{ steps.submodules.outputs.hash }}
- name: Install Nim ${{ env.NIM_VERSION }}
uses: jiro4989/setup-nim-action@v2
with:
nim-version: ${{ env.NIM_VERSION }}
repo-token: ${{ secrets.GITHUB_TOKEN }}
- name: Install Nimble ${{ env.NIMBLE_VERSION }}
run: |
cd /tmp && nimble install "nimble@${{ env.NIMBLE_VERSION }}" -y
echo "$HOME/.nimble/bin" >> $GITHUB_PATH
- name: Cache nimble deps
id: cache-nimbledeps
uses: actions/cache@v3
with:
path: |
nimbledeps/
nimble.paths
key: ${{ runner.os }}-nimbledeps-nimble${{ env.NIMBLE_VERSION }}-${{ hashFiles('nimble.lock', 'BearSSL.mk', 'Nat.mk') }}
- name: Install nimble deps
if: steps.cache-nimbledeps.outputs.cache-hit != 'true'
run: |
nimble setup --localdeps -y
make rebuild-nat-libs-nimbledeps
make rebuild-bearssl-nimbledeps
touch nimbledeps/.nimble-setup
- name: Build wakunode2
run: |
make -j${NPROC} V=1 POSTGRES=1 \
NIMFLAGS="-d:disableMarchNative -d:chronicles_colors:none" \
wakunode2
- name: Build local Docker image
run: |
docker build -t nwaku-rln-ci:test -f docker/binaries/Dockerfile.bn.amd64 .
- name: Clone logos-delivery-simulator
run: |
git clone --depth 1 https://github.com/logos-messaging/logos-delivery-simulator.git "$RUNNER_TEMP/logos-delivery-simulator"
- name: Write simulator .env
working-directory: ${{ runner.temp }}/logos-delivery-simulator
run: |
cat > .env <<EOF
LD_IMAGE=nwaku-rln-ci:test
NUM_LD_NODES=${{ steps.cfg.outputs.num_nodes }}
MSG_SIZE_KBYTES=1
TRAFFIC_DELAY_SECONDS=5
RLN_RELAY_EPOCH_SEC=${{ steps.cfg.outputs.epoch_sec }}
RLN_RELAY_MSG_LIMIT=${{ steps.cfg.outputs.msg_limit }}
MAX_MESSAGE_LIMIT=100
RPC_URL=http://foundry:8545
PRIVATE_KEY=0xac0974bec39a17e36ba4a6b4d238ff944bacb478cbed5efcae784d7bf4f2ff80
ETH_FROM=0xf39fd6e51aad88f6f4ce6ab8827279cfffb92266
RLN_CONTRACT_REPO_COMMIT=e75ac913e579ad872f54b2225eec35d1de3d98b0
WATCHTOWER_ENABLED=false
EOF
- name: Bring up simulator (RLN subset)
working-directory: ${{ runner.temp }}/logos-delivery-simulator
run: |
docker compose up -d foundry contract-repo-deployer nwaku-token-init bootstrap nwaku
- name: Wait for contract deployer
working-directory: ${{ runner.temp }}/logos-delivery-simulator
run: |
for _ in $(seq 1 60); do
st=$(docker inspect logos-delivery-simulator-contract-repo-deployer-1 --format='{{.State.Status}}' 2>/dev/null || echo missing)
[ "$st" = "exited" ] && break
echo "deployer status: $st"; sleep 15
done
ec=$(docker inspect logos-delivery-simulator-contract-repo-deployer-1 --format='{{.State.ExitCode}}')
echo "deployer exit code: $ec"
if [ "$ec" != "0" ]; then
docker logs logos-delivery-simulator-contract-repo-deployer-1 2>&1 | tail -50
exit 1
fi
- name: Wait for nwaku fleet to register
working-directory: ${{ runner.temp }}/logos-delivery-simulator
run: |
N=${{ steps.cfg.outputs.num_nodes }}
for _ in $(seq 1 60); do
up=$(docker ps --filter 'name=logos-delivery-simulator-nwaku-' --filter 'status=running' --format '{{.Names}}' | wc -l)
echo "nwaku running: $up/$N"
[ "$up" -ge "$N" ] && break
sleep 15
done
# nwaku-1 must reach the "registered + started" marker
timeout 300 docker logs -f logos-delivery-simulator-nwaku-1 2>&1 \
| grep -m1 -E "Segmentation fault|Illegal instruction|Failed to register on-chain|I am a nwaku node" \
| tee /tmp/nwaku1.verdict
grep -q "I am a nwaku node" /tmp/nwaku1.verdict
- name: Run RLN e2e scenarios
run: |
TEST_SCRIPT="$RUNNER_TEMP/rln-e2e-test.py"
test -f "$TEST_SCRIPT" \
|| { echo "stashed test script missing at $TEST_SCRIPT"; exit 1; }
docker run --rm \
--network logos-delivery-simulator_simulation \
-v "$TEST_SCRIPT:/test.py:ro" \
python:3.11-slim \
sh -c "pip install --quiet --disable-pip-version-check requests && \
python /test.py \
--hostname-prefix logos-delivery-simulator-nwaku- \
--num-nodes ${{ steps.cfg.outputs.num_nodes }} \
--msg-limit ${{ steps.cfg.outputs.msg_limit }} \
--epoch-sec ${{ steps.cfg.outputs.epoch_sec }} \
--health-deadline-sec 600"
- name: Collect logs on failure
if: failure()
working-directory: ${{ runner.temp }}/logos-delivery-simulator
run: |
mkdir -p "$RUNNER_TEMP/logs"
for c in $(docker ps -a --filter 'name=logos-delivery-simulator-' --format '{{.Names}}'); do
docker logs "$c" > "$RUNNER_TEMP/logs/$c.log" 2>&1 || true
done
- name: Upload logs
if: failure()
uses: actions/upload-artifact@v4
with:
name: simulator-logs
path: ${{ runner.temp }}/logs
retention-days: 7
- name: Tear down
if: always()
working-directory: ${{ runner.temp }}/logos-delivery-simulator
run: docker compose down -v || true
- name: Notify Discord
if: always()
env:
DISCORD_WEBHOOK_URL: ${{ secrets.DISCORD_WEBHOOK_URL }}
run: |
[ -z "$DISCORD_WEBHOOK_URL" ] && exit 0
STATUS="${{ job.status }}"
BRANCH="${{ steps.cfg.outputs.branch }}"
RUN_URL="https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"
if [ "$STATUS" = "success" ]; then COLOR=3066993; TITLE="✅ RLN E2E passed"; else COLOR=15158332; TITLE="❌ RLN E2E failed"; fi
curl -H "Content-Type: application/json" -X POST -d "{
\"embeds\":[{\"title\":\"$TITLE\",\"color\":$COLOR,
\"fields\":[
{\"name\":\"Branch\",\"value\":\"$BRANCH\",\"inline\":true},
{\"name\":\"Status\",\"value\":\"$STATUS\",\"inline\":true}],
\"url\":\"$RUN_URL\",
\"footer\":{\"text\":\"Daily RLN simulator E2E\"}}]}" \
"$DISCORD_WEBHOOK_URL"

4
.gitignore vendored
View File

@ -86,3 +86,7 @@ nimbledeps
**/anvil_state/state-deployed-contracts-mint-and-approved.json
.gitnexus
# Python bytecode from tests/simulator
__pycache__/
*.pyc

388
tests/simulator/rln-e2e-test.py Executable file
View File

@ -0,0 +1,388 @@
#!/usr/bin/env python3
"""
RLN end-to-end test against a running logos-delivery-simulator stack.
Designed to run as a sidecar container on the simulator's Docker network so
hostnames like `logos-delivery-simulator-nwaku-1` resolve via Docker DNS.
Scenarios covered (in order):
1. HEALTH - every node responds to /debug/v1/info with an enrUri
2. SUBSCRIBE - every node REST-subscribes to the pubsub topic
3. WITHIN_LIMIT - every node concurrently sends msg_limit messages -> 200
4. PROPAGATION - one sender's message lands in all peers' inboxes
5. OVER_LIMIT - one extra message per node -> 500 (rate-limit hit)
6. EPOCH_RESET - after epoch_sec, every node can send 1 more -> 200
7. SAME_MESSAGE_ID - sending same message_id twice in same epoch is the
slashable signal (verified by checking node logs)
Exit code:
0 = all scenarios passed
N = number of scenarios that failed
Usage (typical):
docker run --rm \\
--network logos-delivery-simulator_simulation \\
-v /path/to/rln-e2e-test.py:/test.py \\
python:3.11-slim \\
sh -c 'pip install --quiet requests && python /test.py \\
--hostname-prefix logos-delivery-simulator-nwaku- \\
--num-nodes 30 --msg-limit 30 --epoch-sec 15'
"""
import argparse
import base64
import concurrent.futures as cf
import json
import os
import sys
import time
import urllib.parse
from dataclasses import dataclass
from typing import Optional
import requests
PUBSUB_TOPIC = "/waku/2/rs/66/0"
CONTENT_TOPIC = "/rln-test/1/probe/proto"
# ---------------------------------------------------------------------------
# helpers
# ---------------------------------------------------------------------------
def url_of(host: str, port: int = 8645) -> str:
return f"http://{host}:{port}"
def waku_publish(node_url: str, payload: bytes, timeout: float = 5.0) -> int:
body = {
"payload": base64.b64encode(payload).decode("ascii"),
"contentTopic": CONTENT_TOPIC,
"version": 1,
"timestamp": time.time_ns(),
}
enc = urllib.parse.quote(PUBSUB_TOPIC, safe="")
try:
r = requests.post(
f"{node_url}/relay/v1/messages/{enc}",
json=body,
timeout=timeout,
headers={"content-type": "application/json"},
)
return r.status_code
except requests.RequestException:
return -1
def waku_subscribe(node_url: str, timeout: float = 5.0) -> int:
try:
r = requests.post(
f"{node_url}/relay/v1/subscriptions",
json=[PUBSUB_TOPIC],
timeout=timeout,
headers={"content-type": "application/json"},
)
return r.status_code
except requests.RequestException:
return -1
def waku_get_messages(node_url: str, timeout: float = 5.0) -> Optional[list]:
enc = urllib.parse.quote(PUBSUB_TOPIC, safe="")
try:
r = requests.get(
f"{node_url}/relay/v1/messages/{enc}",
timeout=timeout,
)
if r.status_code != 200:
return None
return r.json()
except (requests.RequestException, json.JSONDecodeError):
return None
def node_healthy(node_url: str, timeout: float = 3.0) -> bool:
try:
r = requests.get(f"{node_url}/debug/v1/info", timeout=timeout)
return r.status_code == 200 and "enrUri" in r.json()
except (requests.RequestException, json.JSONDecodeError):
return False
# ---------------------------------------------------------------------------
# scenarios
# ---------------------------------------------------------------------------
@dataclass
class Result:
name: str
ok: bool
detail: str = ""
def __str__(self) -> str:
status = "PASS" if self.ok else "FAIL"
s = f"[{status}] {self.name}"
if self.detail:
s += f"{self.detail}"
return s
def scenario_health(nodes: list[str], deadline_sec: int = 120) -> Result:
"""Every node must be reachable within deadline_sec."""
start = time.time()
unhealthy = list(nodes)
while time.time() - start < deadline_sec and unhealthy:
with cf.ThreadPoolExecutor(max_workers=min(32, len(unhealthy))) as ex:
results = list(ex.map(node_healthy, [url_of(n) for n in unhealthy]))
unhealthy = [n for n, ok in zip(unhealthy, results) if not ok]
if unhealthy:
time.sleep(3)
return Result(
"HEALTH",
not unhealthy,
f"{len(nodes) - len(unhealthy)}/{len(nodes)} healthy"
+ (f"; failing: {unhealthy[:5]}" if unhealthy else ""),
)
def scenario_subscribe(nodes: list[str]) -> Result:
"""REST-subscribe every node to the pubsub topic so GETs return cached msgs."""
with cf.ThreadPoolExecutor(max_workers=min(32, len(nodes))) as ex:
codes = list(ex.map(waku_subscribe, [url_of(n) for n in nodes]))
bad = [(n, c) for n, c in zip(nodes, codes) if c != 200]
return Result(
"SUBSCRIBE",
not bad,
f"{len(nodes) - len(bad)}/{len(nodes)} subscribed"
+ (f"; failing: {bad[:5]}" if bad else ""),
)
def _send_n(node_url: str, n: int) -> list[int]:
codes = []
for i in range(n):
codes.append(waku_publish(node_url, f"probe-{i}".encode()))
return codes
def _burst_until_blocked(node_url: str, msg_limit: int, overshoot: int = 3):
"""Send msg_limit+overshoot messages back-to-back, fast, recording codes.
Designed to complete inside a single epoch keep epoch_sec large enough
that this burst can't straddle an epoch boundary.
Returns (n_200, n_500, n_transport_err, two_hundred_after_block) where
two_hundred_after_block flags a 200 appearing AFTER the first 500 (i.e.
quota reset mid-burst => epoch straddle)."""
codes = []
for i in range(msg_limit + overshoot):
codes.append(waku_publish(node_url, f"burst-{i}".encode(), timeout=10.0))
n_200 = sum(c == 200 for c in codes)
n_500 = sum(c == 500 for c in codes)
n_err = sum(c not in (200, 500) for c in codes) # -1, 4xx transient, etc.
first_block_idx = next((i for i, c in enumerate(codes) if c == 500), None)
two_hundred_after_block = (
first_block_idx is not None
and any(c == 200 for c in codes[first_block_idx + 1:])
)
return n_200, n_500, n_err, two_hundred_after_block
def _publish_until_ok(node_url: str, attempts: int = 20, spacing: float = 5.0) -> bool:
"""Retry a single publish until it returns 200 or attempts run out.
Tolerates the post-startup window where discv5/gossipsub mesh is still
forming and the RLN publish path transiently 500s."""
for _ in range(attempts):
if waku_publish(node_url, b"warmup", timeout=10.0) == 200:
return True
time.sleep(spacing)
return False
def scenario_warmup(nodes: list[str], attempts: int = 20) -> Result:
"""Readiness gate: every node must successfully publish at least once.
This absorbs mesh-formation churn so PROPAGATION/RATE_LIMIT aren't
judging a not-yet-connected fleet. Consumes 1 nonce/node well within
msg_limit, and RATE_LIMIT's tolerance accounts for it."""
with cf.ThreadPoolExecutor(max_workers=min(8, len(nodes))) as ex:
ready = list(ex.map(lambda n: _publish_until_ok(url_of(n), attempts), nodes))
not_ready = [n for n, ok in zip(nodes, ready) if not ok]
return Result(
"WARMUP",
not not_ready,
f"{len(nodes) - len(not_ready)}/{len(nodes)} nodes publishing"
+ (f"; never ready: {not_ready[:5]}" if not_ready else ""),
)
def scenario_rate_limit(nodes: list[str], msg_limit: int, tolerance: int = 3) -> Result:
"""Per-node burst of msg_limit+3 messages within one epoch.
The RLN invariant being checked:
(a) a node must NEVER publish more than msg_limit in one epoch, and
(b) the node must enforce a 500 ceiling once the quota is exhausted.
Transient HTTP errors under concurrent load can lower the accepted count
below msg_limit that does NOT violate the invariant, so we accept
successes in [msg_limit - tolerance, msg_limit]. successes > msg_limit OR
a 200 after the first 500 means the epoch rolled mid-burst (raise
RLN_RELAY_EPOCH_SEC) reported as a timing skew, not an RLN failure."""
# Cap concurrency: firing len(nodes)*(msg_limit+3) publishes all at once
# saturates small CI runners (2 vCPU) and causes publish-path timeouts
# that masquerade as rate-limit failures.
with cf.ThreadPoolExecutor(max_workers=min(5, len(nodes))) as ex:
per_node = list(
ex.map(lambda n: _burst_until_blocked(url_of(n), msg_limit), nodes)
)
rate_failures = [] # genuine RLN misbehaviour
timing_skews = [] # epoch straddled mid-burst — inconclusive
for node, (n_200, n_500, n_err, after_block) in zip(nodes, per_node):
if n_200 > msg_limit or after_block:
timing_skews.append(
(node, f"{n_200} ok, epoch rolled mid-burst (raise epoch_sec)")
)
elif n_500 == 0:
rate_failures.append((node, f"no 500 ceiling ({n_200} ok, {n_err} err)"))
elif n_200 < msg_limit - tolerance:
rate_failures.append(
(node, f"only {n_200}/{msg_limit} ok ({n_err} transport err)")
)
if timing_skews and not rate_failures:
return Result(
"RATE_LIMIT",
False,
f"INCONCLUSIVE (timing) — raise RLN_RELAY_EPOCH_SEC; "
f"{len(timing_skews)} node(s) straddled an epoch: {timing_skews[:3]}",
)
ok = not rate_failures and not timing_skews
good = len(nodes) - len(rate_failures) - len(timing_skews)
return Result(
"RATE_LIMIT",
ok,
f"{good}/{len(nodes)} nodes enforced <= {msg_limit} then 500 "
f"(tolerance {tolerance} for transport noise)"
+ (f"; rate failures: {rate_failures[:3]}" if rate_failures else "")
+ (f"; timing skews: {timing_skews[:3]}" if timing_skews else ""),
)
def scenario_propagation(
sender: str, receivers: list[str], settle_sec: int = 5
) -> Result:
"""Send one message on `sender`, expect it visible in every receiver's
REST inbox within settle_sec."""
marker = f"propagation-marker-{time.time_ns()}".encode()
code = waku_publish(url_of(sender), marker)
if code != 200:
return Result("PROPAGATION", False, f"sender publish returned {code}")
time.sleep(settle_sec)
missing = []
with cf.ThreadPoolExecutor(max_workers=min(32, len(receivers))) as ex:
inboxes = list(ex.map(waku_get_messages, [url_of(r) for r in receivers]))
encoded_marker = base64.b64encode(marker).decode().rstrip("=")
for r, inbox in zip(receivers, inboxes):
if inbox is None:
missing.append((r, "GET failed"))
continue
# Look for our marker payload in any message
found = any(
(m.get("payload") or "").rstrip("=") == encoded_marker
for m in inbox
)
if not found:
missing.append((r, f"{len(inbox)} msgs, marker not present"))
return Result(
"PROPAGATION",
not missing,
f"{len(receivers) - len(missing)}/{len(receivers)} receivers got the message"
+ (f"; missing on {missing[:3]}" if missing else ""),
)
def scenario_epoch_reset(nodes: list[str], epoch_sec: int) -> Result:
"""After epoch_sec + slack, each node can send 1 more message — expect 200."""
sleep_s = epoch_sec + 3
print(f" sleeping {sleep_s}s for epoch reset...")
time.sleep(sleep_s)
with cf.ThreadPoolExecutor(max_workers=len(nodes)) as ex:
codes = list(
ex.map(
lambda n: waku_publish(url_of(n), b"post-epoch"),
nodes,
)
)
bad = [(n, c) for n, c in zip(nodes, codes) if c != 200]
return Result(
"EPOCH_RESET",
not bad,
f"{sum(c == 200 for c in codes)}/{len(nodes)} returned 200 after epoch reset"
+ (f"; failing: {bad[:3]}" if bad else ""),
)
# ---------------------------------------------------------------------------
# main
# ---------------------------------------------------------------------------
def main() -> int:
ap = argparse.ArgumentParser(description=__doc__)
ap.add_argument("--hostname-prefix", default="logos-delivery-simulator-nwaku-")
ap.add_argument("--num-nodes", type=int, default=30)
ap.add_argument("--msg-limit", type=int, default=30,
help="Must match RLN_RELAY_MSG_LIMIT in simulator .env")
ap.add_argument("--epoch-sec", type=int, default=15,
help="Must match RLN_RELAY_EPOCH_SEC in simulator .env")
ap.add_argument("--health-deadline-sec", type=int, default=180)
args = ap.parse_args()
nodes = [f"{args.hostname_prefix}{i}" for i in range(1, args.num_nodes + 1)]
print(f"Testing {len(nodes)} nodes: {nodes[0]}{nodes[-1]}")
print(f"Config: msg_limit={args.msg_limit}, epoch_sec={args.epoch_sec}")
print()
results: list[Result] = []
def run(scenario_fn, *fn_args, **fn_kwargs) -> bool:
r = scenario_fn(*fn_args, **fn_kwargs)
results.append(r)
print(r)
return r.ok
if not run(scenario_health, nodes, deadline_sec=args.health_deadline_sec):
print("\nABORTING — nodes never reached healthy state.")
return _summarize(results)
if not run(scenario_subscribe, nodes):
print("\nABORTING — could not subscribe nodes to pubsub topic.")
return _summarize(results)
# Readiness gate: wait out mesh-formation churn before judging behaviour.
if not run(scenario_warmup, nodes):
print("\nABORTING — fleet never reached a publishable state.")
return _summarize(results)
run(scenario_propagation, nodes[0], nodes[1:])
# Rate limit: per-node burst, asserts exactly msg_limit then 500.
# Requires epoch_sec large enough that the burst can't straddle an epoch.
run(scenario_rate_limit, nodes, args.msg_limit)
run(scenario_epoch_reset, nodes, args.epoch_sec)
return _summarize(results)
def _summarize(results: list[Result]) -> int:
print()
print("=" * 64)
passed = sum(r.ok for r in results)
print(f" {passed}/{len(results)} scenarios passed")
for r in results:
print(f" {r}")
print("=" * 64)
return len(results) - passed
if __name__ == "__main__":
sys.exit(main())

View File

@ -0,0 +1,6 @@
# Source of truth for the RLN simulator E2E run (ci-rln-simulator.yml).
# workflow_dispatch inputs override any value here per-run (blank input = use this file).
BRANCH=master
NUM_NODES=6
MSG_LIMIT=30
EPOCH_SEC=120