mirror of
https://github.com/logos-blockchain/logos-blockchain-pocs.git
synced 2026-01-02 13:13:09 +00:00
storage benchmarks infrastructure
This commit is contained in:
parent
73441efb69
commit
4de3e1e68e
26
.gitignore
vendored
26
.gitignore
vendored
@ -1,3 +1,25 @@
|
||||
# Generated by Cargo
|
||||
# will have compiled files and executables
|
||||
/target/
|
||||
*/target/
|
||||
|
||||
# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
|
||||
# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
|
||||
Cargo.lock
|
||||
target/
|
||||
.vscode
|
||||
|
||||
# These are backup files generated by rustfmt
|
||||
**/*.rs.bk
|
||||
|
||||
# Files generated by build processes or applications
|
||||
config.yml
|
||||
store.*
|
||||
*.txt
|
||||
.env
|
||||
.idea/
|
||||
.vscode/
|
||||
|
||||
# Integration test temp dirs
|
||||
tests/.tmp*
|
||||
|
||||
# Wildcard for any file that contains ignore
|
||||
*ignore*
|
||||
244
storage-benchmarks/Cargo.toml
Normal file
244
storage-benchmarks/Cargo.toml
Normal file
@ -0,0 +1,244 @@
|
||||
[package]
|
||||
edition = "2021"
|
||||
name = "storage-benchmarks"
|
||||
version = "0.1.0"
|
||||
|
||||
[dependencies]
|
||||
# Storage APIs - using public nomos repository
|
||||
cryptarchia-engine = { git = "https://github.com/logos-co/nomos-node", package = "cryptarchia-engine" }
|
||||
nomos-core = { git = "https://github.com/logos-co/nomos-node", package = "nomos-core" }
|
||||
nomos-storage = { git = "https://github.com/logos-co/nomos-node", package = "nomos-storage", features = ["rocksdb-backend"] }
|
||||
|
||||
# Database
|
||||
rocksdb = { version = "0.24", features = ["bindgen-runtime"] }
|
||||
|
||||
# Async runtime
|
||||
tokio = { features = ["macros", "rt-multi-thread", "time"], version = "1" }
|
||||
async-trait = "0.1"
|
||||
|
||||
# Data structures
|
||||
bincode = "1.0"
|
||||
bytes = "1.3"
|
||||
chrono = { version = "0.4", features = ["serde"] }
|
||||
env_logger = "0.10"
|
||||
log = "0.4"
|
||||
num_cpus = "1.0"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0"
|
||||
tempfile = "3"
|
||||
toml = "0.8"
|
||||
rand = "0.8"
|
||||
rand_chacha = "0.3"
|
||||
ed25519-dalek = "2.0"
|
||||
groth16 = { git = "https://github.com/logos-co/nomos-node", package = "groth16" }
|
||||
pol = { git = "https://github.com/logos-co/nomos-node", package = "pol" }
|
||||
futures = "0.3"
|
||||
rayon = "1.0"
|
||||
rand_distr = "0.4"
|
||||
hdrhistogram = "7.5"
|
||||
md5 = "0.7"
|
||||
clap = { version = "4.0", features = ["derive"] }
|
||||
thiserror = "1.0"
|
||||
smallvec = "1.0"
|
||||
|
||||
# Optional allocator features
|
||||
mimalloc = { version = "0.1", optional = true }
|
||||
jemallocator = { version = "0.5", optional = true }
|
||||
|
||||
[features]
|
||||
default = []
|
||||
allocator-mimalloc = ["mimalloc"]
|
||||
allocator-jemalloc = ["jemallocator"]
|
||||
|
||||
[dev-dependencies]
|
||||
divan = { default-features = false, version = "0.1" }
|
||||
|
||||
# Production binaries
|
||||
[[bin]]
|
||||
name = "dataset_generator"
|
||||
path = "src/bin/dataset_generator.rs"
|
||||
|
||||
[[bin]]
|
||||
name = "verify_dataset_integrity"
|
||||
path = "src/bin/verify_dataset_integrity.rs"
|
||||
|
||||
[[bin]]
|
||||
name = "storage_bench_runner"
|
||||
path = "src/bin/storage_bench_runner.rs"
|
||||
|
||||
[[bin]]
|
||||
name = "verify_rocksdb_properties"
|
||||
path = "src/bin/verify_rocksdb_properties.rs"
|
||||
|
||||
[[bin]]
|
||||
name = "dataset_builder"
|
||||
path = "src/bin/dataset_builder.rs"
|
||||
|
||||
# Educational examples
|
||||
[[example]]
|
||||
name = "storage_capacity_calculator"
|
||||
path = "examples/storage_capacity_calculator.rs"
|
||||
|
||||
[lints.clippy]
|
||||
|
||||
# Nursery and allowed nursery warnings (new lints will warn by default)
|
||||
nursery = { level = "warn", priority = -1 }
|
||||
|
||||
# Pedantic and allowed pedantic warnings (new lints will warn by default)
|
||||
pedantic = { level = "warn", priority = -1 }
|
||||
|
||||
similar_names = { level = "allow" }
|
||||
|
||||
# Restriction and allowed restriction warnings (new lints will warn by default)
|
||||
restriction = { level = "warn", priority = -1 }
|
||||
|
||||
absolute_paths = { level = "allow" }
|
||||
alloc_instead_of_core = { level = "allow" }
|
||||
arbitrary_source_item_ordering = { level = "allow" }
|
||||
big_endian_bytes = { level = "allow" }
|
||||
blanket_clippy_restriction_lints = { level = "allow" }
|
||||
decimal_literal_representation = { level = "allow" }
|
||||
default_numeric_fallback = { level = "allow" }
|
||||
deref_by_slicing = { level = "allow" }
|
||||
else_if_without_else = { level = "allow" }
|
||||
exhaustive_enums = { level = "allow" }
|
||||
exhaustive_structs = { level = "allow" }
|
||||
exit = { level = "allow" }
|
||||
expect_used = { level = "allow" }
|
||||
field_scoped_visibility_modifiers = { level = "allow" }
|
||||
float_arithmetic = { level = "allow" }
|
||||
get_unwrap = { level = "allow" }
|
||||
host_endian_bytes = { level = "allow" }
|
||||
implicit_return = { level = "allow" }
|
||||
integer_division_remainder_used = { level = "allow" }
|
||||
iter_over_hash_type = { level = "allow" }
|
||||
let_underscore_must_use = { level = "allow" }
|
||||
let_underscore_untyped = { level = "allow" }
|
||||
little_endian_bytes = { level = "allow" }
|
||||
map_err_ignore = { level = "allow" }
|
||||
min_ident_chars = { level = "allow" }
|
||||
missing_asserts_for_indexing = { level = "allow" }
|
||||
missing_docs_in_private_items = { level = "allow" }
|
||||
missing_inline_in_public_items = { level = "allow" }
|
||||
missing_trait_methods = { level = "allow" }
|
||||
mixed_read_write_in_expression = { level = "allow" }
|
||||
mod_module_files = { level = "allow" }
|
||||
module_name_repetitions = { level = "allow" }
|
||||
modulo_arithmetic = { level = "allow" }
|
||||
panic = { level = "allow" }
|
||||
panic_in_result_fn = { level = "allow" }
|
||||
partial_pub_fields = { level = "allow" }
|
||||
print_stderr = { level = "allow" }
|
||||
print_stdout = { level = "allow" }
|
||||
pub_use = { level = "allow" }
|
||||
pub_with_shorthand = { level = "allow" }
|
||||
question_mark_used = { level = "allow" }
|
||||
self_named_module_files = { level = "allow" }
|
||||
semicolon_inside_block = { level = "allow" }
|
||||
single_call_fn = { level = "allow" }
|
||||
single_char_lifetime_names = { level = "allow" }
|
||||
std_instead_of_alloc = { level = "allow" }
|
||||
std_instead_of_core = { level = "allow" }
|
||||
struct_field_names = { level = "allow" }
|
||||
unseparated_literal_suffix = { level = "allow" }
|
||||
use_debug = { level = "allow" }
|
||||
wildcard_enum_match_arm = { level = "allow" }
|
||||
|
||||
arithmetic_side_effects = { level = "allow" }
|
||||
as_conversions = { level = "allow" }
|
||||
as_pointer_underscore = { level = "allow" }
|
||||
as_underscore = { level = "allow" }
|
||||
assertions_on_result_states = { level = "allow" }
|
||||
cast_possible_truncation = { level = "allow" }
|
||||
cast_possible_wrap = { level = "allow" }
|
||||
cast_precision_loss = { level = "allow" }
|
||||
cast_sign_loss = { level = "allow" }
|
||||
doc_broken_link = { level = "allow" }
|
||||
string_slice = { level = "allow" }
|
||||
future_not_send = { level = "allow" }
|
||||
unused_self = { level = "allow" }
|
||||
unnecessary_wraps = { level = "allow" }
|
||||
single_match_else = { level = "allow" }
|
||||
option_if_let_else = { level = "allow" }
|
||||
uninlined_format_args = { level = "allow" }
|
||||
needless_borrow = { level = "allow" }
|
||||
str_to_string = { level = "allow" }
|
||||
new_without_default = { level = "allow" }
|
||||
must_use_candidate = { level = "allow" }
|
||||
missing_const_for_fn = { level = "allow" }
|
||||
large_stack_arrays = { level = "allow" }
|
||||
unnecessary_to_owned = { level = "allow" }
|
||||
undocumented_unsafe_blocks = { level = "allow" }
|
||||
ref_as_ptr = { level = "allow" }
|
||||
unused_async = { level = "allow" }
|
||||
items_after_statements = { level = "allow" }
|
||||
ok_expect = { level = "allow" }
|
||||
map_with_unused_argument_over_ranges = { level = "allow" }
|
||||
ignored_unit_patterns = { level = "allow" }
|
||||
too_many_lines = { level = "allow" }
|
||||
not_unsafe_ptr_arg_deref = { level = "allow" }
|
||||
type_complexity = { level = "allow" }
|
||||
single_match = { level = "allow" }
|
||||
error_impl_error = { level = "allow" }
|
||||
impl_trait_in_params = { level = "allow" }
|
||||
indexing_slicing = { level = "allow" }
|
||||
infinite_loop = { level = "allow" }
|
||||
integer_division = { level = "allow" }
|
||||
large_stack_frames = { level = "allow" }
|
||||
missing_assert_message = { level = "allow" }
|
||||
missing_errors_doc = { level = "allow" }
|
||||
missing_panics_doc = { level = "allow" }
|
||||
pattern_type_mismatch = { level = "allow" }
|
||||
redundant_test_prefix = { level = "allow" }
|
||||
ref_patterns = { level = "allow" }
|
||||
renamed_function_params = { level = "allow" }
|
||||
same_name_method = { level = "allow" }
|
||||
shadow_reuse = { level = "allow" }
|
||||
shadow_same = { level = "allow" }
|
||||
shadow_unrelated = { level = "allow" }
|
||||
tests_outside_test_module = { level = "allow" }
|
||||
todo = { level = "allow" }
|
||||
unimplemented = { level = "allow" }
|
||||
unreachable = { level = "allow" }
|
||||
unwrap_in_result = { level = "allow" }
|
||||
unwrap_used = { level = "allow" }
|
||||
|
||||
[lints.rust]
|
||||
|
||||
unused_crate_dependencies = { level = "allow" }
|
||||
unused_results = { level = "allow" }
|
||||
|
||||
impl_trait_redundant_captures = { level = "warn" }
|
||||
missing_unsafe_on_extern = { level = "warn" }
|
||||
redundant_imports = { level = "warn" }
|
||||
redundant_lifetimes = { level = "warn" }
|
||||
single_use_lifetimes = { level = "warn" }
|
||||
trivial_numeric_casts = { level = "warn" }
|
||||
unsafe_attr_outside_unsafe = { level = "warn" }
|
||||
unsafe_op_in_unsafe_fn = { level = "warn" }
|
||||
unstable_features = { level = "warn" }
|
||||
unused_extern_crates = { level = "warn" }
|
||||
unused_import_braces = { level = "warn" }
|
||||
unused_lifetimes = { level = "warn" }
|
||||
unused_macro_rules = { level = "warn" }
|
||||
unused_qualifications = { level = "warn" }
|
||||
|
||||
absolute_paths_not_starting_with_crate = { level = "allow" }
|
||||
ambiguous_negative_literals = { level = "allow" }
|
||||
closure_returning_async_block = { level = "allow" }
|
||||
deref_into_dyn_supertrait = { level = "allow" }
|
||||
elided_lifetimes_in_paths = { level = "allow" }
|
||||
ffi_unwind_calls = { level = "allow" }
|
||||
impl_trait_overcaptures = { level = "allow" }
|
||||
let_underscore_drop = { level = "allow" }
|
||||
linker_messages = { level = "allow" }
|
||||
macro_use_extern_crate = { level = "allow" }
|
||||
missing_copy_implementations = { level = "allow" }
|
||||
missing_debug_implementations = { level = "allow" }
|
||||
missing_docs = { level = "allow" }
|
||||
tail_expr_drop_order = { level = "allow" }
|
||||
trivial_casts = { level = "allow" }
|
||||
unit_bindings = { level = "allow" }
|
||||
unreachable_pub = { level = "allow" }
|
||||
unsafe_code = { level = "allow" }
|
||||
variant_size_differences = { level = "allow" }
|
||||
181
storage-benchmarks/README.md
Normal file
181
storage-benchmarks/README.md
Normal file
@ -0,0 +1,181 @@
|
||||
# Nomos Storage Benchmarks
|
||||
|
||||
Goal: tune RocksDB for Nomos validator workloads using realistic data and sizes. The approach is to run benchmarks and try different parameters and settings, then compare results.
|
||||
|
||||
## What it does
|
||||
|
||||
- Generates datasets that approximate realistic sizes and access patterns.
|
||||
- Runs mixed read/write validator-style workloads against RocksDB.
|
||||
- Varies RocksDB parameters (cache, write buffer, compaction, block size, compression).
|
||||
- Records throughput and basic variability across repeated runs.
|
||||
|
||||
## Quick start
|
||||
|
||||
1) Generate a dataset
|
||||
```bash
|
||||
POL_PROOF_DEV_MODE=true RUST_LOG=info cargo run --bin dataset_generator -- --config dataset_configs/annual_mainnet.toml
|
||||
```
|
||||
|
||||
2) Run a baseline
|
||||
```bash
|
||||
RUST_LOG=info cargo run --bin storage_bench_runner -- --profile mainnet --memory 8 --duration 120
|
||||
```
|
||||
|
||||
3) Try parameters and compare
|
||||
```bash
|
||||
# Cache size
|
||||
cargo run --bin storage_bench_runner -- --profile mainnet --memory 8 --duration 120 --cache-size 25
|
||||
cargo run --bin storage_bench_runner -- --profile mainnet --memory 8 --duration 120 --cache-size 40
|
||||
cargo run --bin storage_bench_runner -- --profile mainnet --memory 8 --duration 120 --cache-size 55
|
||||
|
||||
# Write buffer (use the best cache size observed)
|
||||
cargo run --bin storage_bench_runner -- --profile mainnet --memory 8 --duration 120 --cache-size 40 --write-buffer 128
|
||||
cargo run --bin storage_bench_runner -- --profile mainnet --memory 8 --duration 120 --cache-size 40 --write-buffer 256
|
||||
|
||||
# Compaction jobs
|
||||
cargo run --bin storage_bench_runner -- --profile mainnet --memory 8 --duration 120 --cache-size 40 --write-buffer 128 --compaction-jobs 8
|
||||
```
|
||||
|
||||
## How to evaluate
|
||||
|
||||
- One warmup and at least three measured runs per setting.
|
||||
- Fixed seed when exact reproducibility is required.
|
||||
- Compare mean ops/sec and variability across runs.
|
||||
- Change one setting at a time.
|
||||
|
||||
## Parameter ranges under evaluation
|
||||
|
||||
- Block cache: 25–55% of RAM
|
||||
- Write buffer: 64–256 MB
|
||||
- Compaction jobs: 4–12
|
||||
- Block size: 16–64 KB
|
||||
- Compression: none, lz4, snappy, zstd
|
||||
|
||||
## Profiles and datasets
|
||||
|
||||
Validator profiles:
|
||||
- light (~100 validators)
|
||||
- mainnet (~2000 validators)
|
||||
- testnet (~1000 validators)
|
||||
|
||||
Datasets:
|
||||
- quick_test.toml: ~27 MB (fast checks)
|
||||
- testnet_sim.toml: ~1 GB
|
||||
- annual_mainnet.toml: ~40 GB
|
||||
|
||||
## CLI
|
||||
```bash
|
||||
cargo run --bin storage_bench_runner -- [OPTIONS]
|
||||
|
||||
--profile light | mainnet | testnet
|
||||
--memory RAM limit in GB (default: 8)
|
||||
--duration Benchmark duration (default: 120)
|
||||
--cache-size Block cache size as % of RAM (20–60)
|
||||
--write-buffer Write buffer size in MB (64–512)
|
||||
--compaction-jobs Background compaction jobs (4–16)
|
||||
--block-size Table block size in KB (8–64)
|
||||
--compression none | lz4 | snappy | zstd
|
||||
--seed RNG seed
|
||||
--warmup-runs Warmup iterations (default: 1)
|
||||
--measurement-runs Measurement iterations (default: 3)
|
||||
--read-only Read-only mode
|
||||
```
|
||||
|
||||
Reproducible run:
|
||||
```bash
|
||||
cargo run --bin storage_bench_runner -- --profile mainnet --memory 8 --duration 120 --seed 12345
|
||||
```
|
||||
|
||||
## Test plan
|
||||
|
||||
Purpose: verify that benchmarks run, produce results, and that parameter changes have measurable effects.
|
||||
|
||||
### Scope
|
||||
|
||||
- Dataset generation at different sizes.
|
||||
- Benchmark runs across profiles.
|
||||
- Parameter sweeps for cache, write buffer, compaction, block size, compression.
|
||||
- Result capture (JSON) and basic summary output.
|
||||
|
||||
### Environments
|
||||
|
||||
- Memory limits: 4 GB, 8 GB, 16 GB.
|
||||
- Datasets: small (quick), medium, large.
|
||||
- Duration: short for exploration (60–120s), longer to confirm (180–300s).
|
||||
|
||||
### Test cases
|
||||
|
||||
1. Dataset generation
|
||||
- Small dataset completes.
|
||||
- Large dataset resumes if partially present.
|
||||
- Outputs stored in expected path.
|
||||
|
||||
2. Baseline benchmark
|
||||
- Runs with selected profile and memory limit.
|
||||
- Produces JSON results and console summary.
|
||||
|
||||
3. Cache size
|
||||
- 25%, 40%, 55%.
|
||||
- Compare mean ops/sec and variability.
|
||||
- Record chosen value.
|
||||
|
||||
4. Write buffer
|
||||
- Keep chosen cache.
|
||||
- 128 MB, 256 MB (and 64/512 MB if needed).
|
||||
- Record impact, pick value.
|
||||
|
||||
5. Compaction jobs
|
||||
- 4, 8, 12 (or within system limits).
|
||||
- Check for stalls or CPU saturation.
|
||||
|
||||
6. Block size
|
||||
- 16 KB, 32 KB, 64 KB.
|
||||
- Evaluate read performance and variability.
|
||||
|
||||
7. Compression
|
||||
- none, lz4, snappy, zstd.
|
||||
- Compare throughput; consider disk footprint if relevant.
|
||||
|
||||
8. Reproducibility
|
||||
- Repeat a chosen run with a fixed seed.
|
||||
- Confirm similar results across iterations.
|
||||
|
||||
9. Memory sensitivity
|
||||
- Re-run chosen settings at lower and higher memory limits.
|
||||
- Check for regressions.
|
||||
|
||||
### Acceptance criteria
|
||||
|
||||
- All runs complete without errors.
|
||||
- Results are saved (JSON present).
|
||||
- Chosen settings show a measurable improvement over baseline.
|
||||
- Variability remains acceptable for this use case.
|
||||
|
||||
### Reporting
|
||||
|
||||
- Log command lines and seeds used.
|
||||
- Note dataset, profile, memory, and duration for each run.
|
||||
- Store JSON result files together for comparison.
|
||||
|
||||
## Outputs
|
||||
|
||||
- Datasets: ~/.nomos_storage_benchmarks/rocksdb_data
|
||||
- Results (JSON): ~/.nomos_storage_benchmarks/results/
|
||||
- Console summary shows mean ops/sec and variability.
|
||||
|
||||
## Requirements
|
||||
|
||||
- Rust 1.75+
|
||||
- 8+ GB RAM (more for larger datasets)
|
||||
- ~50+ GB disk for the largest dataset
|
||||
|
||||
## Notes
|
||||
|
||||
- Baseline first, then change one parameter at a time.
|
||||
- Keep runs short while exploring; confirm with longer runs when needed.
|
||||
|
||||
## Why no general-purpose benchmarking library
|
||||
|
||||
- Workloads require long-running mixed operations (reads, range scans, writes) against a prebuilt dataset; typical micro-benchmark frameworks focus on short, isolated functions.
|
||||
- We need control over dataset size/layout, memory limits, and external RocksDB options; this is easier with a purpose-built runner.
|
||||
- Results include per-run JSON with config and summary metrics; integrating this into a generic harness would add overhead without benefit here.
|
||||
19
storage-benchmarks/dataset_configs/annual_mainnet.toml
Normal file
19
storage-benchmarks/dataset_configs/annual_mainnet.toml
Normal file
@ -0,0 +1,19 @@
|
||||
[dataset]
|
||||
block_time_seconds = 30
|
||||
days = 365
|
||||
name = "annual_mainnet_conservative"
|
||||
|
||||
[network]
|
||||
blobs_per_block = 50
|
||||
load_name = "annual_mainnet"
|
||||
total_subnets = 2048
|
||||
|
||||
[validator]
|
||||
assigned_subnets = 10
|
||||
|
||||
[blocks]
|
||||
size_bytes = 34371
|
||||
|
||||
[da]
|
||||
commitment_size_bytes = 220000
|
||||
share_size_bytes = 1024
|
||||
19
storage-benchmarks/dataset_configs/quick_test.toml
Normal file
19
storage-benchmarks/dataset_configs/quick_test.toml
Normal file
@ -0,0 +1,19 @@
|
||||
[dataset]
|
||||
days = 1
|
||||
block_time_seconds = 30
|
||||
name = "quick_test"
|
||||
|
||||
[network]
|
||||
load_name = "light_testnet"
|
||||
blobs_per_block = 2
|
||||
total_subnets = 2048
|
||||
|
||||
[validator]
|
||||
assigned_subnets = 1
|
||||
|
||||
[blocks]
|
||||
size_bytes = 10000
|
||||
|
||||
[da]
|
||||
share_size_bytes = 512
|
||||
commitment_size_bytes = 50000
|
||||
19
storage-benchmarks/dataset_configs/testnet_sim.toml
Normal file
19
storage-benchmarks/dataset_configs/testnet_sim.toml
Normal file
@ -0,0 +1,19 @@
|
||||
[dataset]
|
||||
days = 7
|
||||
block_time_seconds = 30
|
||||
name = "testnet_simulation"
|
||||
|
||||
[network]
|
||||
load_name = "medium_testnet"
|
||||
blobs_per_block = 15
|
||||
total_subnets = 2048
|
||||
|
||||
[validator]
|
||||
assigned_subnets = 5
|
||||
|
||||
[blocks]
|
||||
size_bytes = 34371
|
||||
|
||||
[da]
|
||||
share_size_bytes = 1024
|
||||
commitment_size_bytes = 220000
|
||||
55
storage-benchmarks/dataset_configs/validator_profiles.toml
Normal file
55
storage-benchmarks/dataset_configs/validator_profiles.toml
Normal file
@ -0,0 +1,55 @@
|
||||
# Spec-accurate validator operation profiles
|
||||
|
||||
[light]
|
||||
name = "light"
|
||||
description = "Light validator with minimal resources and spec-accurate patterns"
|
||||
# Read frequencies
|
||||
block_read_rate_hz = 2.0 # Block validation reads
|
||||
da_share_read_rate_hz = 0.67 # DA sampling reads (20 samples per 30s block)
|
||||
range_scan_rate_hz = 0.01 # Occasional sync serving
|
||||
# Write frequencies
|
||||
block_write_rate_hz = 0.033 # New block finalization (30s blocks)
|
||||
da_share_write_rate_hz = 0.5 # New DA share storage
|
||||
commitment_write_rate_hz = 0.5 # New commitments
|
||||
# Access patterns (spec-accurate temporal distribution)
|
||||
recent_access_ratio = 0.80 # 80% reads from recent data (Zipfian)
|
||||
historical_access_ratio = 0.20 # 20% reads from historical data (uniform)
|
||||
# Network scaling
|
||||
total_validators = 100 # Small testnet
|
||||
assigned_subnets = 20 # High subnet assignment for small network
|
||||
|
||||
[mainnet]
|
||||
name = "mainnet"
|
||||
description = "Mainnet validator with high activity and spec-accurate patterns"
|
||||
# Read frequencies (higher validation load)
|
||||
block_read_rate_hz = 10.0 # High block validation rate
|
||||
da_share_read_rate_hz = 5.0 # Higher DA sampling frequency
|
||||
range_scan_rate_hz = 0.1 # More frequent sync serving
|
||||
# Write frequencies (mainnet load)
|
||||
block_write_rate_hz = 0.033 # Same block time
|
||||
da_share_write_rate_hz = 5.0 # High DA write activity
|
||||
commitment_write_rate_hz = 5.0 # Matching commitment writes
|
||||
# Access patterns (more recent focus)
|
||||
recent_access_ratio = 0.90 # 90% recent access (heavy tip bias)
|
||||
historical_access_ratio = 0.10 # 10% historical access
|
||||
# Network scaling
|
||||
total_validators = 2000 # Mainnet scale
|
||||
assigned_subnets = 10 # Medium subnet assignment
|
||||
|
||||
[testnet]
|
||||
name = "testnet"
|
||||
description = "Testnet network with heavy sync activity and range scanning"
|
||||
# Read frequencies (sync serving dominates)
|
||||
block_read_rate_hz = 50.0 # Heavy block serving for sync
|
||||
da_share_read_rate_hz = 10.0 # Moderate DA validation
|
||||
range_scan_rate_hz = 10.0 # Continuous range scans for sync
|
||||
# Write frequencies (reduced during sync)
|
||||
block_write_rate_hz = 0.01 # Minimal new blocks
|
||||
da_share_write_rate_hz = 0.1 # Reduced DA writes
|
||||
commitment_write_rate_hz = 0.1 # Reduced commitments
|
||||
# Access patterns (historical focus for sync)
|
||||
recent_access_ratio = 0.20 # Mostly historical data
|
||||
historical_access_ratio = 0.80 # Heavy historical access
|
||||
# Network scaling
|
||||
total_validators = 1000 # Medium network during sync
|
||||
assigned_subnets = 10 # Standard subnet assignment
|
||||
440
storage-benchmarks/examples/storage_capacity_calculator.rs
Normal file
440
storage-benchmarks/examples/storage_capacity_calculator.rs
Normal file
@ -0,0 +1,440 @@
|
||||
//! Storage capacity estimator
|
||||
//!
|
||||
//! Computes block and DA storage requirements for various time periods and
|
||||
//! network scenarios. Produces summaries, time breakdowns, and simple hardware
|
||||
//! recommendations.
|
||||
|
||||
use std::{collections::HashMap, fs};
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use storage_benchmarks::BenchConfig;
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct TimePeriod {
|
||||
/// Number of days represented by the period
|
||||
pub days: u64,
|
||||
/// Human-readable label (e.g., "1 year")
|
||||
pub description: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct NetworkConfig {
|
||||
/// Block time in seconds
|
||||
pub block_time_seconds: u64,
|
||||
/// Average block size in bytes
|
||||
pub avg_block_size_bytes: u64,
|
||||
/// Total DA subnets
|
||||
pub total_subnets: u64,
|
||||
/// DA share size in bytes
|
||||
pub da_share_size_bytes: u64,
|
||||
/// DA commitment size in bytes
|
||||
pub da_commitment_size_bytes: u64,
|
||||
/// Shares per blob
|
||||
pub shares_per_blob: u64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct NetworkScenario {
|
||||
/// Scenario name
|
||||
pub name: String,
|
||||
/// Blobs per block
|
||||
pub blobs_per_block: u64,
|
||||
/// Total validators used to estimate DA responsibility
|
||||
pub total_validators: u64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct CalculationConfig {
|
||||
/// Time window for the calculation
|
||||
pub time_period: TimePeriod,
|
||||
/// Network parameters used across scenarios
|
||||
pub network: NetworkConfig,
|
||||
/// Scenarios to evaluate
|
||||
pub scenarios: Vec<NetworkScenario>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct BlockDataResults {
|
||||
/// Blocks produced per day
|
||||
pub blocks_per_day: u64,
|
||||
/// Total blocks in the period
|
||||
pub blocks_for_period: u64,
|
||||
/// Average block size in KiB
|
||||
pub avg_block_size_kb: u64,
|
||||
/// Total block data size in GiB for the period
|
||||
pub total_block_data_gb: f64,
|
||||
/// Period label
|
||||
pub time_period_description: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct ScenarioResults {
|
||||
/// Scenario label
|
||||
pub scenario_name: String,
|
||||
/// Blobs per block for this scenario
|
||||
pub blobs_per_block: u64,
|
||||
/// Total validators
|
||||
pub total_validators: u64,
|
||||
/// Typical subnets assigned per validator
|
||||
pub typical_subnets_per_validator: u64,
|
||||
/// Percent of subnets likely assigned to a validator
|
||||
pub subnet_assignment_percent: f64,
|
||||
/// Count of DA shares stored by the validator over the period
|
||||
pub shares_stored_count: u64,
|
||||
/// Count of blobs assigned over the period
|
||||
pub blobs_assigned_count: u64,
|
||||
/// DA shares size in GiB
|
||||
pub da_shares_gb: f64,
|
||||
/// DA commitments size in GiB
|
||||
pub da_commitments_gb: f64,
|
||||
/// Total DA data size in GiB
|
||||
pub total_da_gb: f64,
|
||||
/// Total validator storage in GiB (blocks + DA)
|
||||
pub total_validator_storage_gb: f64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct TimeBreakdown {
|
||||
/// Sequential period index
|
||||
pub period_number: u64,
|
||||
/// Label (Month/Week/Day N)
|
||||
pub period_description: String,
|
||||
/// Cumulative storage at this step in GiB
|
||||
pub cumulative_gb: f64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct HardwareRecommendation {
|
||||
/// Scenario label
|
||||
pub scenario: String,
|
||||
/// Required storage in GiB for the period
|
||||
pub storage_gb_for_period: u64,
|
||||
/// Recommended device size
|
||||
pub recommended_storage: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct StorageCalculationResults {
|
||||
/// Input config used to compute results
|
||||
pub calculation_config: CalculationConfig,
|
||||
/// Aggregate block data for the period
|
||||
pub block_data: BlockDataResults,
|
||||
/// Per-scenario storage summaries
|
||||
pub scenarios: Vec<ScenarioResults>,
|
||||
/// Time-based accumulation for visualization
|
||||
pub time_breakdown: Vec<TimeBreakdown>,
|
||||
/// Simple hardware sizing suggestions
|
||||
pub hardware_recommendations: Vec<HardwareRecommendation>,
|
||||
/// Notes for stress testing considerations
|
||||
pub stress_testing_notes: Vec<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct CapacityCalculationReport {
|
||||
pub calculation_results: std::collections::HashMap<String, StorageCalculationResults>,
|
||||
pub summary: CalculationSummary,
|
||||
pub metadata: ReportMetadata,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct CalculationSummary {
|
||||
pub scenarios_calculated: usize,
|
||||
pub total_time_periods: usize,
|
||||
pub calculation_timestamp: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct ReportMetadata {
|
||||
pub tool: String,
|
||||
pub version: String,
|
||||
pub description: String,
|
||||
}
|
||||
|
||||
impl Default for NetworkConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
block_time_seconds: 30,
|
||||
avg_block_size_bytes: 34_371,
|
||||
total_subnets: 2048,
|
||||
da_share_size_bytes: 1_024,
|
||||
da_commitment_size_bytes: 220_000,
|
||||
shares_per_blob: 512,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for CalculationConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
time_period: TimePeriod {
|
||||
days: 365,
|
||||
description: "1 year".to_string(),
|
||||
},
|
||||
network: NetworkConfig::default(),
|
||||
scenarios: vec![
|
||||
NetworkScenario {
|
||||
name: "Conservative".to_string(),
|
||||
blobs_per_block: 50,
|
||||
total_validators: 2000,
|
||||
},
|
||||
NetworkScenario {
|
||||
name: "Active".to_string(),
|
||||
blobs_per_block: 100,
|
||||
total_validators: 2000,
|
||||
},
|
||||
NetworkScenario {
|
||||
name: "High Activity".to_string(),
|
||||
blobs_per_block: 200,
|
||||
total_validators: 3000,
|
||||
},
|
||||
NetworkScenario {
|
||||
name: "Peak".to_string(),
|
||||
blobs_per_block: 500,
|
||||
total_validators: 5000,
|
||||
},
|
||||
],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Compute storage with blob/share separation for DA
|
||||
///
|
||||
/// - Includes blocks, DA shares, and commitments
|
||||
/// - Returns summaries, breakdowns, and recommendations
|
||||
fn calculate_storage_requirements(config: &CalculationConfig) -> StorageCalculationResults {
|
||||
let blocks_per_day = (24 * 60 * 60) / config.network.block_time_seconds;
|
||||
let total_blocks_for_period = config.time_period.days * blocks_per_day;
|
||||
|
||||
let block_data_for_period_gb = (total_blocks_for_period as f64
|
||||
* config.network.avg_block_size_bytes as f64)
|
||||
/ (1024.0 * 1024.0 * 1024.0);
|
||||
|
||||
let block_data = BlockDataResults {
|
||||
blocks_per_day,
|
||||
blocks_for_period: total_blocks_for_period,
|
||||
avg_block_size_kb: config.network.avg_block_size_bytes / 1024,
|
||||
total_block_data_gb: block_data_for_period_gb,
|
||||
time_period_description: config.time_period.description.clone(),
|
||||
};
|
||||
|
||||
let mut scenarios = Vec::new();
|
||||
let mut scenario_storage_map = HashMap::new();
|
||||
|
||||
for scenario in &config.scenarios {
|
||||
let typical_subnets_per_validator =
|
||||
config.network.total_subnets / (scenario.total_validators / 10).max(1);
|
||||
let subnet_assignment_probability =
|
||||
typical_subnets_per_validator as f64 / config.network.total_subnets as f64;
|
||||
|
||||
let total_blobs_for_period = total_blocks_for_period * scenario.blobs_per_block;
|
||||
|
||||
let validator_assigned_blobs =
|
||||
(total_blobs_for_period as f64 * subnet_assignment_probability) as u64;
|
||||
|
||||
let shares_per_assigned_blob =
|
||||
config.network.shares_per_blob / config.network.total_subnets;
|
||||
let total_shares_stored = validator_assigned_blobs * shares_per_assigned_blob.max(1);
|
||||
let da_shares_size_gb = (total_shares_stored * config.network.da_share_size_bytes) as f64
|
||||
/ (1024.0 * 1024.0 * 1024.0);
|
||||
|
||||
let da_commitments_size_gb = (validator_assigned_blobs
|
||||
* config.network.da_commitment_size_bytes) as f64
|
||||
/ (1024.0 * 1024.0 * 1024.0);
|
||||
|
||||
let total_da_size_gb = da_shares_size_gb + da_commitments_size_gb;
|
||||
let total_storage_for_period = block_data_for_period_gb + total_da_size_gb;
|
||||
|
||||
scenario_storage_map.insert(scenario.name.clone(), total_da_size_gb);
|
||||
|
||||
scenarios.push(ScenarioResults {
|
||||
scenario_name: scenario.name.clone(),
|
||||
blobs_per_block: scenario.blobs_per_block,
|
||||
total_validators: scenario.total_validators,
|
||||
typical_subnets_per_validator,
|
||||
subnet_assignment_percent: subnet_assignment_probability * 100.0,
|
||||
shares_stored_count: total_shares_stored,
|
||||
blobs_assigned_count: validator_assigned_blobs,
|
||||
da_shares_gb: da_shares_size_gb,
|
||||
da_commitments_gb: da_commitments_size_gb,
|
||||
total_da_gb: total_da_size_gb,
|
||||
total_validator_storage_gb: total_storage_for_period,
|
||||
});
|
||||
}
|
||||
|
||||
let breakdown_periods = if config.time_period.days >= 365 {
|
||||
12
|
||||
} else if config.time_period.days >= 30 {
|
||||
config.time_period.days / 7
|
||||
} else {
|
||||
config.time_period.days
|
||||
};
|
||||
|
||||
let first_scenario_da_gb = scenario_storage_map.values().next().copied().unwrap_or(0.0);
|
||||
let total_gb_per_period = block_data_for_period_gb + first_scenario_da_gb;
|
||||
let increment_gb = total_gb_per_period / breakdown_periods as f64;
|
||||
|
||||
let mut time_breakdown = Vec::new();
|
||||
for period in 1..=breakdown_periods {
|
||||
let cumulative_gb = increment_gb * period as f64;
|
||||
let period_desc = if config.time_period.days >= 365 {
|
||||
format!("Month {}", period)
|
||||
} else if config.time_period.days >= 30 {
|
||||
format!("Week {}", period)
|
||||
} else {
|
||||
format!("Day {}", period)
|
||||
};
|
||||
|
||||
time_breakdown.push(TimeBreakdown {
|
||||
period_number: period,
|
||||
period_description: period_desc,
|
||||
cumulative_gb,
|
||||
});
|
||||
}
|
||||
|
||||
let mut hardware_recommendations = Vec::new();
|
||||
for scenario in &scenarios {
|
||||
let storage_gb = scenario.total_validator_storage_gb as u64;
|
||||
let recommended = if storage_gb < 50 {
|
||||
"100GB+ storage"
|
||||
} else if storage_gb < 100 {
|
||||
"200GB+ storage"
|
||||
} else if storage_gb < 200 {
|
||||
"500GB+ storage"
|
||||
} else if storage_gb < 500 {
|
||||
"1TB+ storage"
|
||||
} else {
|
||||
"2TB+ storage"
|
||||
};
|
||||
|
||||
hardware_recommendations.push(HardwareRecommendation {
|
||||
scenario: scenario.scenario_name.clone(),
|
||||
storage_gb_for_period: storage_gb,
|
||||
recommended_storage: recommended.to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
let stress_testing_notes = vec![
|
||||
"Memory pressure increases with database size".to_string(),
|
||||
"Cache efficiency decreases as dataset grows beyond memory".to_string(),
|
||||
"Compaction overhead increases with write frequency".to_string(),
|
||||
"Range scan performance degrades with database size".to_string(),
|
||||
"Storage benchmarks should test multi-GB datasets for realism".to_string(),
|
||||
format!(
|
||||
"Test with datasets representing {}-{} days of operation",
|
||||
config.time_period.days / 4,
|
||||
config.time_period.days / 2
|
||||
),
|
||||
];
|
||||
|
||||
StorageCalculationResults {
|
||||
calculation_config: config.clone(),
|
||||
block_data,
|
||||
scenarios,
|
||||
time_breakdown,
|
||||
hardware_recommendations,
|
||||
stress_testing_notes,
|
||||
}
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let default_config = CalculationConfig::default();
|
||||
|
||||
let monthly_config = CalculationConfig {
|
||||
time_period: TimePeriod {
|
||||
days: 30,
|
||||
description: "30 days".to_string(),
|
||||
},
|
||||
network: NetworkConfig::default(),
|
||||
scenarios: vec![
|
||||
NetworkScenario {
|
||||
name: "Testnet Conservative".to_string(),
|
||||
blobs_per_block: 25,
|
||||
total_validators: 100,
|
||||
},
|
||||
NetworkScenario {
|
||||
name: "Testnet Active".to_string(),
|
||||
blobs_per_block: 50,
|
||||
total_validators: 100,
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
let weekly_config = CalculationConfig {
|
||||
time_period: TimePeriod {
|
||||
days: 7,
|
||||
description: "1 week".to_string(),
|
||||
},
|
||||
network: NetworkConfig {
|
||||
block_time_seconds: 15,
|
||||
shares_per_blob: 256,
|
||||
..NetworkConfig::default()
|
||||
},
|
||||
scenarios: vec![NetworkScenario {
|
||||
name: "Development".to_string(),
|
||||
blobs_per_block: 10,
|
||||
total_validators: 10,
|
||||
}],
|
||||
};
|
||||
|
||||
let configs = vec![
|
||||
("annual", default_config),
|
||||
("monthly", monthly_config),
|
||||
("weekly", weekly_config),
|
||||
];
|
||||
|
||||
let mut all_results = HashMap::new();
|
||||
|
||||
for (name, config) in configs {
|
||||
let results = calculate_storage_requirements(&config);
|
||||
all_results.insert(name, results);
|
||||
}
|
||||
|
||||
save_capacity_results(&all_results);
|
||||
|
||||
match serde_json::to_string_pretty(&all_results) {
|
||||
Ok(json) => println!("{}", json),
|
||||
Err(e) => eprintln!("Error serializing results: {}", e),
|
||||
}
|
||||
}
|
||||
|
||||
fn save_capacity_results(all_results: &HashMap<&str, StorageCalculationResults>) {
|
||||
let results_dir = BenchConfig::results_path();
|
||||
let timestamp = chrono::Utc::now().format("%Y%m%d_%H%M%S");
|
||||
let filename = format!("storage_capacity_calculation_{}.json", timestamp);
|
||||
let filepath = results_dir.join(filename);
|
||||
|
||||
let calculation_results: std::collections::HashMap<String, StorageCalculationResults> =
|
||||
all_results
|
||||
.iter()
|
||||
.map(|(k, v)| (k.to_string(), v.clone()))
|
||||
.collect();
|
||||
|
||||
let report = CapacityCalculationReport {
|
||||
calculation_results,
|
||||
summary: CalculationSummary {
|
||||
scenarios_calculated: all_results.len(),
|
||||
total_time_periods: all_results
|
||||
.values()
|
||||
.map(|r| r.scenarios.len())
|
||||
.sum::<usize>(),
|
||||
calculation_timestamp: chrono::Utc::now().to_rfc3339(),
|
||||
},
|
||||
metadata: ReportMetadata {
|
||||
tool: "storage_capacity_calculator".to_string(),
|
||||
version: env!("CARGO_PKG_VERSION").to_string(),
|
||||
description: "Storage capacity estimates for Nomos validator scenarios".to_string(),
|
||||
},
|
||||
};
|
||||
|
||||
match fs::write(&filepath, serde_json::to_string_pretty(&report).unwrap()) {
|
||||
Ok(_) => eprintln!(
|
||||
"Capacity calculation results saved to: {}",
|
||||
filepath.display()
|
||||
),
|
||||
Err(e) => eprintln!(
|
||||
"Failed to save capacity results to {}: {}",
|
||||
filepath.display(),
|
||||
e
|
||||
),
|
||||
}
|
||||
}
|
||||
12
storage-benchmarks/rust-toolchain.toml
Normal file
12
storage-benchmarks/rust-toolchain.toml
Normal file
@ -0,0 +1,12 @@
|
||||
[toolchain]
|
||||
# Keep this version in sync also in the following places:
|
||||
# * Dockerfile
|
||||
# * flake.nix
|
||||
# * testnet/Dockerfile
|
||||
# Also, update the version of the nightly toolchain to the latest nightly of the new version specified in the following places:
|
||||
# * .github/workflows/code-check.yml (fmt job)
|
||||
# * .pre-commit-config.yml (fmt hook)
|
||||
# Then, if there is any new allow-by-default rustc lint introduced/stabilized, add it to the respective entry in our `config.toml`.
|
||||
channel = "1.90.0"
|
||||
# Even if clippy should be included in the default profile, in some cases it is not installed. So we force it with an explicit declaration.
|
||||
components = ["clippy"]
|
||||
5
storage-benchmarks/rustfmt.toml
Normal file
5
storage-benchmarks/rustfmt.toml
Normal file
@ -0,0 +1,5 @@
|
||||
group_imports = "StdExternalCrate"
|
||||
imports_granularity = "Crate"
|
||||
reorder_imports = true
|
||||
reorder_modules = true
|
||||
wrap_comments = true
|
||||
71
storage-benchmarks/src/benchmark/analysis.rs
Normal file
71
storage-benchmarks/src/benchmark/analysis.rs
Normal file
@ -0,0 +1,71 @@
|
||||
use nomos_storage::{
|
||||
api::{chain::StorageChainApi as _, da::StorageDaApi as _},
|
||||
backends::rocksdb::RocksBackend,
|
||||
};
|
||||
|
||||
use super::{create_blob_id, create_header_id};
|
||||
|
||||
pub async fn analyze_dataset(
|
||||
storage: &mut RocksBackend,
|
||||
) -> Result<(usize, usize), Box<dyn std::error::Error>> {
|
||||
log::info!("Analyzing dataset size with adaptive probing...");
|
||||
|
||||
let mut upper_bound = 10000;
|
||||
while upper_bound < 10_000_000 {
|
||||
let header_id = create_header_id(upper_bound);
|
||||
let block_result = storage.get_block(header_id).await;
|
||||
match block_result {
|
||||
Ok(Some(_)) => {}
|
||||
Ok(None) | Err(_) => {
|
||||
break;
|
||||
}
|
||||
}
|
||||
upper_bound *= 2;
|
||||
}
|
||||
|
||||
let mut low = upper_bound / 2;
|
||||
let mut high = upper_bound;
|
||||
let mut block_count = low;
|
||||
|
||||
while low <= high {
|
||||
let mid = usize::midpoint(low, high);
|
||||
let header_id = create_header_id(mid);
|
||||
|
||||
match storage.get_block(header_id).await {
|
||||
Ok(Some(_)) => {
|
||||
block_count = mid;
|
||||
low = mid + 1;
|
||||
}
|
||||
_ => {
|
||||
high = mid - 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut share_count = 0;
|
||||
let da_sample_size = std::cmp::min(1000, block_count / 100);
|
||||
|
||||
for blob_idx in 0..da_sample_size {
|
||||
for subnet in 0..50 {
|
||||
let blob_id = create_blob_id(blob_idx, 0);
|
||||
let share_idx = [subnet as u8, 0u8];
|
||||
if let Ok(Some(_)) = storage.get_light_share(blob_id, share_idx).await {
|
||||
share_count += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let estimated_da_total = if da_sample_size > 0 {
|
||||
share_count * (block_count / da_sample_size)
|
||||
} else {
|
||||
share_count
|
||||
};
|
||||
|
||||
log::info!("DA estimation: sampled {share_count} objects from {da_sample_size} blocks, extrapolated to {estimated_da_total} total (assumes uniform distribution)");
|
||||
|
||||
log::info!(
|
||||
"Dataset analysis complete: {block_count} blocks, ~{estimated_da_total} DA objects (sampled)"
|
||||
);
|
||||
|
||||
Ok((block_count, estimated_da_total))
|
||||
}
|
||||
11
storage-benchmarks/src/benchmark/mod.rs
Normal file
11
storage-benchmarks/src/benchmark/mod.rs
Normal file
@ -0,0 +1,11 @@
|
||||
pub mod analysis;
|
||||
pub mod runner;
|
||||
pub mod types;
|
||||
pub mod utilities;
|
||||
pub mod workloads;
|
||||
|
||||
pub use analysis::*;
|
||||
pub use runner::*;
|
||||
pub use types::*;
|
||||
pub use utilities::*;
|
||||
pub use workloads::*;
|
||||
250
storage-benchmarks/src/benchmark/runner.rs
Normal file
250
storage-benchmarks/src/benchmark/runner.rs
Normal file
@ -0,0 +1,250 @@
|
||||
use std::time::Duration;
|
||||
|
||||
use log::info;
|
||||
use nomos_storage::backends::{rocksdb::RocksBackend, StorageBackend as _};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use super::{
|
||||
analyze_dataset, run_concurrent_validator_benchmark, BenchConfigSummary, BenchmarkReport,
|
||||
BenchmarkResultsSummary, ConcurrentBenchmarkResult, ReportMetadata, StatisticsSummary,
|
||||
};
|
||||
use crate::{
|
||||
config::{ProductionBenchConfig, ValidatorProfile, ValidatorProfiles},
|
||||
BenchConfig,
|
||||
};
|
||||
|
||||
pub struct BenchmarkRunner {
|
||||
config: ProductionBenchConfig,
|
||||
profile: ValidatorProfile,
|
||||
storage_config: BenchConfig,
|
||||
execution_state: ExecutionState,
|
||||
results: BenchmarkResults,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default)]
|
||||
struct ExecutionState {
|
||||
warmup_completed: usize,
|
||||
measurements_completed: usize,
|
||||
dataset_size: Option<(usize, usize)>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct BenchmarkResults {
|
||||
pub raw_measurements: Vec<f64>,
|
||||
pub warmup_results: Vec<f64>,
|
||||
pub detailed_results: Vec<ConcurrentBenchmarkResult>,
|
||||
pub mean_ops_sec: f64,
|
||||
pub variability_percent: f64,
|
||||
pub best_result: Option<ConcurrentBenchmarkResult>,
|
||||
pub stats_summary: Option<RocksDbStatsSummary>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct RocksDbStatsSummary {
|
||||
pub cache_hit_rate_improvement: f64,
|
||||
pub l0_file_growth: i64,
|
||||
pub compaction_activity: u64,
|
||||
pub memory_usage_change: i64,
|
||||
}
|
||||
|
||||
impl BenchmarkRunner {
|
||||
pub fn new(config: ProductionBenchConfig) -> Result<Self, Box<dyn std::error::Error>> {
|
||||
config.validate()?;
|
||||
|
||||
let profiles = ValidatorProfiles::from_file("dataset_configs/validator_profiles.toml")?;
|
||||
let profile = profiles
|
||||
.get_profile(&config.profile.to_string())
|
||||
.ok_or_else(|| format!("Profile '{}' not found", config.profile))?
|
||||
.clone();
|
||||
|
||||
let storage_config = BenchConfig::production();
|
||||
if !storage_config.settings.db_path.exists() {
|
||||
return Err("No dataset found - run dataset_generator first".into());
|
||||
}
|
||||
|
||||
Ok(Self {
|
||||
config,
|
||||
profile,
|
||||
storage_config,
|
||||
execution_state: ExecutionState::default(),
|
||||
results: BenchmarkResults::default(),
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn execute_benchmark(
|
||||
&mut self,
|
||||
) -> Result<BenchmarkResults, Box<dyn std::error::Error>> {
|
||||
self.setup_memory_limits();
|
||||
self.analyze_dataset().await?;
|
||||
|
||||
info!("Starting warmup phase: {} runs", self.config.warmup_runs);
|
||||
for i in 1..=self.config.warmup_runs {
|
||||
info!("Warmup run {}/{}", i, self.config.warmup_runs);
|
||||
let result = self.run_single_iteration().await?;
|
||||
self.results.warmup_results.push(result);
|
||||
self.execution_state.warmup_completed = i;
|
||||
}
|
||||
|
||||
info!(
|
||||
"Starting measurement phase: {} runs",
|
||||
self.config.measurement_runs
|
||||
);
|
||||
for i in 1..=self.config.measurement_runs {
|
||||
info!("Measurement run {}/{}", i, self.config.measurement_runs);
|
||||
let result = self.run_single_iteration().await?;
|
||||
info!("Run {i} result: {result:.1} ops/sec");
|
||||
self.results.raw_measurements.push(result);
|
||||
self.execution_state.measurements_completed = i;
|
||||
}
|
||||
|
||||
self.calculate_final_statistics();
|
||||
self.save_results();
|
||||
|
||||
Ok(self.results.clone())
|
||||
}
|
||||
|
||||
fn setup_memory_limits(&self) {
|
||||
info!("Setting memory limit to {}GB", self.config.memory);
|
||||
}
|
||||
|
||||
async fn analyze_dataset(&mut self) -> Result<(), Box<dyn std::error::Error>> {
|
||||
let mut storage_settings = self.storage_config.settings.clone();
|
||||
storage_settings.read_only = self.config.read_only;
|
||||
|
||||
let mut storage = RocksBackend::new(storage_settings)?;
|
||||
let dataset_size = analyze_dataset(&mut storage).await?;
|
||||
|
||||
self.execution_state.dataset_size = Some(dataset_size);
|
||||
info!(
|
||||
"Dataset analysis: {} blocks, {} shares",
|
||||
dataset_size.0, dataset_size.1
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn run_single_iteration(&mut self) -> Result<f64, Box<dyn std::error::Error>> {
|
||||
let mut storage_settings = self.storage_config.settings.clone();
|
||||
storage_settings.read_only = self.config.read_only;
|
||||
|
||||
let storage = RocksBackend::new(storage_settings)?;
|
||||
let dataset_size = self.execution_state.dataset_size.unwrap_or((0, 0));
|
||||
|
||||
match run_concurrent_validator_benchmark(
|
||||
storage,
|
||||
Duration::from_secs(self.config.duration),
|
||||
&self.profile,
|
||||
dataset_size,
|
||||
self.config.read_only,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(detailed_result) => {
|
||||
let throughput = detailed_result.combined_throughput();
|
||||
self.results.detailed_results.push(detailed_result);
|
||||
Ok(throughput)
|
||||
}
|
||||
Err(e) => {
|
||||
log::error!("Benchmark iteration failed: {e}");
|
||||
Ok(0.0)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn calculate_final_statistics(&mut self) {
|
||||
if self.results.raw_measurements.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
let mean = self.results.raw_measurements.iter().sum::<f64>()
|
||||
/ self.results.raw_measurements.len() as f64;
|
||||
let min = self
|
||||
.results
|
||||
.raw_measurements
|
||||
.iter()
|
||||
.fold(f64::INFINITY, |a, &b| a.min(b));
|
||||
let max = self
|
||||
.results
|
||||
.raw_measurements
|
||||
.iter()
|
||||
.fold(f64::NEG_INFINITY, |a, &b| a.max(b));
|
||||
let variability = if mean > 0.0 {
|
||||
(max - min) / mean * 100.0
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
|
||||
self.results.mean_ops_sec = mean;
|
||||
self.results.variability_percent = variability;
|
||||
|
||||
if let Some(best_idx) = self
|
||||
.results
|
||||
.raw_measurements
|
||||
.iter()
|
||||
.enumerate()
|
||||
.max_by(|a, b| a.1.partial_cmp(b.1).unwrap())
|
||||
.map(|(idx, _)| idx)
|
||||
{
|
||||
self.results.best_result = self.results.detailed_results.get(best_idx).cloned();
|
||||
}
|
||||
}
|
||||
|
||||
fn save_results(&self) {
|
||||
let results_dir = BenchConfig::results_path();
|
||||
let timestamp = chrono::Utc::now().format("%Y%m%d_%H%M%S");
|
||||
let filename = format!(
|
||||
"bench_{}_{}_{}gb_{}.json",
|
||||
self.config.profile, timestamp, self.config.memory, self.config.duration
|
||||
);
|
||||
let filepath = results_dir.join(filename);
|
||||
|
||||
let report = BenchmarkReport {
|
||||
config_summary: BenchConfigSummary {
|
||||
profile: format!("{:?}", self.config.profile),
|
||||
memory_gb: self.config.memory,
|
||||
duration_seconds: self.config.duration,
|
||||
warmup_runs: self.config.warmup_runs,
|
||||
measurement_runs: self.config.measurement_runs,
|
||||
},
|
||||
results: BenchmarkResultsSummary {
|
||||
raw_measurements: self.results.raw_measurements.clone(),
|
||||
warmup_results: self.results.warmup_results.clone(),
|
||||
statistics: StatisticsSummary {
|
||||
mean_ops_sec: self.results.mean_ops_sec,
|
||||
min_ops_sec: 0.0,
|
||||
max_ops_sec: 0.0,
|
||||
variability_percent: self.results.variability_percent,
|
||||
sample_count: self.results.raw_measurements.len(),
|
||||
},
|
||||
},
|
||||
metadata: ReportMetadata {
|
||||
timestamp: chrono::Utc::now().to_rfc3339(),
|
||||
tool_version: env!("CARGO_PKG_VERSION").to_owned(),
|
||||
runner_type: "batch".to_owned(),
|
||||
},
|
||||
};
|
||||
|
||||
match std::fs::write(&filepath, serde_json::to_string_pretty(&report).unwrap()) {
|
||||
Ok(()) => log::info!(
|
||||
"Stateful benchmark results saved to: {}",
|
||||
filepath.display()
|
||||
),
|
||||
Err(e) => log::warn!("Failed to save results to {}: {}", filepath.display(), e),
|
||||
}
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub const fn execution_progress(&self) -> (usize, usize, usize, usize) {
|
||||
(
|
||||
self.execution_state.warmup_completed,
|
||||
self.config.warmup_runs,
|
||||
self.execution_state.measurements_completed,
|
||||
self.config.measurement_runs,
|
||||
)
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub const fn current_results(&self) -> &BenchmarkResults {
|
||||
&self.results
|
||||
}
|
||||
}
|
||||
266
storage-benchmarks/src/benchmark/types.rs
Normal file
266
storage-benchmarks/src/benchmark/types.rs
Normal file
@ -0,0 +1,266 @@
|
||||
use std::time::Duration;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::{
|
||||
config::types::WorkloadType,
|
||||
metrics::{LatencyPercentiles, RocksDbStats},
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct WorkloadStreamResult {
|
||||
pub workload_type: WorkloadType,
|
||||
|
||||
pub executed: bool,
|
||||
|
||||
pub operations_total: u64,
|
||||
|
||||
pub operations_success: u64,
|
||||
|
||||
pub bytes_read: u64,
|
||||
|
||||
pub bytes_written: u64,
|
||||
|
||||
pub duration: Duration,
|
||||
|
||||
pub errors: u64,
|
||||
|
||||
pub cache_misses: u64,
|
||||
|
||||
pub latency_percentiles: Option<LatencyPercentiles>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ConcurrentBenchmarkResult {
|
||||
pub block_validation: WorkloadStreamResult,
|
||||
pub da_sampling: WorkloadStreamResult,
|
||||
|
||||
pub da_commitments: WorkloadStreamResult,
|
||||
pub ibd_serving: WorkloadStreamResult,
|
||||
pub block_storage: WorkloadStreamResult,
|
||||
pub da_storage: WorkloadStreamResult,
|
||||
|
||||
pub total_duration: Duration,
|
||||
|
||||
pub peak_memory_mb: f64,
|
||||
|
||||
pub resource_contention_factor: f64,
|
||||
|
||||
pub concurrent_operations_peak: u64,
|
||||
|
||||
pub rocksdb_stats_before: RocksDbStats,
|
||||
|
||||
pub rocksdb_stats_after: RocksDbStats,
|
||||
}
|
||||
|
||||
impl ConcurrentBenchmarkResult {
|
||||
#[must_use]
|
||||
pub const fn total_operations(&self) -> u64 {
|
||||
let mut total = 0;
|
||||
if self.block_validation.executed {
|
||||
total += self.block_validation.operations_total;
|
||||
}
|
||||
if self.da_sampling.executed {
|
||||
total += self.da_sampling.operations_total;
|
||||
}
|
||||
if self.da_commitments.executed {
|
||||
total += self.da_commitments.operations_total;
|
||||
}
|
||||
if self.ibd_serving.executed {
|
||||
total += self.ibd_serving.operations_total;
|
||||
}
|
||||
if self.block_storage.executed {
|
||||
total += self.block_storage.operations_total;
|
||||
}
|
||||
if self.da_storage.executed {
|
||||
total += self.da_storage.operations_total;
|
||||
}
|
||||
total
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub const fn total_success(&self) -> u64 {
|
||||
let mut total = 0;
|
||||
if self.block_validation.executed {
|
||||
total += self.block_validation.operations_success;
|
||||
}
|
||||
if self.da_sampling.executed {
|
||||
total += self.da_sampling.operations_success;
|
||||
}
|
||||
if self.da_commitments.executed {
|
||||
total += self.da_commitments.operations_success;
|
||||
}
|
||||
if self.ibd_serving.executed {
|
||||
total += self.ibd_serving.operations_success;
|
||||
}
|
||||
if self.block_storage.executed {
|
||||
total += self.block_storage.operations_success;
|
||||
}
|
||||
if self.da_storage.executed {
|
||||
total += self.da_storage.operations_success;
|
||||
}
|
||||
total
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn combined_throughput(&self) -> f64 {
|
||||
self.total_success() as f64 / self.total_duration.as_secs_f64()
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn success_rate(&self) -> f64 {
|
||||
if self.total_operations() > 0 {
|
||||
self.total_success() as f64 / self.total_operations() as f64
|
||||
} else {
|
||||
0.0
|
||||
}
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn total_data_throughput_mbps(&self) -> f64 {
|
||||
let mut total_bytes = 0;
|
||||
if self.block_validation.executed {
|
||||
total_bytes += self.block_validation.bytes_read;
|
||||
}
|
||||
if self.da_sampling.executed {
|
||||
total_bytes += self.da_sampling.bytes_read;
|
||||
}
|
||||
if self.da_commitments.executed {
|
||||
total_bytes += self.da_commitments.bytes_read;
|
||||
}
|
||||
if self.ibd_serving.executed {
|
||||
total_bytes += self.ibd_serving.bytes_read;
|
||||
}
|
||||
if self.block_storage.executed {
|
||||
total_bytes += self.block_storage.bytes_written;
|
||||
}
|
||||
if self.da_storage.executed {
|
||||
total_bytes += self.da_storage.bytes_written;
|
||||
}
|
||||
total_bytes as f64 / 1024.0 / 1024.0 / self.total_duration.as_secs_f64()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct StorageBenchReport {
|
||||
pub benchmark_config: BenchConfigSummary,
|
||||
pub results: BenchResultsSummary,
|
||||
pub timestamp: String,
|
||||
pub tool_version: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct BenchConfigSummary {
|
||||
pub profile: String,
|
||||
pub memory_gb: u32,
|
||||
pub duration_seconds: u64,
|
||||
pub warmup_runs: usize,
|
||||
pub measurement_runs: usize,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct BenchResultsSummary {
|
||||
pub raw_measurements: Vec<f64>,
|
||||
pub statistics: StatisticsSummary,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct StatisticsSummary {
|
||||
pub mean_ops_sec: f64,
|
||||
pub min_ops_sec: f64,
|
||||
pub max_ops_sec: f64,
|
||||
pub variability_percent: f64,
|
||||
pub sample_count: usize,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct DatasetGenerationReport {
|
||||
pub generation_summary: GenerationSummary,
|
||||
pub performance: GenerationPerformance,
|
||||
pub timestamp: String,
|
||||
pub tool_version: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct GenerationSummary {
|
||||
pub blocks_generated: usize,
|
||||
pub da_objects_generated: usize,
|
||||
pub total_objects: usize,
|
||||
pub duration_seconds: u64,
|
||||
pub duration_minutes: f64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct GenerationPerformance {
|
||||
pub total_rate_objects_per_sec: f64,
|
||||
pub block_rate_per_sec: f64,
|
||||
pub da_rate_per_sec: f64,
|
||||
pub cpu_cores_used: usize,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct DatasetVerificationReport {
|
||||
pub verification_summary: VerificationSummary,
|
||||
pub data_sizes: DataSizesSummary,
|
||||
pub completeness_estimates: CompletenessSummary,
|
||||
pub performance: VerificationPerformance,
|
||||
pub warnings: WarningsSummary,
|
||||
pub timestamp: String,
|
||||
pub tool_version: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct VerificationSummary {
|
||||
pub blocks_found: usize,
|
||||
pub da_shares_found: usize,
|
||||
pub da_commitments_found: usize,
|
||||
pub total_objects_found: usize,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct DataSizesSummary {
|
||||
pub total_block_size_bytes: u64,
|
||||
pub total_share_size_bytes: u64,
|
||||
pub total_commitment_size_bytes: u64,
|
||||
pub total_verified_size_bytes: u64,
|
||||
pub total_verified_size_gb: f64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct CompletenessSummary {
|
||||
pub block_completeness_percent: f64,
|
||||
pub da_completeness_percent: f64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct VerificationPerformance {
|
||||
pub verification_time_seconds: f64,
|
||||
pub objects_verified_per_sec: f64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct WarningsSummary {
|
||||
pub block_generation_incomplete: bool,
|
||||
pub data_size_smaller_than_expected: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct BenchmarkReport {
|
||||
pub config_summary: BenchConfigSummary,
|
||||
pub results: BenchmarkResultsSummary,
|
||||
pub metadata: ReportMetadata,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct BenchmarkResultsSummary {
|
||||
pub raw_measurements: Vec<f64>,
|
||||
pub warmup_results: Vec<f64>,
|
||||
pub statistics: StatisticsSummary,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct ReportMetadata {
|
||||
pub timestamp: String,
|
||||
pub tool_version: String,
|
||||
pub runner_type: String,
|
||||
}
|
||||
37
storage-benchmarks/src/benchmark/utilities.rs
Normal file
37
storage-benchmarks/src/benchmark/utilities.rs
Normal file
@ -0,0 +1,37 @@
|
||||
use std::time::Duration;
|
||||
|
||||
use crate::config::ValidatorProfile;
|
||||
|
||||
#[must_use]
|
||||
pub fn create_header_id(index: usize) -> nomos_core::header::HeaderId {
|
||||
let mut id = [0u8; 32];
|
||||
id[0..4].copy_from_slice(&(index as u32).to_be_bytes());
|
||||
nomos_core::header::HeaderId::from(id)
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn create_blob_id(block: usize, blob_in_block: usize) -> nomos_core::da::BlobId {
|
||||
let mut id = [0u8; 32];
|
||||
id[0..4].copy_from_slice(&(block as u32).to_be_bytes());
|
||||
id[4..8].copy_from_slice(&(blob_in_block as u32).to_be_bytes());
|
||||
nomos_core::da::BlobId::from(id)
|
||||
}
|
||||
|
||||
pub fn safe_interval_from_hz(frequency_hz: f64, workload_type: &str) -> Result<Duration, String> {
|
||||
if frequency_hz <= 0.0 {
|
||||
return Err(format!(
|
||||
"Invalid frequency {frequency_hz} Hz for {workload_type}"
|
||||
));
|
||||
}
|
||||
|
||||
let interval_ms = (1000.0 / frequency_hz) as u64;
|
||||
Ok(Duration::from_millis(interval_ms))
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn estimate_sequential_performance(profile: &ValidatorProfile) -> f64 {
|
||||
profile.range_scan_rate_hz.mul_add(
|
||||
10.0,
|
||||
profile.block_read_rate_hz + profile.da_share_read_rate_hz + profile.da_share_write_rate_hz,
|
||||
)
|
||||
}
|
||||
9
storage-benchmarks/src/benchmark/workloads/mod.rs
Normal file
9
storage-benchmarks/src/benchmark/workloads/mod.rs
Normal file
@ -0,0 +1,9 @@
|
||||
pub mod orchestrator;
|
||||
pub mod reads;
|
||||
pub mod runners;
|
||||
pub mod writes;
|
||||
|
||||
pub use orchestrator::*;
|
||||
pub use reads::*;
|
||||
pub use runners::*;
|
||||
pub use writes::*;
|
||||
126
storage-benchmarks/src/benchmark/workloads/orchestrator.rs
Normal file
126
storage-benchmarks/src/benchmark/workloads/orchestrator.rs
Normal file
@ -0,0 +1,126 @@
|
||||
use std::{sync::Arc, time::Instant};
|
||||
|
||||
use log::info;
|
||||
use nomos_storage::backends::rocksdb::RocksBackend;
|
||||
use tokio::sync::Mutex;
|
||||
|
||||
use super::{
|
||||
super::{estimate_sequential_performance, ConcurrentBenchmarkResult},
|
||||
reads::{
|
||||
run_block_validation_workload, run_da_commitments_workload, run_da_sampling_workload,
|
||||
run_ibd_serving_workload,
|
||||
},
|
||||
writes::{run_conditional_block_storage_workload, run_conditional_da_storage_workload},
|
||||
};
|
||||
use crate::{config::ValidatorProfile, metrics::StatsCollector};
|
||||
|
||||
pub async fn run_concurrent_validator_benchmark(
|
||||
storage: RocksBackend,
|
||||
duration: std::time::Duration,
|
||||
profile: &ValidatorProfile,
|
||||
dataset_size: (usize, usize),
|
||||
is_read_only: bool,
|
||||
) -> Result<ConcurrentBenchmarkResult, Box<dyn std::error::Error>> {
|
||||
if is_read_only && (profile.block_write_rate_hz > 0.0 || profile.da_share_write_rate_hz > 0.0) {
|
||||
log::warn!("Storage is read-only but profile has write operations. Write workloads will be skipped.");
|
||||
}
|
||||
|
||||
let storage = Arc::new(Mutex::new(storage));
|
||||
|
||||
let mut stats_collector = StatsCollector::new();
|
||||
stats_collector.collect_before(&*storage.lock().await);
|
||||
|
||||
let start_time = Instant::now();
|
||||
|
||||
info!(
|
||||
"Starting concurrent validator simulation for {:.1}s",
|
||||
duration.as_secs_f64()
|
||||
);
|
||||
info!(
|
||||
"Network-aware concurrency: {} validators \u{2192} {} IBD streams, {} DA streams",
|
||||
profile.total_validators,
|
||||
profile.ibd_concurrent_streams(),
|
||||
profile.da_concurrent_streams()
|
||||
);
|
||||
|
||||
let (
|
||||
block_validation_result,
|
||||
da_sampling_result,
|
||||
da_commitments_result,
|
||||
ibd_serving_result,
|
||||
block_storage_result,
|
||||
da_storage_result,
|
||||
) = tokio::join!(
|
||||
run_block_validation_workload(
|
||||
Arc::clone(&storage),
|
||||
duration,
|
||||
profile.block_read_rate_hz,
|
||||
dataset_size.0,
|
||||
profile
|
||||
),
|
||||
run_da_sampling_workload(
|
||||
Arc::clone(&storage),
|
||||
duration,
|
||||
profile.da_share_read_rate_hz,
|
||||
dataset_size.0,
|
||||
profile
|
||||
),
|
||||
run_da_commitments_workload(
|
||||
Arc::clone(&storage),
|
||||
duration,
|
||||
profile.da_share_read_rate_hz * 0.3,
|
||||
dataset_size.0,
|
||||
profile
|
||||
),
|
||||
run_ibd_serving_workload(
|
||||
Arc::clone(&storage),
|
||||
duration,
|
||||
profile.range_scan_rate_hz,
|
||||
dataset_size.0
|
||||
),
|
||||
run_conditional_block_storage_workload(
|
||||
Arc::clone(&storage),
|
||||
duration,
|
||||
profile.block_write_rate_hz,
|
||||
dataset_size.0,
|
||||
is_read_only
|
||||
),
|
||||
run_conditional_da_storage_workload(
|
||||
Arc::clone(&storage),
|
||||
duration,
|
||||
profile.da_share_write_rate_hz,
|
||||
dataset_size.1,
|
||||
is_read_only
|
||||
)
|
||||
);
|
||||
|
||||
let total_duration = start_time.elapsed();
|
||||
|
||||
stats_collector.collect_after(&*storage.lock().await);
|
||||
|
||||
let sequential_estimated_throughput = estimate_sequential_performance(profile);
|
||||
let actual_concurrent_throughput = (block_validation_result.operations_success
|
||||
+ da_sampling_result.operations_success
|
||||
+ da_commitments_result.operations_success
|
||||
+ ibd_serving_result.operations_success
|
||||
+ block_storage_result.operations_success
|
||||
+ da_storage_result.operations_success) as f64
|
||||
/ total_duration.as_secs_f64();
|
||||
|
||||
let contention_factor = actual_concurrent_throughput / sequential_estimated_throughput;
|
||||
|
||||
Ok(ConcurrentBenchmarkResult {
|
||||
block_validation: block_validation_result,
|
||||
da_sampling: da_sampling_result,
|
||||
da_commitments: da_commitments_result,
|
||||
ibd_serving: ibd_serving_result,
|
||||
block_storage: block_storage_result,
|
||||
da_storage: da_storage_result,
|
||||
total_duration,
|
||||
peak_memory_mb: 0.0,
|
||||
resource_contention_factor: contention_factor,
|
||||
concurrent_operations_peak: 6,
|
||||
rocksdb_stats_before: stats_collector.before.clone(),
|
||||
rocksdb_stats_after: stats_collector.after.clone(),
|
||||
})
|
||||
}
|
||||
302
storage-benchmarks/src/benchmark/workloads/reads.rs
Normal file
302
storage-benchmarks/src/benchmark/workloads/reads.rs
Normal file
@ -0,0 +1,302 @@
|
||||
use std::{
|
||||
sync::Arc,
|
||||
time::{Duration, Instant},
|
||||
};
|
||||
|
||||
use nomos_storage::{
|
||||
api::{chain::StorageChainApi as _, da::StorageDaApi as _},
|
||||
backends::rocksdb::RocksBackend,
|
||||
};
|
||||
use tokio::sync::Mutex;
|
||||
|
||||
use super::super::{create_blob_id, create_header_id, safe_interval_from_hz, WorkloadStreamResult};
|
||||
use crate::{
|
||||
config::{types::WorkloadType, ValidatorProfile},
|
||||
data::{select_block_spec_accurate, select_da_spec_accurate},
|
||||
metrics::LatencyTracker,
|
||||
};
|
||||
|
||||
pub async fn run_block_validation_workload(
|
||||
storage: Arc<Mutex<RocksBackend>>,
|
||||
duration: Duration,
|
||||
frequency_hz: f64,
|
||||
max_blocks: usize,
|
||||
profile: &ValidatorProfile,
|
||||
) -> WorkloadStreamResult {
|
||||
let mut result = WorkloadStreamResult {
|
||||
workload_type: WorkloadType::BlockValidation,
|
||||
executed: true,
|
||||
operations_total: 0,
|
||||
operations_success: 0,
|
||||
bytes_read: 0,
|
||||
bytes_written: 0,
|
||||
duration,
|
||||
errors: 0,
|
||||
cache_misses: 0,
|
||||
latency_percentiles: None,
|
||||
};
|
||||
|
||||
let mut latency_tracker = LatencyTracker::new();
|
||||
|
||||
let interval = match safe_interval_from_hz(frequency_hz, &result.workload_type.to_string()) {
|
||||
Ok(interval) => interval,
|
||||
Err(e) => {
|
||||
log::warn!("{e}");
|
||||
result.duration = duration;
|
||||
result.latency_percentiles = Some(latency_tracker.get_percentiles());
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
let mut ticker = tokio::time::interval(interval);
|
||||
let end_time = Instant::now() + duration;
|
||||
|
||||
while Instant::now() < end_time {
|
||||
ticker.tick().await;
|
||||
|
||||
let block_index = select_block_spec_accurate(result.operations_total, max_blocks, profile);
|
||||
let header_id = create_header_id(block_index);
|
||||
|
||||
let operation_result = latency_tracker
|
||||
.record_async_operation(|| async {
|
||||
let mut storage_guard = storage.lock().await;
|
||||
let get_result = storage_guard.get_block(header_id).await;
|
||||
drop(storage_guard);
|
||||
get_result
|
||||
})
|
||||
.await;
|
||||
|
||||
match operation_result {
|
||||
Ok(Some(data)) => {
|
||||
result.operations_success += 1;
|
||||
result.bytes_read += data.len() as u64;
|
||||
}
|
||||
Ok(None) => {}
|
||||
Err(_) => result.errors += 1,
|
||||
}
|
||||
|
||||
result.operations_total += 1;
|
||||
}
|
||||
|
||||
result.duration = duration;
|
||||
result.latency_percentiles = Some(latency_tracker.get_percentiles());
|
||||
result
|
||||
}
|
||||
|
||||
pub async fn run_da_sampling_workload(
|
||||
storage: Arc<Mutex<RocksBackend>>,
|
||||
duration: Duration,
|
||||
frequency_hz: f64,
|
||||
max_blocks: usize,
|
||||
profile: &ValidatorProfile,
|
||||
) -> WorkloadStreamResult {
|
||||
let mut result = WorkloadStreamResult {
|
||||
workload_type: WorkloadType::DaSampling,
|
||||
executed: true,
|
||||
operations_total: 0,
|
||||
operations_success: 0,
|
||||
bytes_read: 0,
|
||||
bytes_written: 0,
|
||||
duration,
|
||||
errors: 0,
|
||||
cache_misses: 0,
|
||||
latency_percentiles: None,
|
||||
};
|
||||
|
||||
let mut latency_tracker = LatencyTracker::new();
|
||||
|
||||
let interval = match safe_interval_from_hz(frequency_hz, &result.workload_type.to_string()) {
|
||||
Ok(interval) => interval,
|
||||
Err(e) => {
|
||||
log::warn!("{e}");
|
||||
result.duration = duration;
|
||||
result.latency_percentiles = Some(latency_tracker.get_percentiles());
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
let mut ticker = tokio::time::interval(interval);
|
||||
let end_time = Instant::now() + duration;
|
||||
|
||||
while Instant::now() < end_time {
|
||||
ticker.tick().await;
|
||||
|
||||
let blob_index = select_da_spec_accurate(result.operations_total, max_blocks, profile);
|
||||
let blob_id = create_blob_id(blob_index, 0);
|
||||
let share_idx = [(result.operations_total % 20) as u8, 0u8];
|
||||
|
||||
let operation_result = latency_tracker
|
||||
.record_async_operation(|| async {
|
||||
let mut storage_guard = storage.lock().await;
|
||||
let get_result = storage_guard.get_light_share(blob_id, share_idx).await;
|
||||
drop(storage_guard);
|
||||
get_result
|
||||
})
|
||||
.await;
|
||||
|
||||
match operation_result {
|
||||
Ok(Some(data)) => {
|
||||
result.operations_success += 1;
|
||||
result.bytes_read += data.len() as u64;
|
||||
}
|
||||
Ok(None) => {}
|
||||
Err(_) => result.errors += 1,
|
||||
}
|
||||
|
||||
result.operations_total += 1;
|
||||
}
|
||||
|
||||
result.duration = duration;
|
||||
result.latency_percentiles = Some(latency_tracker.get_percentiles());
|
||||
result
|
||||
}
|
||||
|
||||
pub async fn run_ibd_serving_workload(
|
||||
storage: Arc<Mutex<RocksBackend>>,
|
||||
duration: Duration,
|
||||
frequency_hz: f64,
|
||||
max_blocks: usize,
|
||||
) -> WorkloadStreamResult {
|
||||
const IBD_CHUNK_SIZE: usize = 1000;
|
||||
|
||||
let mut result = WorkloadStreamResult {
|
||||
workload_type: WorkloadType::IbdServing,
|
||||
executed: true,
|
||||
operations_total: 0,
|
||||
operations_success: 0,
|
||||
bytes_read: 0,
|
||||
bytes_written: 0,
|
||||
duration,
|
||||
errors: 0,
|
||||
cache_misses: 0,
|
||||
latency_percentiles: None,
|
||||
};
|
||||
|
||||
let mut latency_tracker = LatencyTracker::new();
|
||||
|
||||
let interval = match safe_interval_from_hz(frequency_hz, &result.workload_type.to_string()) {
|
||||
Ok(interval) => interval,
|
||||
Err(e) => {
|
||||
log::warn!("{e}");
|
||||
result.duration = duration;
|
||||
result.latency_percentiles = Some(latency_tracker.get_percentiles());
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
let mut ticker = tokio::time::interval(interval);
|
||||
let end_time = Instant::now() + duration;
|
||||
|
||||
while Instant::now() < end_time {
|
||||
ticker.tick().await;
|
||||
|
||||
let max_safe_blocks = max_blocks.saturating_sub(IBD_CHUNK_SIZE).max(1);
|
||||
let start_block = (result.operations_total as usize * IBD_CHUNK_SIZE) % max_safe_blocks;
|
||||
let start_slot = cryptarchia_engine::Slot::from(start_block as u64);
|
||||
let end_slot = cryptarchia_engine::Slot::from((start_block + IBD_CHUNK_SIZE) as u64);
|
||||
let Some(limit) = std::num::NonZeroUsize::new(IBD_CHUNK_SIZE) else {
|
||||
log::error!("Invalid IBD chunk size: {IBD_CHUNK_SIZE}");
|
||||
result.errors += 1;
|
||||
continue;
|
||||
};
|
||||
|
||||
let operation_result = latency_tracker
|
||||
.record_async_operation(|| async {
|
||||
let mut storage_guard = storage.lock().await;
|
||||
let scan_result = storage_guard
|
||||
.scan_immutable_block_ids(start_slot..=end_slot, limit)
|
||||
.await;
|
||||
|
||||
if let Ok(header_ids) = &scan_result {
|
||||
for header_id in header_ids.iter().take(IBD_CHUNK_SIZE) {
|
||||
let _ = storage_guard.get_block(*header_id).await;
|
||||
}
|
||||
}
|
||||
|
||||
drop(storage_guard);
|
||||
scan_result
|
||||
})
|
||||
.await;
|
||||
|
||||
match operation_result {
|
||||
Ok(header_ids) => {
|
||||
result.operations_success += 1;
|
||||
let estimated_bytes = header_ids.len() as u64 * 34371;
|
||||
result.bytes_read += estimated_bytes;
|
||||
}
|
||||
Err(_) => result.errors += 1,
|
||||
}
|
||||
result.operations_total += 1;
|
||||
}
|
||||
|
||||
result.duration = duration;
|
||||
result.latency_percentiles = Some(latency_tracker.get_percentiles());
|
||||
result
|
||||
}
|
||||
|
||||
pub async fn run_da_commitments_workload(
|
||||
storage: Arc<Mutex<RocksBackend>>,
|
||||
duration: Duration,
|
||||
frequency_hz: f64,
|
||||
max_blocks: usize,
|
||||
profile: &ValidatorProfile,
|
||||
) -> WorkloadStreamResult {
|
||||
let mut result = WorkloadStreamResult {
|
||||
workload_type: WorkloadType::DaCommitments,
|
||||
executed: true,
|
||||
operations_total: 0,
|
||||
operations_success: 0,
|
||||
bytes_read: 0,
|
||||
bytes_written: 0,
|
||||
duration,
|
||||
errors: 0,
|
||||
cache_misses: 0,
|
||||
latency_percentiles: None,
|
||||
};
|
||||
|
||||
let mut latency_tracker = LatencyTracker::new();
|
||||
|
||||
let interval = match safe_interval_from_hz(frequency_hz, &result.workload_type.to_string()) {
|
||||
Ok(interval) => interval,
|
||||
Err(e) => {
|
||||
log::warn!("{e}");
|
||||
result.duration = duration;
|
||||
result.latency_percentiles = Some(latency_tracker.get_percentiles());
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
let mut ticker = tokio::time::interval(interval);
|
||||
let end_time = Instant::now() + duration;
|
||||
|
||||
while Instant::now() < end_time {
|
||||
ticker.tick().await;
|
||||
|
||||
let blob_index = select_da_spec_accurate(result.operations_total, max_blocks, profile);
|
||||
let blob_id = create_blob_id(blob_index, 0);
|
||||
|
||||
let operation_result = latency_tracker
|
||||
.record_async_operation(|| async {
|
||||
let mut storage_guard = storage.lock().await;
|
||||
let get_result = storage_guard.get_shared_commitments(blob_id).await;
|
||||
drop(storage_guard);
|
||||
get_result
|
||||
})
|
||||
.await;
|
||||
|
||||
match operation_result {
|
||||
Ok(Some(data)) => {
|
||||
result.operations_success += 1;
|
||||
result.bytes_read += data.len() as u64;
|
||||
}
|
||||
Ok(None) => {}
|
||||
Err(_) => result.errors += 1,
|
||||
}
|
||||
|
||||
result.operations_total += 1;
|
||||
}
|
||||
|
||||
result.duration = duration;
|
||||
result.latency_percentiles = Some(latency_tracker.get_percentiles());
|
||||
result
|
||||
}
|
||||
293
storage-benchmarks/src/benchmark/workloads/runners.rs
Normal file
293
storage-benchmarks/src/benchmark/workloads/runners.rs
Normal file
@ -0,0 +1,293 @@
|
||||
use std::{
|
||||
sync::Arc,
|
||||
time::{Duration, Instant},
|
||||
};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use nomos_storage::{
|
||||
api::{chain::StorageChainApi as _, da::StorageDaApi as _},
|
||||
backends::rocksdb::RocksBackend,
|
||||
};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tokio::sync::Mutex;
|
||||
|
||||
use super::super::{create_blob_id, create_header_id, safe_interval_from_hz, WorkloadStreamResult};
|
||||
use crate::{
|
||||
config::{types::WorkloadType, ValidatorProfile},
|
||||
data::{select_block_spec_accurate, select_da_spec_accurate},
|
||||
metrics::LatencyTracker,
|
||||
};
|
||||
|
||||
#[async_trait]
|
||||
pub trait WorkloadRunner {
|
||||
async fn execute(&mut self, duration: Duration) -> WorkloadStreamResult;
|
||||
fn workload_type(&self) -> WorkloadType;
|
||||
fn is_read_only(&self) -> bool;
|
||||
}
|
||||
|
||||
pub struct BlockValidationRunner {
|
||||
storage: Arc<Mutex<RocksBackend>>,
|
||||
profile: ValidatorProfile,
|
||||
max_blocks: usize,
|
||||
frequency_hz: f64,
|
||||
latency_tracker: LatencyTracker,
|
||||
execution_stats: WorkloadExecutionStats,
|
||||
}
|
||||
|
||||
pub struct DaSamplingRunner {
|
||||
storage: Arc<Mutex<RocksBackend>>,
|
||||
profile: ValidatorProfile,
|
||||
max_blocks: usize,
|
||||
frequency_hz: f64,
|
||||
latency_tracker: LatencyTracker,
|
||||
execution_stats: WorkloadExecutionStats,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
|
||||
pub struct WorkloadExecutionStats {
|
||||
pub operations_attempted: u64,
|
||||
pub operations_successful: u64,
|
||||
pub bytes_processed: u64,
|
||||
pub errors_encountered: u64,
|
||||
pub cache_misses_estimated: u64,
|
||||
pub execution_start: Option<chrono::DateTime<chrono::Utc>>,
|
||||
pub last_operation_time: Option<chrono::DateTime<chrono::Utc>>,
|
||||
}
|
||||
|
||||
impl BlockValidationRunner {
|
||||
pub fn new(
|
||||
storage: Arc<Mutex<RocksBackend>>,
|
||||
profile: ValidatorProfile,
|
||||
max_blocks: usize,
|
||||
frequency_hz: f64,
|
||||
) -> Self {
|
||||
Self {
|
||||
storage,
|
||||
profile,
|
||||
max_blocks,
|
||||
frequency_hz,
|
||||
latency_tracker: LatencyTracker::new(),
|
||||
execution_stats: WorkloadExecutionStats::default(),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn execute(&mut self, duration: Duration) -> WorkloadStreamResult {
|
||||
self.execution_stats.execution_start = Some(chrono::Utc::now());
|
||||
|
||||
let interval = match safe_interval_from_hz(self.frequency_hz, "block_validation") {
|
||||
Ok(interval) => interval,
|
||||
Err(e) => {
|
||||
log::warn!("{e}");
|
||||
return self.create_error_result(duration);
|
||||
}
|
||||
};
|
||||
|
||||
let mut ticker = tokio::time::interval(interval);
|
||||
let end_time = Instant::now() + duration;
|
||||
|
||||
while Instant::now() < end_time {
|
||||
ticker.tick().await;
|
||||
self.execute_single_block_validation().await;
|
||||
}
|
||||
|
||||
self.create_final_result(duration)
|
||||
}
|
||||
|
||||
async fn execute_single_block_validation(&mut self) {
|
||||
let block_index = select_block_spec_accurate(
|
||||
self.execution_stats.operations_attempted,
|
||||
self.max_blocks,
|
||||
&self.profile,
|
||||
);
|
||||
let header_id = create_header_id(block_index);
|
||||
|
||||
let operation_result = self
|
||||
.latency_tracker
|
||||
.record_async_operation(|| async {
|
||||
let mut storage_guard = self.storage.lock().await;
|
||||
let result = storage_guard.get_block(header_id).await;
|
||||
drop(storage_guard);
|
||||
result
|
||||
})
|
||||
.await;
|
||||
|
||||
match operation_result {
|
||||
Ok(Some(data)) => {
|
||||
self.execution_stats.operations_successful += 1;
|
||||
self.execution_stats.bytes_processed += data.len() as u64;
|
||||
}
|
||||
Ok(None) => {}
|
||||
Err(_) => self.execution_stats.errors_encountered += 1,
|
||||
}
|
||||
|
||||
self.execution_stats.operations_attempted += 1;
|
||||
self.execution_stats.last_operation_time = Some(chrono::Utc::now());
|
||||
}
|
||||
|
||||
fn create_final_result(&self, duration: Duration) -> WorkloadStreamResult {
|
||||
WorkloadStreamResult {
|
||||
workload_type: WorkloadType::BlockValidation,
|
||||
executed: true,
|
||||
operations_total: self.execution_stats.operations_attempted,
|
||||
operations_success: self.execution_stats.operations_successful,
|
||||
bytes_read: self.execution_stats.bytes_processed,
|
||||
bytes_written: 0,
|
||||
duration,
|
||||
errors: self.execution_stats.errors_encountered,
|
||||
cache_misses: self.execution_stats.cache_misses_estimated,
|
||||
latency_percentiles: Some(self.latency_tracker.get_percentiles()),
|
||||
}
|
||||
}
|
||||
|
||||
fn create_error_result(&self, duration: Duration) -> WorkloadStreamResult {
|
||||
WorkloadStreamResult {
|
||||
workload_type: WorkloadType::BlockValidation,
|
||||
executed: false,
|
||||
operations_total: 0,
|
||||
operations_success: 0,
|
||||
bytes_read: 0,
|
||||
bytes_written: 0,
|
||||
duration,
|
||||
errors: 1,
|
||||
cache_misses: 0,
|
||||
latency_percentiles: Some(self.latency_tracker.get_percentiles()),
|
||||
}
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub const fn execution_state(&self) -> &WorkloadExecutionStats {
|
||||
&self.execution_stats
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl WorkloadRunner for BlockValidationRunner {
|
||||
async fn execute(&mut self, duration: Duration) -> WorkloadStreamResult {
|
||||
Self::execute(self, duration).await
|
||||
}
|
||||
|
||||
fn workload_type(&self) -> WorkloadType {
|
||||
WorkloadType::BlockValidation
|
||||
}
|
||||
|
||||
fn is_read_only(&self) -> bool {
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
impl DaSamplingRunner {
|
||||
pub fn new(
|
||||
storage: Arc<Mutex<RocksBackend>>,
|
||||
profile: ValidatorProfile,
|
||||
max_blocks: usize,
|
||||
frequency_hz: f64,
|
||||
) -> Self {
|
||||
Self {
|
||||
storage,
|
||||
profile,
|
||||
max_blocks,
|
||||
frequency_hz,
|
||||
latency_tracker: LatencyTracker::new(),
|
||||
execution_stats: WorkloadExecutionStats::default(),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn execute(&mut self, duration: Duration) -> WorkloadStreamResult {
|
||||
self.execution_stats.execution_start = Some(chrono::Utc::now());
|
||||
|
||||
let interval = match safe_interval_from_hz(self.frequency_hz, "da_sampling") {
|
||||
Ok(interval) => interval,
|
||||
Err(e) => {
|
||||
log::warn!("{e}");
|
||||
return self.create_error_result(duration);
|
||||
}
|
||||
};
|
||||
|
||||
let mut ticker = tokio::time::interval(interval);
|
||||
let end_time = Instant::now() + duration;
|
||||
|
||||
while Instant::now() < end_time {
|
||||
ticker.tick().await;
|
||||
self.execute_single_da_sample().await;
|
||||
}
|
||||
|
||||
self.create_final_result(duration)
|
||||
}
|
||||
|
||||
async fn execute_single_da_sample(&mut self) {
|
||||
let blob_index = select_da_spec_accurate(
|
||||
self.execution_stats.operations_attempted,
|
||||
self.max_blocks,
|
||||
&self.profile,
|
||||
);
|
||||
let blob_id = create_blob_id(blob_index, 0);
|
||||
let share_idx = [(self.execution_stats.operations_attempted % 20) as u8, 0u8];
|
||||
|
||||
let operation_result = self
|
||||
.latency_tracker
|
||||
.record_async_operation(|| async {
|
||||
let mut storage_guard = self.storage.lock().await;
|
||||
let result = storage_guard.get_light_share(blob_id, share_idx).await;
|
||||
drop(storage_guard);
|
||||
result
|
||||
})
|
||||
.await;
|
||||
|
||||
match operation_result {
|
||||
Ok(Some(data)) => {
|
||||
self.execution_stats.operations_successful += 1;
|
||||
self.execution_stats.bytes_processed += data.len() as u64;
|
||||
}
|
||||
Ok(None) => {}
|
||||
Err(_) => self.execution_stats.errors_encountered += 1,
|
||||
}
|
||||
|
||||
self.execution_stats.operations_attempted += 1;
|
||||
self.execution_stats.last_operation_time = Some(chrono::Utc::now());
|
||||
}
|
||||
|
||||
fn create_final_result(&self, duration: Duration) -> WorkloadStreamResult {
|
||||
WorkloadStreamResult {
|
||||
workload_type: WorkloadType::DaSampling,
|
||||
executed: true,
|
||||
operations_total: self.execution_stats.operations_attempted,
|
||||
operations_success: self.execution_stats.operations_successful,
|
||||
bytes_read: self.execution_stats.bytes_processed,
|
||||
bytes_written: 0,
|
||||
duration,
|
||||
errors: self.execution_stats.errors_encountered,
|
||||
cache_misses: self.execution_stats.cache_misses_estimated,
|
||||
latency_percentiles: Some(self.latency_tracker.get_percentiles()),
|
||||
}
|
||||
}
|
||||
|
||||
fn create_error_result(&self, duration: Duration) -> WorkloadStreamResult {
|
||||
WorkloadStreamResult {
|
||||
workload_type: WorkloadType::DaSampling,
|
||||
executed: false,
|
||||
operations_total: 0,
|
||||
operations_success: 0,
|
||||
bytes_read: 0,
|
||||
bytes_written: 0,
|
||||
duration,
|
||||
errors: 1,
|
||||
cache_misses: 0,
|
||||
latency_percentiles: Some(self.latency_tracker.get_percentiles()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl WorkloadRunner for DaSamplingRunner {
|
||||
async fn execute(&mut self, duration: Duration) -> WorkloadStreamResult {
|
||||
Self::execute(self, duration).await
|
||||
}
|
||||
|
||||
fn workload_type(&self) -> WorkloadType {
|
||||
WorkloadType::DaSampling
|
||||
}
|
||||
|
||||
fn is_read_only(&self) -> bool {
|
||||
true
|
||||
}
|
||||
}
|
||||
227
storage-benchmarks/src/benchmark/workloads/writes.rs
Normal file
227
storage-benchmarks/src/benchmark/workloads/writes.rs
Normal file
@ -0,0 +1,227 @@
|
||||
use std::{
|
||||
sync::Arc,
|
||||
time::{Duration, Instant},
|
||||
};
|
||||
|
||||
use nomos_storage::{
|
||||
api::{chain::StorageChainApi as _, da::StorageDaApi as _},
|
||||
backends::rocksdb::RocksBackend,
|
||||
};
|
||||
use tokio::sync::Mutex;
|
||||
|
||||
use super::super::{
|
||||
safe_interval_from_hz,
|
||||
utilities::{create_blob_id, create_header_id},
|
||||
WorkloadStreamResult,
|
||||
};
|
||||
use crate::{
|
||||
config::types::WorkloadType,
|
||||
data::{create_block_data, create_commitment, create_da_share},
|
||||
metrics::LatencyTracker,
|
||||
};
|
||||
|
||||
pub async fn run_block_storage_workload(
|
||||
storage: Arc<Mutex<RocksBackend>>,
|
||||
duration: Duration,
|
||||
frequency_hz: f64,
|
||||
starting_block_height: usize,
|
||||
) -> WorkloadStreamResult {
|
||||
let mut result = WorkloadStreamResult {
|
||||
workload_type: WorkloadType::BlockStorage,
|
||||
executed: true,
|
||||
operations_total: 0,
|
||||
operations_success: 0,
|
||||
bytes_read: 0,
|
||||
bytes_written: 0,
|
||||
duration,
|
||||
errors: 0,
|
||||
cache_misses: 0,
|
||||
latency_percentiles: None,
|
||||
};
|
||||
|
||||
let mut latency_tracker = LatencyTracker::new();
|
||||
|
||||
let interval = match safe_interval_from_hz(frequency_hz, &result.workload_type.to_string()) {
|
||||
Ok(interval) => interval,
|
||||
Err(e) => {
|
||||
log::warn!("{e}");
|
||||
result.duration = duration;
|
||||
result.latency_percentiles = Some(latency_tracker.get_percentiles());
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
let mut ticker = tokio::time::interval(interval);
|
||||
let end_time = Instant::now() + duration;
|
||||
let mut current_height = starting_block_height;
|
||||
|
||||
while Instant::now() < end_time {
|
||||
ticker.tick().await;
|
||||
|
||||
let header_id = create_header_id(current_height);
|
||||
let block_data = create_block_data(current_height, 34_371);
|
||||
|
||||
let operation_result = latency_tracker
|
||||
.record_async_operation(|| async {
|
||||
let mut storage_guard = storage.lock().await;
|
||||
let store_result = storage_guard
|
||||
.store_block(header_id, block_data.clone())
|
||||
.await;
|
||||
|
||||
if store_result.is_ok() {
|
||||
let slot = cryptarchia_engine::Slot::from(current_height as u64);
|
||||
let ids = std::collections::BTreeMap::from([(slot, header_id)]);
|
||||
let _ = storage_guard.store_immutable_block_ids(ids).await;
|
||||
}
|
||||
|
||||
drop(storage_guard);
|
||||
store_result
|
||||
})
|
||||
.await;
|
||||
|
||||
match operation_result {
|
||||
Ok(()) => {
|
||||
result.operations_success += 1;
|
||||
result.bytes_written += block_data.len() as u64;
|
||||
current_height += 1;
|
||||
}
|
||||
Err(_) => result.errors += 1,
|
||||
}
|
||||
|
||||
result.operations_total += 1;
|
||||
}
|
||||
|
||||
result.duration = duration;
|
||||
result.latency_percentiles = Some(latency_tracker.get_percentiles());
|
||||
result
|
||||
}
|
||||
|
||||
pub async fn run_da_storage_workload(
|
||||
storage: Arc<Mutex<RocksBackend>>,
|
||||
duration: Duration,
|
||||
frequency_hz: f64,
|
||||
starting_share_count: usize,
|
||||
) -> WorkloadStreamResult {
|
||||
let mut result = WorkloadStreamResult {
|
||||
workload_type: WorkloadType::DaStorage,
|
||||
executed: true,
|
||||
operations_total: 0,
|
||||
operations_success: 0,
|
||||
bytes_read: 0,
|
||||
bytes_written: 0,
|
||||
duration,
|
||||
errors: 0,
|
||||
cache_misses: 0,
|
||||
latency_percentiles: None,
|
||||
};
|
||||
|
||||
let mut latency_tracker = LatencyTracker::new();
|
||||
|
||||
let interval = match safe_interval_from_hz(frequency_hz, &result.workload_type.to_string()) {
|
||||
Ok(interval) => interval,
|
||||
Err(e) => {
|
||||
log::warn!("{e}");
|
||||
result.duration = duration;
|
||||
result.latency_percentiles = Some(latency_tracker.get_percentiles());
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
let mut ticker = tokio::time::interval(interval);
|
||||
let end_time = Instant::now() + duration;
|
||||
|
||||
while Instant::now() < end_time {
|
||||
ticker.tick().await;
|
||||
|
||||
let blob_id = create_blob_id(starting_share_count + result.operations_total as usize, 0);
|
||||
let share_idx = [(result.operations_total % 20) as u8, 0u8];
|
||||
let share_data = create_da_share(
|
||||
starting_share_count + result.operations_total as usize,
|
||||
0,
|
||||
1024,
|
||||
);
|
||||
|
||||
let operation_result = latency_tracker
|
||||
.record_async_operation(|| async {
|
||||
let mut storage_guard = storage.lock().await;
|
||||
let store_result = storage_guard
|
||||
.store_light_share(blob_id, share_idx, share_data.clone())
|
||||
.await;
|
||||
|
||||
if store_result.is_ok() {
|
||||
if let Ok(commitment) = create_commitment(
|
||||
starting_share_count + result.operations_total as usize,
|
||||
0,
|
||||
220_000,
|
||||
)
|
||||
.await
|
||||
{
|
||||
let _ = storage_guard
|
||||
.store_shared_commitments(blob_id, commitment)
|
||||
.await;
|
||||
}
|
||||
}
|
||||
|
||||
drop(storage_guard);
|
||||
store_result
|
||||
})
|
||||
.await;
|
||||
|
||||
match operation_result {
|
||||
Ok(()) => {
|
||||
result.operations_success += 1;
|
||||
result.bytes_written += share_data.len() as u64 + 220_000;
|
||||
}
|
||||
Err(_) => result.errors += 1,
|
||||
}
|
||||
|
||||
result.operations_total += 1;
|
||||
}
|
||||
|
||||
result.duration = duration;
|
||||
result.latency_percentiles = Some(latency_tracker.get_percentiles());
|
||||
result
|
||||
}
|
||||
|
||||
pub async fn run_conditional_block_storage_workload(
|
||||
storage: Arc<Mutex<RocksBackend>>,
|
||||
duration: Duration,
|
||||
frequency_hz: f64,
|
||||
starting_block_height: usize,
|
||||
is_read_only: bool,
|
||||
) -> WorkloadStreamResult {
|
||||
if is_read_only || frequency_hz == 0.0 {
|
||||
return create_empty_workload_result(WorkloadType::BlockStorage);
|
||||
}
|
||||
|
||||
run_block_storage_workload(storage, duration, frequency_hz, starting_block_height).await
|
||||
}
|
||||
|
||||
pub async fn run_conditional_da_storage_workload(
|
||||
storage: Arc<Mutex<RocksBackend>>,
|
||||
duration: Duration,
|
||||
frequency_hz: f64,
|
||||
starting_share_count: usize,
|
||||
is_read_only: bool,
|
||||
) -> WorkloadStreamResult {
|
||||
if is_read_only || frequency_hz == 0.0 {
|
||||
return create_empty_workload_result(WorkloadType::DaStorage);
|
||||
}
|
||||
|
||||
run_da_storage_workload(storage, duration, frequency_hz, starting_share_count).await
|
||||
}
|
||||
|
||||
const fn create_empty_workload_result(workload_type: WorkloadType) -> WorkloadStreamResult {
|
||||
WorkloadStreamResult {
|
||||
workload_type,
|
||||
executed: false,
|
||||
operations_total: 0,
|
||||
operations_success: 0,
|
||||
bytes_read: 0,
|
||||
bytes_written: 0,
|
||||
duration: Duration::from_secs(0),
|
||||
errors: 0,
|
||||
cache_misses: 0,
|
||||
latency_percentiles: None,
|
||||
}
|
||||
}
|
||||
311
storage-benchmarks/src/bin/dataset_builder.rs
Normal file
311
storage-benchmarks/src/bin/dataset_builder.rs
Normal file
@ -0,0 +1,311 @@
|
||||
use std::{env, time::Instant};
|
||||
|
||||
use log::info;
|
||||
use nomos_storage::{
|
||||
api::chain::StorageChainApi as _,
|
||||
backends::{rocksdb::RocksBackend, StorageBackend as _},
|
||||
};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use storage_benchmarks::{
|
||||
benchmark::{analyze_dataset, utilities::create_header_id},
|
||||
data::create_block_data,
|
||||
BenchConfig, DatasetGenConfig,
|
||||
};
|
||||
|
||||
pub struct DatasetGenerator {
|
||||
config: DatasetGenConfig,
|
||||
storage: RocksBackend,
|
||||
progress: GenerationProgress,
|
||||
stats: GenerationStats,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct GenerationProgress {
|
||||
pub blocks_completed: usize,
|
||||
pub da_objects_completed: usize,
|
||||
pub current_batch_start: usize,
|
||||
pub total_target_blocks: usize,
|
||||
pub generation_start_time: Option<Instant>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
|
||||
pub struct GenerationStats {
|
||||
pub blocks_generated_this_run: usize,
|
||||
pub da_objects_generated_this_run: usize,
|
||||
pub total_generation_time: std::time::Duration,
|
||||
pub block_generation_rate: f64,
|
||||
pub da_generation_rate: f64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct DatasetGenerationReport {
|
||||
pub generation_summary: GenerationSummary,
|
||||
pub performance: PerformanceMetrics,
|
||||
pub config: DatasetGenConfig,
|
||||
pub timestamp: String,
|
||||
pub tool_version: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct GenerationSummary {
|
||||
pub blocks_generated: usize,
|
||||
pub da_objects_generated: usize,
|
||||
pub total_objects: usize,
|
||||
pub duration_seconds: u64,
|
||||
pub duration_minutes: f64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct PerformanceMetrics {
|
||||
pub block_rate_per_sec: f64,
|
||||
pub da_rate_per_sec: f64,
|
||||
pub total_rate_objects_per_sec: f64,
|
||||
pub cpu_cores_used: usize,
|
||||
}
|
||||
|
||||
impl DatasetGenerator {
|
||||
pub async fn new(config_path: &str) -> Result<Self, Box<dyn std::error::Error>> {
|
||||
let config = DatasetGenConfig::from_file(config_path)?;
|
||||
let benchmark_config = BenchConfig::production();
|
||||
let storage = RocksBackend::new(benchmark_config.settings)?;
|
||||
|
||||
let mut generator = Self {
|
||||
config,
|
||||
storage,
|
||||
progress: GenerationProgress::default(),
|
||||
stats: GenerationStats::default(),
|
||||
};
|
||||
|
||||
generator.analyze_existing_data().await?;
|
||||
|
||||
Ok(generator)
|
||||
}
|
||||
|
||||
async fn analyze_existing_data(&mut self) -> Result<(), Box<dyn std::error::Error>> {
|
||||
let (existing_blocks, existing_da) = analyze_dataset(&mut self.storage).await?;
|
||||
|
||||
self.progress.blocks_completed = existing_blocks;
|
||||
self.progress.da_objects_completed = existing_da;
|
||||
self.progress.total_target_blocks = self.config.total_blocks();
|
||||
|
||||
info!(
|
||||
"Found existing data: {} blocks, {} DA objects",
|
||||
existing_blocks, existing_da
|
||||
);
|
||||
info!("Target: {} total blocks", self.progress.total_target_blocks);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn generate_dataset(
|
||||
&mut self,
|
||||
) -> Result<GenerationStats, Box<dyn std::error::Error>> {
|
||||
info!(
|
||||
"Multi-core generation: {} ({} cores available)",
|
||||
self.config.dataset.name,
|
||||
num_cpus::get()
|
||||
);
|
||||
|
||||
self.progress.generation_start_time = Some(Instant::now());
|
||||
|
||||
if self.progress.blocks_completed < self.progress.total_target_blocks {
|
||||
self.generate_remaining_blocks().await?;
|
||||
} else {
|
||||
info!("All blocks already generated!");
|
||||
}
|
||||
|
||||
self.generate_da_objects()?;
|
||||
|
||||
self.finalize_generation();
|
||||
self.save_generation_report();
|
||||
|
||||
Ok(self.stats.clone())
|
||||
}
|
||||
|
||||
async fn generate_remaining_blocks(&mut self) -> Result<(), Box<dyn std::error::Error>> {
|
||||
let remaining_blocks = self.progress.total_target_blocks - self.progress.blocks_completed;
|
||||
|
||||
info!(
|
||||
"Resuming block generation from block {}, generating {} more blocks",
|
||||
self.progress.blocks_completed, remaining_blocks
|
||||
);
|
||||
|
||||
let blocks_generated = self.generate_blocks_in_batches(remaining_blocks).await?;
|
||||
self.stats.blocks_generated_this_run = blocks_generated;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn generate_blocks_in_batches(
|
||||
&mut self,
|
||||
blocks_to_generate: usize,
|
||||
) -> Result<usize, Box<dyn std::error::Error>> {
|
||||
const PARALLEL_BATCH_SIZE: usize = 1000;
|
||||
let mut blocks_generated = 0;
|
||||
|
||||
for batch_start in (0..blocks_to_generate).step_by(PARALLEL_BATCH_SIZE) {
|
||||
let batch_end = std::cmp::min(batch_start + PARALLEL_BATCH_SIZE, blocks_to_generate);
|
||||
let actual_batch_start = self.progress.blocks_completed + batch_start;
|
||||
|
||||
let batch_data =
|
||||
self.generate_block_batch_parallel(actual_batch_start, batch_end - batch_start)?;
|
||||
self.store_block_batch(&batch_data).await?;
|
||||
|
||||
blocks_generated += batch_end - batch_start;
|
||||
self.log_block_progress(actual_batch_start, blocks_generated);
|
||||
}
|
||||
|
||||
Ok(blocks_generated)
|
||||
}
|
||||
|
||||
fn generate_block_batch_parallel(
|
||||
&self,
|
||||
start_index: usize,
|
||||
count: usize,
|
||||
) -> Result<Vec<(usize, bytes::Bytes)>, Box<dyn std::error::Error>> {
|
||||
use rayon::prelude::*;
|
||||
|
||||
let generation_start = Instant::now();
|
||||
let batch_data: Vec<_> = (0..count)
|
||||
.into_par_iter()
|
||||
.map(|i| {
|
||||
let block_index = start_index + i;
|
||||
let block_data = create_block_data(block_index, self.config.blocks.size_bytes);
|
||||
(block_index, block_data)
|
||||
})
|
||||
.collect();
|
||||
|
||||
let generation_time = generation_start.elapsed();
|
||||
info!(
|
||||
"Generated {} blocks in {:.2}s ({:.0} blocks/s)",
|
||||
count,
|
||||
generation_time.as_secs_f64(),
|
||||
count as f64 / generation_time.as_secs_f64()
|
||||
);
|
||||
|
||||
Ok(batch_data)
|
||||
}
|
||||
|
||||
async fn store_block_batch(
|
||||
&mut self,
|
||||
batch: &[(usize, bytes::Bytes)],
|
||||
) -> Result<(), Box<dyn std::error::Error>> {
|
||||
let storage_start = Instant::now();
|
||||
|
||||
for (block_index, block_data) in batch {
|
||||
let header_id = create_header_id(*block_index);
|
||||
self.storage
|
||||
.store_block(header_id, block_data.clone())
|
||||
.await?;
|
||||
|
||||
let slot = cryptarchia_engine::Slot::from(*block_index as u64);
|
||||
let ids = std::collections::BTreeMap::from([(slot, header_id)]);
|
||||
self.storage.store_immutable_block_ids(ids).await?;
|
||||
}
|
||||
|
||||
let storage_time = storage_start.elapsed();
|
||||
info!(
|
||||
"Stored {} blocks in {:.2}s ({:.0} blocks/s)",
|
||||
batch.len(),
|
||||
storage_time.as_secs_f64(),
|
||||
batch.len() as f64 / storage_time.as_secs_f64()
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn generate_da_objects(&mut self) -> Result<(), Box<dyn std::error::Error>> {
|
||||
self.stats.da_objects_generated_this_run = 0;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn log_block_progress(&self, current_block: usize, blocks_generated: usize) {
|
||||
if self.progress.total_target_blocks > 1000 {
|
||||
let completion_percent =
|
||||
(blocks_generated * 100) as f64 / self.progress.total_target_blocks as f64;
|
||||
info!(
|
||||
"Block progress: {} completed - {:.1}% total",
|
||||
current_block, completion_percent
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
fn finalize_generation(&mut self) {
|
||||
if let Some(start_time) = self.progress.generation_start_time {
|
||||
self.stats.total_generation_time = start_time.elapsed();
|
||||
|
||||
if self.stats.total_generation_time.as_secs() > 0 {
|
||||
self.stats.block_generation_rate = self.stats.blocks_generated_this_run as f64
|
||||
/ self.stats.total_generation_time.as_secs_f64();
|
||||
self.stats.da_generation_rate = self.stats.da_objects_generated_this_run as f64
|
||||
/ self.stats.total_generation_time.as_secs_f64();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn save_generation_report(&self) {
|
||||
let results_dir = BenchConfig::results_path();
|
||||
let timestamp = chrono::Utc::now().format("%Y%m%d_%H%M%S");
|
||||
let filename = format!("dataset_generation_{}.json", timestamp);
|
||||
let filepath = results_dir.join(filename);
|
||||
|
||||
let report = DatasetGenerationReport {
|
||||
generation_summary: GenerationSummary {
|
||||
blocks_generated: self.stats.blocks_generated_this_run,
|
||||
da_objects_generated: self.stats.da_objects_generated_this_run,
|
||||
total_objects: self.stats.blocks_generated_this_run
|
||||
+ self.stats.da_objects_generated_this_run,
|
||||
duration_seconds: self.stats.total_generation_time.as_secs(),
|
||||
duration_minutes: self.stats.total_generation_time.as_secs_f64() / 60.0,
|
||||
},
|
||||
performance: PerformanceMetrics {
|
||||
block_rate_per_sec: self.stats.block_generation_rate,
|
||||
da_rate_per_sec: self.stats.da_generation_rate,
|
||||
total_rate_objects_per_sec: self.stats.block_generation_rate
|
||||
+ self.stats.da_generation_rate,
|
||||
cpu_cores_used: num_cpus::get(),
|
||||
},
|
||||
config: self.config.clone(),
|
||||
timestamp: chrono::Utc::now().to_rfc3339(),
|
||||
tool_version: env!("CARGO_PKG_VERSION").to_string(),
|
||||
};
|
||||
|
||||
match std::fs::write(&filepath, serde_json::to_string_pretty(&report).unwrap()) {
|
||||
Ok(_) => info!("Generation report saved to: {}", filepath.display()),
|
||||
Err(e) => log::warn!("Failed to save report to {}: {}", filepath.display(), e),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
env_logger::init();
|
||||
|
||||
let args: Vec<String> = env::args().collect();
|
||||
|
||||
if args.len() < 3 || args[1] != "--config" {
|
||||
print_usage();
|
||||
return Err("Configuration file required".into());
|
||||
}
|
||||
|
||||
let mut generator = DatasetGenerator::new(&args[2]).await?;
|
||||
let final_stats = generator.generate_dataset().await?;
|
||||
|
||||
info!("Generation completed successfully!");
|
||||
info!(
|
||||
"Final stats: {} blocks, {} DA objects in {:.1}min",
|
||||
final_stats.blocks_generated_this_run,
|
||||
final_stats.da_objects_generated_this_run,
|
||||
final_stats.total_generation_time.as_secs_f64() / 60.0
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn print_usage() {
|
||||
eprintln!("Multi-core Dataset Builder");
|
||||
eprintln!("Generates blocks and DA data in parallel");
|
||||
eprintln!();
|
||||
eprintln!("USAGE:");
|
||||
eprintln!(" cargo run --bin dataset_builder -- --config <file>");
|
||||
}
|
||||
375
storage-benchmarks/src/bin/dataset_generator.rs
Normal file
375
storage-benchmarks/src/bin/dataset_generator.rs
Normal file
@ -0,0 +1,375 @@
|
||||
use std::{env, time::Instant};
|
||||
|
||||
use log::info;
|
||||
use nomos_storage::{
|
||||
api::{chain::StorageChainApi as _, da::StorageDaApi as _},
|
||||
backends::{rocksdb::RocksBackend, StorageBackend as _},
|
||||
};
|
||||
use rand::SeedableRng as _;
|
||||
use rayon::prelude::*;
|
||||
use storage_benchmarks::{
|
||||
benchmark::{
|
||||
analyze_dataset,
|
||||
utilities::{create_blob_id, create_header_id},
|
||||
DatasetGenerationReport, GenerationPerformance, GenerationSummary,
|
||||
},
|
||||
data::{create_block_data, create_da_share},
|
||||
BenchConfig, DatasetGenConfig,
|
||||
};
|
||||
|
||||
const PARALLEL_BATCH_SIZE: usize = 1000;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
env_logger::init();
|
||||
|
||||
let args: Vec<String> = env::args().collect();
|
||||
|
||||
if args.len() < 3 || args[1] != "--config" {
|
||||
print_usage();
|
||||
return Err("Configuration file required".into());
|
||||
}
|
||||
|
||||
run_multicore_generation(&args[2]).await
|
||||
}
|
||||
|
||||
async fn run_multicore_generation(config_path: &str) -> Result<(), Box<dyn std::error::Error>> {
|
||||
let config = DatasetGenConfig::from_file(config_path)?;
|
||||
|
||||
info!(
|
||||
"Multi-core generation: {} ({} cores available)",
|
||||
config.dataset.name,
|
||||
num_cpus::get()
|
||||
);
|
||||
|
||||
let generation_start = Instant::now();
|
||||
let benchmark_config = BenchConfig::production();
|
||||
let mut storage = RocksBackend::new(benchmark_config.settings)?;
|
||||
|
||||
let (existing_blocks, existing_da) = analyze_dataset(&mut storage).await?;
|
||||
let total_blocks = config.total_blocks();
|
||||
|
||||
info!(
|
||||
"Found existing data: {} blocks, {} DA objects",
|
||||
existing_blocks, existing_da
|
||||
);
|
||||
info!("Target: {} total blocks", total_blocks);
|
||||
|
||||
let blocks_generated = if existing_blocks < total_blocks {
|
||||
let remaining_blocks = total_blocks - existing_blocks;
|
||||
info!(
|
||||
"Resuming block generation from block {}, generating {} more blocks",
|
||||
existing_blocks, remaining_blocks
|
||||
);
|
||||
generate_blocks_multicore(&mut storage, &config, remaining_blocks, existing_blocks).await?
|
||||
} else {
|
||||
info!("All blocks already generated!");
|
||||
0
|
||||
};
|
||||
|
||||
let da_generated = generate_da_objects_multicore(&mut storage, &config, total_blocks).await?;
|
||||
|
||||
let total_time = generation_start.elapsed();
|
||||
|
||||
log_generation_completion(blocks_generated, da_generated, total_time);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn generate_blocks_multicore(
|
||||
storage: &mut RocksBackend,
|
||||
config: &DatasetGenConfig,
|
||||
blocks_to_generate: usize,
|
||||
start_from_block: usize,
|
||||
) -> Result<usize, Box<dyn std::error::Error>> {
|
||||
let mut blocks_generated = 0;
|
||||
|
||||
for batch_start in (0..blocks_to_generate).step_by(PARALLEL_BATCH_SIZE) {
|
||||
let batch_end = std::cmp::min(batch_start + PARALLEL_BATCH_SIZE, blocks_to_generate);
|
||||
let batch_size = batch_end - batch_start;
|
||||
|
||||
let actual_batch_start = start_from_block + batch_start;
|
||||
let actual_batch_end = start_from_block + batch_end;
|
||||
|
||||
let block_data_batch =
|
||||
generate_block_batch_parallel(actual_batch_start, actual_batch_end, config)?;
|
||||
|
||||
store_block_batch(storage, &block_data_batch).await?;
|
||||
|
||||
blocks_generated += batch_size;
|
||||
|
||||
log_block_progress(
|
||||
actual_batch_start,
|
||||
actual_batch_end,
|
||||
start_from_block + blocks_to_generate,
|
||||
blocks_generated,
|
||||
);
|
||||
}
|
||||
|
||||
Ok(blocks_generated)
|
||||
}
|
||||
|
||||
async fn generate_da_objects_multicore(
|
||||
storage: &mut RocksBackend,
|
||||
config: &DatasetGenConfig,
|
||||
total_blocks: usize,
|
||||
) -> Result<usize, Box<dyn std::error::Error>> {
|
||||
info!(
|
||||
"Generating DA objects using {} CPU cores...",
|
||||
num_cpus::get()
|
||||
);
|
||||
let mut da_objects_generated = 0;
|
||||
|
||||
for batch_start in (0..total_blocks).step_by(PARALLEL_BATCH_SIZE) {
|
||||
let batch_end = std::cmp::min(batch_start + PARALLEL_BATCH_SIZE, total_blocks);
|
||||
|
||||
let da_batch_count =
|
||||
generate_da_batch_for_blocks(storage, config, batch_start, batch_end).await?;
|
||||
|
||||
da_objects_generated += da_batch_count;
|
||||
}
|
||||
|
||||
Ok(da_objects_generated)
|
||||
}
|
||||
|
||||
fn generate_block_batch_parallel(
|
||||
batch_start: usize,
|
||||
batch_end: usize,
|
||||
config: &DatasetGenConfig,
|
||||
) -> Result<Vec<(usize, bytes::Bytes)>, Box<dyn std::error::Error>> {
|
||||
let batch_indices: Vec<usize> = (batch_start..batch_end).collect();
|
||||
|
||||
let generation_start = Instant::now();
|
||||
let block_data_batch: Vec<(usize, bytes::Bytes)> = batch_indices
|
||||
.par_iter()
|
||||
.map(|&block_index| {
|
||||
let block_data = create_block_data(block_index, config.blocks.size_bytes);
|
||||
(block_index, block_data)
|
||||
})
|
||||
.collect();
|
||||
|
||||
let generation_time = generation_start.elapsed();
|
||||
info!(
|
||||
"Generated {} blocks in {:.2}s ({:.0} blocks/s)",
|
||||
batch_end - batch_start,
|
||||
generation_time.as_secs_f64(),
|
||||
(batch_end - batch_start) as f64 / generation_time.as_secs_f64()
|
||||
);
|
||||
|
||||
Ok(block_data_batch)
|
||||
}
|
||||
|
||||
async fn store_block_batch(
|
||||
storage: &mut RocksBackend,
|
||||
block_batch: &[(usize, bytes::Bytes)],
|
||||
) -> Result<(), Box<dyn std::error::Error>> {
|
||||
let storage_start = Instant::now();
|
||||
|
||||
for (block_index, block_data) in block_batch {
|
||||
let header_id = create_header_id(*block_index);
|
||||
|
||||
storage.store_block(header_id, block_data.clone()).await?;
|
||||
|
||||
let slot = cryptarchia_engine::Slot::from(*block_index as u64);
|
||||
let ids = std::collections::BTreeMap::from([(slot, header_id)]);
|
||||
storage.store_immutable_block_ids(ids).await?;
|
||||
}
|
||||
|
||||
let storage_time = storage_start.elapsed();
|
||||
info!(
|
||||
"Stored {} blocks in {:.2}s ({:.0} blocks/s)",
|
||||
block_batch.len(),
|
||||
storage_time.as_secs_f64(),
|
||||
block_batch.len() as f64 / storage_time.as_secs_f64()
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn generate_da_batch_for_blocks(
|
||||
storage: &mut RocksBackend,
|
||||
config: &DatasetGenConfig,
|
||||
batch_start: usize,
|
||||
batch_end: usize,
|
||||
) -> Result<usize, Box<dyn std::error::Error>> {
|
||||
let da_specs = collect_da_specs_for_blocks(config, batch_start, batch_end);
|
||||
|
||||
if da_specs.is_empty() {
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
let da_data_batch = generate_da_data_parallel(&da_specs, config)?;
|
||||
|
||||
store_da_batch(storage, &da_data_batch).await?;
|
||||
|
||||
Ok(da_data_batch.len())
|
||||
}
|
||||
|
||||
fn collect_da_specs_for_blocks(
|
||||
config: &DatasetGenConfig,
|
||||
batch_start: usize,
|
||||
batch_end: usize,
|
||||
) -> Vec<(usize, usize, usize)> {
|
||||
let mut da_specs = Vec::new();
|
||||
|
||||
for block in batch_start..batch_end {
|
||||
for blob in 0..config.network.blobs_per_block {
|
||||
let blob_global_index = block * config.network.blobs_per_block + blob;
|
||||
let subnet = blob_global_index % config.network.total_subnets;
|
||||
|
||||
if subnet < config.validator.assigned_subnets {
|
||||
da_specs.push((block, blob, subnet));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
da_specs
|
||||
}
|
||||
|
||||
fn generate_da_data_parallel(
|
||||
da_specs: &[(usize, usize, usize)],
|
||||
config: &DatasetGenConfig,
|
||||
) -> Result<
|
||||
Vec<(nomos_core::da::BlobId, [u8; 2], bytes::Bytes, bytes::Bytes)>,
|
||||
Box<dyn std::error::Error>,
|
||||
> {
|
||||
let generation_start = Instant::now();
|
||||
|
||||
let da_data_batch: Vec<_> = da_specs
|
||||
.par_iter()
|
||||
.map(|&(block, blob, subnet)| {
|
||||
let blob_id = create_blob_id(block, blob);
|
||||
let share_idx = [subnet as u8, 0u8];
|
||||
let share_data = create_da_share(block, blob, config.da.share_size_bytes);
|
||||
|
||||
let commitment_data = {
|
||||
let mut rng =
|
||||
rand_chacha::ChaCha20Rng::seed_from_u64((block as u64 * 1000) + blob as u64);
|
||||
use rand::Rng as _;
|
||||
let data: Vec<u8> = (0..config.da.commitment_size_bytes)
|
||||
.map(|_| rng.gen())
|
||||
.collect();
|
||||
bytes::Bytes::from(data)
|
||||
};
|
||||
|
||||
(blob_id, share_idx, share_data, commitment_data)
|
||||
})
|
||||
.collect();
|
||||
|
||||
let generation_time = generation_start.elapsed();
|
||||
info!(
|
||||
"Generated {} DA objects in {:.2}s ({:.0} objects/s)",
|
||||
da_data_batch.len(),
|
||||
generation_time.as_secs_f64(),
|
||||
da_data_batch.len() as f64 / generation_time.as_secs_f64()
|
||||
);
|
||||
|
||||
Ok(da_data_batch)
|
||||
}
|
||||
|
||||
async fn store_da_batch(
|
||||
storage: &mut RocksBackend,
|
||||
da_batch: &[(nomos_core::da::BlobId, [u8; 2], bytes::Bytes, bytes::Bytes)],
|
||||
) -> Result<(), Box<dyn std::error::Error>> {
|
||||
let storage_start = Instant::now();
|
||||
|
||||
for (blob_id, share_idx, share_data, commitment_data) in da_batch {
|
||||
storage
|
||||
.store_light_share(*blob_id, *share_idx, share_data.clone())
|
||||
.await?;
|
||||
storage
|
||||
.store_shared_commitments(*blob_id, commitment_data.clone())
|
||||
.await?;
|
||||
}
|
||||
|
||||
let storage_time = storage_start.elapsed();
|
||||
info!(
|
||||
"Stored {} DA objects in {:.2}s ({:.0} objects/s)",
|
||||
da_batch.len(),
|
||||
storage_time.as_secs_f64(),
|
||||
da_batch.len() as f64 / storage_time.as_secs_f64()
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn log_block_progress(
|
||||
batch_start: usize,
|
||||
batch_end: usize,
|
||||
total_blocks: usize,
|
||||
blocks_generated: usize,
|
||||
) {
|
||||
if total_blocks > 1000 {
|
||||
info!(
|
||||
"Block progress: {}-{} completed - {:.1}% total",
|
||||
batch_start,
|
||||
batch_end - 1,
|
||||
(blocks_generated * 100) as f64 / total_blocks as f64
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
fn log_generation_completion(
|
||||
blocks_generated: usize,
|
||||
da_generated: usize,
|
||||
total_time: std::time::Duration,
|
||||
) {
|
||||
save_generation_report(blocks_generated, da_generated, total_time);
|
||||
|
||||
info!(
|
||||
"Multi-core generation completed: {} blocks, {} DA objects in {:.1}min",
|
||||
blocks_generated,
|
||||
da_generated,
|
||||
total_time.as_secs_f64() / 60.0
|
||||
);
|
||||
|
||||
let total_rate = (blocks_generated + da_generated) as f64 / total_time.as_secs_f64();
|
||||
info!(
|
||||
"Total rate: {:.0} objects/sec using {} CPU cores",
|
||||
total_rate,
|
||||
num_cpus::get()
|
||||
);
|
||||
}
|
||||
|
||||
fn save_generation_report(
|
||||
blocks_generated: usize,
|
||||
da_generated: usize,
|
||||
total_time: std::time::Duration,
|
||||
) {
|
||||
let results_dir = BenchConfig::results_path();
|
||||
let timestamp = chrono::Utc::now().format("%Y%m%d_%H%M%S");
|
||||
let filename = format!("dataset_generation_{}.json", timestamp);
|
||||
let filepath = results_dir.join(filename);
|
||||
|
||||
let report = DatasetGenerationReport {
|
||||
generation_summary: GenerationSummary {
|
||||
blocks_generated,
|
||||
da_objects_generated: da_generated,
|
||||
total_objects: blocks_generated + da_generated,
|
||||
duration_seconds: total_time.as_secs(),
|
||||
duration_minutes: total_time.as_secs_f64() / 60.0,
|
||||
},
|
||||
performance: GenerationPerformance {
|
||||
total_rate_objects_per_sec: (blocks_generated + da_generated) as f64
|
||||
/ total_time.as_secs_f64(),
|
||||
block_rate_per_sec: blocks_generated as f64 / total_time.as_secs_f64(),
|
||||
da_rate_per_sec: da_generated as f64 / total_time.as_secs_f64(),
|
||||
cpu_cores_used: num_cpus::get(),
|
||||
},
|
||||
timestamp: chrono::Utc::now().to_rfc3339(),
|
||||
tool_version: env!("CARGO_PKG_VERSION").to_string(),
|
||||
};
|
||||
|
||||
match std::fs::write(&filepath, serde_json::to_string_pretty(&report).unwrap()) {
|
||||
Ok(_) => info!("Generation report saved to: {}", filepath.display()),
|
||||
Err(e) => log::warn!("Failed to save report to {}: {}", filepath.display(), e),
|
||||
}
|
||||
}
|
||||
|
||||
fn print_usage() {
|
||||
eprintln!("Multi-core Dataset Generator");
|
||||
eprintln!("Uses all CPU cores for parallel data generation");
|
||||
eprintln!();
|
||||
eprintln!("USAGE:");
|
||||
eprintln!(" POL_PROOF_DEV_MODE=true cargo run --example multicore_dataset_generator -- --config <file>");
|
||||
}
|
||||
193
storage-benchmarks/src/bin/storage_bench_runner.rs
Normal file
193
storage-benchmarks/src/bin/storage_bench_runner.rs
Normal file
@ -0,0 +1,193 @@
|
||||
use clap::Parser as _;
|
||||
use log::info;
|
||||
use nomos_storage::backends::{rocksdb::RocksBackend, StorageBackend as _};
|
||||
use storage_benchmarks::{
|
||||
benchmark::{
|
||||
analyze_dataset, run_concurrent_validator_benchmark, BenchConfigSummary,
|
||||
BenchResultsSummary, StatisticsSummary, StorageBenchReport,
|
||||
},
|
||||
config::{ProductionBenchConfig, ValidatorProfiles},
|
||||
data::initialize_benchmark_seed,
|
||||
metrics::RuntimeValidatorAllocator,
|
||||
BenchConfig,
|
||||
};
|
||||
|
||||
#[global_allocator]
|
||||
static ALLOCATOR: RuntimeValidatorAllocator = RuntimeValidatorAllocator::new();
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
env_logger::init();
|
||||
|
||||
let config = ProductionBenchConfig::parse();
|
||||
config.validate()?;
|
||||
|
||||
let _seed_config = initialize_benchmark_seed(&[]);
|
||||
|
||||
run_benchmark(config).await
|
||||
}
|
||||
|
||||
async fn run_benchmark(config: ProductionBenchConfig) -> Result<(), Box<dyn std::error::Error>> {
|
||||
ALLOCATOR.set_limit_gb(config.memory as usize);
|
||||
|
||||
let profiles = ValidatorProfiles::from_file("dataset_configs/validator_profiles.toml")?;
|
||||
let profile = profiles
|
||||
.get_profile(&config.profile.to_string())
|
||||
.ok_or_else(|| format!("Profile '{}' not found", config.profile))?;
|
||||
|
||||
let bench_config = BenchConfig::production();
|
||||
if !bench_config.settings.db_path.exists() {
|
||||
return Err("No dataset found".into());
|
||||
}
|
||||
|
||||
let mut results = Vec::new();
|
||||
|
||||
for i in 1..=config.warmup_runs {
|
||||
info!("Warmup run {}/{}", i, config.warmup_runs);
|
||||
let _ = run_iteration(&bench_config, profile, &config).await;
|
||||
}
|
||||
|
||||
for i in 1..=config.measurement_runs {
|
||||
info!("Measurement run {}/{}", i, config.measurement_runs);
|
||||
let result = run_iteration(&bench_config, profile, &config).await;
|
||||
info!("Run {} result: {:.1} ops/sec", i, result);
|
||||
results.push(result);
|
||||
}
|
||||
|
||||
report_results(&results, &config);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn run_iteration(
|
||||
bench_config: &BenchConfig,
|
||||
profile: &storage_benchmarks::config::ValidatorProfile,
|
||||
config: &ProductionBenchConfig,
|
||||
) -> f64 {
|
||||
let mut storage_settings = bench_config.settings.clone();
|
||||
storage_settings.read_only = config.read_only;
|
||||
|
||||
match RocksBackend::new(storage_settings) {
|
||||
Ok(mut storage) => {
|
||||
if let Ok((block_count, share_count)) = analyze_dataset(&mut storage).await {
|
||||
if let Ok(result) = run_concurrent_validator_benchmark(
|
||||
storage,
|
||||
std::time::Duration::from_secs(config.duration),
|
||||
profile,
|
||||
(block_count, share_count),
|
||||
config.read_only,
|
||||
)
|
||||
.await
|
||||
{
|
||||
return result.combined_throughput();
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => log::error!("Storage error: {}", e),
|
||||
}
|
||||
|
||||
0.0
|
||||
}
|
||||
|
||||
fn report_results(results: &[f64], config: &ProductionBenchConfig) {
|
||||
save_results_to_file(results, config);
|
||||
print_results_summary(results, config);
|
||||
}
|
||||
|
||||
fn save_results_to_file(results: &[f64], config: &ProductionBenchConfig) {
|
||||
let results_dir = BenchConfig::results_path();
|
||||
let timestamp = chrono::Utc::now().format("%Y%m%d_%H%M%S");
|
||||
let filename = format!(
|
||||
"storage_bench_{}_{}_{}gb_{}.json",
|
||||
config.profile, timestamp, config.memory, config.duration
|
||||
);
|
||||
let filepath = results_dir.join(filename);
|
||||
|
||||
let mean = if results.is_empty() {
|
||||
0.0
|
||||
} else {
|
||||
results.iter().sum::<f64>() / results.len() as f64
|
||||
};
|
||||
let min = results.iter().fold(f64::INFINITY, |a, &b| a.min(b));
|
||||
let max = results.iter().fold(f64::NEG_INFINITY, |a, &b| a.max(b));
|
||||
let variability = if mean > 0.0 {
|
||||
(max - min) / mean * 100.0
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
|
||||
let detailed_results = StorageBenchReport {
|
||||
benchmark_config: BenchConfigSummary {
|
||||
profile: format!("{:?}", config.profile),
|
||||
memory_gb: config.memory,
|
||||
duration_seconds: config.duration,
|
||||
warmup_runs: config.warmup_runs,
|
||||
measurement_runs: config.measurement_runs,
|
||||
},
|
||||
results: BenchResultsSummary {
|
||||
raw_measurements: results.to_vec(),
|
||||
statistics: StatisticsSummary {
|
||||
mean_ops_sec: mean,
|
||||
min_ops_sec: min,
|
||||
max_ops_sec: max,
|
||||
variability_percent: variability,
|
||||
sample_count: results.len(),
|
||||
},
|
||||
},
|
||||
timestamp: chrono::Utc::now().to_rfc3339(),
|
||||
tool_version: env!("CARGO_PKG_VERSION").to_string(),
|
||||
};
|
||||
|
||||
let json_content = match serde_json::to_string_pretty(&detailed_results) {
|
||||
Ok(content) => content,
|
||||
Err(e) => {
|
||||
log::error!("Failed to serialize results: {}", e);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
match std::fs::write(&filepath, json_content) {
|
||||
Ok(_) => log::info!("Results saved to: {}", filepath.display()),
|
||||
Err(e) => log::warn!("Failed to save results to {}: {}", filepath.display(), e),
|
||||
}
|
||||
}
|
||||
|
||||
fn print_results_summary(results: &[f64], config: &ProductionBenchConfig) {
|
||||
if results.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
let mean = results.iter().sum::<f64>() / results.len() as f64;
|
||||
let min = results.iter().fold(f64::INFINITY, |a, &b| a.min(b));
|
||||
let max = results.iter().fold(f64::NEG_INFINITY, |a, &b| a.max(b));
|
||||
let variability = if mean > 0.0 {
|
||||
(max - min) / mean * 100.0
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
|
||||
info!(
|
||||
"Mean: {:.1} ops/sec, Range: {:.1}-{:.1}, Variability: {:.1}%",
|
||||
mean, min, max, variability
|
||||
);
|
||||
|
||||
let summary = StatisticsSummary {
|
||||
mean_ops_sec: mean,
|
||||
min_ops_sec: min,
|
||||
max_ops_sec: max,
|
||||
variability_percent: variability,
|
||||
sample_count: results.len(),
|
||||
};
|
||||
|
||||
log::info!(
|
||||
"MACHINE_READABLE: {}",
|
||||
serde_json::to_string(&summary).unwrap_or_default()
|
||||
);
|
||||
|
||||
println!("\n| Profile | Memory | Ops/sec | Variability |");
|
||||
println!("|---------|--------|---------|-------------|");
|
||||
println!(
|
||||
"| {} | {}GB | {:.1} | {:.1}% |",
|
||||
config.profile, config.memory, mean, variability
|
||||
);
|
||||
}
|
||||
259
storage-benchmarks/src/bin/verify_dataset_integrity.rs
Normal file
259
storage-benchmarks/src/bin/verify_dataset_integrity.rs
Normal file
@ -0,0 +1,259 @@
|
||||
use std::time::Instant;
|
||||
|
||||
use log::info;
|
||||
use nomos_storage::{
|
||||
api::{chain::StorageChainApi as _, da::StorageDaApi as _},
|
||||
backends::{rocksdb::RocksBackend, StorageBackend as _},
|
||||
};
|
||||
use storage_benchmarks::{
|
||||
benchmark::utilities::{create_blob_id, create_header_id},
|
||||
BenchConfig, CompletenessSummary, DataSizesSummary, DatasetVerificationReport,
|
||||
VerificationPerformance, VerificationSummary, WarningsSummary,
|
||||
};
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
env_logger::init();
|
||||
|
||||
let config = BenchConfig::production();
|
||||
|
||||
if !config.settings.db_path.exists() {
|
||||
println!(
|
||||
"No database found at: {}",
|
||||
config.settings.db_path.display()
|
||||
);
|
||||
return Err("Database not found".into());
|
||||
}
|
||||
|
||||
info!("Opening database: {}", config.settings.db_path.display());
|
||||
|
||||
let mut storage_settings = config.settings.clone();
|
||||
storage_settings.read_only = true;
|
||||
|
||||
let mut storage = RocksBackend::new(storage_settings)?;
|
||||
|
||||
info!("Starting database verification");
|
||||
info!("=== Database Verification ===");
|
||||
|
||||
info!("Checking blocks...");
|
||||
let start_time = Instant::now();
|
||||
let mut blocks_found = 0;
|
||||
let mut total_block_size = 0u64;
|
||||
|
||||
for chunk_start in (0..1_100_000).step_by(10_000) {
|
||||
let mut chunk_found = 0;
|
||||
let chunk_end = chunk_start + 10_000;
|
||||
|
||||
for i in chunk_start..chunk_end {
|
||||
let header_id = create_header_id(i);
|
||||
|
||||
match storage.get_block(header_id).await {
|
||||
Ok(Some(data)) => {
|
||||
blocks_found += 1;
|
||||
total_block_size += data.len() as u64;
|
||||
chunk_found += 1;
|
||||
}
|
||||
Ok(None) => {
|
||||
if chunk_found == 0 {
|
||||
info!("No more blocks found after block {}", i);
|
||||
break;
|
||||
}
|
||||
}
|
||||
Err(_) => {}
|
||||
}
|
||||
}
|
||||
|
||||
if chunk_found == 0 {
|
||||
break;
|
||||
}
|
||||
|
||||
info!(
|
||||
"Blocks {}-{}: found {} blocks",
|
||||
chunk_start,
|
||||
chunk_start + chunk_found - 1,
|
||||
chunk_found
|
||||
);
|
||||
}
|
||||
|
||||
let blocks_check_time = start_time.elapsed();
|
||||
|
||||
println!("Block Data:");
|
||||
println!(" Blocks found: {}", blocks_found);
|
||||
println!(" Expected blocks: 1,051,200");
|
||||
println!(
|
||||
" Total block size: {:.1} GB",
|
||||
total_block_size as f64 / 1024.0 / 1024.0 / 1024.0
|
||||
);
|
||||
println!(
|
||||
" Average block size: {} bytes",
|
||||
if blocks_found > 0 {
|
||||
total_block_size / blocks_found
|
||||
} else {
|
||||
0
|
||||
}
|
||||
);
|
||||
println!(" Check time: {:.1}s", blocks_check_time.as_secs_f64());
|
||||
println!();
|
||||
|
||||
info!("Checking DA shares...");
|
||||
let start_time = Instant::now();
|
||||
let mut shares_found = 0;
|
||||
let mut total_share_size = 0u64;
|
||||
let mut commitments_found = 0;
|
||||
let mut total_commitment_size = 0u64;
|
||||
|
||||
for blob_idx in 0..1000 {
|
||||
for subnet in 0..50 {
|
||||
let blob_id = create_blob_id(blob_idx, 0);
|
||||
let share_idx = [subnet as u8, 0u8];
|
||||
|
||||
if let Ok(Some(data)) = storage.get_light_share(blob_id, share_idx).await {
|
||||
shares_found += 1;
|
||||
total_share_size += data.len() as u64;
|
||||
}
|
||||
|
||||
if let Ok(Some(data)) = storage.get_shared_commitments(blob_id).await {
|
||||
commitments_found += 1;
|
||||
total_commitment_size += data.len() as u64;
|
||||
}
|
||||
}
|
||||
|
||||
if blob_idx % 100 == 0 {
|
||||
info!(
|
||||
"Checked blob {} - found {} shares, {} commitments so far",
|
||||
blob_idx, shares_found, commitments_found
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
let da_check_time = start_time.elapsed();
|
||||
|
||||
println!("DA Data:");
|
||||
println!(
|
||||
" DA shares found: {} (sampled from first 50K possibilities)",
|
||||
shares_found
|
||||
);
|
||||
println!(" Expected DA shares: ~256,650 total");
|
||||
println!(
|
||||
" Total share size: {:.1} MB",
|
||||
total_share_size as f64 / 1024.0 / 1024.0
|
||||
);
|
||||
println!(
|
||||
" Average share size: {} bytes",
|
||||
if shares_found > 0 {
|
||||
total_share_size / shares_found
|
||||
} else {
|
||||
0
|
||||
}
|
||||
);
|
||||
println!();
|
||||
println!(" Commitments found: {}", commitments_found);
|
||||
println!(
|
||||
" Total commitment size: {:.1} GB",
|
||||
total_commitment_size as f64 / 1024.0 / 1024.0 / 1024.0
|
||||
);
|
||||
println!(
|
||||
" Average commitment size: {} bytes",
|
||||
if commitments_found > 0 {
|
||||
total_commitment_size / commitments_found
|
||||
} else {
|
||||
0
|
||||
}
|
||||
);
|
||||
println!(" Check time: {:.1}s", da_check_time.as_secs_f64());
|
||||
println!();
|
||||
|
||||
let total_verified_size = total_block_size + total_share_size + total_commitment_size;
|
||||
|
||||
println!("Summary:");
|
||||
println!(" Database on disk: 4.8 GB");
|
||||
println!(
|
||||
" Verified data size: {:.1} GB",
|
||||
total_verified_size as f64 / 1024.0 / 1024.0 / 1024.0
|
||||
);
|
||||
println!(
|
||||
" Blocks completeness: {:.1}%",
|
||||
blocks_found as f64 / 1_051_200.0 * 100.0
|
||||
);
|
||||
println!(
|
||||
" Estimated DA completeness: {:.1}%",
|
||||
shares_found as f64 / (256_650.0 / 50.0) * 100.0
|
||||
);
|
||||
|
||||
if blocks_found < 1_000_000 {
|
||||
println!("WARNING: Block generation may have been incomplete");
|
||||
}
|
||||
|
||||
if total_verified_size < 50 * 1024 * 1024 * 1024 {
|
||||
println!("WARNING: Data size much smaller than expected - check generation logic");
|
||||
}
|
||||
|
||||
save_verification_report(
|
||||
blocks_found as usize,
|
||||
shares_found as usize,
|
||||
commitments_found as usize,
|
||||
total_block_size,
|
||||
total_share_size,
|
||||
total_commitment_size,
|
||||
blocks_check_time + da_check_time,
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn save_verification_report(
|
||||
blocks_found: usize,
|
||||
shares_found: usize,
|
||||
commitments_found: usize,
|
||||
total_block_size: u64,
|
||||
total_share_size: u64,
|
||||
total_commitment_size: u64,
|
||||
verification_time: std::time::Duration,
|
||||
) {
|
||||
let results_dir = BenchConfig::results_path();
|
||||
let timestamp = chrono::Utc::now().format("%Y%m%d_%H%M%S");
|
||||
let filename = format!("dataset_verification_{}.json", timestamp);
|
||||
let filepath = results_dir.join(filename);
|
||||
|
||||
let total_verified_size = total_block_size + total_share_size + total_commitment_size;
|
||||
|
||||
let report = DatasetVerificationReport {
|
||||
verification_summary: VerificationSummary {
|
||||
blocks_found,
|
||||
da_shares_found: shares_found,
|
||||
da_commitments_found: commitments_found,
|
||||
total_objects_found: blocks_found + shares_found + commitments_found,
|
||||
},
|
||||
data_sizes: DataSizesSummary {
|
||||
total_block_size_bytes: total_block_size,
|
||||
total_share_size_bytes: total_share_size,
|
||||
total_commitment_size_bytes: total_commitment_size,
|
||||
total_verified_size_bytes: total_verified_size,
|
||||
total_verified_size_gb: total_verified_size as f64 / (1024.0 * 1024.0 * 1024.0),
|
||||
},
|
||||
completeness_estimates: CompletenessSummary {
|
||||
block_completeness_percent: blocks_found as f64 / 1_051_200.0 * 100.0,
|
||||
da_completeness_percent: shares_found as f64 / (256_650.0 / 50.0) * 100.0,
|
||||
},
|
||||
performance: VerificationPerformance {
|
||||
verification_time_seconds: verification_time.as_secs_f64(),
|
||||
objects_verified_per_sec: (blocks_found + shares_found + commitments_found) as f64
|
||||
/ verification_time.as_secs_f64(),
|
||||
},
|
||||
warnings: WarningsSummary {
|
||||
block_generation_incomplete: blocks_found < 1_000_000,
|
||||
data_size_smaller_than_expected: total_verified_size < 50 * 1024 * 1024 * 1024,
|
||||
},
|
||||
timestamp: chrono::Utc::now().to_rfc3339(),
|
||||
tool_version: env!("CARGO_PKG_VERSION").to_string(),
|
||||
};
|
||||
|
||||
match std::fs::write(&filepath, serde_json::to_string_pretty(&report).unwrap()) {
|
||||
Ok(_) => info!("Verification report saved to: {}", filepath.display()),
|
||||
Err(e) => log::warn!(
|
||||
"Failed to save verification report to {}: {}",
|
||||
filepath.display(),
|
||||
e
|
||||
),
|
||||
}
|
||||
}
|
||||
161
storage-benchmarks/src/bin/verify_rocksdb_properties.rs
Normal file
161
storage-benchmarks/src/bin/verify_rocksdb_properties.rs
Normal file
@ -0,0 +1,161 @@
|
||||
use nomos_storage::backends::{rocksdb::RocksBackend, StorageBackend as _};
|
||||
use storage_benchmarks::BenchConfig;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
env_logger::init();
|
||||
|
||||
let config = BenchConfig::production();
|
||||
|
||||
if !config.settings.db_path.exists() {
|
||||
println!(
|
||||
"No database found at: {}",
|
||||
config.settings.db_path.display()
|
||||
);
|
||||
return Err("Database not found - run dataset_generator first".into());
|
||||
}
|
||||
|
||||
let storage = RocksBackend::new(config.settings)?;
|
||||
|
||||
println!("=== RocksDB Properties Verification ===");
|
||||
println!();
|
||||
|
||||
verify_property_availability(&storage);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn verify_property_availability(storage: &RocksBackend) {
|
||||
let level0_prop = rocksdb::properties::num_files_at_level(0);
|
||||
let level1_prop = rocksdb::properties::num_files_at_level(1);
|
||||
let level2_prop = rocksdb::properties::num_files_at_level(2);
|
||||
|
||||
let properties_to_test = vec![
|
||||
("STATS", rocksdb::properties::STATS),
|
||||
(
|
||||
"BLOCK_CACHE_CAPACITY",
|
||||
rocksdb::properties::BLOCK_CACHE_CAPACITY,
|
||||
),
|
||||
(
|
||||
"TOTAL_SST_FILES_SIZE",
|
||||
rocksdb::properties::TOTAL_SST_FILES_SIZE,
|
||||
),
|
||||
(
|
||||
"CUR_SIZE_ALL_MEM_TABLES",
|
||||
rocksdb::properties::CUR_SIZE_ALL_MEM_TABLES,
|
||||
),
|
||||
(
|
||||
"LIVE_SST_FILES_SIZE",
|
||||
rocksdb::properties::LIVE_SST_FILES_SIZE,
|
||||
),
|
||||
("ESTIMATE_NUM_KEYS", rocksdb::properties::ESTIMATE_NUM_KEYS),
|
||||
("NUM_FILES_AT_LEVEL0", &level0_prop),
|
||||
("NUM_FILES_AT_LEVEL1", &level1_prop),
|
||||
("NUM_FILES_AT_LEVEL2", &level2_prop),
|
||||
];
|
||||
|
||||
let custom_properties = vec![
|
||||
"rocksdb.index-and-filter-cache.usage",
|
||||
"rocksdb.index-and-filter-cache.capacity",
|
||||
"rocksdb.compaction-pending",
|
||||
"rocksdb.number.compactions",
|
||||
"rocksdb.compact.read.bytes",
|
||||
"rocksdb.compact.write.bytes",
|
||||
"rocksdb.compaction.cpu.time",
|
||||
"rocksdb.mem-table-flush-pending",
|
||||
"rocksdb.space.amplification",
|
||||
"rocksdb.total-sst-files-size",
|
||||
"rocksdb.number.keys.deleted",
|
||||
"rocksdb.size-bytes-at-level0",
|
||||
"rocksdb.size-bytes-at-level1",
|
||||
];
|
||||
|
||||
println!("Standard RocksDB Properties:");
|
||||
for (name, prop) in properties_to_test {
|
||||
test_standard_property(storage, name, &prop.to_string());
|
||||
}
|
||||
|
||||
println!("\nCustom/Extended Properties:");
|
||||
for prop_name in custom_properties {
|
||||
test_custom_property(storage, prop_name);
|
||||
}
|
||||
|
||||
println!("\nSTATS Property Sample:");
|
||||
test_stats_property(storage);
|
||||
}
|
||||
|
||||
fn test_standard_property(storage: &RocksBackend, name: &str, property: &str) {
|
||||
let property_owned = property.to_string();
|
||||
let transaction = storage.txn(move |db| match db.property_value(&property_owned) {
|
||||
Ok(Some(value)) => Ok(Some(value.into_bytes().into())),
|
||||
Ok(None) => Ok(None),
|
||||
Err(e) => Err(e),
|
||||
});
|
||||
|
||||
match transaction.execute() {
|
||||
Ok(Some(value_bytes)) => {
|
||||
let value_str = String::from_utf8_lossy(&value_bytes);
|
||||
let truncated = if value_str.len() > 100 {
|
||||
format!("{}...", &value_str[..100])
|
||||
} else {
|
||||
value_str.to_string()
|
||||
};
|
||||
println!("OK {}: {}", name, truncated);
|
||||
}
|
||||
Ok(None) => {
|
||||
println!("FAIL {}: None (property exists but no value)", name);
|
||||
}
|
||||
Err(e) => {
|
||||
println!("FAIL {}: Error - {}", name, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn test_custom_property(storage: &RocksBackend, property: &str) {
|
||||
let prop_owned = property.to_string();
|
||||
let transaction = storage.txn(move |db| match db.property_value(&prop_owned) {
|
||||
Ok(Some(value)) => Ok(Some(value.into_bytes().into())),
|
||||
Ok(None) => Ok(None),
|
||||
Err(e) => Err(e),
|
||||
});
|
||||
|
||||
match transaction.execute() {
|
||||
Ok(Some(value_bytes)) => {
|
||||
let value_str = String::from_utf8_lossy(&value_bytes);
|
||||
println!("OK {}: {}", property, value_str.trim());
|
||||
}
|
||||
Ok(None) => {
|
||||
println!("FAIL {}: None (property exists but no value)", property);
|
||||
}
|
||||
Err(e) => {
|
||||
println!("FAIL {}: Error - {}", property, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn test_stats_property(storage: &RocksBackend) {
|
||||
let transaction = storage.txn(|db| match db.property_value(rocksdb::properties::STATS) {
|
||||
Ok(Some(stats)) => Ok(Some(stats.into_bytes().into())),
|
||||
Ok(None) => Ok(None),
|
||||
Err(e) => Err(e),
|
||||
});
|
||||
|
||||
match transaction.execute() {
|
||||
Ok(Some(stats_bytes)) => {
|
||||
let stats_str = String::from_utf8_lossy(&stats_bytes);
|
||||
println!("Sample STATS lines:");
|
||||
for (i, line) in stats_str.lines().take(10).enumerate() {
|
||||
println!(" {}: {}", i + 1, line);
|
||||
}
|
||||
if stats_str.lines().count() > 10 {
|
||||
println!(" ... ({} total lines)", stats_str.lines().count());
|
||||
}
|
||||
}
|
||||
Ok(None) => {
|
||||
println!("FAIL STATS: None");
|
||||
}
|
||||
Err(e) => {
|
||||
println!("FAIL STATS: Error - {}", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
152
storage-benchmarks/src/config/cli.rs
Normal file
152
storage-benchmarks/src/config/cli.rs
Normal file
@ -0,0 +1,152 @@
|
||||
use clap::Parser;
|
||||
|
||||
use super::types::{CompressionType, ProfileType};
|
||||
use crate::RocksDbTuningOptions;
|
||||
|
||||
#[derive(Debug, Clone, Parser)]
|
||||
#[command(name = "optimization_bench")]
|
||||
#[command(about = "RocksDB optimization benchmarks")]
|
||||
#[command(long_about = "Systematic RocksDB parameter optimization with statistical rigor")]
|
||||
#[non_exhaustive]
|
||||
pub struct ProductionBenchConfig {
|
||||
#[arg(long)]
|
||||
pub profile: ProfileType,
|
||||
|
||||
#[arg(long, default_value = "8")]
|
||||
pub memory: u32,
|
||||
|
||||
#[arg(long, default_value = "120")]
|
||||
pub duration: u64,
|
||||
|
||||
#[arg(long)]
|
||||
pub cache_size: Option<u32>,
|
||||
|
||||
#[arg(long)]
|
||||
pub write_buffer: Option<u32>,
|
||||
|
||||
#[arg(long)]
|
||||
pub compaction_jobs: Option<u32>,
|
||||
|
||||
#[arg(long)]
|
||||
pub block_size: Option<u32>,
|
||||
|
||||
#[arg(long)]
|
||||
pub compression: Option<CompressionType>,
|
||||
|
||||
#[arg(long)]
|
||||
pub read_only: bool,
|
||||
|
||||
#[arg(long)]
|
||||
pub seed: Option<u64>,
|
||||
|
||||
#[arg(long, default_value = "1")]
|
||||
pub warmup_runs: usize,
|
||||
|
||||
#[arg(long, default_value = "3")]
|
||||
pub measurement_runs: usize,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Parser)]
|
||||
#[command(name = "dataset_generator")]
|
||||
#[command(about = "Multi-core dataset generation")]
|
||||
pub struct DatasetGeneratorConfig {
|
||||
#[arg(long)]
|
||||
pub config: std::path::PathBuf,
|
||||
|
||||
#[arg(long)]
|
||||
pub seed: Option<u64>,
|
||||
|
||||
#[arg(long)]
|
||||
pub size_limit: Option<f64>,
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum ConfigValidationError {
|
||||
#[error("Memory limit must be between 1-512GB, got {0}GB")]
|
||||
InvalidMemoryLimit(u32),
|
||||
|
||||
#[error("Duration must be between 1-86400 seconds, got {0}s")]
|
||||
InvalidDuration(u64),
|
||||
|
||||
#[error("Cache size must be between 1-80% of RAM, got {0}%")]
|
||||
InvalidCacheSize(u32),
|
||||
|
||||
#[error("Write buffer must be between 16-2048MB, got {0}MB")]
|
||||
InvalidWriteBuffer(u32),
|
||||
|
||||
#[error("Compaction jobs must be between 1-32, got {0}")]
|
||||
InvalidCompactionJobs(u32),
|
||||
|
||||
#[error("Block size must be between 1-128KB, got {0}KB")]
|
||||
InvalidBlockSize(u32),
|
||||
|
||||
#[error("Warmup runs must be less than measurement runs, got warmup={0}, measurement={1}")]
|
||||
InvalidRunCounts(usize, usize),
|
||||
|
||||
#[error("Unknown compression type: {0} (valid: none, lz4, zstd)")]
|
||||
InvalidCompression(String),
|
||||
|
||||
#[error("Profile '{0}' not found in validator_profiles.toml")]
|
||||
ProfileNotFound(String),
|
||||
}
|
||||
|
||||
impl ProductionBenchConfig {
|
||||
pub fn validate(&self) -> Result<(), ConfigValidationError> {
|
||||
if !(1..=512).contains(&self.memory) {
|
||||
return Err(ConfigValidationError::InvalidMemoryLimit(self.memory));
|
||||
}
|
||||
|
||||
if !(1..=86400).contains(&self.duration) {
|
||||
return Err(ConfigValidationError::InvalidDuration(self.duration));
|
||||
}
|
||||
|
||||
if let Some(cache) = self.cache_size {
|
||||
if !(1..=80).contains(&cache) {
|
||||
return Err(ConfigValidationError::InvalidCacheSize(cache));
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(buffer) = self.write_buffer {
|
||||
if !(16..=2048).contains(&buffer) {
|
||||
return Err(ConfigValidationError::InvalidWriteBuffer(buffer));
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(jobs) = self.compaction_jobs {
|
||||
if !(1..=32).contains(&jobs) {
|
||||
return Err(ConfigValidationError::InvalidCompactionJobs(jobs));
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(block_size) = self.block_size {
|
||||
if !(1..=128).contains(&block_size) {
|
||||
return Err(ConfigValidationError::InvalidBlockSize(block_size));
|
||||
}
|
||||
}
|
||||
|
||||
if self.warmup_runs >= self.measurement_runs {
|
||||
return Err(ConfigValidationError::InvalidRunCounts(
|
||||
self.warmup_runs,
|
||||
self.measurement_runs,
|
||||
));
|
||||
}
|
||||
|
||||
if let Some(comp) = self.compression {
|
||||
log::debug!("Compression type: {comp}");
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub const fn to_rocksdb_tuning(&self) -> RocksDbTuningOptions {
|
||||
RocksDbTuningOptions {
|
||||
cache_size_percent: self.cache_size,
|
||||
write_buffer_mb: self.write_buffer,
|
||||
compaction_jobs: self.compaction_jobs,
|
||||
block_size_kb: self.block_size,
|
||||
compression: self.compression,
|
||||
bloom_filter_bits: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
88
storage-benchmarks/src/config/dataset_generation.rs
Normal file
88
storage-benchmarks/src/config/dataset_generation.rs
Normal file
@ -0,0 +1,88 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct DatasetGenConfig {
|
||||
pub dataset: DatasetParams,
|
||||
pub network: NetworkParams,
|
||||
pub validator: ValidatorParams,
|
||||
pub blocks: BlockParams,
|
||||
pub da: DataAvailabilityParams,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct DatasetParams {
|
||||
pub days: usize,
|
||||
pub block_time_seconds: u64,
|
||||
pub name: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct NetworkParams {
|
||||
pub load_name: String,
|
||||
pub blobs_per_block: usize,
|
||||
pub total_subnets: usize,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ValidatorParams {
|
||||
pub assigned_subnets: usize,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct BlockParams {
|
||||
pub size_bytes: usize,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct DataAvailabilityParams {
|
||||
pub share_size_bytes: usize,
|
||||
pub commitment_size_bytes: usize,
|
||||
pub shares_per_blob: usize,
|
||||
}
|
||||
|
||||
impl DatasetGenConfig {
|
||||
pub fn from_file<P: AsRef<std::path::Path>>(
|
||||
path: P,
|
||||
) -> Result<Self, Box<dyn std::error::Error>> {
|
||||
let content = std::fs::read_to_string(path)?;
|
||||
let config: Self =
|
||||
toml::from_str(&content).map_err(|e| format!("Failed to parse TOML: {e}"))?;
|
||||
Ok(config)
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub const fn total_blocks(&self) -> usize {
|
||||
let blocks_per_day = (24 * 60 * 60) / self.dataset.block_time_seconds as usize;
|
||||
self.dataset.days * blocks_per_day
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn estimated_size(&self) -> String {
|
||||
let total_blocks = self.total_blocks() as u64;
|
||||
let block_size = self.blocks.size_bytes as u64;
|
||||
|
||||
let subnet_assignment_probability =
|
||||
self.validator.assigned_subnets as f64 / self.network.total_subnets as f64;
|
||||
|
||||
let total_blobs = total_blocks * self.network.blobs_per_block as u64;
|
||||
let validator_assigned_blobs = (total_blobs as f64 * subnet_assignment_probability) as u64;
|
||||
|
||||
let shares_per_assigned_blob =
|
||||
self.da.shares_per_blob as u64 / self.network.total_subnets as u64;
|
||||
let total_shares_stored = validator_assigned_blobs * shares_per_assigned_blob;
|
||||
|
||||
let block_data_size = total_blocks * block_size;
|
||||
let da_shares_size = total_shares_stored * self.da.share_size_bytes as u64;
|
||||
let da_commitments_size = validator_assigned_blobs * self.da.commitment_size_bytes as u64;
|
||||
let da_data_size = da_shares_size + da_commitments_size;
|
||||
let total_bytes = block_data_size + da_data_size;
|
||||
|
||||
if total_bytes < 1024 * 1024 {
|
||||
format!("{:.1} KB", total_bytes as f64 / 1024.0)
|
||||
} else if total_bytes < 1024 * 1024 * 1024 {
|
||||
format!("{:.1} MB", total_bytes as f64 / 1024.0 / 1024.0)
|
||||
} else {
|
||||
format!("{:.1} GB", total_bytes as f64 / 1024.0 / 1024.0 / 1024.0)
|
||||
}
|
||||
}
|
||||
}
|
||||
9
storage-benchmarks/src/config/mod.rs
Normal file
9
storage-benchmarks/src/config/mod.rs
Normal file
@ -0,0 +1,9 @@
|
||||
pub mod cli;
|
||||
pub mod dataset_generation;
|
||||
pub mod types;
|
||||
pub mod validator_profiles;
|
||||
|
||||
pub use cli::*;
|
||||
pub use dataset_generation::*;
|
||||
pub use types::*;
|
||||
pub use validator_profiles::*;
|
||||
122
storage-benchmarks/src/config/types.rs
Normal file
122
storage-benchmarks/src/config/types.rs
Normal file
@ -0,0 +1,122 @@
|
||||
use std::str::FromStr;
|
||||
|
||||
use clap::ValueEnum;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, ValueEnum)]
|
||||
#[non_exhaustive]
|
||||
pub enum CompressionType {
|
||||
None,
|
||||
|
||||
Lz4,
|
||||
|
||||
Snappy,
|
||||
|
||||
Zstd,
|
||||
}
|
||||
|
||||
impl Default for CompressionType {
|
||||
fn default() -> Self {
|
||||
Self::Snappy
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for CompressionType {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::None => write!(f, "none"),
|
||||
Self::Lz4 => write!(f, "lz4"),
|
||||
Self::Snappy => write!(f, "snappy"),
|
||||
Self::Zstd => write!(f, "zstd"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FromStr for CompressionType {
|
||||
type Err = String;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
match s {
|
||||
"none" => Ok(Self::None),
|
||||
"lz4" => Ok(Self::Lz4),
|
||||
"snappy" => Ok(Self::Snappy),
|
||||
"zstd" => Ok(Self::Zstd),
|
||||
_ => Err(format!("Unknown compression type: {s}")),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, ValueEnum)]
|
||||
#[non_exhaustive]
|
||||
pub enum ProfileType {
|
||||
Light,
|
||||
|
||||
Mainnet,
|
||||
|
||||
Testnet,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for ProfileType {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::Light => write!(f, "light"),
|
||||
Self::Mainnet => write!(f, "mainnet"),
|
||||
Self::Testnet => write!(f, "testnet"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
||||
#[non_exhaustive]
|
||||
pub enum WorkloadType {
|
||||
BlockValidation,
|
||||
DaSampling,
|
||||
DaCommitments,
|
||||
IbdServing,
|
||||
BlockStorage,
|
||||
DaStorage,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for WorkloadType {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::BlockValidation => write!(f, "block_validation"),
|
||||
Self::DaSampling => write!(f, "da_sampling"),
|
||||
Self::DaCommitments => write!(f, "da_commitments"),
|
||||
Self::IbdServing => write!(f, "ibd_serving"),
|
||||
Self::BlockStorage => write!(f, "block_storage"),
|
||||
Self::DaStorage => write!(f, "da_storage"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
||||
#[non_exhaustive]
|
||||
pub enum NetworkSize {
|
||||
Small,
|
||||
Medium,
|
||||
Large,
|
||||
Peak,
|
||||
}
|
||||
|
||||
impl NetworkSize {
|
||||
#[must_use]
|
||||
pub const fn validator_count(self) -> usize {
|
||||
match self {
|
||||
Self::Small => 100,
|
||||
Self::Medium => 1000,
|
||||
Self::Large => 2000,
|
||||
Self::Peak => 5000,
|
||||
}
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub const fn concurrent_services(self) -> usize {
|
||||
match self {
|
||||
Self::Small => 6,
|
||||
Self::Medium => 8,
|
||||
Self::Large => 10,
|
||||
Self::Peak => 15,
|
||||
}
|
||||
}
|
||||
}
|
||||
115
storage-benchmarks/src/config/validator_profiles.rs
Normal file
115
storage-benchmarks/src/config/validator_profiles.rs
Normal file
@ -0,0 +1,115 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ValidatorProfile {
|
||||
pub name: String,
|
||||
pub description: String,
|
||||
|
||||
pub block_read_rate_hz: f64,
|
||||
pub da_share_read_rate_hz: f64,
|
||||
pub range_scan_rate_hz: f64,
|
||||
|
||||
pub block_write_rate_hz: f64,
|
||||
pub da_share_write_rate_hz: f64,
|
||||
pub commitment_write_rate_hz: f64,
|
||||
|
||||
pub recent_access_ratio: f64,
|
||||
pub historical_access_ratio: f64,
|
||||
|
||||
#[serde(default = "default_total_validators")]
|
||||
pub total_validators: usize,
|
||||
#[serde(default = "default_assigned_subnets")]
|
||||
pub assigned_subnets: usize,
|
||||
}
|
||||
|
||||
impl ValidatorProfile {
|
||||
#[must_use]
|
||||
pub fn ibd_concurrent_streams(&self) -> usize {
|
||||
let base_streams = 1;
|
||||
let network_factor = (self.total_validators as f64 / 500.0).max(1.0);
|
||||
let total_streams = (f64::from(base_streams) * network_factor).round() as usize;
|
||||
|
||||
std::cmp::min(total_streams, 8)
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn da_concurrent_streams(&self) -> usize {
|
||||
let subnet_factor = (self.assigned_subnets as f64 / 5.0).max(1.0);
|
||||
let total_streams = subnet_factor.round() as usize;
|
||||
|
||||
std::cmp::min(total_streams, 5)
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn total_concurrent_services(&self) -> usize {
|
||||
let base_services = 3;
|
||||
let ibd_services = self.ibd_concurrent_streams();
|
||||
let da_services = self.da_concurrent_streams();
|
||||
|
||||
base_services + ibd_services + da_services
|
||||
}
|
||||
}
|
||||
|
||||
const fn default_total_validators() -> usize {
|
||||
1000
|
||||
}
|
||||
const fn default_assigned_subnets() -> usize {
|
||||
10
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct NetworkScalingConfig {
|
||||
pub total_validators: usize,
|
||||
|
||||
pub total_subnets: usize,
|
||||
|
||||
pub assigned_subnets: usize,
|
||||
|
||||
pub activity_multiplier: f64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ConcurrencyConfig {
|
||||
pub base_concurrent_services: usize,
|
||||
|
||||
pub services_per_1k_validators: f64,
|
||||
|
||||
pub max_concurrent_services: usize,
|
||||
|
||||
pub ibd_concurrency_factor: f64,
|
||||
|
||||
pub da_concurrency_factor: f64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ValidatorProfiles {
|
||||
pub light: ValidatorProfile,
|
||||
pub mainnet: ValidatorProfile,
|
||||
pub testnet: ValidatorProfile,
|
||||
}
|
||||
|
||||
impl ValidatorProfiles {
|
||||
pub fn from_file<P: AsRef<std::path::Path>>(
|
||||
path: P,
|
||||
) -> Result<Self, Box<dyn std::error::Error>> {
|
||||
let content = std::fs::read_to_string(path)?;
|
||||
let profiles: Self = toml::from_str(&content)
|
||||
.map_err(|e| format!("Failed to parse validator profiles TOML: {e}"))?;
|
||||
Ok(profiles)
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn get_profile(&self, name: &str) -> Option<&ValidatorProfile> {
|
||||
match name {
|
||||
"light" => Some(&self.light),
|
||||
"mainnet" => Some(&self.mainnet),
|
||||
"testnet" => Some(&self.testnet),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn available_profiles(&self) -> Vec<&str> {
|
||||
vec!["light", "mainnet", "testnet"]
|
||||
}
|
||||
}
|
||||
152
storage-benchmarks/src/data/deterministic.rs
Normal file
152
storage-benchmarks/src/data/deterministic.rs
Normal file
@ -0,0 +1,152 @@
|
||||
use rand::{Rng as _, SeedableRng as _};
|
||||
use rand_chacha::ChaCha20Rng;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct BenchmarkSeed {
|
||||
pub master_seed: u64,
|
||||
pub dataset_generation_seed: u64,
|
||||
pub access_pattern_seed: u64,
|
||||
pub latency_measurement_seed: u64,
|
||||
}
|
||||
|
||||
impl BenchmarkSeed {
|
||||
#[must_use]
|
||||
pub fn from_master(master_seed: u64) -> Self {
|
||||
let mut rng = ChaCha20Rng::seed_from_u64(master_seed);
|
||||
|
||||
Self {
|
||||
master_seed,
|
||||
dataset_generation_seed: rng.gen(),
|
||||
access_pattern_seed: rng.gen(),
|
||||
latency_measurement_seed: rng.gen(),
|
||||
}
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn default_fixed() -> Self {
|
||||
Self::from_master(12345)
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn from_args_or_env(args: &[String]) -> Self {
|
||||
for (i, arg) in args.iter().enumerate() {
|
||||
if arg == "--seed" && i + 1 < args.len() {
|
||||
if let Ok(seed) = args[i + 1].parse::<u64>() {
|
||||
return Self::from_master(seed);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if let Ok(seed_str) = std::env::var("BENCHMARK_SEED") {
|
||||
if let Ok(seed) = seed_str.parse::<u64>() {
|
||||
return Self::from_master(seed);
|
||||
}
|
||||
}
|
||||
|
||||
Self::default_fixed()
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn dataset_rng(&self) -> ChaCha20Rng {
|
||||
ChaCha20Rng::seed_from_u64(self.dataset_generation_seed)
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn access_pattern_rng(&self, operation_id: u64) -> ChaCha20Rng {
|
||||
ChaCha20Rng::seed_from_u64(self.access_pattern_seed.wrapping_add(operation_id))
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn latency_measurement_rng(&self) -> ChaCha20Rng {
|
||||
ChaCha20Rng::seed_from_u64(self.latency_measurement_seed)
|
||||
}
|
||||
|
||||
pub fn log_configuration(&self) {
|
||||
log::info!("Benchmark seeds (for reproducibility):");
|
||||
log::info!(" Master seed: {}", self.master_seed);
|
||||
log::info!(" Dataset generation: {}", self.dataset_generation_seed);
|
||||
log::info!(" Access patterns: {}", self.access_pattern_seed);
|
||||
log::info!(" Latency measurement: {}", self.latency_measurement_seed);
|
||||
log::info!(
|
||||
" Reproduce with: --seed {} or BENCHMARK_SEED={}",
|
||||
self.master_seed,
|
||||
self.master_seed
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
static GLOBAL_BENCHMARK_SEED: std::sync::OnceLock<BenchmarkSeed> = std::sync::OnceLock::new();
|
||||
|
||||
pub fn initialize_benchmark_seed(args: &[String]) -> &'static BenchmarkSeed {
|
||||
GLOBAL_BENCHMARK_SEED.get_or_init(|| {
|
||||
let seed = BenchmarkSeed::from_args_or_env(args);
|
||||
seed.log_configuration();
|
||||
seed
|
||||
})
|
||||
}
|
||||
|
||||
pub fn get_benchmark_seed() -> &'static BenchmarkSeed {
|
||||
GLOBAL_BENCHMARK_SEED.get().unwrap_or_else(|| {
|
||||
GLOBAL_BENCHMARK_SEED.get_or_init(|| {
|
||||
let seed = BenchmarkSeed::default_fixed();
|
||||
log::warn!("Using default seed (benchmark_seed not initialized)");
|
||||
seed.log_configuration();
|
||||
seed
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn create_deterministic_rng(purpose: RngPurpose, id: u64) -> ChaCha20Rng {
|
||||
let seed = get_benchmark_seed();
|
||||
|
||||
match purpose {
|
||||
RngPurpose::DatasetGeneration => {
|
||||
ChaCha20Rng::seed_from_u64(seed.dataset_generation_seed.wrapping_add(id))
|
||||
}
|
||||
RngPurpose::AccessPattern => seed.access_pattern_rng(id),
|
||||
RngPurpose::LatencyMeasurement => {
|
||||
ChaCha20Rng::seed_from_u64(seed.latency_measurement_seed.wrapping_add(id))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub enum RngPurpose {
|
||||
DatasetGeneration,
|
||||
AccessPattern,
|
||||
LatencyMeasurement,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_deterministic_seed_derivation() {
|
||||
let seed1 = BenchmarkSeed::from_master(12345);
|
||||
let seed2 = BenchmarkSeed::from_master(12345);
|
||||
|
||||
assert_eq!(seed1.dataset_generation_seed, seed2.dataset_generation_seed);
|
||||
assert_eq!(seed1.access_pattern_seed, seed2.access_pattern_seed);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_different_master_seeds() {
|
||||
let seed1 = BenchmarkSeed::from_master(12345);
|
||||
let seed2 = BenchmarkSeed::from_master(54321);
|
||||
|
||||
assert_ne!(seed1.dataset_generation_seed, seed2.dataset_generation_seed);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_deterministic_rng_creation() {
|
||||
let seed = BenchmarkSeed::from_master(12345);
|
||||
|
||||
let rng1 = seed.access_pattern_rng(100);
|
||||
let rng2 = seed.access_pattern_rng(100);
|
||||
|
||||
assert_eq!(rng1.get_seed(), rng2.get_seed());
|
||||
}
|
||||
}
|
||||
200
storage-benchmarks/src/data/generator.rs
Normal file
200
storage-benchmarks/src/data/generator.rs
Normal file
@ -0,0 +1,200 @@
|
||||
use bytes::Bytes;
|
||||
use nomos_core::{da::BlobId, header::HeaderId};
|
||||
use rand::Rng as _;
|
||||
use rand_chacha::ChaCha20Rng;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::{
|
||||
benchmark::utilities::{create_blob_id, create_header_id},
|
||||
deterministic::BenchmarkSeed,
|
||||
};
|
||||
|
||||
pub struct RealisticDataGenerator {
|
||||
seed_config: BenchmarkSeed,
|
||||
dataset_rng: ChaCha20Rng,
|
||||
block_sequence: u64,
|
||||
da_sequence: u64,
|
||||
generation_stats: DataGenerationStats,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
|
||||
pub struct DataGenerationStats {
|
||||
pub blocks_created: u64,
|
||||
pub da_shares_created: u64,
|
||||
pub commitments_created: u64,
|
||||
pub total_bytes_generated: u64,
|
||||
pub generation_start: Option<chrono::DateTime<chrono::Utc>>,
|
||||
}
|
||||
|
||||
impl RealisticDataGenerator {
|
||||
#[must_use]
|
||||
pub fn new(master_seed: u64) -> Self {
|
||||
let seed_config = BenchmarkSeed::from_master(master_seed);
|
||||
let dataset_rng = seed_config.dataset_rng();
|
||||
|
||||
Self {
|
||||
seed_config,
|
||||
dataset_rng,
|
||||
block_sequence: 0,
|
||||
da_sequence: 0,
|
||||
generation_stats: DataGenerationStats {
|
||||
generation_start: Some(chrono::Utc::now()),
|
||||
..Default::default()
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn with_default_seed() -> Self {
|
||||
Self::new(12345)
|
||||
}
|
||||
|
||||
pub fn generate_block(&mut self, target_size: usize) -> Bytes {
|
||||
let block_data = self.create_realistic_block_data(self.block_sequence, target_size);
|
||||
|
||||
self.block_sequence += 1;
|
||||
self.generation_stats.blocks_created += 1;
|
||||
self.generation_stats.total_bytes_generated += target_size as u64;
|
||||
|
||||
block_data
|
||||
}
|
||||
|
||||
pub fn generate_da_share(&mut self, size: usize) -> Bytes {
|
||||
let share_data = self.create_deterministic_da_share(self.da_sequence, size);
|
||||
|
||||
self.da_sequence += 1;
|
||||
self.generation_stats.da_shares_created += 1;
|
||||
self.generation_stats.total_bytes_generated += size as u64;
|
||||
|
||||
share_data
|
||||
}
|
||||
|
||||
pub fn generate_commitment(&mut self, size: usize) -> Bytes {
|
||||
let commitment_data = self.create_deterministic_commitment(self.da_sequence, size);
|
||||
|
||||
self.generation_stats.commitments_created += 1;
|
||||
self.generation_stats.total_bytes_generated += size as u64;
|
||||
|
||||
commitment_data
|
||||
}
|
||||
|
||||
pub fn generate_block_batch(&mut self, count: usize, block_size: usize) -> Vec<Bytes> {
|
||||
std::iter::repeat_with(|| self.generate_block(block_size))
|
||||
.take(count)
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn generate_da_batch(
|
||||
&mut self,
|
||||
count: usize,
|
||||
share_size: usize,
|
||||
commitment_size: usize,
|
||||
) -> Vec<(Bytes, Bytes)> {
|
||||
std::iter::repeat_with(|| {
|
||||
let share = self.generate_da_share(share_size);
|
||||
let commitment = self.generate_commitment(commitment_size);
|
||||
(share, commitment)
|
||||
})
|
||||
.take(count)
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub const fn stats(&self) -> &DataGenerationStats {
|
||||
&self.generation_stats
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub const fn sequence_state(&self) -> (u64, u64) {
|
||||
(self.block_sequence, self.da_sequence)
|
||||
}
|
||||
|
||||
pub const fn set_sequence_state(&mut self, block_sequence: u64, da_sequence: u64) {
|
||||
self.block_sequence = block_sequence;
|
||||
self.da_sequence = da_sequence;
|
||||
}
|
||||
|
||||
pub fn reset(&mut self) {
|
||||
self.block_sequence = 0;
|
||||
self.da_sequence = 0;
|
||||
self.generation_stats = DataGenerationStats {
|
||||
generation_start: Some(chrono::Utc::now()),
|
||||
..Default::default()
|
||||
};
|
||||
self.dataset_rng = self.seed_config.dataset_rng();
|
||||
}
|
||||
|
||||
fn create_realistic_block_data(&mut self, block_index: u64, target_size: usize) -> Bytes {
|
||||
let mut block_data = Vec::with_capacity(target_size);
|
||||
|
||||
block_data.extend_from_slice(&block_index.to_be_bytes());
|
||||
|
||||
let parent_hash: [u8; 32] = self.dataset_rng.gen();
|
||||
block_data.extend_from_slice(&parent_hash);
|
||||
|
||||
let merkle_root: [u8; 32] = self.dataset_rng.gen();
|
||||
block_data.extend_from_slice(&merkle_root);
|
||||
|
||||
let timestamp = chrono::Utc::now().timestamp() as u64 + block_index * 30;
|
||||
block_data.extend_from_slice(×tamp.to_be_bytes());
|
||||
|
||||
while block_data.len() < target_size {
|
||||
block_data.push(self.dataset_rng.gen());
|
||||
}
|
||||
|
||||
block_data.resize(target_size, 0);
|
||||
Bytes::from(block_data)
|
||||
}
|
||||
|
||||
fn create_deterministic_da_share(&mut self, _sequence: u64, size: usize) -> Bytes {
|
||||
let data: Vec<u8> = std::iter::repeat_with(|| self.dataset_rng.gen())
|
||||
.take(size)
|
||||
.collect();
|
||||
Bytes::from(data)
|
||||
}
|
||||
|
||||
fn create_deterministic_commitment(&mut self, _sequence: u64, size: usize) -> Bytes {
|
||||
let data: Vec<u8> = std::iter::repeat_with(|| self.dataset_rng.gen())
|
||||
.take(size)
|
||||
.collect();
|
||||
Bytes::from(data)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct IdGenerator {
|
||||
block_counter: usize,
|
||||
blob_counter: usize,
|
||||
}
|
||||
|
||||
impl IdGenerator {
|
||||
#[must_use]
|
||||
pub const fn new() -> Self {
|
||||
Self {
|
||||
block_counter: 0,
|
||||
blob_counter: 0,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn next_header_id(&mut self) -> HeaderId {
|
||||
let id = create_header_id(self.block_counter);
|
||||
self.block_counter += 1;
|
||||
id
|
||||
}
|
||||
|
||||
pub fn next_blob_id(&mut self) -> BlobId {
|
||||
let id = create_blob_id(self.blob_counter, 0);
|
||||
self.blob_counter += 1;
|
||||
id
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub const fn counters(&self) -> (usize, usize) {
|
||||
(self.block_counter, self.blob_counter)
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for IdGenerator {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
172
storage-benchmarks/src/data/hot_path_types.rs
Normal file
172
storage-benchmarks/src/data/hot_path_types.rs
Normal file
@ -0,0 +1,172 @@
|
||||
use smallvec::SmallVec;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct OperationBuffer {
|
||||
pub read_buffer: Box<[u8]>,
|
||||
pub write_buffer: Box<[u8]>,
|
||||
pub key_buffer: SmallVec<[u8; 64]>,
|
||||
}
|
||||
|
||||
impl OperationBuffer {
|
||||
#[must_use]
|
||||
pub fn new(read_size: usize, write_size: usize) -> Self {
|
||||
Self {
|
||||
read_buffer: vec![0u8; read_size].into_boxed_slice(),
|
||||
write_buffer: vec![0u8; write_size].into_boxed_slice(),
|
||||
key_buffer: SmallVec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn read_slice(&self) -> &[u8] {
|
||||
&self.read_buffer
|
||||
}
|
||||
|
||||
pub fn write_slice_mut(&mut self) -> &mut [u8] {
|
||||
&mut self.write_buffer
|
||||
}
|
||||
|
||||
pub fn prepare_key<T: AsRef<[u8]>>(&mut self, key_data: T) -> &[u8] {
|
||||
let key_bytes = key_data.as_ref();
|
||||
|
||||
self.key_buffer.clear();
|
||||
self.key_buffer.extend_from_slice(key_bytes);
|
||||
|
||||
&self.key_buffer
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct TimingMeasurement {
|
||||
pub start_time: std::time::Instant,
|
||||
}
|
||||
|
||||
impl TimingMeasurement {
|
||||
#[inline]
|
||||
#[must_use]
|
||||
pub fn start() -> Self {
|
||||
Self {
|
||||
start_time: std::time::Instant::now(),
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[must_use]
|
||||
pub fn end(self) -> std::time::Duration {
|
||||
self.start_time.elapsed()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct ThreadLocalMetrics {
|
||||
pub operations_count: u64,
|
||||
pub success_count: u64,
|
||||
pub error_count: u64,
|
||||
pub bytes_processed: u64,
|
||||
pub latency_sum_micros: u64,
|
||||
pub latency_count: u64,
|
||||
}
|
||||
|
||||
impl ThreadLocalMetrics {
|
||||
#[inline]
|
||||
pub const fn record_operation(
|
||||
&mut self,
|
||||
success: bool,
|
||||
bytes: u64,
|
||||
latency: std::time::Duration,
|
||||
) {
|
||||
self.operations_count += 1;
|
||||
|
||||
if success {
|
||||
self.success_count += 1;
|
||||
} else {
|
||||
self.error_count += 1;
|
||||
}
|
||||
|
||||
self.bytes_processed += bytes;
|
||||
self.latency_sum_micros += latency.as_micros() as u64;
|
||||
self.latency_count += 1;
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn average_latency_micros(&self) -> f64 {
|
||||
if self.latency_count > 0 {
|
||||
self.latency_sum_micros as f64 / self.latency_count as f64
|
||||
} else {
|
||||
0.0
|
||||
}
|
||||
}
|
||||
|
||||
pub const fn fold_into(self, global: &mut Self) {
|
||||
global.operations_count += self.operations_count;
|
||||
global.success_count += self.success_count;
|
||||
global.error_count += self.error_count;
|
||||
global.bytes_processed += self.bytes_processed;
|
||||
global.latency_sum_micros += self.latency_sum_micros;
|
||||
global.latency_count += self.latency_count;
|
||||
}
|
||||
}
|
||||
|
||||
pub trait EfficientIteratorExt: Iterator {
|
||||
fn collect_presized(self, size_hint: usize) -> Vec<Self::Item>
|
||||
where
|
||||
Self: Sized,
|
||||
{
|
||||
let mut vec = Vec::with_capacity(size_hint);
|
||||
vec.extend(self);
|
||||
vec
|
||||
}
|
||||
|
||||
fn collect_small_8(self) -> SmallVec<[Self::Item; 8]>
|
||||
where
|
||||
Self: Sized,
|
||||
{
|
||||
let mut vec: SmallVec<[Self::Item; 8]> = SmallVec::new();
|
||||
vec.extend(self);
|
||||
vec
|
||||
}
|
||||
}
|
||||
|
||||
impl<I: Iterator> EfficientIteratorExt for I {}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::benchmark::utilities::create_blob_id;
|
||||
|
||||
#[test]
|
||||
fn test_operation_buffer_efficiency() {
|
||||
let mut buffer = OperationBuffer::new(1024, 2048);
|
||||
|
||||
let key1 = buffer.prepare_key(b"test_key_1");
|
||||
assert_eq!(key1, b"test_key_1");
|
||||
|
||||
let key2 = buffer.prepare_key(b"different_key");
|
||||
assert_eq!(key2, b"different_key");
|
||||
|
||||
assert!(buffer.key_buffer.capacity() >= 12);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_efficient_id_creation() {
|
||||
let header_id = create_header_id_efficient(12345);
|
||||
let blob_id = create_blob_id(100, 5);
|
||||
|
||||
assert_ne!(header_id.as_ref(), &[0u8; 32]);
|
||||
assert_ne!(blob_id.as_ref(), &[0u8; 32]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_thread_local_metrics() {
|
||||
let mut metrics = ThreadLocalMetrics::default();
|
||||
|
||||
metrics.record_operation(true, 1024, std::time::Duration::from_micros(500));
|
||||
metrics.record_operation(false, 0, std::time::Duration::from_micros(1000));
|
||||
|
||||
assert_eq!(metrics.operations_count, 2);
|
||||
assert_eq!(metrics.success_count, 1);
|
||||
assert_eq!(metrics.error_count, 1);
|
||||
assert_eq!(metrics.bytes_processed, 1024);
|
||||
assert_eq!(metrics.average_latency_micros(), 750.0);
|
||||
}
|
||||
}
|
||||
11
storage-benchmarks/src/data/mod.rs
Normal file
11
storage-benchmarks/src/data/mod.rs
Normal file
@ -0,0 +1,11 @@
|
||||
pub mod deterministic;
|
||||
pub mod generator;
|
||||
pub mod hot_path_types;
|
||||
pub mod realistic_generation;
|
||||
pub mod spec_access_patterns;
|
||||
pub mod streaming_writer;
|
||||
|
||||
pub use deterministic::*;
|
||||
pub use generator::*;
|
||||
pub use realistic_generation::*;
|
||||
pub use spec_access_patterns::*;
|
||||
211
storage-benchmarks/src/data/realistic_generation.rs
Normal file
211
storage-benchmarks/src/data/realistic_generation.rs
Normal file
@ -0,0 +1,211 @@
|
||||
use bytes::Bytes;
|
||||
use cryptarchia_engine::Slot;
|
||||
use groth16::Fr;
|
||||
use nomos_core::{
|
||||
block::Block,
|
||||
crypto::{Digest as _, Hasher},
|
||||
header::{ContentId, Header, HeaderId},
|
||||
mantle::{
|
||||
ledger::Tx as LedgerTx, ops::leader_claim::VoucherCm, MantleTx, Note, SignedMantleTx,
|
||||
Transaction as _, Utxo,
|
||||
},
|
||||
proofs::{
|
||||
leader_proof::{Groth16LeaderProof, LeaderPrivate, LeaderPublic},
|
||||
zksig::{DummyZkSignature, ZkSignaturePublic},
|
||||
},
|
||||
};
|
||||
use rand::{Rng as _, SeedableRng as _};
|
||||
use rand_chacha::ChaCha20Rng;
|
||||
|
||||
use crate::deterministic::{create_deterministic_rng, RngPurpose};
|
||||
|
||||
pub fn create_block(
|
||||
block_index: usize,
|
||||
parent_id: HeaderId,
|
||||
) -> Result<(HeaderId, Bytes), Box<dyn std::error::Error>> {
|
||||
let transactions = create_signed_mantle_txs(block_index);
|
||||
|
||||
let slot = Slot::from(block_index as u64);
|
||||
let block_root = ContentId::from(calculate_block_root(&transactions));
|
||||
let proof = make_test_proof(block_index);
|
||||
|
||||
let header = Header::new(parent_id, block_root, slot, proof);
|
||||
let header_id = header.id();
|
||||
|
||||
let block: Block<SignedMantleTx> = Block::new(header, transactions);
|
||||
let block_bytes = bincode::serialize(&block)?;
|
||||
|
||||
Ok((header_id, Bytes::from(block_bytes)))
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn create_block_data(block_index: usize, target_size: usize) -> Bytes {
|
||||
create_simplified_realistic_block_data(block_index, target_size)
|
||||
}
|
||||
|
||||
fn make_test_proof(block_index: usize) -> Groth16LeaderProof {
|
||||
let public_inputs = LeaderPublic::new(
|
||||
Fr::from(block_index as u64),
|
||||
Fr::from(block_index as u64 + 1),
|
||||
Fr::from(12345u64),
|
||||
block_index as u64,
|
||||
1_000_000,
|
||||
);
|
||||
|
||||
let note = Note::new(1000, Fr::from(block_index as u64).into());
|
||||
let utxo = Utxo {
|
||||
tx_hash: Fr::from(block_index as u64).into(),
|
||||
output_index: 0,
|
||||
note,
|
||||
};
|
||||
|
||||
let leader_key_bytes = [block_index as u8; 32];
|
||||
let leader_key = ed25519_dalek::VerifyingKey::from_bytes(&leader_key_bytes)
|
||||
.unwrap_or_else(|_| ed25519_dalek::VerifyingKey::from_bytes(&[1u8; 32]).unwrap());
|
||||
|
||||
let aged_path = vec![];
|
||||
let latest_path = vec![];
|
||||
|
||||
let private = LeaderPrivate::new(
|
||||
public_inputs,
|
||||
utxo,
|
||||
&aged_path,
|
||||
&latest_path,
|
||||
Fr::from(999u64),
|
||||
0,
|
||||
&leader_key,
|
||||
);
|
||||
|
||||
let voucher_cm = VoucherCm::default();
|
||||
|
||||
Groth16LeaderProof::prove(private, voucher_cm).unwrap_or_else(|_| {
|
||||
panic!("Proof generation failed - ensure POL_PROOF_DEV_MODE=true is set");
|
||||
})
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn create_da_share(block: usize, blob: usize, size: usize) -> Bytes {
|
||||
let data_id = (block as u64 * 1000) + blob as u64;
|
||||
let mut rng = create_deterministic_rng(RngPurpose::DatasetGeneration, data_id);
|
||||
|
||||
let data: Vec<u8> = std::iter::repeat_with(|| rng.gen()).take(size).collect();
|
||||
|
||||
Bytes::from(data)
|
||||
}
|
||||
|
||||
pub async fn create_commitment(
|
||||
block: usize,
|
||||
blob: usize,
|
||||
size: usize,
|
||||
) -> Result<Bytes, Box<dyn std::error::Error>> {
|
||||
let commitment_id = (block as u64 * 1000) + blob as u64;
|
||||
let mut rng =
|
||||
create_deterministic_rng(RngPurpose::DatasetGeneration, commitment_id + 1_000_000);
|
||||
let commitment_data: Vec<u8> = std::iter::repeat_with(|| rng.gen()).take(size).collect();
|
||||
|
||||
Ok(Bytes::from(commitment_data))
|
||||
}
|
||||
|
||||
fn create_simplified_realistic_block_data(block_index: usize, target_size: usize) -> Bytes {
|
||||
let mut rng = create_deterministic_rng(RngPurpose::DatasetGeneration, block_index as u64);
|
||||
|
||||
let mut block_data = Vec::with_capacity(target_size);
|
||||
|
||||
block_data.extend_from_slice(&(block_index as u64).to_be_bytes());
|
||||
|
||||
let parent_hash: [u8; 32] = rng.gen();
|
||||
block_data.extend_from_slice(&parent_hash);
|
||||
|
||||
let merkle_root: [u8; 32] = rng.gen();
|
||||
block_data.extend_from_slice(&merkle_root);
|
||||
|
||||
let timestamp = chrono::Utc::now().timestamp() as u64 + block_index as u64 * 30;
|
||||
block_data.extend_from_slice(×tamp.to_be_bytes());
|
||||
|
||||
while block_data.len() < target_size {
|
||||
block_data.push(rng.gen());
|
||||
}
|
||||
|
||||
block_data.resize(target_size, 0);
|
||||
Bytes::from(block_data)
|
||||
}
|
||||
|
||||
fn create_signed_mantle_txs(block_index: usize) -> Vec<SignedMantleTx> {
|
||||
let mut rng = ChaCha20Rng::seed_from_u64(block_index as u64 * 12345);
|
||||
|
||||
let tx_count = std::cmp::min(5 + (block_index % 100), 1024);
|
||||
|
||||
let mut transactions = Vec::with_capacity(tx_count);
|
||||
|
||||
for tx_idx in 0..tx_count {
|
||||
let input_utxos = create_input_utxos(&mut rng, tx_idx);
|
||||
let input_ids: Vec<_> = input_utxos.iter().map(Utxo::id).collect();
|
||||
|
||||
let output_notes = create_output_notes(&mut rng, tx_idx);
|
||||
|
||||
let ledger_tx = LedgerTx::new(input_ids, output_notes);
|
||||
|
||||
let mantle_tx = MantleTx {
|
||||
ops: vec![],
|
||||
ledger_tx,
|
||||
execution_gas_price: rng.gen::<u64>() % 1_000_000,
|
||||
storage_gas_price: rng.gen::<u64>() % 100_000,
|
||||
};
|
||||
|
||||
let pks: Vec<Fr> = input_utxos.iter().map(|utxo| utxo.note.pk.into()).collect();
|
||||
let msg_hash = mantle_tx.hash().into();
|
||||
let ledger_tx_proof = DummyZkSignature::prove(ZkSignaturePublic { pks, msg_hash });
|
||||
|
||||
let ops_proofs = vec![];
|
||||
|
||||
let signed_tx = SignedMantleTx {
|
||||
ops_proofs,
|
||||
ledger_tx_proof,
|
||||
mantle_tx,
|
||||
};
|
||||
|
||||
transactions.push(signed_tx);
|
||||
}
|
||||
|
||||
transactions
|
||||
}
|
||||
|
||||
fn create_input_utxos(rng: &mut ChaCha20Rng, tx_idx: usize) -> Vec<Utxo> {
|
||||
let input_count = 1 + (tx_idx % 3);
|
||||
|
||||
(0..input_count)
|
||||
.map(|input_idx| Utxo {
|
||||
tx_hash: Fr::from(rng.gen::<u64>()).into(),
|
||||
output_index: input_idx,
|
||||
note: Note::new(
|
||||
rng.gen::<u64>() % 1_000_000,
|
||||
Fr::from(rng.gen::<u64>()).into(),
|
||||
),
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn create_output_notes(rng: &mut ChaCha20Rng, tx_idx: usize) -> Vec<Note> {
|
||||
let output_count = 1 + (tx_idx % 4);
|
||||
|
||||
std::iter::repeat_with(|| {
|
||||
Note::new(
|
||||
rng.gen::<u64>() % 1_000_000,
|
||||
Fr::from(rng.gen::<u64>()).into(),
|
||||
)
|
||||
})
|
||||
.take(output_count)
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn calculate_block_root(transactions: &[SignedMantleTx]) -> [u8; 32] {
|
||||
let mut hasher = Hasher::new();
|
||||
hasher.update(b"BLOCK_ROOT_V1");
|
||||
|
||||
for tx in transactions {
|
||||
let tx_hash = tx.mantle_tx.hash();
|
||||
hasher.update(tx_hash.as_signing_bytes());
|
||||
}
|
||||
|
||||
hasher.finalize().into()
|
||||
}
|
||||
69
storage-benchmarks/src/data/spec_access_patterns.rs
Normal file
69
storage-benchmarks/src/data/spec_access_patterns.rs
Normal file
@ -0,0 +1,69 @@
|
||||
use rand_distr::{Distribution as _, Zipf};
|
||||
|
||||
use crate::{
|
||||
config::ValidatorProfile,
|
||||
deterministic::{create_deterministic_rng, RngPurpose},
|
||||
};
|
||||
|
||||
#[must_use]
|
||||
pub fn select_block_spec_accurate(
|
||||
operation_count: u64,
|
||||
max_blocks: usize,
|
||||
profile: &ValidatorProfile,
|
||||
) -> usize {
|
||||
if max_blocks == 0 {
|
||||
return 0;
|
||||
}
|
||||
|
||||
let access_selector = (operation_count * 31) % 100;
|
||||
|
||||
if access_selector < (profile.recent_access_ratio * 100.0) as u64 {
|
||||
select_recent_block_zipfian(operation_count, max_blocks)
|
||||
} else {
|
||||
select_historical_block_uniform(operation_count, max_blocks)
|
||||
}
|
||||
}
|
||||
|
||||
fn select_recent_block_zipfian(operation_count: u64, max_blocks: usize) -> usize {
|
||||
let recent_window_size = std::cmp::max(max_blocks / 5, 1000);
|
||||
|
||||
let zipf_dist = Zipf::new(recent_window_size as u64, 1.0).unwrap();
|
||||
|
||||
let mut rng = create_deterministic_rng(RngPurpose::AccessPattern, operation_count);
|
||||
let zipf_sample = zipf_dist.sample(&mut rng) as usize;
|
||||
|
||||
let recent_start = max_blocks.saturating_sub(recent_window_size);
|
||||
let tip_offset = zipf_sample.saturating_sub(1);
|
||||
|
||||
recent_start + (recent_window_size - 1 - tip_offset)
|
||||
}
|
||||
|
||||
const fn select_historical_block_uniform(operation_count: u64, max_blocks: usize) -> usize {
|
||||
(operation_count as usize * 23) % max_blocks
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn select_da_spec_accurate(
|
||||
operation_count: u64,
|
||||
max_blobs: usize,
|
||||
profile: &ValidatorProfile,
|
||||
) -> usize {
|
||||
if max_blobs == 0 {
|
||||
return 0;
|
||||
}
|
||||
|
||||
let recent_threshold = (profile.recent_access_ratio * 100.0) as u64;
|
||||
let access_selector = (operation_count * 41) % 100;
|
||||
|
||||
if access_selector < recent_threshold {
|
||||
let recent_blobs = std::cmp::min(100, max_blobs);
|
||||
let zipf_dist = Zipf::new(recent_blobs as u64, 1.2).unwrap();
|
||||
let mut rng = create_deterministic_rng(RngPurpose::AccessPattern, operation_count);
|
||||
let sample = zipf_dist.sample(&mut rng) as usize;
|
||||
|
||||
let recent_start = max_blobs.saturating_sub(recent_blobs);
|
||||
recent_start + (recent_blobs - sample.min(recent_blobs))
|
||||
} else {
|
||||
(operation_count as usize * 29) % max_blobs
|
||||
}
|
||||
}
|
||||
49
storage-benchmarks/src/data/streaming_writer.rs
Normal file
49
storage-benchmarks/src/data/streaming_writer.rs
Normal file
@ -0,0 +1,49 @@
|
||||
use std::{
|
||||
fs::File,
|
||||
io::{BufWriter, Write as _},
|
||||
path::Path,
|
||||
};
|
||||
|
||||
const CHUNK_SIZE: usize = 64 * 1024;
|
||||
const BUFFER_SIZE: usize = 1024 * 1024;
|
||||
|
||||
pub struct StreamingDatasetWriter {
|
||||
writer: BufWriter<File>,
|
||||
chunk_buffer: Box<[u8; CHUNK_SIZE]>,
|
||||
bytes_written: u64,
|
||||
}
|
||||
|
||||
impl StreamingDatasetWriter {
|
||||
pub fn new<P: AsRef<Path>>(path: P) -> Result<Self, std::io::Error> {
|
||||
let file = File::create(path)?;
|
||||
let writer = BufWriter::with_capacity(BUFFER_SIZE, file);
|
||||
|
||||
Ok(Self {
|
||||
writer,
|
||||
chunk_buffer: vec![0u8; CHUNK_SIZE]
|
||||
.into_boxed_slice()
|
||||
.try_into()
|
||||
.expect("CHUNK_SIZE is const"),
|
||||
bytes_written: 0,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn write_chunk(&mut self, data: &[u8]) -> Result<(), std::io::Error> {
|
||||
let mut remaining = data;
|
||||
|
||||
while !remaining.is_empty() {
|
||||
let write_size = std::cmp::min(remaining.len(), CHUNK_SIZE);
|
||||
self.chunk_buffer[..write_size].copy_from_slice(&remaining[..write_size]);
|
||||
self.writer.write_all(&self.chunk_buffer[..write_size])?;
|
||||
remaining = &remaining[write_size..];
|
||||
self.bytes_written += write_size as u64;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn finalize(mut self) -> Result<u64, std::io::Error> {
|
||||
self.writer.flush()?;
|
||||
Ok(self.bytes_written)
|
||||
}
|
||||
}
|
||||
58
storage-benchmarks/src/lib.rs
Normal file
58
storage-benchmarks/src/lib.rs
Normal file
@ -0,0 +1,58 @@
|
||||
use std::path::PathBuf;
|
||||
|
||||
use nomos_storage::backends::rocksdb::RocksBackendSettings;
|
||||
|
||||
pub mod benchmark;
|
||||
pub mod config;
|
||||
pub mod data;
|
||||
pub mod metrics;
|
||||
pub mod storage;
|
||||
|
||||
pub use benchmark::*;
|
||||
pub use config::{
|
||||
CompressionType, DatasetGenConfig, NetworkSize, ProductionBenchConfig, ProfileType,
|
||||
ValidatorProfile, ValidatorProfiles, WorkloadType,
|
||||
};
|
||||
pub use data::*;
|
||||
pub use metrics::*;
|
||||
pub use storage::*;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct BenchStorageConfig {
|
||||
pub name: String,
|
||||
pub settings: RocksBackendSettings,
|
||||
}
|
||||
|
||||
impl BenchStorageConfig {
|
||||
#[must_use]
|
||||
pub fn production() -> Self {
|
||||
Self {
|
||||
name: "production".to_string(),
|
||||
settings: RocksBackendSettings {
|
||||
db_path: Self::data_path(),
|
||||
read_only: false,
|
||||
column_family: Some("blocks".to_string()),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn data_path() -> PathBuf {
|
||||
let home_dir = std::env::var("HOME").unwrap_or_else(|_| "/tmp".to_string());
|
||||
let data_dir = PathBuf::from(home_dir).join(".nomos_storage_benchmarks");
|
||||
let _ = std::fs::create_dir_all(&data_dir);
|
||||
data_dir.join("rocksdb_data")
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn results_path() -> PathBuf {
|
||||
let home_dir = std::env::var("HOME").unwrap_or_else(|_| "/tmp".to_string());
|
||||
let results_dir = PathBuf::from(home_dir)
|
||||
.join(".nomos_storage_benchmarks")
|
||||
.join("results");
|
||||
let _ = std::fs::create_dir_all(&results_dir);
|
||||
results_dir
|
||||
}
|
||||
}
|
||||
|
||||
pub type BenchConfig = BenchStorageConfig;
|
||||
87
storage-benchmarks/src/metrics/latency_stats.rs
Normal file
87
storage-benchmarks/src/metrics/latency_stats.rs
Normal file
@ -0,0 +1,87 @@
|
||||
use std::time::Instant;
|
||||
|
||||
use hdrhistogram::Histogram;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct LatencyPercentiles {
|
||||
pub p50_ms: f64,
|
||||
|
||||
pub p90_ms: f64,
|
||||
|
||||
pub p95_ms: f64,
|
||||
|
||||
pub p99_ms: f64,
|
||||
|
||||
pub max_ms: f64,
|
||||
|
||||
pub mean_ms: f64,
|
||||
|
||||
pub sample_count: u64,
|
||||
}
|
||||
|
||||
pub struct LatencyTracker {
|
||||
histogram: Histogram<u64>,
|
||||
operation_count: u64,
|
||||
}
|
||||
|
||||
impl Default for LatencyTracker {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl LatencyTracker {
|
||||
#[must_use]
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
histogram: Histogram::new_with_bounds(1, 3_600_000_000, 3)
|
||||
.expect("Valid histogram bounds"),
|
||||
operation_count: 0,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn record_async_operation<F, Fut, R>(&mut self, operation: F) -> R
|
||||
where
|
||||
F: FnOnce() -> Fut,
|
||||
Fut: std::future::Future<Output = R>,
|
||||
{
|
||||
let start = Instant::now();
|
||||
let result = operation().await;
|
||||
let latency = start.elapsed();
|
||||
|
||||
let latency_micros = latency.as_micros() as u64;
|
||||
if self.histogram.record(latency_micros).is_ok() {
|
||||
self.operation_count += 1;
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn get_percentiles(&self) -> LatencyPercentiles {
|
||||
if self.operation_count == 0 {
|
||||
return LatencyPercentiles {
|
||||
p50_ms: 0.0,
|
||||
p90_ms: 0.0,
|
||||
p95_ms: 0.0,
|
||||
p99_ms: 0.0,
|
||||
max_ms: 0.0,
|
||||
mean_ms: 0.0,
|
||||
sample_count: 0,
|
||||
};
|
||||
}
|
||||
|
||||
let to_ms = |v: u64| v as f64 / 1000.0;
|
||||
|
||||
LatencyPercentiles {
|
||||
p50_ms: to_ms(self.histogram.value_at_quantile(0.50)),
|
||||
p90_ms: to_ms(self.histogram.value_at_quantile(0.90)),
|
||||
p95_ms: to_ms(self.histogram.value_at_quantile(0.95)),
|
||||
p99_ms: to_ms(self.histogram.value_at_quantile(0.99)),
|
||||
max_ms: to_ms(self.histogram.max()),
|
||||
mean_ms: self.histogram.mean() / 1000.0,
|
||||
sample_count: self.operation_count,
|
||||
}
|
||||
}
|
||||
}
|
||||
9
storage-benchmarks/src/metrics/mod.rs
Normal file
9
storage-benchmarks/src/metrics/mod.rs
Normal file
@ -0,0 +1,9 @@
|
||||
pub mod latency_stats;
|
||||
pub mod rocksdb_collector;
|
||||
pub mod rocksdb_stats;
|
||||
pub mod runtime_memory_allocator;
|
||||
|
||||
pub use latency_stats::*;
|
||||
pub use rocksdb_collector::*;
|
||||
pub use rocksdb_stats::*;
|
||||
pub use runtime_memory_allocator::*;
|
||||
365
storage-benchmarks/src/metrics/rocksdb_collector.rs
Normal file
365
storage-benchmarks/src/metrics/rocksdb_collector.rs
Normal file
@ -0,0 +1,365 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use nomos_storage::backends::rocksdb::RocksBackend;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
pub struct RocksDbStatsCollector {
|
||||
storage_ref: Option<*const RocksBackend>,
|
||||
property_cache: HashMap<String, Option<u64>>,
|
||||
stats_cache: Option<String>,
|
||||
cache_valid: bool,
|
||||
collection_count: u64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct RocksDbStatsSnapshot {
|
||||
pub stats: super::RocksDbStats,
|
||||
pub collection_timestamp: chrono::DateTime<chrono::Utc>,
|
||||
pub collection_id: u64,
|
||||
pub cache_hits: u64,
|
||||
pub cache_misses: u64,
|
||||
}
|
||||
|
||||
impl RocksDbStatsCollector {
|
||||
#[must_use]
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
storage_ref: None,
|
||||
property_cache: HashMap::new(),
|
||||
stats_cache: None,
|
||||
cache_valid: false,
|
||||
collection_count: 0,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn attach(&mut self, storage: &RocksBackend) {
|
||||
self.storage_ref = Some(std::ptr::from_ref::<RocksBackend>(storage));
|
||||
self.invalidate_cache();
|
||||
}
|
||||
|
||||
pub fn collect_stats(&mut self) -> Result<RocksDbStatsSnapshot, Box<dyn std::error::Error>> {
|
||||
// SAFETY: storage_ref is set in attach() and guaranteed to be valid for the
|
||||
// lifetime of this collector
|
||||
let storage = unsafe {
|
||||
self.storage_ref
|
||||
.ok_or("No storage attached")?
|
||||
.as_ref()
|
||||
.ok_or("Invalid storage ref")?
|
||||
};
|
||||
|
||||
self.collection_count += 1;
|
||||
|
||||
let stats = self.collect_with_caching(storage)?;
|
||||
|
||||
Ok(RocksDbStatsSnapshot {
|
||||
stats,
|
||||
collection_timestamp: chrono::Utc::now(),
|
||||
collection_id: self.collection_count,
|
||||
cache_hits: self.count_cache_hits(),
|
||||
cache_misses: self.count_cache_misses(),
|
||||
})
|
||||
}
|
||||
|
||||
pub fn collect_before_after<F>(
|
||||
&mut self,
|
||||
operation: F,
|
||||
) -> Result<(RocksDbStatsSnapshot, RocksDbStatsSnapshot), Box<dyn std::error::Error>>
|
||||
where
|
||||
F: FnOnce() -> Result<(), Box<dyn std::error::Error>>,
|
||||
{
|
||||
let before = self.collect_stats()?;
|
||||
self.invalidate_cache();
|
||||
|
||||
operation()?;
|
||||
|
||||
let after = self.collect_stats()?;
|
||||
|
||||
Ok((before, after))
|
||||
}
|
||||
|
||||
pub fn invalidate_cache(&mut self) {
|
||||
self.property_cache.clear();
|
||||
self.stats_cache = None;
|
||||
self.cache_valid = false;
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn collection_stats(&self) -> (u64, u64, u64) {
|
||||
(
|
||||
self.collection_count,
|
||||
self.count_cache_hits(),
|
||||
self.count_cache_misses(),
|
||||
)
|
||||
}
|
||||
|
||||
fn collect_with_caching(
|
||||
&mut self,
|
||||
storage: &RocksBackend,
|
||||
) -> Result<super::RocksDbStats, Box<dyn std::error::Error>> {
|
||||
let stats_string = if let Some(ref cached) = self.stats_cache {
|
||||
cached.clone()
|
||||
} else {
|
||||
let stats = self.get_stats_string(storage)?;
|
||||
self.stats_cache = Some(stats.clone());
|
||||
stats
|
||||
};
|
||||
|
||||
let (cache_hit_count, cache_miss_count) = self.parse_cache_hit_miss(&stats_string);
|
||||
let cache_hit_rate = if cache_hit_count + cache_miss_count > 0 {
|
||||
cache_hit_count as f64 / (cache_hit_count + cache_miss_count) as f64
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
|
||||
let level_files: Vec<u64> = (0..7)
|
||||
.map(|level| {
|
||||
self.get_cached_property_u64(
|
||||
storage,
|
||||
rocksdb::properties::num_files_at_level(level).as_ref(),
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(super::RocksDbStats {
|
||||
cache_hit_rate,
|
||||
cache_hit_count,
|
||||
cache_miss_count,
|
||||
block_cache_usage_bytes: self
|
||||
.get_cached_property_u64(storage, rocksdb::properties::BLOCK_CACHE_USAGE.as_ref()),
|
||||
block_cache_capacity_bytes: self.get_cached_property_u64(
|
||||
storage,
|
||||
rocksdb::properties::BLOCK_CACHE_CAPACITY.as_ref(),
|
||||
),
|
||||
index_cache_usage_bytes: self.get_cached_property_u64(
|
||||
storage,
|
||||
rocksdb::properties::ESTIMATE_TABLE_READERS_MEM.as_ref(),
|
||||
),
|
||||
|
||||
compaction_pending_bytes: self.get_cached_property_u64(
|
||||
storage,
|
||||
rocksdb::properties::ESTIMATE_PENDING_COMPACTION_BYTES.as_ref(),
|
||||
),
|
||||
compaction_running_count: self.get_cached_property_u64(
|
||||
storage,
|
||||
rocksdb::properties::NUM_RUNNING_COMPACTIONS.as_ref(),
|
||||
),
|
||||
|
||||
l0_file_count: level_files[0],
|
||||
l1_file_count: level_files[1],
|
||||
l2_file_count: level_files[2],
|
||||
l3_file_count: level_files[3],
|
||||
l4_file_count: level_files[4],
|
||||
l5_file_count: level_files[5],
|
||||
l6_file_count: level_files[6],
|
||||
total_sst_files: level_files.iter().sum(),
|
||||
total_sst_size_bytes: self.get_cached_property_u64(
|
||||
storage,
|
||||
rocksdb::properties::TOTAL_SST_FILES_SIZE.as_ref(),
|
||||
),
|
||||
|
||||
memtable_count: self.parse_memtable_count(&stats_string),
|
||||
num_immutable_memtables: self.parse_immutable_memtables(&stats_string),
|
||||
memtable_flush_pending: self.get_cached_property_u64(
|
||||
storage,
|
||||
rocksdb::properties::NUM_RUNNING_FLUSHES.as_ref(),
|
||||
),
|
||||
approximate_memory_usage_bytes: self.get_cached_property_u64(
|
||||
storage,
|
||||
rocksdb::properties::CUR_SIZE_ALL_MEM_TABLES.as_ref(),
|
||||
),
|
||||
|
||||
read_amplification: self.parse_read_amplification(&stats_string),
|
||||
write_amplification: self.parse_write_amplification(&stats_string),
|
||||
total_read_bytes: self.parse_total_read_bytes(&stats_string),
|
||||
total_write_bytes: self.parse_total_write_bytes(&stats_string),
|
||||
write_stall_time_ms: self.parse_write_stall_time(&stats_string),
|
||||
|
||||
live_sst_files_size_bytes: self.get_cached_property_u64(
|
||||
storage,
|
||||
rocksdb::properties::LIVE_SST_FILES_SIZE.as_ref(),
|
||||
),
|
||||
num_entries: self
|
||||
.get_cached_property_u64(storage, rocksdb::properties::ESTIMATE_NUM_KEYS.as_ref()),
|
||||
})
|
||||
}
|
||||
|
||||
fn get_cached_property_u64(&mut self, storage: &RocksBackend, property: &str) -> u64 {
|
||||
if let Some(cached_value) = self.property_cache.get(property) {
|
||||
return cached_value.unwrap_or(0);
|
||||
}
|
||||
|
||||
let value = self.query_property_u64(storage, property);
|
||||
self.property_cache.insert(property.to_owned(), value);
|
||||
value.unwrap_or(0)
|
||||
}
|
||||
|
||||
fn query_property_u64(&self, storage: &RocksBackend, property: &str) -> Option<u64> {
|
||||
let property_owned = property.to_owned();
|
||||
let transaction = storage.txn(move |db| match db.property_value(&property_owned) {
|
||||
Ok(Some(value_string)) => Ok(Some(value_string.into_bytes().into())),
|
||||
Ok(None) => Ok(None),
|
||||
Err(e) => Err(e),
|
||||
});
|
||||
|
||||
match transaction.execute() {
|
||||
Ok(Some(result_bytes)) => {
|
||||
let value_str = String::from_utf8_lossy(&result_bytes);
|
||||
value_str.trim().parse().ok()
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn get_stats_string(
|
||||
&self,
|
||||
storage: &RocksBackend,
|
||||
) -> Result<String, Box<dyn std::error::Error>> {
|
||||
let transaction = storage.txn(|db| match db.property_value(rocksdb::properties::STATS) {
|
||||
Ok(Some(stats_string)) => Ok(Some(stats_string.into_bytes().into())),
|
||||
_ => Ok(Some(b"".to_vec().into())),
|
||||
});
|
||||
|
||||
match transaction.execute() {
|
||||
Ok(Some(stats_bytes)) => Ok(String::from_utf8_lossy(&stats_bytes).to_string()),
|
||||
_ => Ok(String::new()),
|
||||
}
|
||||
}
|
||||
|
||||
fn count_cache_hits(&self) -> u64 {
|
||||
self.property_cache.values().filter(|v| v.is_some()).count() as u64
|
||||
}
|
||||
|
||||
fn count_cache_misses(&self) -> u64 {
|
||||
self.property_cache.values().filter(|v| v.is_none()).count() as u64
|
||||
}
|
||||
|
||||
fn parse_cache_hit_miss(&self, stats: &str) -> (u64, u64) {
|
||||
let mut hits = 0u64;
|
||||
let mut misses = 0u64;
|
||||
|
||||
for line in stats.lines() {
|
||||
if line.contains("Block cache hit count:") || line.contains("block.cache.hit") {
|
||||
if let Some(value) = self.extract_number_from_line(line) {
|
||||
hits = value;
|
||||
}
|
||||
} else if line.contains("Block cache miss count:") || line.contains("block.cache.miss")
|
||||
{
|
||||
if let Some(value) = self.extract_number_from_line(line) {
|
||||
misses = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
(hits, misses)
|
||||
}
|
||||
|
||||
fn parse_memtable_count(&self, stats: &str) -> u64 {
|
||||
for line in stats.lines() {
|
||||
if line.contains("Number of memtables") || line.contains("num-live-memtables") {
|
||||
if let Some(value) = self.extract_number_from_line(line) {
|
||||
return value;
|
||||
}
|
||||
}
|
||||
}
|
||||
0
|
||||
}
|
||||
|
||||
fn parse_immutable_memtables(&self, stats: &str) -> u64 {
|
||||
for line in stats.lines() {
|
||||
if line.contains("immutable memtables") || line.contains("num-immutable-mem-table") {
|
||||
if let Some(value) = self.extract_number_from_line(line) {
|
||||
return value;
|
||||
}
|
||||
}
|
||||
}
|
||||
0
|
||||
}
|
||||
|
||||
fn parse_read_amplification(&self, stats: &str) -> f64 {
|
||||
for line in stats.lines() {
|
||||
if line.contains("read amplification") || line.contains("Read(GB)") {
|
||||
if let Some(value) = self.extract_float_from_line(line) {
|
||||
return value;
|
||||
}
|
||||
}
|
||||
}
|
||||
0.0
|
||||
}
|
||||
|
||||
fn parse_write_amplification(&self, stats: &str) -> f64 {
|
||||
for line in stats.lines() {
|
||||
if line.contains("write amplification") || line.contains("Write(GB)") {
|
||||
if let Some(value) = self.extract_float_from_line(line) {
|
||||
return value;
|
||||
}
|
||||
}
|
||||
}
|
||||
0.0
|
||||
}
|
||||
|
||||
fn parse_total_read_bytes(&self, stats: &str) -> u64 {
|
||||
for line in stats.lines() {
|
||||
if line.contains("total bytes read") || line.contains("Read(GB)") {
|
||||
if let Some(value) = self.extract_number_from_line(line) {
|
||||
return value;
|
||||
}
|
||||
}
|
||||
}
|
||||
0
|
||||
}
|
||||
|
||||
fn parse_total_write_bytes(&self, stats: &str) -> u64 {
|
||||
for line in stats.lines() {
|
||||
if line.contains("total bytes written") || line.contains("Write(GB)") {
|
||||
if let Some(value) = self.extract_number_from_line(line) {
|
||||
return value;
|
||||
}
|
||||
}
|
||||
}
|
||||
0
|
||||
}
|
||||
|
||||
fn parse_write_stall_time(&self, stats: &str) -> u64 {
|
||||
for line in stats.lines() {
|
||||
if line.contains("Cumulative stall:") && line.contains("H:M:S") {
|
||||
if let Some(percent_pos) = line.find("percent") {
|
||||
let before_percent = &line[..percent_pos];
|
||||
if let Some(comma_pos) = before_percent.rfind(',') {
|
||||
let percent_str = before_percent[comma_pos + 1..].trim();
|
||||
if let Ok(percent) = percent_str.parse::<f64>() {
|
||||
return (percent * 10.0) as u64;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
0
|
||||
}
|
||||
|
||||
fn extract_number_from_line(&self, line: &str) -> Option<u64> {
|
||||
if let Some(colon_pos) = line.find(':') {
|
||||
let value_part = line[colon_pos + 1..].trim();
|
||||
if let Some(number_str) = value_part.split_whitespace().next() {
|
||||
let clean_number = number_str.replace(',', "");
|
||||
return clean_number.parse().ok();
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn extract_float_from_line(&self, line: &str) -> Option<f64> {
|
||||
if let Some(colon_pos) = line.find(':') {
|
||||
let value_part = line[colon_pos + 1..].trim();
|
||||
if let Some(number_str) = value_part.split_whitespace().next() {
|
||||
return number_str.parse().ok();
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for RocksDbStatsCollector {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
386
storage-benchmarks/src/metrics/rocksdb_stats.rs
Normal file
386
storage-benchmarks/src/metrics/rocksdb_stats.rs
Normal file
@ -0,0 +1,386 @@
|
||||
use nomos_storage::backends::rocksdb::RocksBackend;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct RocksDbStats {
|
||||
pub cache_hit_rate: f64,
|
||||
pub cache_hit_count: u64,
|
||||
pub cache_miss_count: u64,
|
||||
pub block_cache_usage_bytes: u64,
|
||||
pub block_cache_capacity_bytes: u64,
|
||||
pub index_cache_usage_bytes: u64,
|
||||
|
||||
pub compaction_pending_bytes: u64,
|
||||
pub compaction_running_count: u64,
|
||||
|
||||
pub l0_file_count: u64,
|
||||
pub l1_file_count: u64,
|
||||
pub l2_file_count: u64,
|
||||
pub l3_file_count: u64,
|
||||
pub l4_file_count: u64,
|
||||
pub l5_file_count: u64,
|
||||
pub l6_file_count: u64,
|
||||
pub total_sst_files: u64,
|
||||
pub total_sst_size_bytes: u64,
|
||||
|
||||
pub memtable_count: u64,
|
||||
pub num_immutable_memtables: u64,
|
||||
pub memtable_flush_pending: u64,
|
||||
pub approximate_memory_usage_bytes: u64,
|
||||
|
||||
pub read_amplification: f64,
|
||||
pub write_amplification: f64,
|
||||
pub total_read_bytes: u64,
|
||||
pub total_write_bytes: u64,
|
||||
pub write_stall_time_ms: u64,
|
||||
|
||||
pub live_sst_files_size_bytes: u64,
|
||||
pub num_entries: u64,
|
||||
}
|
||||
|
||||
impl Default for RocksDbStats {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
cache_hit_rate: 0.0,
|
||||
cache_hit_count: 0,
|
||||
cache_miss_count: 0,
|
||||
block_cache_usage_bytes: 0,
|
||||
block_cache_capacity_bytes: 0,
|
||||
index_cache_usage_bytes: 0,
|
||||
|
||||
compaction_pending_bytes: 0,
|
||||
compaction_running_count: 0,
|
||||
|
||||
l0_file_count: 0,
|
||||
l1_file_count: 0,
|
||||
l2_file_count: 0,
|
||||
l3_file_count: 0,
|
||||
l4_file_count: 0,
|
||||
l5_file_count: 0,
|
||||
l6_file_count: 0,
|
||||
total_sst_files: 0,
|
||||
total_sst_size_bytes: 0,
|
||||
|
||||
memtable_count: 0,
|
||||
num_immutable_memtables: 0,
|
||||
memtable_flush_pending: 0,
|
||||
approximate_memory_usage_bytes: 0,
|
||||
|
||||
read_amplification: 0.0,
|
||||
write_amplification: 0.0,
|
||||
total_read_bytes: 0,
|
||||
total_write_bytes: 0,
|
||||
write_stall_time_ms: 0,
|
||||
|
||||
live_sst_files_size_bytes: 0,
|
||||
num_entries: 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn collect_rocksdb_stats(storage: &RocksBackend) -> RocksDbStats {
|
||||
let (cache_hit_count, cache_miss_count) =
|
||||
parse_cache_hit_miss_counts(&get_stats_string(storage));
|
||||
let cache_hit_rate = if cache_hit_count + cache_miss_count > 0 {
|
||||
cache_hit_count as f64 / (cache_hit_count + cache_miss_count) as f64
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
|
||||
let l0_files = get_level_file_count(storage, 0);
|
||||
let l1_files = get_level_file_count(storage, 1);
|
||||
let l2_files = get_level_file_count(storage, 2);
|
||||
let l3_files = get_level_file_count(storage, 3);
|
||||
let l4_files = get_level_file_count(storage, 4);
|
||||
let l5_files = get_level_file_count(storage, 5);
|
||||
let l6_files = get_level_file_count(storage, 6);
|
||||
|
||||
RocksDbStats {
|
||||
cache_hit_rate,
|
||||
cache_hit_count,
|
||||
cache_miss_count,
|
||||
block_cache_usage_bytes: get_property_u64(
|
||||
storage,
|
||||
&rocksdb::properties::BLOCK_CACHE_USAGE.as_ref(),
|
||||
),
|
||||
block_cache_capacity_bytes: get_property_u64(
|
||||
storage,
|
||||
&rocksdb::properties::BLOCK_CACHE_CAPACITY.as_ref(),
|
||||
),
|
||||
index_cache_usage_bytes: get_property_u64(
|
||||
storage,
|
||||
&rocksdb::properties::ESTIMATE_TABLE_READERS_MEM.as_ref(),
|
||||
),
|
||||
|
||||
compaction_pending_bytes: get_property_u64(
|
||||
storage,
|
||||
&rocksdb::properties::ESTIMATE_PENDING_COMPACTION_BYTES.as_ref(),
|
||||
),
|
||||
compaction_running_count: get_property_u64(
|
||||
storage,
|
||||
&rocksdb::properties::NUM_RUNNING_COMPACTIONS.as_ref(),
|
||||
),
|
||||
|
||||
l0_file_count: l0_files,
|
||||
l1_file_count: l1_files,
|
||||
l2_file_count: l2_files,
|
||||
l3_file_count: l3_files,
|
||||
l4_file_count: l4_files,
|
||||
l5_file_count: l5_files,
|
||||
l6_file_count: l6_files,
|
||||
total_sst_files: l0_files + l1_files + l2_files + l3_files + l4_files + l5_files + l6_files,
|
||||
total_sst_size_bytes: get_property_u64(
|
||||
storage,
|
||||
&rocksdb::properties::TOTAL_SST_FILES_SIZE.as_ref(),
|
||||
),
|
||||
|
||||
memtable_count: parse_memtable_count(&get_stats_string(storage)),
|
||||
num_immutable_memtables: parse_immutable_memtables(&get_stats_string(storage)),
|
||||
memtable_flush_pending: get_property_u64(
|
||||
storage,
|
||||
&rocksdb::properties::NUM_RUNNING_FLUSHES.as_ref(),
|
||||
),
|
||||
approximate_memory_usage_bytes: get_property_u64(
|
||||
storage,
|
||||
&rocksdb::properties::CUR_SIZE_ALL_MEM_TABLES.as_ref(),
|
||||
),
|
||||
|
||||
read_amplification: parse_read_amplification(&get_stats_string(storage)),
|
||||
write_amplification: parse_write_amplification(&get_stats_string(storage)),
|
||||
total_read_bytes: parse_total_read_bytes(&get_stats_string(storage)),
|
||||
total_write_bytes: parse_total_write_bytes(&get_stats_string(storage)),
|
||||
write_stall_time_ms: parse_write_stall_time(&get_stats_string(storage)),
|
||||
|
||||
live_sst_files_size_bytes: get_property_u64(
|
||||
storage,
|
||||
&rocksdb::properties::LIVE_SST_FILES_SIZE.as_ref(),
|
||||
),
|
||||
num_entries: get_property_u64(storage, &rocksdb::properties::ESTIMATE_NUM_KEYS.as_ref()),
|
||||
}
|
||||
}
|
||||
|
||||
fn get_stats_string(storage: &RocksBackend) -> String {
|
||||
let transaction = storage.txn(|db| match db.property_value(rocksdb::properties::STATS) {
|
||||
Ok(Some(stats_string)) => Ok(Some(stats_string.into_bytes().into())),
|
||||
_ => Ok(Some(b"".to_vec().into())),
|
||||
});
|
||||
|
||||
match transaction.execute() {
|
||||
Ok(Some(stats_bytes)) => String::from_utf8_lossy(&stats_bytes).to_string(),
|
||||
_ => String::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn get_level_file_count(storage: &RocksBackend, level: i32) -> u64 {
|
||||
get_property_u64(
|
||||
storage,
|
||||
&rocksdb::properties::num_files_at_level(level as usize).as_ref(),
|
||||
)
|
||||
}
|
||||
|
||||
fn get_property_u64(storage: &RocksBackend, property: &str) -> u64 {
|
||||
match get_property_value(storage, property) {
|
||||
Some(value) => {
|
||||
log::debug!("Property '{}': {}", property, value);
|
||||
value
|
||||
}
|
||||
None => {
|
||||
log::debug!("Property '{}': unavailable", property);
|
||||
0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn get_property_value(storage: &RocksBackend, property: &str) -> Option<u64> {
|
||||
let property_owned = property.to_owned();
|
||||
let property_for_log = property.to_owned();
|
||||
let transaction = storage.txn(move |db| match db.property_value(&property_owned) {
|
||||
Ok(Some(value_string)) => Ok(Some(value_string.into_bytes().into())),
|
||||
Ok(None) => Ok(None),
|
||||
Err(e) => Err(e),
|
||||
});
|
||||
|
||||
match transaction.execute() {
|
||||
Ok(Some(result_bytes)) => {
|
||||
let value_str = String::from_utf8_lossy(&result_bytes);
|
||||
match value_str.trim().parse::<u64>() {
|
||||
Ok(parsed) => {
|
||||
log::trace!("Property '{}' available: {}", property_for_log, parsed);
|
||||
Some(parsed)
|
||||
}
|
||||
Err(_) => {
|
||||
log::trace!(
|
||||
"Property '{}' parse error from: '{}'",
|
||||
property_for_log,
|
||||
value_str
|
||||
);
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(None) => {
|
||||
log::trace!("Property '{}' unavailable", property_for_log);
|
||||
None
|
||||
}
|
||||
Err(e) => {
|
||||
log::trace!("Property '{}' failed: {}", property_for_log, e);
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_cache_hit_miss_counts(stats: &str) -> (u64, u64) {
|
||||
let mut hits = 0u64;
|
||||
let mut misses = 0u64;
|
||||
|
||||
for line in stats.lines() {
|
||||
if line.contains("Block cache hit count:") || line.contains("block.cache.hit") {
|
||||
if let Some(value) = extract_number_from_line(line) {
|
||||
hits = value;
|
||||
}
|
||||
} else if line.contains("Block cache miss count:") || line.contains("block.cache.miss") {
|
||||
if let Some(value) = extract_number_from_line(line) {
|
||||
misses = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
(hits, misses)
|
||||
}
|
||||
|
||||
fn parse_write_stall_time(stats: &str) -> u64 {
|
||||
for line in stats.lines() {
|
||||
if line.contains("Cumulative stall:") && line.contains("H:M:S") {
|
||||
if let Some(percent_pos) = line.find("percent") {
|
||||
let before_percent = &line[..percent_pos];
|
||||
if let Some(comma_pos) = before_percent.rfind(',') {
|
||||
let percent_str = before_percent[comma_pos + 1..].trim();
|
||||
if let Ok(percent) = percent_str.parse::<f64>() {
|
||||
return (percent * 10.0) as u64;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
0
|
||||
}
|
||||
|
||||
fn extract_number_from_line(line: &str) -> Option<u64> {
|
||||
if let Some(colon_pos) = line.find(':') {
|
||||
let value_part = line[colon_pos + 1..].trim();
|
||||
if let Some(number_str) = value_part.split_whitespace().next() {
|
||||
let clean_number = number_str.replace(',', "");
|
||||
return clean_number.parse().ok();
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn parse_read_amplification(stats: &str) -> f64 {
|
||||
for line in stats.lines() {
|
||||
if line.contains("read amplification") || line.contains("Read(GB)") {
|
||||
if let Some(value) = extract_float_from_line(line) {
|
||||
return value;
|
||||
}
|
||||
}
|
||||
}
|
||||
0.0
|
||||
}
|
||||
|
||||
fn parse_write_amplification(stats: &str) -> f64 {
|
||||
for line in stats.lines() {
|
||||
if line.contains("write amplification") || line.contains("Write(GB)") {
|
||||
if let Some(value) = extract_float_from_line(line) {
|
||||
return value;
|
||||
}
|
||||
}
|
||||
}
|
||||
0.0
|
||||
}
|
||||
|
||||
fn parse_total_read_bytes(stats: &str) -> u64 {
|
||||
for line in stats.lines() {
|
||||
if line.contains("total bytes read") || line.contains("Read(GB)") {
|
||||
if let Some(value) = extract_number_from_line(line) {
|
||||
return value;
|
||||
}
|
||||
}
|
||||
}
|
||||
0
|
||||
}
|
||||
|
||||
fn parse_total_write_bytes(stats: &str) -> u64 {
|
||||
for line in stats.lines() {
|
||||
if line.contains("total bytes written") || line.contains("Write(GB)") {
|
||||
if let Some(value) = extract_number_from_line(line) {
|
||||
return value;
|
||||
}
|
||||
}
|
||||
}
|
||||
0
|
||||
}
|
||||
|
||||
fn parse_memtable_count(stats: &str) -> u64 {
|
||||
for line in stats.lines() {
|
||||
if line.contains("Number of memtables") || line.contains("num-live-memtables") {
|
||||
if let Some(value) = extract_number_from_line(line) {
|
||||
return value;
|
||||
}
|
||||
}
|
||||
}
|
||||
0
|
||||
}
|
||||
|
||||
fn parse_immutable_memtables(stats: &str) -> u64 {
|
||||
for line in stats.lines() {
|
||||
if line.contains("immutable memtables") || line.contains("num-immutable-mem-table") {
|
||||
if let Some(value) = extract_number_from_line(line) {
|
||||
return value;
|
||||
}
|
||||
}
|
||||
}
|
||||
0
|
||||
}
|
||||
|
||||
fn extract_float_from_line(line: &str) -> Option<f64> {
|
||||
if let Some(colon_pos) = line.find(':') {
|
||||
let value_part = line[colon_pos + 1..].trim();
|
||||
if let Some(number_str) = value_part.split_whitespace().next() {
|
||||
return number_str.parse().ok();
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
pub struct StatsCollector {
|
||||
pub before: RocksDbStats,
|
||||
pub after: RocksDbStats,
|
||||
}
|
||||
|
||||
impl StatsCollector {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
before: RocksDbStats::default(),
|
||||
after: RocksDbStats::default(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn collect_before(&mut self, storage: &RocksBackend) {
|
||||
self.before = collect_rocksdb_stats(storage);
|
||||
log::debug!(
|
||||
"Before: cache {:.1}%, L0 files {}",
|
||||
self.before.cache_hit_rate * 100.0,
|
||||
self.before.l0_file_count
|
||||
);
|
||||
}
|
||||
|
||||
pub fn collect_after(&mut self, storage: &RocksBackend) {
|
||||
self.after = collect_rocksdb_stats(storage);
|
||||
log::debug!(
|
||||
"After: cache {:.1}%, L0 files {}",
|
||||
self.after.cache_hit_rate * 100.0,
|
||||
self.after.l0_file_count
|
||||
);
|
||||
}
|
||||
}
|
||||
114
storage-benchmarks/src/metrics/runtime_memory_allocator.rs
Normal file
114
storage-benchmarks/src/metrics/runtime_memory_allocator.rs
Normal file
@ -0,0 +1,114 @@
|
||||
use std::{
|
||||
alloc::{GlobalAlloc, Layout, System},
|
||||
sync::atomic::{AtomicUsize, Ordering},
|
||||
};
|
||||
|
||||
pub struct RuntimeValidatorAllocator {
|
||||
inner: System,
|
||||
allocated: AtomicUsize,
|
||||
limit: AtomicUsize,
|
||||
}
|
||||
|
||||
impl RuntimeValidatorAllocator {
|
||||
pub const fn new() -> Self {
|
||||
Self {
|
||||
inner: System,
|
||||
allocated: AtomicUsize::new(0),
|
||||
limit: AtomicUsize::new(16 * 1024 * 1024 * 1024),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn set_limit_gb(&self, limit_gb: usize) {
|
||||
let limit_bytes = limit_gb * 1024 * 1024 * 1024;
|
||||
self.limit.store(limit_bytes, Ordering::SeqCst);
|
||||
log::info!(
|
||||
"Memory limit updated to {}GB ({} bytes)",
|
||||
limit_gb,
|
||||
limit_bytes
|
||||
);
|
||||
}
|
||||
|
||||
pub fn usage_mb(&self) -> f64 {
|
||||
self.allocated.load(Ordering::Relaxed) as f64 / 1024.0 / 1024.0
|
||||
}
|
||||
|
||||
pub fn usage_percent(&self) -> f64 {
|
||||
let current = self.allocated.load(Ordering::Relaxed);
|
||||
let limit = self.limit.load(Ordering::Relaxed);
|
||||
if limit > 0 {
|
||||
current as f64 / limit as f64 * 100.0
|
||||
} else {
|
||||
0.0
|
||||
}
|
||||
}
|
||||
|
||||
pub fn limit_gb(&self) -> usize {
|
||||
self.limit.load(Ordering::Relaxed) / 1024 / 1024 / 1024
|
||||
}
|
||||
|
||||
pub fn actual_limit_gb(&self) -> usize {
|
||||
self.limit_gb()
|
||||
}
|
||||
|
||||
pub fn would_exceed_limit(&self, size: usize) -> bool {
|
||||
let current = self.allocated.load(Ordering::Relaxed);
|
||||
let limit = self.limit.load(Ordering::Relaxed);
|
||||
current + size > limit
|
||||
}
|
||||
|
||||
pub fn allocation_failures(&self) -> u64 {
|
||||
0
|
||||
}
|
||||
}
|
||||
|
||||
unsafe impl GlobalAlloc for RuntimeValidatorAllocator {
|
||||
unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
|
||||
let size = layout.size();
|
||||
let current = self.allocated.fetch_add(size, Ordering::SeqCst);
|
||||
let limit = self.limit.load(Ordering::Relaxed);
|
||||
|
||||
if current + size > limit {
|
||||
self.allocated.fetch_sub(size, Ordering::SeqCst);
|
||||
|
||||
if size >= 1024 * 1024 {
|
||||
log::warn!(
|
||||
"Memory limit exceeded: {}MB allocation blocked ({}GB limit, {:.1}% used)",
|
||||
size / 1024 / 1024,
|
||||
limit / 1024 / 1024 / 1024,
|
||||
current as f64 / limit as f64 * 100.0
|
||||
);
|
||||
}
|
||||
|
||||
return std::ptr::null_mut();
|
||||
}
|
||||
|
||||
unsafe { self.inner.alloc(layout) }
|
||||
}
|
||||
|
||||
unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) {
|
||||
self.allocated.fetch_sub(layout.size(), Ordering::SeqCst);
|
||||
|
||||
unsafe {
|
||||
self.inner.dealloc(ptr, layout);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_runtime_limit_setting() {
|
||||
let allocator = RuntimeValidatorAllocator::new();
|
||||
|
||||
assert_eq!(allocator.limit_gb(), 16);
|
||||
|
||||
allocator.set_limit_gb(8);
|
||||
assert_eq!(allocator.limit_gb(), 8);
|
||||
assert_eq!(allocator.actual_limit_gb(), 8);
|
||||
|
||||
allocator.set_limit_gb(32);
|
||||
assert_eq!(allocator.limit_gb(), 32);
|
||||
}
|
||||
}
|
||||
3
storage-benchmarks/src/storage/mod.rs
Normal file
3
storage-benchmarks/src/storage/mod.rs
Normal file
@ -0,0 +1,3 @@
|
||||
pub mod rocksdb_options_tuning;
|
||||
|
||||
pub use rocksdb_options_tuning::*;
|
||||
180
storage-benchmarks/src/storage/rocksdb_options_tuning.rs
Normal file
180
storage-benchmarks/src/storage/rocksdb_options_tuning.rs
Normal file
@ -0,0 +1,180 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::CompressionType;
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
|
||||
pub struct RocksDbTuningOptions {
|
||||
pub cache_size_percent: Option<u32>,
|
||||
|
||||
pub write_buffer_mb: Option<u32>,
|
||||
|
||||
pub compaction_jobs: Option<u32>,
|
||||
|
||||
pub block_size_kb: Option<u32>,
|
||||
|
||||
pub compression: Option<CompressionType>,
|
||||
|
||||
pub bloom_filter_bits: Option<u32>,
|
||||
}
|
||||
|
||||
impl RocksDbTuningOptions {
|
||||
pub fn apply_to_options(
|
||||
&self,
|
||||
opts: &mut rocksdb::Options,
|
||||
) -> Result<(), Box<dyn std::error::Error>> {
|
||||
if let Some(cache_percent) = self.cache_size_percent {
|
||||
let system_memory_gb = get_system_memory_gb();
|
||||
let cache_size_bytes = ((system_memory_gb as f64 * (f64::from(cache_percent) / 100.0))
|
||||
* 1024.0
|
||||
* 1024.0
|
||||
* 1024.0) as usize;
|
||||
|
||||
let cache = rocksdb::Cache::new_lru_cache(cache_size_bytes);
|
||||
let mut block_opts = rocksdb::BlockBasedOptions::default();
|
||||
block_opts.set_block_cache(&cache);
|
||||
opts.set_block_based_table_factory(&block_opts);
|
||||
|
||||
log::info!(
|
||||
"Applied block cache: {}% of RAM = {}MB",
|
||||
cache_percent,
|
||||
cache_size_bytes / 1024 / 1024
|
||||
);
|
||||
}
|
||||
|
||||
if let Some(buffer_mb) = self.write_buffer_mb {
|
||||
let buffer_bytes = (buffer_mb as usize) * 1024 * 1024;
|
||||
opts.set_write_buffer_size(buffer_bytes);
|
||||
log::info!("Applied write buffer: {}MB", buffer_mb);
|
||||
}
|
||||
|
||||
if let Some(jobs) = self.compaction_jobs {
|
||||
opts.set_max_background_jobs(jobs as i32);
|
||||
log::info!("Applied compaction jobs: {}", jobs);
|
||||
}
|
||||
|
||||
if let Some(block_size_kb) = self.block_size_kb {
|
||||
let block_size_bytes = (block_size_kb as usize) * 1024;
|
||||
let mut block_opts = rocksdb::BlockBasedOptions::default();
|
||||
block_opts.set_block_size(block_size_bytes);
|
||||
opts.set_block_based_table_factory(&block_opts);
|
||||
log::info!("Applied block size: {}KB", block_size_kb);
|
||||
}
|
||||
|
||||
if let Some(compression) = self.compression {
|
||||
match compression {
|
||||
CompressionType::None => {
|
||||
opts.set_compression_type(rocksdb::DBCompressionType::None);
|
||||
log::info!("Applied compression: None");
|
||||
}
|
||||
CompressionType::Lz4 => {
|
||||
opts.set_compression_type(rocksdb::DBCompressionType::Lz4);
|
||||
log::info!("Applied compression: LZ4");
|
||||
}
|
||||
CompressionType::Snappy => {
|
||||
opts.set_compression_type(rocksdb::DBCompressionType::Snappy);
|
||||
log::info!("Applied compression: Snappy");
|
||||
}
|
||||
CompressionType::Zstd => {
|
||||
opts.set_compression_type(rocksdb::DBCompressionType::Zstd);
|
||||
log::info!("Applied compression: Zstd");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn from_args(args: &[String]) -> (Self, bool) {
|
||||
let mut config = Self::default();
|
||||
let mut read_only = false;
|
||||
|
||||
let mut i = 0;
|
||||
while i < args.len() {
|
||||
match args[i].as_str() {
|
||||
"--cache-size" if i + 1 < args.len() => {
|
||||
config.cache_size_percent = args[i + 1].parse().ok();
|
||||
i += 2;
|
||||
}
|
||||
"--write-buffer" if i + 1 < args.len() => {
|
||||
config.write_buffer_mb = args[i + 1].parse().ok();
|
||||
i += 2;
|
||||
}
|
||||
"--compaction-jobs" if i + 1 < args.len() => {
|
||||
config.compaction_jobs = args[i + 1].parse().ok();
|
||||
i += 2;
|
||||
}
|
||||
"--block-size" if i + 1 < args.len() => {
|
||||
config.block_size_kb = args[i + 1].parse().ok();
|
||||
i += 2;
|
||||
}
|
||||
"--read-only" => {
|
||||
read_only = true;
|
||||
i += 1;
|
||||
}
|
||||
"--compression" if i + 1 < args.len() => {
|
||||
match args[i + 1].parse::<CompressionType>() {
|
||||
Ok(compression_type) => config.compression = Some(compression_type),
|
||||
Err(e) => log::warn!("Invalid compression type: {}", e),
|
||||
}
|
||||
i += 2;
|
||||
}
|
||||
_ => {
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
(config, read_only)
|
||||
}
|
||||
|
||||
pub fn description(&self) -> String {
|
||||
let mut parts = Vec::new();
|
||||
|
||||
if let Some(cache) = self.cache_size_percent {
|
||||
parts.push(format!("cache:{}%", cache));
|
||||
}
|
||||
if let Some(buffer) = self.write_buffer_mb {
|
||||
parts.push(format!("buffer:{}MB", buffer));
|
||||
}
|
||||
if let Some(jobs) = self.compaction_jobs {
|
||||
parts.push(format!("jobs:{}", jobs));
|
||||
}
|
||||
if let Some(block_size) = self.block_size_kb {
|
||||
parts.push(format!("block:{}KB", block_size));
|
||||
}
|
||||
|
||||
if parts.is_empty() {
|
||||
"defaults".to_string()
|
||||
} else {
|
||||
parts.join(",")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn get_system_memory_gb() -> usize {
|
||||
if let Ok(meminfo) = std::fs::read_to_string("/proc/meminfo") {
|
||||
for line in meminfo.lines() {
|
||||
if line.starts_with("MemTotal:") {
|
||||
if let Some(kb_str) = line.split_whitespace().nth(1) {
|
||||
if let Ok(kb) = kb_str.parse::<usize>() {
|
||||
return kb / 1024 / 1024;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
16
|
||||
}
|
||||
|
||||
pub fn create_tuned_rocksdb_options(tuning_config: &RocksDbTuningOptions) -> rocksdb::Options {
|
||||
let mut opts = rocksdb::Options::default();
|
||||
opts.create_if_missing(true);
|
||||
opts.create_missing_column_families(true);
|
||||
|
||||
if let Err(e) = tuning_config.apply_to_options(&mut opts) {
|
||||
log::error!("Failed to apply RocksDB tuning: {}", e);
|
||||
}
|
||||
|
||||
opts
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user