storage benchmarks infrastructure

This commit is contained in:
andrussal 2025-10-06 11:11:53 +02:00
parent 73441efb69
commit 4de3e1e68e
45 changed files with 6461 additions and 2 deletions

26
.gitignore vendored
View File

@ -1,3 +1,25 @@
# Generated by Cargo
# will have compiled files and executables
/target/
*/target/
# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
Cargo.lock
target/
.vscode
# These are backup files generated by rustfmt
**/*.rs.bk
# Files generated by build processes or applications
config.yml
store.*
*.txt
.env
.idea/
.vscode/
# Integration test temp dirs
tests/.tmp*
# Wildcard for any file that contains ignore
*ignore*

View File

@ -0,0 +1,244 @@
[package]
edition = "2021"
name = "storage-benchmarks"
version = "0.1.0"
[dependencies]
# Storage APIs - using public nomos repository
cryptarchia-engine = { git = "https://github.com/logos-co/nomos-node", package = "cryptarchia-engine" }
nomos-core = { git = "https://github.com/logos-co/nomos-node", package = "nomos-core" }
nomos-storage = { git = "https://github.com/logos-co/nomos-node", package = "nomos-storage", features = ["rocksdb-backend"] }
# Database
rocksdb = { version = "0.24", features = ["bindgen-runtime"] }
# Async runtime
tokio = { features = ["macros", "rt-multi-thread", "time"], version = "1" }
async-trait = "0.1"
# Data structures
bincode = "1.0"
bytes = "1.3"
chrono = { version = "0.4", features = ["serde"] }
env_logger = "0.10"
log = "0.4"
num_cpus = "1.0"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
tempfile = "3"
toml = "0.8"
rand = "0.8"
rand_chacha = "0.3"
ed25519-dalek = "2.0"
groth16 = { git = "https://github.com/logos-co/nomos-node", package = "groth16" }
pol = { git = "https://github.com/logos-co/nomos-node", package = "pol" }
futures = "0.3"
rayon = "1.0"
rand_distr = "0.4"
hdrhistogram = "7.5"
md5 = "0.7"
clap = { version = "4.0", features = ["derive"] }
thiserror = "1.0"
smallvec = "1.0"
# Optional allocator features
mimalloc = { version = "0.1", optional = true }
jemallocator = { version = "0.5", optional = true }
[features]
default = []
allocator-mimalloc = ["mimalloc"]
allocator-jemalloc = ["jemallocator"]
[dev-dependencies]
divan = { default-features = false, version = "0.1" }
# Production binaries
[[bin]]
name = "dataset_generator"
path = "src/bin/dataset_generator.rs"
[[bin]]
name = "verify_dataset_integrity"
path = "src/bin/verify_dataset_integrity.rs"
[[bin]]
name = "storage_bench_runner"
path = "src/bin/storage_bench_runner.rs"
[[bin]]
name = "verify_rocksdb_properties"
path = "src/bin/verify_rocksdb_properties.rs"
[[bin]]
name = "dataset_builder"
path = "src/bin/dataset_builder.rs"
# Educational examples
[[example]]
name = "storage_capacity_calculator"
path = "examples/storage_capacity_calculator.rs"
[lints.clippy]
# Nursery and allowed nursery warnings (new lints will warn by default)
nursery = { level = "warn", priority = -1 }
# Pedantic and allowed pedantic warnings (new lints will warn by default)
pedantic = { level = "warn", priority = -1 }
similar_names = { level = "allow" }
# Restriction and allowed restriction warnings (new lints will warn by default)
restriction = { level = "warn", priority = -1 }
absolute_paths = { level = "allow" }
alloc_instead_of_core = { level = "allow" }
arbitrary_source_item_ordering = { level = "allow" }
big_endian_bytes = { level = "allow" }
blanket_clippy_restriction_lints = { level = "allow" }
decimal_literal_representation = { level = "allow" }
default_numeric_fallback = { level = "allow" }
deref_by_slicing = { level = "allow" }
else_if_without_else = { level = "allow" }
exhaustive_enums = { level = "allow" }
exhaustive_structs = { level = "allow" }
exit = { level = "allow" }
expect_used = { level = "allow" }
field_scoped_visibility_modifiers = { level = "allow" }
float_arithmetic = { level = "allow" }
get_unwrap = { level = "allow" }
host_endian_bytes = { level = "allow" }
implicit_return = { level = "allow" }
integer_division_remainder_used = { level = "allow" }
iter_over_hash_type = { level = "allow" }
let_underscore_must_use = { level = "allow" }
let_underscore_untyped = { level = "allow" }
little_endian_bytes = { level = "allow" }
map_err_ignore = { level = "allow" }
min_ident_chars = { level = "allow" }
missing_asserts_for_indexing = { level = "allow" }
missing_docs_in_private_items = { level = "allow" }
missing_inline_in_public_items = { level = "allow" }
missing_trait_methods = { level = "allow" }
mixed_read_write_in_expression = { level = "allow" }
mod_module_files = { level = "allow" }
module_name_repetitions = { level = "allow" }
modulo_arithmetic = { level = "allow" }
panic = { level = "allow" }
panic_in_result_fn = { level = "allow" }
partial_pub_fields = { level = "allow" }
print_stderr = { level = "allow" }
print_stdout = { level = "allow" }
pub_use = { level = "allow" }
pub_with_shorthand = { level = "allow" }
question_mark_used = { level = "allow" }
self_named_module_files = { level = "allow" }
semicolon_inside_block = { level = "allow" }
single_call_fn = { level = "allow" }
single_char_lifetime_names = { level = "allow" }
std_instead_of_alloc = { level = "allow" }
std_instead_of_core = { level = "allow" }
struct_field_names = { level = "allow" }
unseparated_literal_suffix = { level = "allow" }
use_debug = { level = "allow" }
wildcard_enum_match_arm = { level = "allow" }
arithmetic_side_effects = { level = "allow" }
as_conversions = { level = "allow" }
as_pointer_underscore = { level = "allow" }
as_underscore = { level = "allow" }
assertions_on_result_states = { level = "allow" }
cast_possible_truncation = { level = "allow" }
cast_possible_wrap = { level = "allow" }
cast_precision_loss = { level = "allow" }
cast_sign_loss = { level = "allow" }
doc_broken_link = { level = "allow" }
string_slice = { level = "allow" }
future_not_send = { level = "allow" }
unused_self = { level = "allow" }
unnecessary_wraps = { level = "allow" }
single_match_else = { level = "allow" }
option_if_let_else = { level = "allow" }
uninlined_format_args = { level = "allow" }
needless_borrow = { level = "allow" }
str_to_string = { level = "allow" }
new_without_default = { level = "allow" }
must_use_candidate = { level = "allow" }
missing_const_for_fn = { level = "allow" }
large_stack_arrays = { level = "allow" }
unnecessary_to_owned = { level = "allow" }
undocumented_unsafe_blocks = { level = "allow" }
ref_as_ptr = { level = "allow" }
unused_async = { level = "allow" }
items_after_statements = { level = "allow" }
ok_expect = { level = "allow" }
map_with_unused_argument_over_ranges = { level = "allow" }
ignored_unit_patterns = { level = "allow" }
too_many_lines = { level = "allow" }
not_unsafe_ptr_arg_deref = { level = "allow" }
type_complexity = { level = "allow" }
single_match = { level = "allow" }
error_impl_error = { level = "allow" }
impl_trait_in_params = { level = "allow" }
indexing_slicing = { level = "allow" }
infinite_loop = { level = "allow" }
integer_division = { level = "allow" }
large_stack_frames = { level = "allow" }
missing_assert_message = { level = "allow" }
missing_errors_doc = { level = "allow" }
missing_panics_doc = { level = "allow" }
pattern_type_mismatch = { level = "allow" }
redundant_test_prefix = { level = "allow" }
ref_patterns = { level = "allow" }
renamed_function_params = { level = "allow" }
same_name_method = { level = "allow" }
shadow_reuse = { level = "allow" }
shadow_same = { level = "allow" }
shadow_unrelated = { level = "allow" }
tests_outside_test_module = { level = "allow" }
todo = { level = "allow" }
unimplemented = { level = "allow" }
unreachable = { level = "allow" }
unwrap_in_result = { level = "allow" }
unwrap_used = { level = "allow" }
[lints.rust]
unused_crate_dependencies = { level = "allow" }
unused_results = { level = "allow" }
impl_trait_redundant_captures = { level = "warn" }
missing_unsafe_on_extern = { level = "warn" }
redundant_imports = { level = "warn" }
redundant_lifetimes = { level = "warn" }
single_use_lifetimes = { level = "warn" }
trivial_numeric_casts = { level = "warn" }
unsafe_attr_outside_unsafe = { level = "warn" }
unsafe_op_in_unsafe_fn = { level = "warn" }
unstable_features = { level = "warn" }
unused_extern_crates = { level = "warn" }
unused_import_braces = { level = "warn" }
unused_lifetimes = { level = "warn" }
unused_macro_rules = { level = "warn" }
unused_qualifications = { level = "warn" }
absolute_paths_not_starting_with_crate = { level = "allow" }
ambiguous_negative_literals = { level = "allow" }
closure_returning_async_block = { level = "allow" }
deref_into_dyn_supertrait = { level = "allow" }
elided_lifetimes_in_paths = { level = "allow" }
ffi_unwind_calls = { level = "allow" }
impl_trait_overcaptures = { level = "allow" }
let_underscore_drop = { level = "allow" }
linker_messages = { level = "allow" }
macro_use_extern_crate = { level = "allow" }
missing_copy_implementations = { level = "allow" }
missing_debug_implementations = { level = "allow" }
missing_docs = { level = "allow" }
tail_expr_drop_order = { level = "allow" }
trivial_casts = { level = "allow" }
unit_bindings = { level = "allow" }
unreachable_pub = { level = "allow" }
unsafe_code = { level = "allow" }
variant_size_differences = { level = "allow" }

View File

@ -0,0 +1,181 @@
# Nomos Storage Benchmarks
Goal: tune RocksDB for Nomos validator workloads using realistic data and sizes. The approach is to run benchmarks and try different parameters and settings, then compare results.
## What it does
- Generates datasets that approximate realistic sizes and access patterns.
- Runs mixed read/write validator-style workloads against RocksDB.
- Varies RocksDB parameters (cache, write buffer, compaction, block size, compression).
- Records throughput and basic variability across repeated runs.
## Quick start
1) Generate a dataset
```bash
POL_PROOF_DEV_MODE=true RUST_LOG=info cargo run --bin dataset_generator -- --config dataset_configs/annual_mainnet.toml
```
2) Run a baseline
```bash
RUST_LOG=info cargo run --bin storage_bench_runner -- --profile mainnet --memory 8 --duration 120
```
3) Try parameters and compare
```bash
# Cache size
cargo run --bin storage_bench_runner -- --profile mainnet --memory 8 --duration 120 --cache-size 25
cargo run --bin storage_bench_runner -- --profile mainnet --memory 8 --duration 120 --cache-size 40
cargo run --bin storage_bench_runner -- --profile mainnet --memory 8 --duration 120 --cache-size 55
# Write buffer (use the best cache size observed)
cargo run --bin storage_bench_runner -- --profile mainnet --memory 8 --duration 120 --cache-size 40 --write-buffer 128
cargo run --bin storage_bench_runner -- --profile mainnet --memory 8 --duration 120 --cache-size 40 --write-buffer 256
# Compaction jobs
cargo run --bin storage_bench_runner -- --profile mainnet --memory 8 --duration 120 --cache-size 40 --write-buffer 128 --compaction-jobs 8
```
## How to evaluate
- One warmup and at least three measured runs per setting.
- Fixed seed when exact reproducibility is required.
- Compare mean ops/sec and variability across runs.
- Change one setting at a time.
## Parameter ranges under evaluation
- Block cache: 2555% of RAM
- Write buffer: 64256 MB
- Compaction jobs: 412
- Block size: 1664 KB
- Compression: none, lz4, snappy, zstd
## Profiles and datasets
Validator profiles:
- light (~100 validators)
- mainnet (~2000 validators)
- testnet (~1000 validators)
Datasets:
- quick_test.toml: ~27 MB (fast checks)
- testnet_sim.toml: ~1 GB
- annual_mainnet.toml: ~40 GB
## CLI
```bash
cargo run --bin storage_bench_runner -- [OPTIONS]
--profile light | mainnet | testnet
--memory RAM limit in GB (default: 8)
--duration Benchmark duration (default: 120)
--cache-size Block cache size as % of RAM (2060)
--write-buffer Write buffer size in MB (64512)
--compaction-jobs Background compaction jobs (416)
--block-size Table block size in KB (864)
--compression none | lz4 | snappy | zstd
--seed RNG seed
--warmup-runs Warmup iterations (default: 1)
--measurement-runs Measurement iterations (default: 3)
--read-only Read-only mode
```
Reproducible run:
```bash
cargo run --bin storage_bench_runner -- --profile mainnet --memory 8 --duration 120 --seed 12345
```
## Test plan
Purpose: verify that benchmarks run, produce results, and that parameter changes have measurable effects.
### Scope
- Dataset generation at different sizes.
- Benchmark runs across profiles.
- Parameter sweeps for cache, write buffer, compaction, block size, compression.
- Result capture (JSON) and basic summary output.
### Environments
- Memory limits: 4 GB, 8 GB, 16 GB.
- Datasets: small (quick), medium, large.
- Duration: short for exploration (60120s), longer to confirm (180300s).
### Test cases
1. Dataset generation
- Small dataset completes.
- Large dataset resumes if partially present.
- Outputs stored in expected path.
2. Baseline benchmark
- Runs with selected profile and memory limit.
- Produces JSON results and console summary.
3. Cache size
- 25%, 40%, 55%.
- Compare mean ops/sec and variability.
- Record chosen value.
4. Write buffer
- Keep chosen cache.
- 128 MB, 256 MB (and 64/512 MB if needed).
- Record impact, pick value.
5. Compaction jobs
- 4, 8, 12 (or within system limits).
- Check for stalls or CPU saturation.
6. Block size
- 16 KB, 32 KB, 64 KB.
- Evaluate read performance and variability.
7. Compression
- none, lz4, snappy, zstd.
- Compare throughput; consider disk footprint if relevant.
8. Reproducibility
- Repeat a chosen run with a fixed seed.
- Confirm similar results across iterations.
9. Memory sensitivity
- Re-run chosen settings at lower and higher memory limits.
- Check for regressions.
### Acceptance criteria
- All runs complete without errors.
- Results are saved (JSON present).
- Chosen settings show a measurable improvement over baseline.
- Variability remains acceptable for this use case.
### Reporting
- Log command lines and seeds used.
- Note dataset, profile, memory, and duration for each run.
- Store JSON result files together for comparison.
## Outputs
- Datasets: ~/.nomos_storage_benchmarks/rocksdb_data
- Results (JSON): ~/.nomos_storage_benchmarks/results/
- Console summary shows mean ops/sec and variability.
## Requirements
- Rust 1.75+
- 8+ GB RAM (more for larger datasets)
- ~50+ GB disk for the largest dataset
## Notes
- Baseline first, then change one parameter at a time.
- Keep runs short while exploring; confirm with longer runs when needed.
## Why no general-purpose benchmarking library
- Workloads require long-running mixed operations (reads, range scans, writes) against a prebuilt dataset; typical micro-benchmark frameworks focus on short, isolated functions.
- We need control over dataset size/layout, memory limits, and external RocksDB options; this is easier with a purpose-built runner.
- Results include per-run JSON with config and summary metrics; integrating this into a generic harness would add overhead without benefit here.

View File

@ -0,0 +1,19 @@
[dataset]
block_time_seconds = 30
days = 365
name = "annual_mainnet_conservative"
[network]
blobs_per_block = 50
load_name = "annual_mainnet"
total_subnets = 2048
[validator]
assigned_subnets = 10
[blocks]
size_bytes = 34371
[da]
commitment_size_bytes = 220000
share_size_bytes = 1024

View File

@ -0,0 +1,19 @@
[dataset]
days = 1
block_time_seconds = 30
name = "quick_test"
[network]
load_name = "light_testnet"
blobs_per_block = 2
total_subnets = 2048
[validator]
assigned_subnets = 1
[blocks]
size_bytes = 10000
[da]
share_size_bytes = 512
commitment_size_bytes = 50000

View File

@ -0,0 +1,19 @@
[dataset]
days = 7
block_time_seconds = 30
name = "testnet_simulation"
[network]
load_name = "medium_testnet"
blobs_per_block = 15
total_subnets = 2048
[validator]
assigned_subnets = 5
[blocks]
size_bytes = 34371
[da]
share_size_bytes = 1024
commitment_size_bytes = 220000

View File

@ -0,0 +1,55 @@
# Spec-accurate validator operation profiles
[light]
name = "light"
description = "Light validator with minimal resources and spec-accurate patterns"
# Read frequencies
block_read_rate_hz = 2.0 # Block validation reads
da_share_read_rate_hz = 0.67 # DA sampling reads (20 samples per 30s block)
range_scan_rate_hz = 0.01 # Occasional sync serving
# Write frequencies
block_write_rate_hz = 0.033 # New block finalization (30s blocks)
da_share_write_rate_hz = 0.5 # New DA share storage
commitment_write_rate_hz = 0.5 # New commitments
# Access patterns (spec-accurate temporal distribution)
recent_access_ratio = 0.80 # 80% reads from recent data (Zipfian)
historical_access_ratio = 0.20 # 20% reads from historical data (uniform)
# Network scaling
total_validators = 100 # Small testnet
assigned_subnets = 20 # High subnet assignment for small network
[mainnet]
name = "mainnet"
description = "Mainnet validator with high activity and spec-accurate patterns"
# Read frequencies (higher validation load)
block_read_rate_hz = 10.0 # High block validation rate
da_share_read_rate_hz = 5.0 # Higher DA sampling frequency
range_scan_rate_hz = 0.1 # More frequent sync serving
# Write frequencies (mainnet load)
block_write_rate_hz = 0.033 # Same block time
da_share_write_rate_hz = 5.0 # High DA write activity
commitment_write_rate_hz = 5.0 # Matching commitment writes
# Access patterns (more recent focus)
recent_access_ratio = 0.90 # 90% recent access (heavy tip bias)
historical_access_ratio = 0.10 # 10% historical access
# Network scaling
total_validators = 2000 # Mainnet scale
assigned_subnets = 10 # Medium subnet assignment
[testnet]
name = "testnet"
description = "Testnet network with heavy sync activity and range scanning"
# Read frequencies (sync serving dominates)
block_read_rate_hz = 50.0 # Heavy block serving for sync
da_share_read_rate_hz = 10.0 # Moderate DA validation
range_scan_rate_hz = 10.0 # Continuous range scans for sync
# Write frequencies (reduced during sync)
block_write_rate_hz = 0.01 # Minimal new blocks
da_share_write_rate_hz = 0.1 # Reduced DA writes
commitment_write_rate_hz = 0.1 # Reduced commitments
# Access patterns (historical focus for sync)
recent_access_ratio = 0.20 # Mostly historical data
historical_access_ratio = 0.80 # Heavy historical access
# Network scaling
total_validators = 1000 # Medium network during sync
assigned_subnets = 10 # Standard subnet assignment

View File

@ -0,0 +1,440 @@
//! Storage capacity estimator
//!
//! Computes block and DA storage requirements for various time periods and
//! network scenarios. Produces summaries, time breakdowns, and simple hardware
//! recommendations.
use std::{collections::HashMap, fs};
use serde::{Deserialize, Serialize};
use storage_benchmarks::BenchConfig;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TimePeriod {
/// Number of days represented by the period
pub days: u64,
/// Human-readable label (e.g., "1 year")
pub description: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct NetworkConfig {
/// Block time in seconds
pub block_time_seconds: u64,
/// Average block size in bytes
pub avg_block_size_bytes: u64,
/// Total DA subnets
pub total_subnets: u64,
/// DA share size in bytes
pub da_share_size_bytes: u64,
/// DA commitment size in bytes
pub da_commitment_size_bytes: u64,
/// Shares per blob
pub shares_per_blob: u64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct NetworkScenario {
/// Scenario name
pub name: String,
/// Blobs per block
pub blobs_per_block: u64,
/// Total validators used to estimate DA responsibility
pub total_validators: u64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CalculationConfig {
/// Time window for the calculation
pub time_period: TimePeriod,
/// Network parameters used across scenarios
pub network: NetworkConfig,
/// Scenarios to evaluate
pub scenarios: Vec<NetworkScenario>,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct BlockDataResults {
/// Blocks produced per day
pub blocks_per_day: u64,
/// Total blocks in the period
pub blocks_for_period: u64,
/// Average block size in KiB
pub avg_block_size_kb: u64,
/// Total block data size in GiB for the period
pub total_block_data_gb: f64,
/// Period label
pub time_period_description: String,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct ScenarioResults {
/// Scenario label
pub scenario_name: String,
/// Blobs per block for this scenario
pub blobs_per_block: u64,
/// Total validators
pub total_validators: u64,
/// Typical subnets assigned per validator
pub typical_subnets_per_validator: u64,
/// Percent of subnets likely assigned to a validator
pub subnet_assignment_percent: f64,
/// Count of DA shares stored by the validator over the period
pub shares_stored_count: u64,
/// Count of blobs assigned over the period
pub blobs_assigned_count: u64,
/// DA shares size in GiB
pub da_shares_gb: f64,
/// DA commitments size in GiB
pub da_commitments_gb: f64,
/// Total DA data size in GiB
pub total_da_gb: f64,
/// Total validator storage in GiB (blocks + DA)
pub total_validator_storage_gb: f64,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct TimeBreakdown {
/// Sequential period index
pub period_number: u64,
/// Label (Month/Week/Day N)
pub period_description: String,
/// Cumulative storage at this step in GiB
pub cumulative_gb: f64,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct HardwareRecommendation {
/// Scenario label
pub scenario: String,
/// Required storage in GiB for the period
pub storage_gb_for_period: u64,
/// Recommended device size
pub recommended_storage: String,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct StorageCalculationResults {
/// Input config used to compute results
pub calculation_config: CalculationConfig,
/// Aggregate block data for the period
pub block_data: BlockDataResults,
/// Per-scenario storage summaries
pub scenarios: Vec<ScenarioResults>,
/// Time-based accumulation for visualization
pub time_breakdown: Vec<TimeBreakdown>,
/// Simple hardware sizing suggestions
pub hardware_recommendations: Vec<HardwareRecommendation>,
/// Notes for stress testing considerations
pub stress_testing_notes: Vec<String>,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct CapacityCalculationReport {
pub calculation_results: std::collections::HashMap<String, StorageCalculationResults>,
pub summary: CalculationSummary,
pub metadata: ReportMetadata,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct CalculationSummary {
pub scenarios_calculated: usize,
pub total_time_periods: usize,
pub calculation_timestamp: String,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct ReportMetadata {
pub tool: String,
pub version: String,
pub description: String,
}
impl Default for NetworkConfig {
fn default() -> Self {
Self {
block_time_seconds: 30,
avg_block_size_bytes: 34_371,
total_subnets: 2048,
da_share_size_bytes: 1_024,
da_commitment_size_bytes: 220_000,
shares_per_blob: 512,
}
}
}
impl Default for CalculationConfig {
fn default() -> Self {
Self {
time_period: TimePeriod {
days: 365,
description: "1 year".to_string(),
},
network: NetworkConfig::default(),
scenarios: vec![
NetworkScenario {
name: "Conservative".to_string(),
blobs_per_block: 50,
total_validators: 2000,
},
NetworkScenario {
name: "Active".to_string(),
blobs_per_block: 100,
total_validators: 2000,
},
NetworkScenario {
name: "High Activity".to_string(),
blobs_per_block: 200,
total_validators: 3000,
},
NetworkScenario {
name: "Peak".to_string(),
blobs_per_block: 500,
total_validators: 5000,
},
],
}
}
}
/// Compute storage with blob/share separation for DA
///
/// - Includes blocks, DA shares, and commitments
/// - Returns summaries, breakdowns, and recommendations
fn calculate_storage_requirements(config: &CalculationConfig) -> StorageCalculationResults {
let blocks_per_day = (24 * 60 * 60) / config.network.block_time_seconds;
let total_blocks_for_period = config.time_period.days * blocks_per_day;
let block_data_for_period_gb = (total_blocks_for_period as f64
* config.network.avg_block_size_bytes as f64)
/ (1024.0 * 1024.0 * 1024.0);
let block_data = BlockDataResults {
blocks_per_day,
blocks_for_period: total_blocks_for_period,
avg_block_size_kb: config.network.avg_block_size_bytes / 1024,
total_block_data_gb: block_data_for_period_gb,
time_period_description: config.time_period.description.clone(),
};
let mut scenarios = Vec::new();
let mut scenario_storage_map = HashMap::new();
for scenario in &config.scenarios {
let typical_subnets_per_validator =
config.network.total_subnets / (scenario.total_validators / 10).max(1);
let subnet_assignment_probability =
typical_subnets_per_validator as f64 / config.network.total_subnets as f64;
let total_blobs_for_period = total_blocks_for_period * scenario.blobs_per_block;
let validator_assigned_blobs =
(total_blobs_for_period as f64 * subnet_assignment_probability) as u64;
let shares_per_assigned_blob =
config.network.shares_per_blob / config.network.total_subnets;
let total_shares_stored = validator_assigned_blobs * shares_per_assigned_blob.max(1);
let da_shares_size_gb = (total_shares_stored * config.network.da_share_size_bytes) as f64
/ (1024.0 * 1024.0 * 1024.0);
let da_commitments_size_gb = (validator_assigned_blobs
* config.network.da_commitment_size_bytes) as f64
/ (1024.0 * 1024.0 * 1024.0);
let total_da_size_gb = da_shares_size_gb + da_commitments_size_gb;
let total_storage_for_period = block_data_for_period_gb + total_da_size_gb;
scenario_storage_map.insert(scenario.name.clone(), total_da_size_gb);
scenarios.push(ScenarioResults {
scenario_name: scenario.name.clone(),
blobs_per_block: scenario.blobs_per_block,
total_validators: scenario.total_validators,
typical_subnets_per_validator,
subnet_assignment_percent: subnet_assignment_probability * 100.0,
shares_stored_count: total_shares_stored,
blobs_assigned_count: validator_assigned_blobs,
da_shares_gb: da_shares_size_gb,
da_commitments_gb: da_commitments_size_gb,
total_da_gb: total_da_size_gb,
total_validator_storage_gb: total_storage_for_period,
});
}
let breakdown_periods = if config.time_period.days >= 365 {
12
} else if config.time_period.days >= 30 {
config.time_period.days / 7
} else {
config.time_period.days
};
let first_scenario_da_gb = scenario_storage_map.values().next().copied().unwrap_or(0.0);
let total_gb_per_period = block_data_for_period_gb + first_scenario_da_gb;
let increment_gb = total_gb_per_period / breakdown_periods as f64;
let mut time_breakdown = Vec::new();
for period in 1..=breakdown_periods {
let cumulative_gb = increment_gb * period as f64;
let period_desc = if config.time_period.days >= 365 {
format!("Month {}", period)
} else if config.time_period.days >= 30 {
format!("Week {}", period)
} else {
format!("Day {}", period)
};
time_breakdown.push(TimeBreakdown {
period_number: period,
period_description: period_desc,
cumulative_gb,
});
}
let mut hardware_recommendations = Vec::new();
for scenario in &scenarios {
let storage_gb = scenario.total_validator_storage_gb as u64;
let recommended = if storage_gb < 50 {
"100GB+ storage"
} else if storage_gb < 100 {
"200GB+ storage"
} else if storage_gb < 200 {
"500GB+ storage"
} else if storage_gb < 500 {
"1TB+ storage"
} else {
"2TB+ storage"
};
hardware_recommendations.push(HardwareRecommendation {
scenario: scenario.scenario_name.clone(),
storage_gb_for_period: storage_gb,
recommended_storage: recommended.to_string(),
});
}
let stress_testing_notes = vec![
"Memory pressure increases with database size".to_string(),
"Cache efficiency decreases as dataset grows beyond memory".to_string(),
"Compaction overhead increases with write frequency".to_string(),
"Range scan performance degrades with database size".to_string(),
"Storage benchmarks should test multi-GB datasets for realism".to_string(),
format!(
"Test with datasets representing {}-{} days of operation",
config.time_period.days / 4,
config.time_period.days / 2
),
];
StorageCalculationResults {
calculation_config: config.clone(),
block_data,
scenarios,
time_breakdown,
hardware_recommendations,
stress_testing_notes,
}
}
fn main() {
let default_config = CalculationConfig::default();
let monthly_config = CalculationConfig {
time_period: TimePeriod {
days: 30,
description: "30 days".to_string(),
},
network: NetworkConfig::default(),
scenarios: vec![
NetworkScenario {
name: "Testnet Conservative".to_string(),
blobs_per_block: 25,
total_validators: 100,
},
NetworkScenario {
name: "Testnet Active".to_string(),
blobs_per_block: 50,
total_validators: 100,
},
],
};
let weekly_config = CalculationConfig {
time_period: TimePeriod {
days: 7,
description: "1 week".to_string(),
},
network: NetworkConfig {
block_time_seconds: 15,
shares_per_blob: 256,
..NetworkConfig::default()
},
scenarios: vec![NetworkScenario {
name: "Development".to_string(),
blobs_per_block: 10,
total_validators: 10,
}],
};
let configs = vec![
("annual", default_config),
("monthly", monthly_config),
("weekly", weekly_config),
];
let mut all_results = HashMap::new();
for (name, config) in configs {
let results = calculate_storage_requirements(&config);
all_results.insert(name, results);
}
save_capacity_results(&all_results);
match serde_json::to_string_pretty(&all_results) {
Ok(json) => println!("{}", json),
Err(e) => eprintln!("Error serializing results: {}", e),
}
}
fn save_capacity_results(all_results: &HashMap<&str, StorageCalculationResults>) {
let results_dir = BenchConfig::results_path();
let timestamp = chrono::Utc::now().format("%Y%m%d_%H%M%S");
let filename = format!("storage_capacity_calculation_{}.json", timestamp);
let filepath = results_dir.join(filename);
let calculation_results: std::collections::HashMap<String, StorageCalculationResults> =
all_results
.iter()
.map(|(k, v)| (k.to_string(), v.clone()))
.collect();
let report = CapacityCalculationReport {
calculation_results,
summary: CalculationSummary {
scenarios_calculated: all_results.len(),
total_time_periods: all_results
.values()
.map(|r| r.scenarios.len())
.sum::<usize>(),
calculation_timestamp: chrono::Utc::now().to_rfc3339(),
},
metadata: ReportMetadata {
tool: "storage_capacity_calculator".to_string(),
version: env!("CARGO_PKG_VERSION").to_string(),
description: "Storage capacity estimates for Nomos validator scenarios".to_string(),
},
};
match fs::write(&filepath, serde_json::to_string_pretty(&report).unwrap()) {
Ok(_) => eprintln!(
"Capacity calculation results saved to: {}",
filepath.display()
),
Err(e) => eprintln!(
"Failed to save capacity results to {}: {}",
filepath.display(),
e
),
}
}

View File

@ -0,0 +1,12 @@
[toolchain]
# Keep this version in sync also in the following places:
# * Dockerfile
# * flake.nix
# * testnet/Dockerfile
# Also, update the version of the nightly toolchain to the latest nightly of the new version specified in the following places:
# * .github/workflows/code-check.yml (fmt job)
# * .pre-commit-config.yml (fmt hook)
# Then, if there is any new allow-by-default rustc lint introduced/stabilized, add it to the respective entry in our `config.toml`.
channel = "1.90.0"
# Even if clippy should be included in the default profile, in some cases it is not installed. So we force it with an explicit declaration.
components = ["clippy"]

View File

@ -0,0 +1,5 @@
group_imports = "StdExternalCrate"
imports_granularity = "Crate"
reorder_imports = true
reorder_modules = true
wrap_comments = true

View File

@ -0,0 +1,71 @@
use nomos_storage::{
api::{chain::StorageChainApi as _, da::StorageDaApi as _},
backends::rocksdb::RocksBackend,
};
use super::{create_blob_id, create_header_id};
pub async fn analyze_dataset(
storage: &mut RocksBackend,
) -> Result<(usize, usize), Box<dyn std::error::Error>> {
log::info!("Analyzing dataset size with adaptive probing...");
let mut upper_bound = 10000;
while upper_bound < 10_000_000 {
let header_id = create_header_id(upper_bound);
let block_result = storage.get_block(header_id).await;
match block_result {
Ok(Some(_)) => {}
Ok(None) | Err(_) => {
break;
}
}
upper_bound *= 2;
}
let mut low = upper_bound / 2;
let mut high = upper_bound;
let mut block_count = low;
while low <= high {
let mid = usize::midpoint(low, high);
let header_id = create_header_id(mid);
match storage.get_block(header_id).await {
Ok(Some(_)) => {
block_count = mid;
low = mid + 1;
}
_ => {
high = mid - 1;
}
}
}
let mut share_count = 0;
let da_sample_size = std::cmp::min(1000, block_count / 100);
for blob_idx in 0..da_sample_size {
for subnet in 0..50 {
let blob_id = create_blob_id(blob_idx, 0);
let share_idx = [subnet as u8, 0u8];
if let Ok(Some(_)) = storage.get_light_share(blob_id, share_idx).await {
share_count += 1;
}
}
}
let estimated_da_total = if da_sample_size > 0 {
share_count * (block_count / da_sample_size)
} else {
share_count
};
log::info!("DA estimation: sampled {share_count} objects from {da_sample_size} blocks, extrapolated to {estimated_da_total} total (assumes uniform distribution)");
log::info!(
"Dataset analysis complete: {block_count} blocks, ~{estimated_da_total} DA objects (sampled)"
);
Ok((block_count, estimated_da_total))
}

View File

@ -0,0 +1,11 @@
pub mod analysis;
pub mod runner;
pub mod types;
pub mod utilities;
pub mod workloads;
pub use analysis::*;
pub use runner::*;
pub use types::*;
pub use utilities::*;
pub use workloads::*;

View File

@ -0,0 +1,250 @@
use std::time::Duration;
use log::info;
use nomos_storage::backends::{rocksdb::RocksBackend, StorageBackend as _};
use serde::{Deserialize, Serialize};
use super::{
analyze_dataset, run_concurrent_validator_benchmark, BenchConfigSummary, BenchmarkReport,
BenchmarkResultsSummary, ConcurrentBenchmarkResult, ReportMetadata, StatisticsSummary,
};
use crate::{
config::{ProductionBenchConfig, ValidatorProfile, ValidatorProfiles},
BenchConfig,
};
pub struct BenchmarkRunner {
config: ProductionBenchConfig,
profile: ValidatorProfile,
storage_config: BenchConfig,
execution_state: ExecutionState,
results: BenchmarkResults,
}
#[derive(Debug, Clone, Default)]
struct ExecutionState {
warmup_completed: usize,
measurements_completed: usize,
dataset_size: Option<(usize, usize)>,
}
#[derive(Debug, Clone, Default)]
pub struct BenchmarkResults {
pub raw_measurements: Vec<f64>,
pub warmup_results: Vec<f64>,
pub detailed_results: Vec<ConcurrentBenchmarkResult>,
pub mean_ops_sec: f64,
pub variability_percent: f64,
pub best_result: Option<ConcurrentBenchmarkResult>,
pub stats_summary: Option<RocksDbStatsSummary>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RocksDbStatsSummary {
pub cache_hit_rate_improvement: f64,
pub l0_file_growth: i64,
pub compaction_activity: u64,
pub memory_usage_change: i64,
}
impl BenchmarkRunner {
pub fn new(config: ProductionBenchConfig) -> Result<Self, Box<dyn std::error::Error>> {
config.validate()?;
let profiles = ValidatorProfiles::from_file("dataset_configs/validator_profiles.toml")?;
let profile = profiles
.get_profile(&config.profile.to_string())
.ok_or_else(|| format!("Profile '{}' not found", config.profile))?
.clone();
let storage_config = BenchConfig::production();
if !storage_config.settings.db_path.exists() {
return Err("No dataset found - run dataset_generator first".into());
}
Ok(Self {
config,
profile,
storage_config,
execution_state: ExecutionState::default(),
results: BenchmarkResults::default(),
})
}
pub async fn execute_benchmark(
&mut self,
) -> Result<BenchmarkResults, Box<dyn std::error::Error>> {
self.setup_memory_limits();
self.analyze_dataset().await?;
info!("Starting warmup phase: {} runs", self.config.warmup_runs);
for i in 1..=self.config.warmup_runs {
info!("Warmup run {}/{}", i, self.config.warmup_runs);
let result = self.run_single_iteration().await?;
self.results.warmup_results.push(result);
self.execution_state.warmup_completed = i;
}
info!(
"Starting measurement phase: {} runs",
self.config.measurement_runs
);
for i in 1..=self.config.measurement_runs {
info!("Measurement run {}/{}", i, self.config.measurement_runs);
let result = self.run_single_iteration().await?;
info!("Run {i} result: {result:.1} ops/sec");
self.results.raw_measurements.push(result);
self.execution_state.measurements_completed = i;
}
self.calculate_final_statistics();
self.save_results();
Ok(self.results.clone())
}
fn setup_memory_limits(&self) {
info!("Setting memory limit to {}GB", self.config.memory);
}
async fn analyze_dataset(&mut self) -> Result<(), Box<dyn std::error::Error>> {
let mut storage_settings = self.storage_config.settings.clone();
storage_settings.read_only = self.config.read_only;
let mut storage = RocksBackend::new(storage_settings)?;
let dataset_size = analyze_dataset(&mut storage).await?;
self.execution_state.dataset_size = Some(dataset_size);
info!(
"Dataset analysis: {} blocks, {} shares",
dataset_size.0, dataset_size.1
);
Ok(())
}
async fn run_single_iteration(&mut self) -> Result<f64, Box<dyn std::error::Error>> {
let mut storage_settings = self.storage_config.settings.clone();
storage_settings.read_only = self.config.read_only;
let storage = RocksBackend::new(storage_settings)?;
let dataset_size = self.execution_state.dataset_size.unwrap_or((0, 0));
match run_concurrent_validator_benchmark(
storage,
Duration::from_secs(self.config.duration),
&self.profile,
dataset_size,
self.config.read_only,
)
.await
{
Ok(detailed_result) => {
let throughput = detailed_result.combined_throughput();
self.results.detailed_results.push(detailed_result);
Ok(throughput)
}
Err(e) => {
log::error!("Benchmark iteration failed: {e}");
Ok(0.0)
}
}
}
fn calculate_final_statistics(&mut self) {
if self.results.raw_measurements.is_empty() {
return;
}
let mean = self.results.raw_measurements.iter().sum::<f64>()
/ self.results.raw_measurements.len() as f64;
let min = self
.results
.raw_measurements
.iter()
.fold(f64::INFINITY, |a, &b| a.min(b));
let max = self
.results
.raw_measurements
.iter()
.fold(f64::NEG_INFINITY, |a, &b| a.max(b));
let variability = if mean > 0.0 {
(max - min) / mean * 100.0
} else {
0.0
};
self.results.mean_ops_sec = mean;
self.results.variability_percent = variability;
if let Some(best_idx) = self
.results
.raw_measurements
.iter()
.enumerate()
.max_by(|a, b| a.1.partial_cmp(b.1).unwrap())
.map(|(idx, _)| idx)
{
self.results.best_result = self.results.detailed_results.get(best_idx).cloned();
}
}
fn save_results(&self) {
let results_dir = BenchConfig::results_path();
let timestamp = chrono::Utc::now().format("%Y%m%d_%H%M%S");
let filename = format!(
"bench_{}_{}_{}gb_{}.json",
self.config.profile, timestamp, self.config.memory, self.config.duration
);
let filepath = results_dir.join(filename);
let report = BenchmarkReport {
config_summary: BenchConfigSummary {
profile: format!("{:?}", self.config.profile),
memory_gb: self.config.memory,
duration_seconds: self.config.duration,
warmup_runs: self.config.warmup_runs,
measurement_runs: self.config.measurement_runs,
},
results: BenchmarkResultsSummary {
raw_measurements: self.results.raw_measurements.clone(),
warmup_results: self.results.warmup_results.clone(),
statistics: StatisticsSummary {
mean_ops_sec: self.results.mean_ops_sec,
min_ops_sec: 0.0,
max_ops_sec: 0.0,
variability_percent: self.results.variability_percent,
sample_count: self.results.raw_measurements.len(),
},
},
metadata: ReportMetadata {
timestamp: chrono::Utc::now().to_rfc3339(),
tool_version: env!("CARGO_PKG_VERSION").to_owned(),
runner_type: "batch".to_owned(),
},
};
match std::fs::write(&filepath, serde_json::to_string_pretty(&report).unwrap()) {
Ok(()) => log::info!(
"Stateful benchmark results saved to: {}",
filepath.display()
),
Err(e) => log::warn!("Failed to save results to {}: {}", filepath.display(), e),
}
}
#[must_use]
pub const fn execution_progress(&self) -> (usize, usize, usize, usize) {
(
self.execution_state.warmup_completed,
self.config.warmup_runs,
self.execution_state.measurements_completed,
self.config.measurement_runs,
)
}
#[must_use]
pub const fn current_results(&self) -> &BenchmarkResults {
&self.results
}
}

View File

@ -0,0 +1,266 @@
use std::time::Duration;
use serde::{Deserialize, Serialize};
use crate::{
config::types::WorkloadType,
metrics::{LatencyPercentiles, RocksDbStats},
};
#[derive(Debug, Clone)]
pub struct WorkloadStreamResult {
pub workload_type: WorkloadType,
pub executed: bool,
pub operations_total: u64,
pub operations_success: u64,
pub bytes_read: u64,
pub bytes_written: u64,
pub duration: Duration,
pub errors: u64,
pub cache_misses: u64,
pub latency_percentiles: Option<LatencyPercentiles>,
}
#[derive(Debug, Clone)]
pub struct ConcurrentBenchmarkResult {
pub block_validation: WorkloadStreamResult,
pub da_sampling: WorkloadStreamResult,
pub da_commitments: WorkloadStreamResult,
pub ibd_serving: WorkloadStreamResult,
pub block_storage: WorkloadStreamResult,
pub da_storage: WorkloadStreamResult,
pub total_duration: Duration,
pub peak_memory_mb: f64,
pub resource_contention_factor: f64,
pub concurrent_operations_peak: u64,
pub rocksdb_stats_before: RocksDbStats,
pub rocksdb_stats_after: RocksDbStats,
}
impl ConcurrentBenchmarkResult {
#[must_use]
pub const fn total_operations(&self) -> u64 {
let mut total = 0;
if self.block_validation.executed {
total += self.block_validation.operations_total;
}
if self.da_sampling.executed {
total += self.da_sampling.operations_total;
}
if self.da_commitments.executed {
total += self.da_commitments.operations_total;
}
if self.ibd_serving.executed {
total += self.ibd_serving.operations_total;
}
if self.block_storage.executed {
total += self.block_storage.operations_total;
}
if self.da_storage.executed {
total += self.da_storage.operations_total;
}
total
}
#[must_use]
pub const fn total_success(&self) -> u64 {
let mut total = 0;
if self.block_validation.executed {
total += self.block_validation.operations_success;
}
if self.da_sampling.executed {
total += self.da_sampling.operations_success;
}
if self.da_commitments.executed {
total += self.da_commitments.operations_success;
}
if self.ibd_serving.executed {
total += self.ibd_serving.operations_success;
}
if self.block_storage.executed {
total += self.block_storage.operations_success;
}
if self.da_storage.executed {
total += self.da_storage.operations_success;
}
total
}
#[must_use]
pub fn combined_throughput(&self) -> f64 {
self.total_success() as f64 / self.total_duration.as_secs_f64()
}
#[must_use]
pub fn success_rate(&self) -> f64 {
if self.total_operations() > 0 {
self.total_success() as f64 / self.total_operations() as f64
} else {
0.0
}
}
#[must_use]
pub fn total_data_throughput_mbps(&self) -> f64 {
let mut total_bytes = 0;
if self.block_validation.executed {
total_bytes += self.block_validation.bytes_read;
}
if self.da_sampling.executed {
total_bytes += self.da_sampling.bytes_read;
}
if self.da_commitments.executed {
total_bytes += self.da_commitments.bytes_read;
}
if self.ibd_serving.executed {
total_bytes += self.ibd_serving.bytes_read;
}
if self.block_storage.executed {
total_bytes += self.block_storage.bytes_written;
}
if self.da_storage.executed {
total_bytes += self.da_storage.bytes_written;
}
total_bytes as f64 / 1024.0 / 1024.0 / self.total_duration.as_secs_f64()
}
}
#[derive(Debug, Serialize, Deserialize)]
pub struct StorageBenchReport {
pub benchmark_config: BenchConfigSummary,
pub results: BenchResultsSummary,
pub timestamp: String,
pub tool_version: String,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct BenchConfigSummary {
pub profile: String,
pub memory_gb: u32,
pub duration_seconds: u64,
pub warmup_runs: usize,
pub measurement_runs: usize,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct BenchResultsSummary {
pub raw_measurements: Vec<f64>,
pub statistics: StatisticsSummary,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct StatisticsSummary {
pub mean_ops_sec: f64,
pub min_ops_sec: f64,
pub max_ops_sec: f64,
pub variability_percent: f64,
pub sample_count: usize,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct DatasetGenerationReport {
pub generation_summary: GenerationSummary,
pub performance: GenerationPerformance,
pub timestamp: String,
pub tool_version: String,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct GenerationSummary {
pub blocks_generated: usize,
pub da_objects_generated: usize,
pub total_objects: usize,
pub duration_seconds: u64,
pub duration_minutes: f64,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct GenerationPerformance {
pub total_rate_objects_per_sec: f64,
pub block_rate_per_sec: f64,
pub da_rate_per_sec: f64,
pub cpu_cores_used: usize,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct DatasetVerificationReport {
pub verification_summary: VerificationSummary,
pub data_sizes: DataSizesSummary,
pub completeness_estimates: CompletenessSummary,
pub performance: VerificationPerformance,
pub warnings: WarningsSummary,
pub timestamp: String,
pub tool_version: String,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct VerificationSummary {
pub blocks_found: usize,
pub da_shares_found: usize,
pub da_commitments_found: usize,
pub total_objects_found: usize,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct DataSizesSummary {
pub total_block_size_bytes: u64,
pub total_share_size_bytes: u64,
pub total_commitment_size_bytes: u64,
pub total_verified_size_bytes: u64,
pub total_verified_size_gb: f64,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct CompletenessSummary {
pub block_completeness_percent: f64,
pub da_completeness_percent: f64,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct VerificationPerformance {
pub verification_time_seconds: f64,
pub objects_verified_per_sec: f64,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct WarningsSummary {
pub block_generation_incomplete: bool,
pub data_size_smaller_than_expected: bool,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct BenchmarkReport {
pub config_summary: BenchConfigSummary,
pub results: BenchmarkResultsSummary,
pub metadata: ReportMetadata,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct BenchmarkResultsSummary {
pub raw_measurements: Vec<f64>,
pub warmup_results: Vec<f64>,
pub statistics: StatisticsSummary,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct ReportMetadata {
pub timestamp: String,
pub tool_version: String,
pub runner_type: String,
}

View File

@ -0,0 +1,37 @@
use std::time::Duration;
use crate::config::ValidatorProfile;
#[must_use]
pub fn create_header_id(index: usize) -> nomos_core::header::HeaderId {
let mut id = [0u8; 32];
id[0..4].copy_from_slice(&(index as u32).to_be_bytes());
nomos_core::header::HeaderId::from(id)
}
#[must_use]
pub fn create_blob_id(block: usize, blob_in_block: usize) -> nomos_core::da::BlobId {
let mut id = [0u8; 32];
id[0..4].copy_from_slice(&(block as u32).to_be_bytes());
id[4..8].copy_from_slice(&(blob_in_block as u32).to_be_bytes());
nomos_core::da::BlobId::from(id)
}
pub fn safe_interval_from_hz(frequency_hz: f64, workload_type: &str) -> Result<Duration, String> {
if frequency_hz <= 0.0 {
return Err(format!(
"Invalid frequency {frequency_hz} Hz for {workload_type}"
));
}
let interval_ms = (1000.0 / frequency_hz) as u64;
Ok(Duration::from_millis(interval_ms))
}
#[must_use]
pub fn estimate_sequential_performance(profile: &ValidatorProfile) -> f64 {
profile.range_scan_rate_hz.mul_add(
10.0,
profile.block_read_rate_hz + profile.da_share_read_rate_hz + profile.da_share_write_rate_hz,
)
}

View File

@ -0,0 +1,9 @@
pub mod orchestrator;
pub mod reads;
pub mod runners;
pub mod writes;
pub use orchestrator::*;
pub use reads::*;
pub use runners::*;
pub use writes::*;

View File

@ -0,0 +1,126 @@
use std::{sync::Arc, time::Instant};
use log::info;
use nomos_storage::backends::rocksdb::RocksBackend;
use tokio::sync::Mutex;
use super::{
super::{estimate_sequential_performance, ConcurrentBenchmarkResult},
reads::{
run_block_validation_workload, run_da_commitments_workload, run_da_sampling_workload,
run_ibd_serving_workload,
},
writes::{run_conditional_block_storage_workload, run_conditional_da_storage_workload},
};
use crate::{config::ValidatorProfile, metrics::StatsCollector};
pub async fn run_concurrent_validator_benchmark(
storage: RocksBackend,
duration: std::time::Duration,
profile: &ValidatorProfile,
dataset_size: (usize, usize),
is_read_only: bool,
) -> Result<ConcurrentBenchmarkResult, Box<dyn std::error::Error>> {
if is_read_only && (profile.block_write_rate_hz > 0.0 || profile.da_share_write_rate_hz > 0.0) {
log::warn!("Storage is read-only but profile has write operations. Write workloads will be skipped.");
}
let storage = Arc::new(Mutex::new(storage));
let mut stats_collector = StatsCollector::new();
stats_collector.collect_before(&*storage.lock().await);
let start_time = Instant::now();
info!(
"Starting concurrent validator simulation for {:.1}s",
duration.as_secs_f64()
);
info!(
"Network-aware concurrency: {} validators \u{2192} {} IBD streams, {} DA streams",
profile.total_validators,
profile.ibd_concurrent_streams(),
profile.da_concurrent_streams()
);
let (
block_validation_result,
da_sampling_result,
da_commitments_result,
ibd_serving_result,
block_storage_result,
da_storage_result,
) = tokio::join!(
run_block_validation_workload(
Arc::clone(&storage),
duration,
profile.block_read_rate_hz,
dataset_size.0,
profile
),
run_da_sampling_workload(
Arc::clone(&storage),
duration,
profile.da_share_read_rate_hz,
dataset_size.0,
profile
),
run_da_commitments_workload(
Arc::clone(&storage),
duration,
profile.da_share_read_rate_hz * 0.3,
dataset_size.0,
profile
),
run_ibd_serving_workload(
Arc::clone(&storage),
duration,
profile.range_scan_rate_hz,
dataset_size.0
),
run_conditional_block_storage_workload(
Arc::clone(&storage),
duration,
profile.block_write_rate_hz,
dataset_size.0,
is_read_only
),
run_conditional_da_storage_workload(
Arc::clone(&storage),
duration,
profile.da_share_write_rate_hz,
dataset_size.1,
is_read_only
)
);
let total_duration = start_time.elapsed();
stats_collector.collect_after(&*storage.lock().await);
let sequential_estimated_throughput = estimate_sequential_performance(profile);
let actual_concurrent_throughput = (block_validation_result.operations_success
+ da_sampling_result.operations_success
+ da_commitments_result.operations_success
+ ibd_serving_result.operations_success
+ block_storage_result.operations_success
+ da_storage_result.operations_success) as f64
/ total_duration.as_secs_f64();
let contention_factor = actual_concurrent_throughput / sequential_estimated_throughput;
Ok(ConcurrentBenchmarkResult {
block_validation: block_validation_result,
da_sampling: da_sampling_result,
da_commitments: da_commitments_result,
ibd_serving: ibd_serving_result,
block_storage: block_storage_result,
da_storage: da_storage_result,
total_duration,
peak_memory_mb: 0.0,
resource_contention_factor: contention_factor,
concurrent_operations_peak: 6,
rocksdb_stats_before: stats_collector.before.clone(),
rocksdb_stats_after: stats_collector.after.clone(),
})
}

View File

@ -0,0 +1,302 @@
use std::{
sync::Arc,
time::{Duration, Instant},
};
use nomos_storage::{
api::{chain::StorageChainApi as _, da::StorageDaApi as _},
backends::rocksdb::RocksBackend,
};
use tokio::sync::Mutex;
use super::super::{create_blob_id, create_header_id, safe_interval_from_hz, WorkloadStreamResult};
use crate::{
config::{types::WorkloadType, ValidatorProfile},
data::{select_block_spec_accurate, select_da_spec_accurate},
metrics::LatencyTracker,
};
pub async fn run_block_validation_workload(
storage: Arc<Mutex<RocksBackend>>,
duration: Duration,
frequency_hz: f64,
max_blocks: usize,
profile: &ValidatorProfile,
) -> WorkloadStreamResult {
let mut result = WorkloadStreamResult {
workload_type: WorkloadType::BlockValidation,
executed: true,
operations_total: 0,
operations_success: 0,
bytes_read: 0,
bytes_written: 0,
duration,
errors: 0,
cache_misses: 0,
latency_percentiles: None,
};
let mut latency_tracker = LatencyTracker::new();
let interval = match safe_interval_from_hz(frequency_hz, &result.workload_type.to_string()) {
Ok(interval) => interval,
Err(e) => {
log::warn!("{e}");
result.duration = duration;
result.latency_percentiles = Some(latency_tracker.get_percentiles());
return result;
}
};
let mut ticker = tokio::time::interval(interval);
let end_time = Instant::now() + duration;
while Instant::now() < end_time {
ticker.tick().await;
let block_index = select_block_spec_accurate(result.operations_total, max_blocks, profile);
let header_id = create_header_id(block_index);
let operation_result = latency_tracker
.record_async_operation(|| async {
let mut storage_guard = storage.lock().await;
let get_result = storage_guard.get_block(header_id).await;
drop(storage_guard);
get_result
})
.await;
match operation_result {
Ok(Some(data)) => {
result.operations_success += 1;
result.bytes_read += data.len() as u64;
}
Ok(None) => {}
Err(_) => result.errors += 1,
}
result.operations_total += 1;
}
result.duration = duration;
result.latency_percentiles = Some(latency_tracker.get_percentiles());
result
}
pub async fn run_da_sampling_workload(
storage: Arc<Mutex<RocksBackend>>,
duration: Duration,
frequency_hz: f64,
max_blocks: usize,
profile: &ValidatorProfile,
) -> WorkloadStreamResult {
let mut result = WorkloadStreamResult {
workload_type: WorkloadType::DaSampling,
executed: true,
operations_total: 0,
operations_success: 0,
bytes_read: 0,
bytes_written: 0,
duration,
errors: 0,
cache_misses: 0,
latency_percentiles: None,
};
let mut latency_tracker = LatencyTracker::new();
let interval = match safe_interval_from_hz(frequency_hz, &result.workload_type.to_string()) {
Ok(interval) => interval,
Err(e) => {
log::warn!("{e}");
result.duration = duration;
result.latency_percentiles = Some(latency_tracker.get_percentiles());
return result;
}
};
let mut ticker = tokio::time::interval(interval);
let end_time = Instant::now() + duration;
while Instant::now() < end_time {
ticker.tick().await;
let blob_index = select_da_spec_accurate(result.operations_total, max_blocks, profile);
let blob_id = create_blob_id(blob_index, 0);
let share_idx = [(result.operations_total % 20) as u8, 0u8];
let operation_result = latency_tracker
.record_async_operation(|| async {
let mut storage_guard = storage.lock().await;
let get_result = storage_guard.get_light_share(blob_id, share_idx).await;
drop(storage_guard);
get_result
})
.await;
match operation_result {
Ok(Some(data)) => {
result.operations_success += 1;
result.bytes_read += data.len() as u64;
}
Ok(None) => {}
Err(_) => result.errors += 1,
}
result.operations_total += 1;
}
result.duration = duration;
result.latency_percentiles = Some(latency_tracker.get_percentiles());
result
}
pub async fn run_ibd_serving_workload(
storage: Arc<Mutex<RocksBackend>>,
duration: Duration,
frequency_hz: f64,
max_blocks: usize,
) -> WorkloadStreamResult {
const IBD_CHUNK_SIZE: usize = 1000;
let mut result = WorkloadStreamResult {
workload_type: WorkloadType::IbdServing,
executed: true,
operations_total: 0,
operations_success: 0,
bytes_read: 0,
bytes_written: 0,
duration,
errors: 0,
cache_misses: 0,
latency_percentiles: None,
};
let mut latency_tracker = LatencyTracker::new();
let interval = match safe_interval_from_hz(frequency_hz, &result.workload_type.to_string()) {
Ok(interval) => interval,
Err(e) => {
log::warn!("{e}");
result.duration = duration;
result.latency_percentiles = Some(latency_tracker.get_percentiles());
return result;
}
};
let mut ticker = tokio::time::interval(interval);
let end_time = Instant::now() + duration;
while Instant::now() < end_time {
ticker.tick().await;
let max_safe_blocks = max_blocks.saturating_sub(IBD_CHUNK_SIZE).max(1);
let start_block = (result.operations_total as usize * IBD_CHUNK_SIZE) % max_safe_blocks;
let start_slot = cryptarchia_engine::Slot::from(start_block as u64);
let end_slot = cryptarchia_engine::Slot::from((start_block + IBD_CHUNK_SIZE) as u64);
let Some(limit) = std::num::NonZeroUsize::new(IBD_CHUNK_SIZE) else {
log::error!("Invalid IBD chunk size: {IBD_CHUNK_SIZE}");
result.errors += 1;
continue;
};
let operation_result = latency_tracker
.record_async_operation(|| async {
let mut storage_guard = storage.lock().await;
let scan_result = storage_guard
.scan_immutable_block_ids(start_slot..=end_slot, limit)
.await;
if let Ok(header_ids) = &scan_result {
for header_id in header_ids.iter().take(IBD_CHUNK_SIZE) {
let _ = storage_guard.get_block(*header_id).await;
}
}
drop(storage_guard);
scan_result
})
.await;
match operation_result {
Ok(header_ids) => {
result.operations_success += 1;
let estimated_bytes = header_ids.len() as u64 * 34371;
result.bytes_read += estimated_bytes;
}
Err(_) => result.errors += 1,
}
result.operations_total += 1;
}
result.duration = duration;
result.latency_percentiles = Some(latency_tracker.get_percentiles());
result
}
pub async fn run_da_commitments_workload(
storage: Arc<Mutex<RocksBackend>>,
duration: Duration,
frequency_hz: f64,
max_blocks: usize,
profile: &ValidatorProfile,
) -> WorkloadStreamResult {
let mut result = WorkloadStreamResult {
workload_type: WorkloadType::DaCommitments,
executed: true,
operations_total: 0,
operations_success: 0,
bytes_read: 0,
bytes_written: 0,
duration,
errors: 0,
cache_misses: 0,
latency_percentiles: None,
};
let mut latency_tracker = LatencyTracker::new();
let interval = match safe_interval_from_hz(frequency_hz, &result.workload_type.to_string()) {
Ok(interval) => interval,
Err(e) => {
log::warn!("{e}");
result.duration = duration;
result.latency_percentiles = Some(latency_tracker.get_percentiles());
return result;
}
};
let mut ticker = tokio::time::interval(interval);
let end_time = Instant::now() + duration;
while Instant::now() < end_time {
ticker.tick().await;
let blob_index = select_da_spec_accurate(result.operations_total, max_blocks, profile);
let blob_id = create_blob_id(blob_index, 0);
let operation_result = latency_tracker
.record_async_operation(|| async {
let mut storage_guard = storage.lock().await;
let get_result = storage_guard.get_shared_commitments(blob_id).await;
drop(storage_guard);
get_result
})
.await;
match operation_result {
Ok(Some(data)) => {
result.operations_success += 1;
result.bytes_read += data.len() as u64;
}
Ok(None) => {}
Err(_) => result.errors += 1,
}
result.operations_total += 1;
}
result.duration = duration;
result.latency_percentiles = Some(latency_tracker.get_percentiles());
result
}

View File

@ -0,0 +1,293 @@
use std::{
sync::Arc,
time::{Duration, Instant},
};
use async_trait::async_trait;
use nomos_storage::{
api::{chain::StorageChainApi as _, da::StorageDaApi as _},
backends::rocksdb::RocksBackend,
};
use serde::{Deserialize, Serialize};
use tokio::sync::Mutex;
use super::super::{create_blob_id, create_header_id, safe_interval_from_hz, WorkloadStreamResult};
use crate::{
config::{types::WorkloadType, ValidatorProfile},
data::{select_block_spec_accurate, select_da_spec_accurate},
metrics::LatencyTracker,
};
#[async_trait]
pub trait WorkloadRunner {
async fn execute(&mut self, duration: Duration) -> WorkloadStreamResult;
fn workload_type(&self) -> WorkloadType;
fn is_read_only(&self) -> bool;
}
pub struct BlockValidationRunner {
storage: Arc<Mutex<RocksBackend>>,
profile: ValidatorProfile,
max_blocks: usize,
frequency_hz: f64,
latency_tracker: LatencyTracker,
execution_stats: WorkloadExecutionStats,
}
pub struct DaSamplingRunner {
storage: Arc<Mutex<RocksBackend>>,
profile: ValidatorProfile,
max_blocks: usize,
frequency_hz: f64,
latency_tracker: LatencyTracker,
execution_stats: WorkloadExecutionStats,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct WorkloadExecutionStats {
pub operations_attempted: u64,
pub operations_successful: u64,
pub bytes_processed: u64,
pub errors_encountered: u64,
pub cache_misses_estimated: u64,
pub execution_start: Option<chrono::DateTime<chrono::Utc>>,
pub last_operation_time: Option<chrono::DateTime<chrono::Utc>>,
}
impl BlockValidationRunner {
pub fn new(
storage: Arc<Mutex<RocksBackend>>,
profile: ValidatorProfile,
max_blocks: usize,
frequency_hz: f64,
) -> Self {
Self {
storage,
profile,
max_blocks,
frequency_hz,
latency_tracker: LatencyTracker::new(),
execution_stats: WorkloadExecutionStats::default(),
}
}
pub async fn execute(&mut self, duration: Duration) -> WorkloadStreamResult {
self.execution_stats.execution_start = Some(chrono::Utc::now());
let interval = match safe_interval_from_hz(self.frequency_hz, "block_validation") {
Ok(interval) => interval,
Err(e) => {
log::warn!("{e}");
return self.create_error_result(duration);
}
};
let mut ticker = tokio::time::interval(interval);
let end_time = Instant::now() + duration;
while Instant::now() < end_time {
ticker.tick().await;
self.execute_single_block_validation().await;
}
self.create_final_result(duration)
}
async fn execute_single_block_validation(&mut self) {
let block_index = select_block_spec_accurate(
self.execution_stats.operations_attempted,
self.max_blocks,
&self.profile,
);
let header_id = create_header_id(block_index);
let operation_result = self
.latency_tracker
.record_async_operation(|| async {
let mut storage_guard = self.storage.lock().await;
let result = storage_guard.get_block(header_id).await;
drop(storage_guard);
result
})
.await;
match operation_result {
Ok(Some(data)) => {
self.execution_stats.operations_successful += 1;
self.execution_stats.bytes_processed += data.len() as u64;
}
Ok(None) => {}
Err(_) => self.execution_stats.errors_encountered += 1,
}
self.execution_stats.operations_attempted += 1;
self.execution_stats.last_operation_time = Some(chrono::Utc::now());
}
fn create_final_result(&self, duration: Duration) -> WorkloadStreamResult {
WorkloadStreamResult {
workload_type: WorkloadType::BlockValidation,
executed: true,
operations_total: self.execution_stats.operations_attempted,
operations_success: self.execution_stats.operations_successful,
bytes_read: self.execution_stats.bytes_processed,
bytes_written: 0,
duration,
errors: self.execution_stats.errors_encountered,
cache_misses: self.execution_stats.cache_misses_estimated,
latency_percentiles: Some(self.latency_tracker.get_percentiles()),
}
}
fn create_error_result(&self, duration: Duration) -> WorkloadStreamResult {
WorkloadStreamResult {
workload_type: WorkloadType::BlockValidation,
executed: false,
operations_total: 0,
operations_success: 0,
bytes_read: 0,
bytes_written: 0,
duration,
errors: 1,
cache_misses: 0,
latency_percentiles: Some(self.latency_tracker.get_percentiles()),
}
}
#[must_use]
pub const fn execution_state(&self) -> &WorkloadExecutionStats {
&self.execution_stats
}
}
#[async_trait]
impl WorkloadRunner for BlockValidationRunner {
async fn execute(&mut self, duration: Duration) -> WorkloadStreamResult {
Self::execute(self, duration).await
}
fn workload_type(&self) -> WorkloadType {
WorkloadType::BlockValidation
}
fn is_read_only(&self) -> bool {
true
}
}
impl DaSamplingRunner {
pub fn new(
storage: Arc<Mutex<RocksBackend>>,
profile: ValidatorProfile,
max_blocks: usize,
frequency_hz: f64,
) -> Self {
Self {
storage,
profile,
max_blocks,
frequency_hz,
latency_tracker: LatencyTracker::new(),
execution_stats: WorkloadExecutionStats::default(),
}
}
pub async fn execute(&mut self, duration: Duration) -> WorkloadStreamResult {
self.execution_stats.execution_start = Some(chrono::Utc::now());
let interval = match safe_interval_from_hz(self.frequency_hz, "da_sampling") {
Ok(interval) => interval,
Err(e) => {
log::warn!("{e}");
return self.create_error_result(duration);
}
};
let mut ticker = tokio::time::interval(interval);
let end_time = Instant::now() + duration;
while Instant::now() < end_time {
ticker.tick().await;
self.execute_single_da_sample().await;
}
self.create_final_result(duration)
}
async fn execute_single_da_sample(&mut self) {
let blob_index = select_da_spec_accurate(
self.execution_stats.operations_attempted,
self.max_blocks,
&self.profile,
);
let blob_id = create_blob_id(blob_index, 0);
let share_idx = [(self.execution_stats.operations_attempted % 20) as u8, 0u8];
let operation_result = self
.latency_tracker
.record_async_operation(|| async {
let mut storage_guard = self.storage.lock().await;
let result = storage_guard.get_light_share(blob_id, share_idx).await;
drop(storage_guard);
result
})
.await;
match operation_result {
Ok(Some(data)) => {
self.execution_stats.operations_successful += 1;
self.execution_stats.bytes_processed += data.len() as u64;
}
Ok(None) => {}
Err(_) => self.execution_stats.errors_encountered += 1,
}
self.execution_stats.operations_attempted += 1;
self.execution_stats.last_operation_time = Some(chrono::Utc::now());
}
fn create_final_result(&self, duration: Duration) -> WorkloadStreamResult {
WorkloadStreamResult {
workload_type: WorkloadType::DaSampling,
executed: true,
operations_total: self.execution_stats.operations_attempted,
operations_success: self.execution_stats.operations_successful,
bytes_read: self.execution_stats.bytes_processed,
bytes_written: 0,
duration,
errors: self.execution_stats.errors_encountered,
cache_misses: self.execution_stats.cache_misses_estimated,
latency_percentiles: Some(self.latency_tracker.get_percentiles()),
}
}
fn create_error_result(&self, duration: Duration) -> WorkloadStreamResult {
WorkloadStreamResult {
workload_type: WorkloadType::DaSampling,
executed: false,
operations_total: 0,
operations_success: 0,
bytes_read: 0,
bytes_written: 0,
duration,
errors: 1,
cache_misses: 0,
latency_percentiles: Some(self.latency_tracker.get_percentiles()),
}
}
}
#[async_trait]
impl WorkloadRunner for DaSamplingRunner {
async fn execute(&mut self, duration: Duration) -> WorkloadStreamResult {
Self::execute(self, duration).await
}
fn workload_type(&self) -> WorkloadType {
WorkloadType::DaSampling
}
fn is_read_only(&self) -> bool {
true
}
}

View File

@ -0,0 +1,227 @@
use std::{
sync::Arc,
time::{Duration, Instant},
};
use nomos_storage::{
api::{chain::StorageChainApi as _, da::StorageDaApi as _},
backends::rocksdb::RocksBackend,
};
use tokio::sync::Mutex;
use super::super::{
safe_interval_from_hz,
utilities::{create_blob_id, create_header_id},
WorkloadStreamResult,
};
use crate::{
config::types::WorkloadType,
data::{create_block_data, create_commitment, create_da_share},
metrics::LatencyTracker,
};
pub async fn run_block_storage_workload(
storage: Arc<Mutex<RocksBackend>>,
duration: Duration,
frequency_hz: f64,
starting_block_height: usize,
) -> WorkloadStreamResult {
let mut result = WorkloadStreamResult {
workload_type: WorkloadType::BlockStorage,
executed: true,
operations_total: 0,
operations_success: 0,
bytes_read: 0,
bytes_written: 0,
duration,
errors: 0,
cache_misses: 0,
latency_percentiles: None,
};
let mut latency_tracker = LatencyTracker::new();
let interval = match safe_interval_from_hz(frequency_hz, &result.workload_type.to_string()) {
Ok(interval) => interval,
Err(e) => {
log::warn!("{e}");
result.duration = duration;
result.latency_percentiles = Some(latency_tracker.get_percentiles());
return result;
}
};
let mut ticker = tokio::time::interval(interval);
let end_time = Instant::now() + duration;
let mut current_height = starting_block_height;
while Instant::now() < end_time {
ticker.tick().await;
let header_id = create_header_id(current_height);
let block_data = create_block_data(current_height, 34_371);
let operation_result = latency_tracker
.record_async_operation(|| async {
let mut storage_guard = storage.lock().await;
let store_result = storage_guard
.store_block(header_id, block_data.clone())
.await;
if store_result.is_ok() {
let slot = cryptarchia_engine::Slot::from(current_height as u64);
let ids = std::collections::BTreeMap::from([(slot, header_id)]);
let _ = storage_guard.store_immutable_block_ids(ids).await;
}
drop(storage_guard);
store_result
})
.await;
match operation_result {
Ok(()) => {
result.operations_success += 1;
result.bytes_written += block_data.len() as u64;
current_height += 1;
}
Err(_) => result.errors += 1,
}
result.operations_total += 1;
}
result.duration = duration;
result.latency_percentiles = Some(latency_tracker.get_percentiles());
result
}
pub async fn run_da_storage_workload(
storage: Arc<Mutex<RocksBackend>>,
duration: Duration,
frequency_hz: f64,
starting_share_count: usize,
) -> WorkloadStreamResult {
let mut result = WorkloadStreamResult {
workload_type: WorkloadType::DaStorage,
executed: true,
operations_total: 0,
operations_success: 0,
bytes_read: 0,
bytes_written: 0,
duration,
errors: 0,
cache_misses: 0,
latency_percentiles: None,
};
let mut latency_tracker = LatencyTracker::new();
let interval = match safe_interval_from_hz(frequency_hz, &result.workload_type.to_string()) {
Ok(interval) => interval,
Err(e) => {
log::warn!("{e}");
result.duration = duration;
result.latency_percentiles = Some(latency_tracker.get_percentiles());
return result;
}
};
let mut ticker = tokio::time::interval(interval);
let end_time = Instant::now() + duration;
while Instant::now() < end_time {
ticker.tick().await;
let blob_id = create_blob_id(starting_share_count + result.operations_total as usize, 0);
let share_idx = [(result.operations_total % 20) as u8, 0u8];
let share_data = create_da_share(
starting_share_count + result.operations_total as usize,
0,
1024,
);
let operation_result = latency_tracker
.record_async_operation(|| async {
let mut storage_guard = storage.lock().await;
let store_result = storage_guard
.store_light_share(blob_id, share_idx, share_data.clone())
.await;
if store_result.is_ok() {
if let Ok(commitment) = create_commitment(
starting_share_count + result.operations_total as usize,
0,
220_000,
)
.await
{
let _ = storage_guard
.store_shared_commitments(blob_id, commitment)
.await;
}
}
drop(storage_guard);
store_result
})
.await;
match operation_result {
Ok(()) => {
result.operations_success += 1;
result.bytes_written += share_data.len() as u64 + 220_000;
}
Err(_) => result.errors += 1,
}
result.operations_total += 1;
}
result.duration = duration;
result.latency_percentiles = Some(latency_tracker.get_percentiles());
result
}
pub async fn run_conditional_block_storage_workload(
storage: Arc<Mutex<RocksBackend>>,
duration: Duration,
frequency_hz: f64,
starting_block_height: usize,
is_read_only: bool,
) -> WorkloadStreamResult {
if is_read_only || frequency_hz == 0.0 {
return create_empty_workload_result(WorkloadType::BlockStorage);
}
run_block_storage_workload(storage, duration, frequency_hz, starting_block_height).await
}
pub async fn run_conditional_da_storage_workload(
storage: Arc<Mutex<RocksBackend>>,
duration: Duration,
frequency_hz: f64,
starting_share_count: usize,
is_read_only: bool,
) -> WorkloadStreamResult {
if is_read_only || frequency_hz == 0.0 {
return create_empty_workload_result(WorkloadType::DaStorage);
}
run_da_storage_workload(storage, duration, frequency_hz, starting_share_count).await
}
const fn create_empty_workload_result(workload_type: WorkloadType) -> WorkloadStreamResult {
WorkloadStreamResult {
workload_type,
executed: false,
operations_total: 0,
operations_success: 0,
bytes_read: 0,
bytes_written: 0,
duration: Duration::from_secs(0),
errors: 0,
cache_misses: 0,
latency_percentiles: None,
}
}

View File

@ -0,0 +1,311 @@
use std::{env, time::Instant};
use log::info;
use nomos_storage::{
api::chain::StorageChainApi as _,
backends::{rocksdb::RocksBackend, StorageBackend as _},
};
use serde::{Deserialize, Serialize};
use storage_benchmarks::{
benchmark::{analyze_dataset, utilities::create_header_id},
data::create_block_data,
BenchConfig, DatasetGenConfig,
};
pub struct DatasetGenerator {
config: DatasetGenConfig,
storage: RocksBackend,
progress: GenerationProgress,
stats: GenerationStats,
}
#[derive(Debug, Clone, Default)]
pub struct GenerationProgress {
pub blocks_completed: usize,
pub da_objects_completed: usize,
pub current_batch_start: usize,
pub total_target_blocks: usize,
pub generation_start_time: Option<Instant>,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct GenerationStats {
pub blocks_generated_this_run: usize,
pub da_objects_generated_this_run: usize,
pub total_generation_time: std::time::Duration,
pub block_generation_rate: f64,
pub da_generation_rate: f64,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct DatasetGenerationReport {
pub generation_summary: GenerationSummary,
pub performance: PerformanceMetrics,
pub config: DatasetGenConfig,
pub timestamp: String,
pub tool_version: String,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct GenerationSummary {
pub blocks_generated: usize,
pub da_objects_generated: usize,
pub total_objects: usize,
pub duration_seconds: u64,
pub duration_minutes: f64,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct PerformanceMetrics {
pub block_rate_per_sec: f64,
pub da_rate_per_sec: f64,
pub total_rate_objects_per_sec: f64,
pub cpu_cores_used: usize,
}
impl DatasetGenerator {
pub async fn new(config_path: &str) -> Result<Self, Box<dyn std::error::Error>> {
let config = DatasetGenConfig::from_file(config_path)?;
let benchmark_config = BenchConfig::production();
let storage = RocksBackend::new(benchmark_config.settings)?;
let mut generator = Self {
config,
storage,
progress: GenerationProgress::default(),
stats: GenerationStats::default(),
};
generator.analyze_existing_data().await?;
Ok(generator)
}
async fn analyze_existing_data(&mut self) -> Result<(), Box<dyn std::error::Error>> {
let (existing_blocks, existing_da) = analyze_dataset(&mut self.storage).await?;
self.progress.blocks_completed = existing_blocks;
self.progress.da_objects_completed = existing_da;
self.progress.total_target_blocks = self.config.total_blocks();
info!(
"Found existing data: {} blocks, {} DA objects",
existing_blocks, existing_da
);
info!("Target: {} total blocks", self.progress.total_target_blocks);
Ok(())
}
pub async fn generate_dataset(
&mut self,
) -> Result<GenerationStats, Box<dyn std::error::Error>> {
info!(
"Multi-core generation: {} ({} cores available)",
self.config.dataset.name,
num_cpus::get()
);
self.progress.generation_start_time = Some(Instant::now());
if self.progress.blocks_completed < self.progress.total_target_blocks {
self.generate_remaining_blocks().await?;
} else {
info!("All blocks already generated!");
}
self.generate_da_objects()?;
self.finalize_generation();
self.save_generation_report();
Ok(self.stats.clone())
}
async fn generate_remaining_blocks(&mut self) -> Result<(), Box<dyn std::error::Error>> {
let remaining_blocks = self.progress.total_target_blocks - self.progress.blocks_completed;
info!(
"Resuming block generation from block {}, generating {} more blocks",
self.progress.blocks_completed, remaining_blocks
);
let blocks_generated = self.generate_blocks_in_batches(remaining_blocks).await?;
self.stats.blocks_generated_this_run = blocks_generated;
Ok(())
}
async fn generate_blocks_in_batches(
&mut self,
blocks_to_generate: usize,
) -> Result<usize, Box<dyn std::error::Error>> {
const PARALLEL_BATCH_SIZE: usize = 1000;
let mut blocks_generated = 0;
for batch_start in (0..blocks_to_generate).step_by(PARALLEL_BATCH_SIZE) {
let batch_end = std::cmp::min(batch_start + PARALLEL_BATCH_SIZE, blocks_to_generate);
let actual_batch_start = self.progress.blocks_completed + batch_start;
let batch_data =
self.generate_block_batch_parallel(actual_batch_start, batch_end - batch_start)?;
self.store_block_batch(&batch_data).await?;
blocks_generated += batch_end - batch_start;
self.log_block_progress(actual_batch_start, blocks_generated);
}
Ok(blocks_generated)
}
fn generate_block_batch_parallel(
&self,
start_index: usize,
count: usize,
) -> Result<Vec<(usize, bytes::Bytes)>, Box<dyn std::error::Error>> {
use rayon::prelude::*;
let generation_start = Instant::now();
let batch_data: Vec<_> = (0..count)
.into_par_iter()
.map(|i| {
let block_index = start_index + i;
let block_data = create_block_data(block_index, self.config.blocks.size_bytes);
(block_index, block_data)
})
.collect();
let generation_time = generation_start.elapsed();
info!(
"Generated {} blocks in {:.2}s ({:.0} blocks/s)",
count,
generation_time.as_secs_f64(),
count as f64 / generation_time.as_secs_f64()
);
Ok(batch_data)
}
async fn store_block_batch(
&mut self,
batch: &[(usize, bytes::Bytes)],
) -> Result<(), Box<dyn std::error::Error>> {
let storage_start = Instant::now();
for (block_index, block_data) in batch {
let header_id = create_header_id(*block_index);
self.storage
.store_block(header_id, block_data.clone())
.await?;
let slot = cryptarchia_engine::Slot::from(*block_index as u64);
let ids = std::collections::BTreeMap::from([(slot, header_id)]);
self.storage.store_immutable_block_ids(ids).await?;
}
let storage_time = storage_start.elapsed();
info!(
"Stored {} blocks in {:.2}s ({:.0} blocks/s)",
batch.len(),
storage_time.as_secs_f64(),
batch.len() as f64 / storage_time.as_secs_f64()
);
Ok(())
}
fn generate_da_objects(&mut self) -> Result<(), Box<dyn std::error::Error>> {
self.stats.da_objects_generated_this_run = 0;
Ok(())
}
fn log_block_progress(&self, current_block: usize, blocks_generated: usize) {
if self.progress.total_target_blocks > 1000 {
let completion_percent =
(blocks_generated * 100) as f64 / self.progress.total_target_blocks as f64;
info!(
"Block progress: {} completed - {:.1}% total",
current_block, completion_percent
);
}
}
fn finalize_generation(&mut self) {
if let Some(start_time) = self.progress.generation_start_time {
self.stats.total_generation_time = start_time.elapsed();
if self.stats.total_generation_time.as_secs() > 0 {
self.stats.block_generation_rate = self.stats.blocks_generated_this_run as f64
/ self.stats.total_generation_time.as_secs_f64();
self.stats.da_generation_rate = self.stats.da_objects_generated_this_run as f64
/ self.stats.total_generation_time.as_secs_f64();
}
}
}
fn save_generation_report(&self) {
let results_dir = BenchConfig::results_path();
let timestamp = chrono::Utc::now().format("%Y%m%d_%H%M%S");
let filename = format!("dataset_generation_{}.json", timestamp);
let filepath = results_dir.join(filename);
let report = DatasetGenerationReport {
generation_summary: GenerationSummary {
blocks_generated: self.stats.blocks_generated_this_run,
da_objects_generated: self.stats.da_objects_generated_this_run,
total_objects: self.stats.blocks_generated_this_run
+ self.stats.da_objects_generated_this_run,
duration_seconds: self.stats.total_generation_time.as_secs(),
duration_minutes: self.stats.total_generation_time.as_secs_f64() / 60.0,
},
performance: PerformanceMetrics {
block_rate_per_sec: self.stats.block_generation_rate,
da_rate_per_sec: self.stats.da_generation_rate,
total_rate_objects_per_sec: self.stats.block_generation_rate
+ self.stats.da_generation_rate,
cpu_cores_used: num_cpus::get(),
},
config: self.config.clone(),
timestamp: chrono::Utc::now().to_rfc3339(),
tool_version: env!("CARGO_PKG_VERSION").to_string(),
};
match std::fs::write(&filepath, serde_json::to_string_pretty(&report).unwrap()) {
Ok(_) => info!("Generation report saved to: {}", filepath.display()),
Err(e) => log::warn!("Failed to save report to {}: {}", filepath.display(), e),
}
}
}
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
env_logger::init();
let args: Vec<String> = env::args().collect();
if args.len() < 3 || args[1] != "--config" {
print_usage();
return Err("Configuration file required".into());
}
let mut generator = DatasetGenerator::new(&args[2]).await?;
let final_stats = generator.generate_dataset().await?;
info!("Generation completed successfully!");
info!(
"Final stats: {} blocks, {} DA objects in {:.1}min",
final_stats.blocks_generated_this_run,
final_stats.da_objects_generated_this_run,
final_stats.total_generation_time.as_secs_f64() / 60.0
);
Ok(())
}
fn print_usage() {
eprintln!("Multi-core Dataset Builder");
eprintln!("Generates blocks and DA data in parallel");
eprintln!();
eprintln!("USAGE:");
eprintln!(" cargo run --bin dataset_builder -- --config <file>");
}

View File

@ -0,0 +1,375 @@
use std::{env, time::Instant};
use log::info;
use nomos_storage::{
api::{chain::StorageChainApi as _, da::StorageDaApi as _},
backends::{rocksdb::RocksBackend, StorageBackend as _},
};
use rand::SeedableRng as _;
use rayon::prelude::*;
use storage_benchmarks::{
benchmark::{
analyze_dataset,
utilities::{create_blob_id, create_header_id},
DatasetGenerationReport, GenerationPerformance, GenerationSummary,
},
data::{create_block_data, create_da_share},
BenchConfig, DatasetGenConfig,
};
const PARALLEL_BATCH_SIZE: usize = 1000;
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
env_logger::init();
let args: Vec<String> = env::args().collect();
if args.len() < 3 || args[1] != "--config" {
print_usage();
return Err("Configuration file required".into());
}
run_multicore_generation(&args[2]).await
}
async fn run_multicore_generation(config_path: &str) -> Result<(), Box<dyn std::error::Error>> {
let config = DatasetGenConfig::from_file(config_path)?;
info!(
"Multi-core generation: {} ({} cores available)",
config.dataset.name,
num_cpus::get()
);
let generation_start = Instant::now();
let benchmark_config = BenchConfig::production();
let mut storage = RocksBackend::new(benchmark_config.settings)?;
let (existing_blocks, existing_da) = analyze_dataset(&mut storage).await?;
let total_blocks = config.total_blocks();
info!(
"Found existing data: {} blocks, {} DA objects",
existing_blocks, existing_da
);
info!("Target: {} total blocks", total_blocks);
let blocks_generated = if existing_blocks < total_blocks {
let remaining_blocks = total_blocks - existing_blocks;
info!(
"Resuming block generation from block {}, generating {} more blocks",
existing_blocks, remaining_blocks
);
generate_blocks_multicore(&mut storage, &config, remaining_blocks, existing_blocks).await?
} else {
info!("All blocks already generated!");
0
};
let da_generated = generate_da_objects_multicore(&mut storage, &config, total_blocks).await?;
let total_time = generation_start.elapsed();
log_generation_completion(blocks_generated, da_generated, total_time);
Ok(())
}
async fn generate_blocks_multicore(
storage: &mut RocksBackend,
config: &DatasetGenConfig,
blocks_to_generate: usize,
start_from_block: usize,
) -> Result<usize, Box<dyn std::error::Error>> {
let mut blocks_generated = 0;
for batch_start in (0..blocks_to_generate).step_by(PARALLEL_BATCH_SIZE) {
let batch_end = std::cmp::min(batch_start + PARALLEL_BATCH_SIZE, blocks_to_generate);
let batch_size = batch_end - batch_start;
let actual_batch_start = start_from_block + batch_start;
let actual_batch_end = start_from_block + batch_end;
let block_data_batch =
generate_block_batch_parallel(actual_batch_start, actual_batch_end, config)?;
store_block_batch(storage, &block_data_batch).await?;
blocks_generated += batch_size;
log_block_progress(
actual_batch_start,
actual_batch_end,
start_from_block + blocks_to_generate,
blocks_generated,
);
}
Ok(blocks_generated)
}
async fn generate_da_objects_multicore(
storage: &mut RocksBackend,
config: &DatasetGenConfig,
total_blocks: usize,
) -> Result<usize, Box<dyn std::error::Error>> {
info!(
"Generating DA objects using {} CPU cores...",
num_cpus::get()
);
let mut da_objects_generated = 0;
for batch_start in (0..total_blocks).step_by(PARALLEL_BATCH_SIZE) {
let batch_end = std::cmp::min(batch_start + PARALLEL_BATCH_SIZE, total_blocks);
let da_batch_count =
generate_da_batch_for_blocks(storage, config, batch_start, batch_end).await?;
da_objects_generated += da_batch_count;
}
Ok(da_objects_generated)
}
fn generate_block_batch_parallel(
batch_start: usize,
batch_end: usize,
config: &DatasetGenConfig,
) -> Result<Vec<(usize, bytes::Bytes)>, Box<dyn std::error::Error>> {
let batch_indices: Vec<usize> = (batch_start..batch_end).collect();
let generation_start = Instant::now();
let block_data_batch: Vec<(usize, bytes::Bytes)> = batch_indices
.par_iter()
.map(|&block_index| {
let block_data = create_block_data(block_index, config.blocks.size_bytes);
(block_index, block_data)
})
.collect();
let generation_time = generation_start.elapsed();
info!(
"Generated {} blocks in {:.2}s ({:.0} blocks/s)",
batch_end - batch_start,
generation_time.as_secs_f64(),
(batch_end - batch_start) as f64 / generation_time.as_secs_f64()
);
Ok(block_data_batch)
}
async fn store_block_batch(
storage: &mut RocksBackend,
block_batch: &[(usize, bytes::Bytes)],
) -> Result<(), Box<dyn std::error::Error>> {
let storage_start = Instant::now();
for (block_index, block_data) in block_batch {
let header_id = create_header_id(*block_index);
storage.store_block(header_id, block_data.clone()).await?;
let slot = cryptarchia_engine::Slot::from(*block_index as u64);
let ids = std::collections::BTreeMap::from([(slot, header_id)]);
storage.store_immutable_block_ids(ids).await?;
}
let storage_time = storage_start.elapsed();
info!(
"Stored {} blocks in {:.2}s ({:.0} blocks/s)",
block_batch.len(),
storage_time.as_secs_f64(),
block_batch.len() as f64 / storage_time.as_secs_f64()
);
Ok(())
}
async fn generate_da_batch_for_blocks(
storage: &mut RocksBackend,
config: &DatasetGenConfig,
batch_start: usize,
batch_end: usize,
) -> Result<usize, Box<dyn std::error::Error>> {
let da_specs = collect_da_specs_for_blocks(config, batch_start, batch_end);
if da_specs.is_empty() {
return Ok(0);
}
let da_data_batch = generate_da_data_parallel(&da_specs, config)?;
store_da_batch(storage, &da_data_batch).await?;
Ok(da_data_batch.len())
}
fn collect_da_specs_for_blocks(
config: &DatasetGenConfig,
batch_start: usize,
batch_end: usize,
) -> Vec<(usize, usize, usize)> {
let mut da_specs = Vec::new();
for block in batch_start..batch_end {
for blob in 0..config.network.blobs_per_block {
let blob_global_index = block * config.network.blobs_per_block + blob;
let subnet = blob_global_index % config.network.total_subnets;
if subnet < config.validator.assigned_subnets {
da_specs.push((block, blob, subnet));
}
}
}
da_specs
}
fn generate_da_data_parallel(
da_specs: &[(usize, usize, usize)],
config: &DatasetGenConfig,
) -> Result<
Vec<(nomos_core::da::BlobId, [u8; 2], bytes::Bytes, bytes::Bytes)>,
Box<dyn std::error::Error>,
> {
let generation_start = Instant::now();
let da_data_batch: Vec<_> = da_specs
.par_iter()
.map(|&(block, blob, subnet)| {
let blob_id = create_blob_id(block, blob);
let share_idx = [subnet as u8, 0u8];
let share_data = create_da_share(block, blob, config.da.share_size_bytes);
let commitment_data = {
let mut rng =
rand_chacha::ChaCha20Rng::seed_from_u64((block as u64 * 1000) + blob as u64);
use rand::Rng as _;
let data: Vec<u8> = (0..config.da.commitment_size_bytes)
.map(|_| rng.gen())
.collect();
bytes::Bytes::from(data)
};
(blob_id, share_idx, share_data, commitment_data)
})
.collect();
let generation_time = generation_start.elapsed();
info!(
"Generated {} DA objects in {:.2}s ({:.0} objects/s)",
da_data_batch.len(),
generation_time.as_secs_f64(),
da_data_batch.len() as f64 / generation_time.as_secs_f64()
);
Ok(da_data_batch)
}
async fn store_da_batch(
storage: &mut RocksBackend,
da_batch: &[(nomos_core::da::BlobId, [u8; 2], bytes::Bytes, bytes::Bytes)],
) -> Result<(), Box<dyn std::error::Error>> {
let storage_start = Instant::now();
for (blob_id, share_idx, share_data, commitment_data) in da_batch {
storage
.store_light_share(*blob_id, *share_idx, share_data.clone())
.await?;
storage
.store_shared_commitments(*blob_id, commitment_data.clone())
.await?;
}
let storage_time = storage_start.elapsed();
info!(
"Stored {} DA objects in {:.2}s ({:.0} objects/s)",
da_batch.len(),
storage_time.as_secs_f64(),
da_batch.len() as f64 / storage_time.as_secs_f64()
);
Ok(())
}
fn log_block_progress(
batch_start: usize,
batch_end: usize,
total_blocks: usize,
blocks_generated: usize,
) {
if total_blocks > 1000 {
info!(
"Block progress: {}-{} completed - {:.1}% total",
batch_start,
batch_end - 1,
(blocks_generated * 100) as f64 / total_blocks as f64
);
}
}
fn log_generation_completion(
blocks_generated: usize,
da_generated: usize,
total_time: std::time::Duration,
) {
save_generation_report(blocks_generated, da_generated, total_time);
info!(
"Multi-core generation completed: {} blocks, {} DA objects in {:.1}min",
blocks_generated,
da_generated,
total_time.as_secs_f64() / 60.0
);
let total_rate = (blocks_generated + da_generated) as f64 / total_time.as_secs_f64();
info!(
"Total rate: {:.0} objects/sec using {} CPU cores",
total_rate,
num_cpus::get()
);
}
fn save_generation_report(
blocks_generated: usize,
da_generated: usize,
total_time: std::time::Duration,
) {
let results_dir = BenchConfig::results_path();
let timestamp = chrono::Utc::now().format("%Y%m%d_%H%M%S");
let filename = format!("dataset_generation_{}.json", timestamp);
let filepath = results_dir.join(filename);
let report = DatasetGenerationReport {
generation_summary: GenerationSummary {
blocks_generated,
da_objects_generated: da_generated,
total_objects: blocks_generated + da_generated,
duration_seconds: total_time.as_secs(),
duration_minutes: total_time.as_secs_f64() / 60.0,
},
performance: GenerationPerformance {
total_rate_objects_per_sec: (blocks_generated + da_generated) as f64
/ total_time.as_secs_f64(),
block_rate_per_sec: blocks_generated as f64 / total_time.as_secs_f64(),
da_rate_per_sec: da_generated as f64 / total_time.as_secs_f64(),
cpu_cores_used: num_cpus::get(),
},
timestamp: chrono::Utc::now().to_rfc3339(),
tool_version: env!("CARGO_PKG_VERSION").to_string(),
};
match std::fs::write(&filepath, serde_json::to_string_pretty(&report).unwrap()) {
Ok(_) => info!("Generation report saved to: {}", filepath.display()),
Err(e) => log::warn!("Failed to save report to {}: {}", filepath.display(), e),
}
}
fn print_usage() {
eprintln!("Multi-core Dataset Generator");
eprintln!("Uses all CPU cores for parallel data generation");
eprintln!();
eprintln!("USAGE:");
eprintln!(" POL_PROOF_DEV_MODE=true cargo run --example multicore_dataset_generator -- --config <file>");
}

View File

@ -0,0 +1,193 @@
use clap::Parser as _;
use log::info;
use nomos_storage::backends::{rocksdb::RocksBackend, StorageBackend as _};
use storage_benchmarks::{
benchmark::{
analyze_dataset, run_concurrent_validator_benchmark, BenchConfigSummary,
BenchResultsSummary, StatisticsSummary, StorageBenchReport,
},
config::{ProductionBenchConfig, ValidatorProfiles},
data::initialize_benchmark_seed,
metrics::RuntimeValidatorAllocator,
BenchConfig,
};
#[global_allocator]
static ALLOCATOR: RuntimeValidatorAllocator = RuntimeValidatorAllocator::new();
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
env_logger::init();
let config = ProductionBenchConfig::parse();
config.validate()?;
let _seed_config = initialize_benchmark_seed(&[]);
run_benchmark(config).await
}
async fn run_benchmark(config: ProductionBenchConfig) -> Result<(), Box<dyn std::error::Error>> {
ALLOCATOR.set_limit_gb(config.memory as usize);
let profiles = ValidatorProfiles::from_file("dataset_configs/validator_profiles.toml")?;
let profile = profiles
.get_profile(&config.profile.to_string())
.ok_or_else(|| format!("Profile '{}' not found", config.profile))?;
let bench_config = BenchConfig::production();
if !bench_config.settings.db_path.exists() {
return Err("No dataset found".into());
}
let mut results = Vec::new();
for i in 1..=config.warmup_runs {
info!("Warmup run {}/{}", i, config.warmup_runs);
let _ = run_iteration(&bench_config, profile, &config).await;
}
for i in 1..=config.measurement_runs {
info!("Measurement run {}/{}", i, config.measurement_runs);
let result = run_iteration(&bench_config, profile, &config).await;
info!("Run {} result: {:.1} ops/sec", i, result);
results.push(result);
}
report_results(&results, &config);
Ok(())
}
async fn run_iteration(
bench_config: &BenchConfig,
profile: &storage_benchmarks::config::ValidatorProfile,
config: &ProductionBenchConfig,
) -> f64 {
let mut storage_settings = bench_config.settings.clone();
storage_settings.read_only = config.read_only;
match RocksBackend::new(storage_settings) {
Ok(mut storage) => {
if let Ok((block_count, share_count)) = analyze_dataset(&mut storage).await {
if let Ok(result) = run_concurrent_validator_benchmark(
storage,
std::time::Duration::from_secs(config.duration),
profile,
(block_count, share_count),
config.read_only,
)
.await
{
return result.combined_throughput();
}
}
}
Err(e) => log::error!("Storage error: {}", e),
}
0.0
}
fn report_results(results: &[f64], config: &ProductionBenchConfig) {
save_results_to_file(results, config);
print_results_summary(results, config);
}
fn save_results_to_file(results: &[f64], config: &ProductionBenchConfig) {
let results_dir = BenchConfig::results_path();
let timestamp = chrono::Utc::now().format("%Y%m%d_%H%M%S");
let filename = format!(
"storage_bench_{}_{}_{}gb_{}.json",
config.profile, timestamp, config.memory, config.duration
);
let filepath = results_dir.join(filename);
let mean = if results.is_empty() {
0.0
} else {
results.iter().sum::<f64>() / results.len() as f64
};
let min = results.iter().fold(f64::INFINITY, |a, &b| a.min(b));
let max = results.iter().fold(f64::NEG_INFINITY, |a, &b| a.max(b));
let variability = if mean > 0.0 {
(max - min) / mean * 100.0
} else {
0.0
};
let detailed_results = StorageBenchReport {
benchmark_config: BenchConfigSummary {
profile: format!("{:?}", config.profile),
memory_gb: config.memory,
duration_seconds: config.duration,
warmup_runs: config.warmup_runs,
measurement_runs: config.measurement_runs,
},
results: BenchResultsSummary {
raw_measurements: results.to_vec(),
statistics: StatisticsSummary {
mean_ops_sec: mean,
min_ops_sec: min,
max_ops_sec: max,
variability_percent: variability,
sample_count: results.len(),
},
},
timestamp: chrono::Utc::now().to_rfc3339(),
tool_version: env!("CARGO_PKG_VERSION").to_string(),
};
let json_content = match serde_json::to_string_pretty(&detailed_results) {
Ok(content) => content,
Err(e) => {
log::error!("Failed to serialize results: {}", e);
return;
}
};
match std::fs::write(&filepath, json_content) {
Ok(_) => log::info!("Results saved to: {}", filepath.display()),
Err(e) => log::warn!("Failed to save results to {}: {}", filepath.display(), e),
}
}
fn print_results_summary(results: &[f64], config: &ProductionBenchConfig) {
if results.is_empty() {
return;
}
let mean = results.iter().sum::<f64>() / results.len() as f64;
let min = results.iter().fold(f64::INFINITY, |a, &b| a.min(b));
let max = results.iter().fold(f64::NEG_INFINITY, |a, &b| a.max(b));
let variability = if mean > 0.0 {
(max - min) / mean * 100.0
} else {
0.0
};
info!(
"Mean: {:.1} ops/sec, Range: {:.1}-{:.1}, Variability: {:.1}%",
mean, min, max, variability
);
let summary = StatisticsSummary {
mean_ops_sec: mean,
min_ops_sec: min,
max_ops_sec: max,
variability_percent: variability,
sample_count: results.len(),
};
log::info!(
"MACHINE_READABLE: {}",
serde_json::to_string(&summary).unwrap_or_default()
);
println!("\n| Profile | Memory | Ops/sec | Variability |");
println!("|---------|--------|---------|-------------|");
println!(
"| {} | {}GB | {:.1} | {:.1}% |",
config.profile, config.memory, mean, variability
);
}

View File

@ -0,0 +1,259 @@
use std::time::Instant;
use log::info;
use nomos_storage::{
api::{chain::StorageChainApi as _, da::StorageDaApi as _},
backends::{rocksdb::RocksBackend, StorageBackend as _},
};
use storage_benchmarks::{
benchmark::utilities::{create_blob_id, create_header_id},
BenchConfig, CompletenessSummary, DataSizesSummary, DatasetVerificationReport,
VerificationPerformance, VerificationSummary, WarningsSummary,
};
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
env_logger::init();
let config = BenchConfig::production();
if !config.settings.db_path.exists() {
println!(
"No database found at: {}",
config.settings.db_path.display()
);
return Err("Database not found".into());
}
info!("Opening database: {}", config.settings.db_path.display());
let mut storage_settings = config.settings.clone();
storage_settings.read_only = true;
let mut storage = RocksBackend::new(storage_settings)?;
info!("Starting database verification");
info!("=== Database Verification ===");
info!("Checking blocks...");
let start_time = Instant::now();
let mut blocks_found = 0;
let mut total_block_size = 0u64;
for chunk_start in (0..1_100_000).step_by(10_000) {
let mut chunk_found = 0;
let chunk_end = chunk_start + 10_000;
for i in chunk_start..chunk_end {
let header_id = create_header_id(i);
match storage.get_block(header_id).await {
Ok(Some(data)) => {
blocks_found += 1;
total_block_size += data.len() as u64;
chunk_found += 1;
}
Ok(None) => {
if chunk_found == 0 {
info!("No more blocks found after block {}", i);
break;
}
}
Err(_) => {}
}
}
if chunk_found == 0 {
break;
}
info!(
"Blocks {}-{}: found {} blocks",
chunk_start,
chunk_start + chunk_found - 1,
chunk_found
);
}
let blocks_check_time = start_time.elapsed();
println!("Block Data:");
println!(" Blocks found: {}", blocks_found);
println!(" Expected blocks: 1,051,200");
println!(
" Total block size: {:.1} GB",
total_block_size as f64 / 1024.0 / 1024.0 / 1024.0
);
println!(
" Average block size: {} bytes",
if blocks_found > 0 {
total_block_size / blocks_found
} else {
0
}
);
println!(" Check time: {:.1}s", blocks_check_time.as_secs_f64());
println!();
info!("Checking DA shares...");
let start_time = Instant::now();
let mut shares_found = 0;
let mut total_share_size = 0u64;
let mut commitments_found = 0;
let mut total_commitment_size = 0u64;
for blob_idx in 0..1000 {
for subnet in 0..50 {
let blob_id = create_blob_id(blob_idx, 0);
let share_idx = [subnet as u8, 0u8];
if let Ok(Some(data)) = storage.get_light_share(blob_id, share_idx).await {
shares_found += 1;
total_share_size += data.len() as u64;
}
if let Ok(Some(data)) = storage.get_shared_commitments(blob_id).await {
commitments_found += 1;
total_commitment_size += data.len() as u64;
}
}
if blob_idx % 100 == 0 {
info!(
"Checked blob {} - found {} shares, {} commitments so far",
blob_idx, shares_found, commitments_found
);
}
}
let da_check_time = start_time.elapsed();
println!("DA Data:");
println!(
" DA shares found: {} (sampled from first 50K possibilities)",
shares_found
);
println!(" Expected DA shares: ~256,650 total");
println!(
" Total share size: {:.1} MB",
total_share_size as f64 / 1024.0 / 1024.0
);
println!(
" Average share size: {} bytes",
if shares_found > 0 {
total_share_size / shares_found
} else {
0
}
);
println!();
println!(" Commitments found: {}", commitments_found);
println!(
" Total commitment size: {:.1} GB",
total_commitment_size as f64 / 1024.0 / 1024.0 / 1024.0
);
println!(
" Average commitment size: {} bytes",
if commitments_found > 0 {
total_commitment_size / commitments_found
} else {
0
}
);
println!(" Check time: {:.1}s", da_check_time.as_secs_f64());
println!();
let total_verified_size = total_block_size + total_share_size + total_commitment_size;
println!("Summary:");
println!(" Database on disk: 4.8 GB");
println!(
" Verified data size: {:.1} GB",
total_verified_size as f64 / 1024.0 / 1024.0 / 1024.0
);
println!(
" Blocks completeness: {:.1}%",
blocks_found as f64 / 1_051_200.0 * 100.0
);
println!(
" Estimated DA completeness: {:.1}%",
shares_found as f64 / (256_650.0 / 50.0) * 100.0
);
if blocks_found < 1_000_000 {
println!("WARNING: Block generation may have been incomplete");
}
if total_verified_size < 50 * 1024 * 1024 * 1024 {
println!("WARNING: Data size much smaller than expected - check generation logic");
}
save_verification_report(
blocks_found as usize,
shares_found as usize,
commitments_found as usize,
total_block_size,
total_share_size,
total_commitment_size,
blocks_check_time + da_check_time,
);
Ok(())
}
fn save_verification_report(
blocks_found: usize,
shares_found: usize,
commitments_found: usize,
total_block_size: u64,
total_share_size: u64,
total_commitment_size: u64,
verification_time: std::time::Duration,
) {
let results_dir = BenchConfig::results_path();
let timestamp = chrono::Utc::now().format("%Y%m%d_%H%M%S");
let filename = format!("dataset_verification_{}.json", timestamp);
let filepath = results_dir.join(filename);
let total_verified_size = total_block_size + total_share_size + total_commitment_size;
let report = DatasetVerificationReport {
verification_summary: VerificationSummary {
blocks_found,
da_shares_found: shares_found,
da_commitments_found: commitments_found,
total_objects_found: blocks_found + shares_found + commitments_found,
},
data_sizes: DataSizesSummary {
total_block_size_bytes: total_block_size,
total_share_size_bytes: total_share_size,
total_commitment_size_bytes: total_commitment_size,
total_verified_size_bytes: total_verified_size,
total_verified_size_gb: total_verified_size as f64 / (1024.0 * 1024.0 * 1024.0),
},
completeness_estimates: CompletenessSummary {
block_completeness_percent: blocks_found as f64 / 1_051_200.0 * 100.0,
da_completeness_percent: shares_found as f64 / (256_650.0 / 50.0) * 100.0,
},
performance: VerificationPerformance {
verification_time_seconds: verification_time.as_secs_f64(),
objects_verified_per_sec: (blocks_found + shares_found + commitments_found) as f64
/ verification_time.as_secs_f64(),
},
warnings: WarningsSummary {
block_generation_incomplete: blocks_found < 1_000_000,
data_size_smaller_than_expected: total_verified_size < 50 * 1024 * 1024 * 1024,
},
timestamp: chrono::Utc::now().to_rfc3339(),
tool_version: env!("CARGO_PKG_VERSION").to_string(),
};
match std::fs::write(&filepath, serde_json::to_string_pretty(&report).unwrap()) {
Ok(_) => info!("Verification report saved to: {}", filepath.display()),
Err(e) => log::warn!(
"Failed to save verification report to {}: {}",
filepath.display(),
e
),
}
}

View File

@ -0,0 +1,161 @@
use nomos_storage::backends::{rocksdb::RocksBackend, StorageBackend as _};
use storage_benchmarks::BenchConfig;
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
env_logger::init();
let config = BenchConfig::production();
if !config.settings.db_path.exists() {
println!(
"No database found at: {}",
config.settings.db_path.display()
);
return Err("Database not found - run dataset_generator first".into());
}
let storage = RocksBackend::new(config.settings)?;
println!("=== RocksDB Properties Verification ===");
println!();
verify_property_availability(&storage);
Ok(())
}
fn verify_property_availability(storage: &RocksBackend) {
let level0_prop = rocksdb::properties::num_files_at_level(0);
let level1_prop = rocksdb::properties::num_files_at_level(1);
let level2_prop = rocksdb::properties::num_files_at_level(2);
let properties_to_test = vec![
("STATS", rocksdb::properties::STATS),
(
"BLOCK_CACHE_CAPACITY",
rocksdb::properties::BLOCK_CACHE_CAPACITY,
),
(
"TOTAL_SST_FILES_SIZE",
rocksdb::properties::TOTAL_SST_FILES_SIZE,
),
(
"CUR_SIZE_ALL_MEM_TABLES",
rocksdb::properties::CUR_SIZE_ALL_MEM_TABLES,
),
(
"LIVE_SST_FILES_SIZE",
rocksdb::properties::LIVE_SST_FILES_SIZE,
),
("ESTIMATE_NUM_KEYS", rocksdb::properties::ESTIMATE_NUM_KEYS),
("NUM_FILES_AT_LEVEL0", &level0_prop),
("NUM_FILES_AT_LEVEL1", &level1_prop),
("NUM_FILES_AT_LEVEL2", &level2_prop),
];
let custom_properties = vec![
"rocksdb.index-and-filter-cache.usage",
"rocksdb.index-and-filter-cache.capacity",
"rocksdb.compaction-pending",
"rocksdb.number.compactions",
"rocksdb.compact.read.bytes",
"rocksdb.compact.write.bytes",
"rocksdb.compaction.cpu.time",
"rocksdb.mem-table-flush-pending",
"rocksdb.space.amplification",
"rocksdb.total-sst-files-size",
"rocksdb.number.keys.deleted",
"rocksdb.size-bytes-at-level0",
"rocksdb.size-bytes-at-level1",
];
println!("Standard RocksDB Properties:");
for (name, prop) in properties_to_test {
test_standard_property(storage, name, &prop.to_string());
}
println!("\nCustom/Extended Properties:");
for prop_name in custom_properties {
test_custom_property(storage, prop_name);
}
println!("\nSTATS Property Sample:");
test_stats_property(storage);
}
fn test_standard_property(storage: &RocksBackend, name: &str, property: &str) {
let property_owned = property.to_string();
let transaction = storage.txn(move |db| match db.property_value(&property_owned) {
Ok(Some(value)) => Ok(Some(value.into_bytes().into())),
Ok(None) => Ok(None),
Err(e) => Err(e),
});
match transaction.execute() {
Ok(Some(value_bytes)) => {
let value_str = String::from_utf8_lossy(&value_bytes);
let truncated = if value_str.len() > 100 {
format!("{}...", &value_str[..100])
} else {
value_str.to_string()
};
println!("OK {}: {}", name, truncated);
}
Ok(None) => {
println!("FAIL {}: None (property exists but no value)", name);
}
Err(e) => {
println!("FAIL {}: Error - {}", name, e);
}
}
}
fn test_custom_property(storage: &RocksBackend, property: &str) {
let prop_owned = property.to_string();
let transaction = storage.txn(move |db| match db.property_value(&prop_owned) {
Ok(Some(value)) => Ok(Some(value.into_bytes().into())),
Ok(None) => Ok(None),
Err(e) => Err(e),
});
match transaction.execute() {
Ok(Some(value_bytes)) => {
let value_str = String::from_utf8_lossy(&value_bytes);
println!("OK {}: {}", property, value_str.trim());
}
Ok(None) => {
println!("FAIL {}: None (property exists but no value)", property);
}
Err(e) => {
println!("FAIL {}: Error - {}", property, e);
}
}
}
fn test_stats_property(storage: &RocksBackend) {
let transaction = storage.txn(|db| match db.property_value(rocksdb::properties::STATS) {
Ok(Some(stats)) => Ok(Some(stats.into_bytes().into())),
Ok(None) => Ok(None),
Err(e) => Err(e),
});
match transaction.execute() {
Ok(Some(stats_bytes)) => {
let stats_str = String::from_utf8_lossy(&stats_bytes);
println!("Sample STATS lines:");
for (i, line) in stats_str.lines().take(10).enumerate() {
println!(" {}: {}", i + 1, line);
}
if stats_str.lines().count() > 10 {
println!(" ... ({} total lines)", stats_str.lines().count());
}
}
Ok(None) => {
println!("FAIL STATS: None");
}
Err(e) => {
println!("FAIL STATS: Error - {}", e);
}
}
}

View File

@ -0,0 +1,152 @@
use clap::Parser;
use super::types::{CompressionType, ProfileType};
use crate::RocksDbTuningOptions;
#[derive(Debug, Clone, Parser)]
#[command(name = "optimization_bench")]
#[command(about = "RocksDB optimization benchmarks")]
#[command(long_about = "Systematic RocksDB parameter optimization with statistical rigor")]
#[non_exhaustive]
pub struct ProductionBenchConfig {
#[arg(long)]
pub profile: ProfileType,
#[arg(long, default_value = "8")]
pub memory: u32,
#[arg(long, default_value = "120")]
pub duration: u64,
#[arg(long)]
pub cache_size: Option<u32>,
#[arg(long)]
pub write_buffer: Option<u32>,
#[arg(long)]
pub compaction_jobs: Option<u32>,
#[arg(long)]
pub block_size: Option<u32>,
#[arg(long)]
pub compression: Option<CompressionType>,
#[arg(long)]
pub read_only: bool,
#[arg(long)]
pub seed: Option<u64>,
#[arg(long, default_value = "1")]
pub warmup_runs: usize,
#[arg(long, default_value = "3")]
pub measurement_runs: usize,
}
#[derive(Debug, Clone, Parser)]
#[command(name = "dataset_generator")]
#[command(about = "Multi-core dataset generation")]
pub struct DatasetGeneratorConfig {
#[arg(long)]
pub config: std::path::PathBuf,
#[arg(long)]
pub seed: Option<u64>,
#[arg(long)]
pub size_limit: Option<f64>,
}
#[derive(Debug, thiserror::Error)]
pub enum ConfigValidationError {
#[error("Memory limit must be between 1-512GB, got {0}GB")]
InvalidMemoryLimit(u32),
#[error("Duration must be between 1-86400 seconds, got {0}s")]
InvalidDuration(u64),
#[error("Cache size must be between 1-80% of RAM, got {0}%")]
InvalidCacheSize(u32),
#[error("Write buffer must be between 16-2048MB, got {0}MB")]
InvalidWriteBuffer(u32),
#[error("Compaction jobs must be between 1-32, got {0}")]
InvalidCompactionJobs(u32),
#[error("Block size must be between 1-128KB, got {0}KB")]
InvalidBlockSize(u32),
#[error("Warmup runs must be less than measurement runs, got warmup={0}, measurement={1}")]
InvalidRunCounts(usize, usize),
#[error("Unknown compression type: {0} (valid: none, lz4, zstd)")]
InvalidCompression(String),
#[error("Profile '{0}' not found in validator_profiles.toml")]
ProfileNotFound(String),
}
impl ProductionBenchConfig {
pub fn validate(&self) -> Result<(), ConfigValidationError> {
if !(1..=512).contains(&self.memory) {
return Err(ConfigValidationError::InvalidMemoryLimit(self.memory));
}
if !(1..=86400).contains(&self.duration) {
return Err(ConfigValidationError::InvalidDuration(self.duration));
}
if let Some(cache) = self.cache_size {
if !(1..=80).contains(&cache) {
return Err(ConfigValidationError::InvalidCacheSize(cache));
}
}
if let Some(buffer) = self.write_buffer {
if !(16..=2048).contains(&buffer) {
return Err(ConfigValidationError::InvalidWriteBuffer(buffer));
}
}
if let Some(jobs) = self.compaction_jobs {
if !(1..=32).contains(&jobs) {
return Err(ConfigValidationError::InvalidCompactionJobs(jobs));
}
}
if let Some(block_size) = self.block_size {
if !(1..=128).contains(&block_size) {
return Err(ConfigValidationError::InvalidBlockSize(block_size));
}
}
if self.warmup_runs >= self.measurement_runs {
return Err(ConfigValidationError::InvalidRunCounts(
self.warmup_runs,
self.measurement_runs,
));
}
if let Some(comp) = self.compression {
log::debug!("Compression type: {comp}");
}
Ok(())
}
#[must_use]
pub const fn to_rocksdb_tuning(&self) -> RocksDbTuningOptions {
RocksDbTuningOptions {
cache_size_percent: self.cache_size,
write_buffer_mb: self.write_buffer,
compaction_jobs: self.compaction_jobs,
block_size_kb: self.block_size,
compression: self.compression,
bloom_filter_bits: None,
}
}
}

View File

@ -0,0 +1,88 @@
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DatasetGenConfig {
pub dataset: DatasetParams,
pub network: NetworkParams,
pub validator: ValidatorParams,
pub blocks: BlockParams,
pub da: DataAvailabilityParams,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DatasetParams {
pub days: usize,
pub block_time_seconds: u64,
pub name: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct NetworkParams {
pub load_name: String,
pub blobs_per_block: usize,
pub total_subnets: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ValidatorParams {
pub assigned_subnets: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct BlockParams {
pub size_bytes: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DataAvailabilityParams {
pub share_size_bytes: usize,
pub commitment_size_bytes: usize,
pub shares_per_blob: usize,
}
impl DatasetGenConfig {
pub fn from_file<P: AsRef<std::path::Path>>(
path: P,
) -> Result<Self, Box<dyn std::error::Error>> {
let content = std::fs::read_to_string(path)?;
let config: Self =
toml::from_str(&content).map_err(|e| format!("Failed to parse TOML: {e}"))?;
Ok(config)
}
#[must_use]
pub const fn total_blocks(&self) -> usize {
let blocks_per_day = (24 * 60 * 60) / self.dataset.block_time_seconds as usize;
self.dataset.days * blocks_per_day
}
#[must_use]
pub fn estimated_size(&self) -> String {
let total_blocks = self.total_blocks() as u64;
let block_size = self.blocks.size_bytes as u64;
let subnet_assignment_probability =
self.validator.assigned_subnets as f64 / self.network.total_subnets as f64;
let total_blobs = total_blocks * self.network.blobs_per_block as u64;
let validator_assigned_blobs = (total_blobs as f64 * subnet_assignment_probability) as u64;
let shares_per_assigned_blob =
self.da.shares_per_blob as u64 / self.network.total_subnets as u64;
let total_shares_stored = validator_assigned_blobs * shares_per_assigned_blob;
let block_data_size = total_blocks * block_size;
let da_shares_size = total_shares_stored * self.da.share_size_bytes as u64;
let da_commitments_size = validator_assigned_blobs * self.da.commitment_size_bytes as u64;
let da_data_size = da_shares_size + da_commitments_size;
let total_bytes = block_data_size + da_data_size;
if total_bytes < 1024 * 1024 {
format!("{:.1} KB", total_bytes as f64 / 1024.0)
} else if total_bytes < 1024 * 1024 * 1024 {
format!("{:.1} MB", total_bytes as f64 / 1024.0 / 1024.0)
} else {
format!("{:.1} GB", total_bytes as f64 / 1024.0 / 1024.0 / 1024.0)
}
}
}

View File

@ -0,0 +1,9 @@
pub mod cli;
pub mod dataset_generation;
pub mod types;
pub mod validator_profiles;
pub use cli::*;
pub use dataset_generation::*;
pub use types::*;
pub use validator_profiles::*;

View File

@ -0,0 +1,122 @@
use std::str::FromStr;
use clap::ValueEnum;
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, ValueEnum)]
#[non_exhaustive]
pub enum CompressionType {
None,
Lz4,
Snappy,
Zstd,
}
impl Default for CompressionType {
fn default() -> Self {
Self::Snappy
}
}
impl std::fmt::Display for CompressionType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::None => write!(f, "none"),
Self::Lz4 => write!(f, "lz4"),
Self::Snappy => write!(f, "snappy"),
Self::Zstd => write!(f, "zstd"),
}
}
}
impl FromStr for CompressionType {
type Err = String;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"none" => Ok(Self::None),
"lz4" => Ok(Self::Lz4),
"snappy" => Ok(Self::Snappy),
"zstd" => Ok(Self::Zstd),
_ => Err(format!("Unknown compression type: {s}")),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, ValueEnum)]
#[non_exhaustive]
pub enum ProfileType {
Light,
Mainnet,
Testnet,
}
impl std::fmt::Display for ProfileType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Light => write!(f, "light"),
Self::Mainnet => write!(f, "mainnet"),
Self::Testnet => write!(f, "testnet"),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[non_exhaustive]
pub enum WorkloadType {
BlockValidation,
DaSampling,
DaCommitments,
IbdServing,
BlockStorage,
DaStorage,
}
impl std::fmt::Display for WorkloadType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::BlockValidation => write!(f, "block_validation"),
Self::DaSampling => write!(f, "da_sampling"),
Self::DaCommitments => write!(f, "da_commitments"),
Self::IbdServing => write!(f, "ibd_serving"),
Self::BlockStorage => write!(f, "block_storage"),
Self::DaStorage => write!(f, "da_storage"),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[non_exhaustive]
pub enum NetworkSize {
Small,
Medium,
Large,
Peak,
}
impl NetworkSize {
#[must_use]
pub const fn validator_count(self) -> usize {
match self {
Self::Small => 100,
Self::Medium => 1000,
Self::Large => 2000,
Self::Peak => 5000,
}
}
#[must_use]
pub const fn concurrent_services(self) -> usize {
match self {
Self::Small => 6,
Self::Medium => 8,
Self::Large => 10,
Self::Peak => 15,
}
}
}

View File

@ -0,0 +1,115 @@
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ValidatorProfile {
pub name: String,
pub description: String,
pub block_read_rate_hz: f64,
pub da_share_read_rate_hz: f64,
pub range_scan_rate_hz: f64,
pub block_write_rate_hz: f64,
pub da_share_write_rate_hz: f64,
pub commitment_write_rate_hz: f64,
pub recent_access_ratio: f64,
pub historical_access_ratio: f64,
#[serde(default = "default_total_validators")]
pub total_validators: usize,
#[serde(default = "default_assigned_subnets")]
pub assigned_subnets: usize,
}
impl ValidatorProfile {
#[must_use]
pub fn ibd_concurrent_streams(&self) -> usize {
let base_streams = 1;
let network_factor = (self.total_validators as f64 / 500.0).max(1.0);
let total_streams = (f64::from(base_streams) * network_factor).round() as usize;
std::cmp::min(total_streams, 8)
}
#[must_use]
pub fn da_concurrent_streams(&self) -> usize {
let subnet_factor = (self.assigned_subnets as f64 / 5.0).max(1.0);
let total_streams = subnet_factor.round() as usize;
std::cmp::min(total_streams, 5)
}
#[must_use]
pub fn total_concurrent_services(&self) -> usize {
let base_services = 3;
let ibd_services = self.ibd_concurrent_streams();
let da_services = self.da_concurrent_streams();
base_services + ibd_services + da_services
}
}
const fn default_total_validators() -> usize {
1000
}
const fn default_assigned_subnets() -> usize {
10
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct NetworkScalingConfig {
pub total_validators: usize,
pub total_subnets: usize,
pub assigned_subnets: usize,
pub activity_multiplier: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ConcurrencyConfig {
pub base_concurrent_services: usize,
pub services_per_1k_validators: f64,
pub max_concurrent_services: usize,
pub ibd_concurrency_factor: f64,
pub da_concurrency_factor: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ValidatorProfiles {
pub light: ValidatorProfile,
pub mainnet: ValidatorProfile,
pub testnet: ValidatorProfile,
}
impl ValidatorProfiles {
pub fn from_file<P: AsRef<std::path::Path>>(
path: P,
) -> Result<Self, Box<dyn std::error::Error>> {
let content = std::fs::read_to_string(path)?;
let profiles: Self = toml::from_str(&content)
.map_err(|e| format!("Failed to parse validator profiles TOML: {e}"))?;
Ok(profiles)
}
#[must_use]
pub fn get_profile(&self, name: &str) -> Option<&ValidatorProfile> {
match name {
"light" => Some(&self.light),
"mainnet" => Some(&self.mainnet),
"testnet" => Some(&self.testnet),
_ => None,
}
}
#[must_use]
pub fn available_profiles(&self) -> Vec<&str> {
vec!["light", "mainnet", "testnet"]
}
}

View File

@ -0,0 +1,152 @@
use rand::{Rng as _, SeedableRng as _};
use rand_chacha::ChaCha20Rng;
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct BenchmarkSeed {
pub master_seed: u64,
pub dataset_generation_seed: u64,
pub access_pattern_seed: u64,
pub latency_measurement_seed: u64,
}
impl BenchmarkSeed {
#[must_use]
pub fn from_master(master_seed: u64) -> Self {
let mut rng = ChaCha20Rng::seed_from_u64(master_seed);
Self {
master_seed,
dataset_generation_seed: rng.gen(),
access_pattern_seed: rng.gen(),
latency_measurement_seed: rng.gen(),
}
}
#[must_use]
pub fn default_fixed() -> Self {
Self::from_master(12345)
}
#[must_use]
pub fn from_args_or_env(args: &[String]) -> Self {
for (i, arg) in args.iter().enumerate() {
if arg == "--seed" && i + 1 < args.len() {
if let Ok(seed) = args[i + 1].parse::<u64>() {
return Self::from_master(seed);
}
}
}
if let Ok(seed_str) = std::env::var("BENCHMARK_SEED") {
if let Ok(seed) = seed_str.parse::<u64>() {
return Self::from_master(seed);
}
}
Self::default_fixed()
}
#[must_use]
pub fn dataset_rng(&self) -> ChaCha20Rng {
ChaCha20Rng::seed_from_u64(self.dataset_generation_seed)
}
#[must_use]
pub fn access_pattern_rng(&self, operation_id: u64) -> ChaCha20Rng {
ChaCha20Rng::seed_from_u64(self.access_pattern_seed.wrapping_add(operation_id))
}
#[must_use]
pub fn latency_measurement_rng(&self) -> ChaCha20Rng {
ChaCha20Rng::seed_from_u64(self.latency_measurement_seed)
}
pub fn log_configuration(&self) {
log::info!("Benchmark seeds (for reproducibility):");
log::info!(" Master seed: {}", self.master_seed);
log::info!(" Dataset generation: {}", self.dataset_generation_seed);
log::info!(" Access patterns: {}", self.access_pattern_seed);
log::info!(" Latency measurement: {}", self.latency_measurement_seed);
log::info!(
" Reproduce with: --seed {} or BENCHMARK_SEED={}",
self.master_seed,
self.master_seed
);
}
}
static GLOBAL_BENCHMARK_SEED: std::sync::OnceLock<BenchmarkSeed> = std::sync::OnceLock::new();
pub fn initialize_benchmark_seed(args: &[String]) -> &'static BenchmarkSeed {
GLOBAL_BENCHMARK_SEED.get_or_init(|| {
let seed = BenchmarkSeed::from_args_or_env(args);
seed.log_configuration();
seed
})
}
pub fn get_benchmark_seed() -> &'static BenchmarkSeed {
GLOBAL_BENCHMARK_SEED.get().unwrap_or_else(|| {
GLOBAL_BENCHMARK_SEED.get_or_init(|| {
let seed = BenchmarkSeed::default_fixed();
log::warn!("Using default seed (benchmark_seed not initialized)");
seed.log_configuration();
seed
})
})
}
#[must_use]
pub fn create_deterministic_rng(purpose: RngPurpose, id: u64) -> ChaCha20Rng {
let seed = get_benchmark_seed();
match purpose {
RngPurpose::DatasetGeneration => {
ChaCha20Rng::seed_from_u64(seed.dataset_generation_seed.wrapping_add(id))
}
RngPurpose::AccessPattern => seed.access_pattern_rng(id),
RngPurpose::LatencyMeasurement => {
ChaCha20Rng::seed_from_u64(seed.latency_measurement_seed.wrapping_add(id))
}
}
}
#[derive(Debug, Clone, Copy)]
pub enum RngPurpose {
DatasetGeneration,
AccessPattern,
LatencyMeasurement,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_deterministic_seed_derivation() {
let seed1 = BenchmarkSeed::from_master(12345);
let seed2 = BenchmarkSeed::from_master(12345);
assert_eq!(seed1.dataset_generation_seed, seed2.dataset_generation_seed);
assert_eq!(seed1.access_pattern_seed, seed2.access_pattern_seed);
}
#[test]
fn test_different_master_seeds() {
let seed1 = BenchmarkSeed::from_master(12345);
let seed2 = BenchmarkSeed::from_master(54321);
assert_ne!(seed1.dataset_generation_seed, seed2.dataset_generation_seed);
}
#[test]
fn test_deterministic_rng_creation() {
let seed = BenchmarkSeed::from_master(12345);
let rng1 = seed.access_pattern_rng(100);
let rng2 = seed.access_pattern_rng(100);
assert_eq!(rng1.get_seed(), rng2.get_seed());
}
}

View File

@ -0,0 +1,200 @@
use bytes::Bytes;
use nomos_core::{da::BlobId, header::HeaderId};
use rand::Rng as _;
use rand_chacha::ChaCha20Rng;
use serde::{Deserialize, Serialize};
use crate::{
benchmark::utilities::{create_blob_id, create_header_id},
deterministic::BenchmarkSeed,
};
pub struct RealisticDataGenerator {
seed_config: BenchmarkSeed,
dataset_rng: ChaCha20Rng,
block_sequence: u64,
da_sequence: u64,
generation_stats: DataGenerationStats,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct DataGenerationStats {
pub blocks_created: u64,
pub da_shares_created: u64,
pub commitments_created: u64,
pub total_bytes_generated: u64,
pub generation_start: Option<chrono::DateTime<chrono::Utc>>,
}
impl RealisticDataGenerator {
#[must_use]
pub fn new(master_seed: u64) -> Self {
let seed_config = BenchmarkSeed::from_master(master_seed);
let dataset_rng = seed_config.dataset_rng();
Self {
seed_config,
dataset_rng,
block_sequence: 0,
da_sequence: 0,
generation_stats: DataGenerationStats {
generation_start: Some(chrono::Utc::now()),
..Default::default()
},
}
}
#[must_use]
pub fn with_default_seed() -> Self {
Self::new(12345)
}
pub fn generate_block(&mut self, target_size: usize) -> Bytes {
let block_data = self.create_realistic_block_data(self.block_sequence, target_size);
self.block_sequence += 1;
self.generation_stats.blocks_created += 1;
self.generation_stats.total_bytes_generated += target_size as u64;
block_data
}
pub fn generate_da_share(&mut self, size: usize) -> Bytes {
let share_data = self.create_deterministic_da_share(self.da_sequence, size);
self.da_sequence += 1;
self.generation_stats.da_shares_created += 1;
self.generation_stats.total_bytes_generated += size as u64;
share_data
}
pub fn generate_commitment(&mut self, size: usize) -> Bytes {
let commitment_data = self.create_deterministic_commitment(self.da_sequence, size);
self.generation_stats.commitments_created += 1;
self.generation_stats.total_bytes_generated += size as u64;
commitment_data
}
pub fn generate_block_batch(&mut self, count: usize, block_size: usize) -> Vec<Bytes> {
std::iter::repeat_with(|| self.generate_block(block_size))
.take(count)
.collect()
}
pub fn generate_da_batch(
&mut self,
count: usize,
share_size: usize,
commitment_size: usize,
) -> Vec<(Bytes, Bytes)> {
std::iter::repeat_with(|| {
let share = self.generate_da_share(share_size);
let commitment = self.generate_commitment(commitment_size);
(share, commitment)
})
.take(count)
.collect()
}
#[must_use]
pub const fn stats(&self) -> &DataGenerationStats {
&self.generation_stats
}
#[must_use]
pub const fn sequence_state(&self) -> (u64, u64) {
(self.block_sequence, self.da_sequence)
}
pub const fn set_sequence_state(&mut self, block_sequence: u64, da_sequence: u64) {
self.block_sequence = block_sequence;
self.da_sequence = da_sequence;
}
pub fn reset(&mut self) {
self.block_sequence = 0;
self.da_sequence = 0;
self.generation_stats = DataGenerationStats {
generation_start: Some(chrono::Utc::now()),
..Default::default()
};
self.dataset_rng = self.seed_config.dataset_rng();
}
fn create_realistic_block_data(&mut self, block_index: u64, target_size: usize) -> Bytes {
let mut block_data = Vec::with_capacity(target_size);
block_data.extend_from_slice(&block_index.to_be_bytes());
let parent_hash: [u8; 32] = self.dataset_rng.gen();
block_data.extend_from_slice(&parent_hash);
let merkle_root: [u8; 32] = self.dataset_rng.gen();
block_data.extend_from_slice(&merkle_root);
let timestamp = chrono::Utc::now().timestamp() as u64 + block_index * 30;
block_data.extend_from_slice(&timestamp.to_be_bytes());
while block_data.len() < target_size {
block_data.push(self.dataset_rng.gen());
}
block_data.resize(target_size, 0);
Bytes::from(block_data)
}
fn create_deterministic_da_share(&mut self, _sequence: u64, size: usize) -> Bytes {
let data: Vec<u8> = std::iter::repeat_with(|| self.dataset_rng.gen())
.take(size)
.collect();
Bytes::from(data)
}
fn create_deterministic_commitment(&mut self, _sequence: u64, size: usize) -> Bytes {
let data: Vec<u8> = std::iter::repeat_with(|| self.dataset_rng.gen())
.take(size)
.collect();
Bytes::from(data)
}
}
pub struct IdGenerator {
block_counter: usize,
blob_counter: usize,
}
impl IdGenerator {
#[must_use]
pub const fn new() -> Self {
Self {
block_counter: 0,
blob_counter: 0,
}
}
pub fn next_header_id(&mut self) -> HeaderId {
let id = create_header_id(self.block_counter);
self.block_counter += 1;
id
}
pub fn next_blob_id(&mut self) -> BlobId {
let id = create_blob_id(self.blob_counter, 0);
self.blob_counter += 1;
id
}
#[must_use]
pub const fn counters(&self) -> (usize, usize) {
(self.block_counter, self.blob_counter)
}
}
impl Default for IdGenerator {
fn default() -> Self {
Self::new()
}
}

View File

@ -0,0 +1,172 @@
use smallvec::SmallVec;
#[derive(Debug, Clone)]
pub struct OperationBuffer {
pub read_buffer: Box<[u8]>,
pub write_buffer: Box<[u8]>,
pub key_buffer: SmallVec<[u8; 64]>,
}
impl OperationBuffer {
#[must_use]
pub fn new(read_size: usize, write_size: usize) -> Self {
Self {
read_buffer: vec![0u8; read_size].into_boxed_slice(),
write_buffer: vec![0u8; write_size].into_boxed_slice(),
key_buffer: SmallVec::new(),
}
}
#[must_use]
pub fn read_slice(&self) -> &[u8] {
&self.read_buffer
}
pub fn write_slice_mut(&mut self) -> &mut [u8] {
&mut self.write_buffer
}
pub fn prepare_key<T: AsRef<[u8]>>(&mut self, key_data: T) -> &[u8] {
let key_bytes = key_data.as_ref();
self.key_buffer.clear();
self.key_buffer.extend_from_slice(key_bytes);
&self.key_buffer
}
}
#[derive(Debug, Clone, Copy)]
pub struct TimingMeasurement {
pub start_time: std::time::Instant,
}
impl TimingMeasurement {
#[inline]
#[must_use]
pub fn start() -> Self {
Self {
start_time: std::time::Instant::now(),
}
}
#[inline]
#[must_use]
pub fn end(self) -> std::time::Duration {
self.start_time.elapsed()
}
}
#[derive(Debug, Clone, Default)]
pub struct ThreadLocalMetrics {
pub operations_count: u64,
pub success_count: u64,
pub error_count: u64,
pub bytes_processed: u64,
pub latency_sum_micros: u64,
pub latency_count: u64,
}
impl ThreadLocalMetrics {
#[inline]
pub const fn record_operation(
&mut self,
success: bool,
bytes: u64,
latency: std::time::Duration,
) {
self.operations_count += 1;
if success {
self.success_count += 1;
} else {
self.error_count += 1;
}
self.bytes_processed += bytes;
self.latency_sum_micros += latency.as_micros() as u64;
self.latency_count += 1;
}
#[must_use]
pub fn average_latency_micros(&self) -> f64 {
if self.latency_count > 0 {
self.latency_sum_micros as f64 / self.latency_count as f64
} else {
0.0
}
}
pub const fn fold_into(self, global: &mut Self) {
global.operations_count += self.operations_count;
global.success_count += self.success_count;
global.error_count += self.error_count;
global.bytes_processed += self.bytes_processed;
global.latency_sum_micros += self.latency_sum_micros;
global.latency_count += self.latency_count;
}
}
pub trait EfficientIteratorExt: Iterator {
fn collect_presized(self, size_hint: usize) -> Vec<Self::Item>
where
Self: Sized,
{
let mut vec = Vec::with_capacity(size_hint);
vec.extend(self);
vec
}
fn collect_small_8(self) -> SmallVec<[Self::Item; 8]>
where
Self: Sized,
{
let mut vec: SmallVec<[Self::Item; 8]> = SmallVec::new();
vec.extend(self);
vec
}
}
impl<I: Iterator> EfficientIteratorExt for I {}
#[cfg(test)]
mod tests {
use super::*;
use crate::benchmark::utilities::create_blob_id;
#[test]
fn test_operation_buffer_efficiency() {
let mut buffer = OperationBuffer::new(1024, 2048);
let key1 = buffer.prepare_key(b"test_key_1");
assert_eq!(key1, b"test_key_1");
let key2 = buffer.prepare_key(b"different_key");
assert_eq!(key2, b"different_key");
assert!(buffer.key_buffer.capacity() >= 12);
}
#[test]
fn test_efficient_id_creation() {
let header_id = create_header_id_efficient(12345);
let blob_id = create_blob_id(100, 5);
assert_ne!(header_id.as_ref(), &[0u8; 32]);
assert_ne!(blob_id.as_ref(), &[0u8; 32]);
}
#[test]
fn test_thread_local_metrics() {
let mut metrics = ThreadLocalMetrics::default();
metrics.record_operation(true, 1024, std::time::Duration::from_micros(500));
metrics.record_operation(false, 0, std::time::Duration::from_micros(1000));
assert_eq!(metrics.operations_count, 2);
assert_eq!(metrics.success_count, 1);
assert_eq!(metrics.error_count, 1);
assert_eq!(metrics.bytes_processed, 1024);
assert_eq!(metrics.average_latency_micros(), 750.0);
}
}

View File

@ -0,0 +1,11 @@
pub mod deterministic;
pub mod generator;
pub mod hot_path_types;
pub mod realistic_generation;
pub mod spec_access_patterns;
pub mod streaming_writer;
pub use deterministic::*;
pub use generator::*;
pub use realistic_generation::*;
pub use spec_access_patterns::*;

View File

@ -0,0 +1,211 @@
use bytes::Bytes;
use cryptarchia_engine::Slot;
use groth16::Fr;
use nomos_core::{
block::Block,
crypto::{Digest as _, Hasher},
header::{ContentId, Header, HeaderId},
mantle::{
ledger::Tx as LedgerTx, ops::leader_claim::VoucherCm, MantleTx, Note, SignedMantleTx,
Transaction as _, Utxo,
},
proofs::{
leader_proof::{Groth16LeaderProof, LeaderPrivate, LeaderPublic},
zksig::{DummyZkSignature, ZkSignaturePublic},
},
};
use rand::{Rng as _, SeedableRng as _};
use rand_chacha::ChaCha20Rng;
use crate::deterministic::{create_deterministic_rng, RngPurpose};
pub fn create_block(
block_index: usize,
parent_id: HeaderId,
) -> Result<(HeaderId, Bytes), Box<dyn std::error::Error>> {
let transactions = create_signed_mantle_txs(block_index);
let slot = Slot::from(block_index as u64);
let block_root = ContentId::from(calculate_block_root(&transactions));
let proof = make_test_proof(block_index);
let header = Header::new(parent_id, block_root, slot, proof);
let header_id = header.id();
let block: Block<SignedMantleTx> = Block::new(header, transactions);
let block_bytes = bincode::serialize(&block)?;
Ok((header_id, Bytes::from(block_bytes)))
}
#[must_use]
pub fn create_block_data(block_index: usize, target_size: usize) -> Bytes {
create_simplified_realistic_block_data(block_index, target_size)
}
fn make_test_proof(block_index: usize) -> Groth16LeaderProof {
let public_inputs = LeaderPublic::new(
Fr::from(block_index as u64),
Fr::from(block_index as u64 + 1),
Fr::from(12345u64),
block_index as u64,
1_000_000,
);
let note = Note::new(1000, Fr::from(block_index as u64).into());
let utxo = Utxo {
tx_hash: Fr::from(block_index as u64).into(),
output_index: 0,
note,
};
let leader_key_bytes = [block_index as u8; 32];
let leader_key = ed25519_dalek::VerifyingKey::from_bytes(&leader_key_bytes)
.unwrap_or_else(|_| ed25519_dalek::VerifyingKey::from_bytes(&[1u8; 32]).unwrap());
let aged_path = vec![];
let latest_path = vec![];
let private = LeaderPrivate::new(
public_inputs,
utxo,
&aged_path,
&latest_path,
Fr::from(999u64),
0,
&leader_key,
);
let voucher_cm = VoucherCm::default();
Groth16LeaderProof::prove(private, voucher_cm).unwrap_or_else(|_| {
panic!("Proof generation failed - ensure POL_PROOF_DEV_MODE=true is set");
})
}
#[must_use]
pub fn create_da_share(block: usize, blob: usize, size: usize) -> Bytes {
let data_id = (block as u64 * 1000) + blob as u64;
let mut rng = create_deterministic_rng(RngPurpose::DatasetGeneration, data_id);
let data: Vec<u8> = std::iter::repeat_with(|| rng.gen()).take(size).collect();
Bytes::from(data)
}
pub async fn create_commitment(
block: usize,
blob: usize,
size: usize,
) -> Result<Bytes, Box<dyn std::error::Error>> {
let commitment_id = (block as u64 * 1000) + blob as u64;
let mut rng =
create_deterministic_rng(RngPurpose::DatasetGeneration, commitment_id + 1_000_000);
let commitment_data: Vec<u8> = std::iter::repeat_with(|| rng.gen()).take(size).collect();
Ok(Bytes::from(commitment_data))
}
fn create_simplified_realistic_block_data(block_index: usize, target_size: usize) -> Bytes {
let mut rng = create_deterministic_rng(RngPurpose::DatasetGeneration, block_index as u64);
let mut block_data = Vec::with_capacity(target_size);
block_data.extend_from_slice(&(block_index as u64).to_be_bytes());
let parent_hash: [u8; 32] = rng.gen();
block_data.extend_from_slice(&parent_hash);
let merkle_root: [u8; 32] = rng.gen();
block_data.extend_from_slice(&merkle_root);
let timestamp = chrono::Utc::now().timestamp() as u64 + block_index as u64 * 30;
block_data.extend_from_slice(&timestamp.to_be_bytes());
while block_data.len() < target_size {
block_data.push(rng.gen());
}
block_data.resize(target_size, 0);
Bytes::from(block_data)
}
fn create_signed_mantle_txs(block_index: usize) -> Vec<SignedMantleTx> {
let mut rng = ChaCha20Rng::seed_from_u64(block_index as u64 * 12345);
let tx_count = std::cmp::min(5 + (block_index % 100), 1024);
let mut transactions = Vec::with_capacity(tx_count);
for tx_idx in 0..tx_count {
let input_utxos = create_input_utxos(&mut rng, tx_idx);
let input_ids: Vec<_> = input_utxos.iter().map(Utxo::id).collect();
let output_notes = create_output_notes(&mut rng, tx_idx);
let ledger_tx = LedgerTx::new(input_ids, output_notes);
let mantle_tx = MantleTx {
ops: vec![],
ledger_tx,
execution_gas_price: rng.gen::<u64>() % 1_000_000,
storage_gas_price: rng.gen::<u64>() % 100_000,
};
let pks: Vec<Fr> = input_utxos.iter().map(|utxo| utxo.note.pk.into()).collect();
let msg_hash = mantle_tx.hash().into();
let ledger_tx_proof = DummyZkSignature::prove(ZkSignaturePublic { pks, msg_hash });
let ops_proofs = vec![];
let signed_tx = SignedMantleTx {
ops_proofs,
ledger_tx_proof,
mantle_tx,
};
transactions.push(signed_tx);
}
transactions
}
fn create_input_utxos(rng: &mut ChaCha20Rng, tx_idx: usize) -> Vec<Utxo> {
let input_count = 1 + (tx_idx % 3);
(0..input_count)
.map(|input_idx| Utxo {
tx_hash: Fr::from(rng.gen::<u64>()).into(),
output_index: input_idx,
note: Note::new(
rng.gen::<u64>() % 1_000_000,
Fr::from(rng.gen::<u64>()).into(),
),
})
.collect()
}
fn create_output_notes(rng: &mut ChaCha20Rng, tx_idx: usize) -> Vec<Note> {
let output_count = 1 + (tx_idx % 4);
std::iter::repeat_with(|| {
Note::new(
rng.gen::<u64>() % 1_000_000,
Fr::from(rng.gen::<u64>()).into(),
)
})
.take(output_count)
.collect()
}
fn calculate_block_root(transactions: &[SignedMantleTx]) -> [u8; 32] {
let mut hasher = Hasher::new();
hasher.update(b"BLOCK_ROOT_V1");
for tx in transactions {
let tx_hash = tx.mantle_tx.hash();
hasher.update(tx_hash.as_signing_bytes());
}
hasher.finalize().into()
}

View File

@ -0,0 +1,69 @@
use rand_distr::{Distribution as _, Zipf};
use crate::{
config::ValidatorProfile,
deterministic::{create_deterministic_rng, RngPurpose},
};
#[must_use]
pub fn select_block_spec_accurate(
operation_count: u64,
max_blocks: usize,
profile: &ValidatorProfile,
) -> usize {
if max_blocks == 0 {
return 0;
}
let access_selector = (operation_count * 31) % 100;
if access_selector < (profile.recent_access_ratio * 100.0) as u64 {
select_recent_block_zipfian(operation_count, max_blocks)
} else {
select_historical_block_uniform(operation_count, max_blocks)
}
}
fn select_recent_block_zipfian(operation_count: u64, max_blocks: usize) -> usize {
let recent_window_size = std::cmp::max(max_blocks / 5, 1000);
let zipf_dist = Zipf::new(recent_window_size as u64, 1.0).unwrap();
let mut rng = create_deterministic_rng(RngPurpose::AccessPattern, operation_count);
let zipf_sample = zipf_dist.sample(&mut rng) as usize;
let recent_start = max_blocks.saturating_sub(recent_window_size);
let tip_offset = zipf_sample.saturating_sub(1);
recent_start + (recent_window_size - 1 - tip_offset)
}
const fn select_historical_block_uniform(operation_count: u64, max_blocks: usize) -> usize {
(operation_count as usize * 23) % max_blocks
}
#[must_use]
pub fn select_da_spec_accurate(
operation_count: u64,
max_blobs: usize,
profile: &ValidatorProfile,
) -> usize {
if max_blobs == 0 {
return 0;
}
let recent_threshold = (profile.recent_access_ratio * 100.0) as u64;
let access_selector = (operation_count * 41) % 100;
if access_selector < recent_threshold {
let recent_blobs = std::cmp::min(100, max_blobs);
let zipf_dist = Zipf::new(recent_blobs as u64, 1.2).unwrap();
let mut rng = create_deterministic_rng(RngPurpose::AccessPattern, operation_count);
let sample = zipf_dist.sample(&mut rng) as usize;
let recent_start = max_blobs.saturating_sub(recent_blobs);
recent_start + (recent_blobs - sample.min(recent_blobs))
} else {
(operation_count as usize * 29) % max_blobs
}
}

View File

@ -0,0 +1,49 @@
use std::{
fs::File,
io::{BufWriter, Write as _},
path::Path,
};
const CHUNK_SIZE: usize = 64 * 1024;
const BUFFER_SIZE: usize = 1024 * 1024;
pub struct StreamingDatasetWriter {
writer: BufWriter<File>,
chunk_buffer: Box<[u8; CHUNK_SIZE]>,
bytes_written: u64,
}
impl StreamingDatasetWriter {
pub fn new<P: AsRef<Path>>(path: P) -> Result<Self, std::io::Error> {
let file = File::create(path)?;
let writer = BufWriter::with_capacity(BUFFER_SIZE, file);
Ok(Self {
writer,
chunk_buffer: vec![0u8; CHUNK_SIZE]
.into_boxed_slice()
.try_into()
.expect("CHUNK_SIZE is const"),
bytes_written: 0,
})
}
pub fn write_chunk(&mut self, data: &[u8]) -> Result<(), std::io::Error> {
let mut remaining = data;
while !remaining.is_empty() {
let write_size = std::cmp::min(remaining.len(), CHUNK_SIZE);
self.chunk_buffer[..write_size].copy_from_slice(&remaining[..write_size]);
self.writer.write_all(&self.chunk_buffer[..write_size])?;
remaining = &remaining[write_size..];
self.bytes_written += write_size as u64;
}
Ok(())
}
pub fn finalize(mut self) -> Result<u64, std::io::Error> {
self.writer.flush()?;
Ok(self.bytes_written)
}
}

View File

@ -0,0 +1,58 @@
use std::path::PathBuf;
use nomos_storage::backends::rocksdb::RocksBackendSettings;
pub mod benchmark;
pub mod config;
pub mod data;
pub mod metrics;
pub mod storage;
pub use benchmark::*;
pub use config::{
CompressionType, DatasetGenConfig, NetworkSize, ProductionBenchConfig, ProfileType,
ValidatorProfile, ValidatorProfiles, WorkloadType,
};
pub use data::*;
pub use metrics::*;
pub use storage::*;
#[derive(Debug, Clone)]
pub struct BenchStorageConfig {
pub name: String,
pub settings: RocksBackendSettings,
}
impl BenchStorageConfig {
#[must_use]
pub fn production() -> Self {
Self {
name: "production".to_string(),
settings: RocksBackendSettings {
db_path: Self::data_path(),
read_only: false,
column_family: Some("blocks".to_string()),
},
}
}
#[must_use]
pub fn data_path() -> PathBuf {
let home_dir = std::env::var("HOME").unwrap_or_else(|_| "/tmp".to_string());
let data_dir = PathBuf::from(home_dir).join(".nomos_storage_benchmarks");
let _ = std::fs::create_dir_all(&data_dir);
data_dir.join("rocksdb_data")
}
#[must_use]
pub fn results_path() -> PathBuf {
let home_dir = std::env::var("HOME").unwrap_or_else(|_| "/tmp".to_string());
let results_dir = PathBuf::from(home_dir)
.join(".nomos_storage_benchmarks")
.join("results");
let _ = std::fs::create_dir_all(&results_dir);
results_dir
}
}
pub type BenchConfig = BenchStorageConfig;

View File

@ -0,0 +1,87 @@
use std::time::Instant;
use hdrhistogram::Histogram;
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LatencyPercentiles {
pub p50_ms: f64,
pub p90_ms: f64,
pub p95_ms: f64,
pub p99_ms: f64,
pub max_ms: f64,
pub mean_ms: f64,
pub sample_count: u64,
}
pub struct LatencyTracker {
histogram: Histogram<u64>,
operation_count: u64,
}
impl Default for LatencyTracker {
fn default() -> Self {
Self::new()
}
}
impl LatencyTracker {
#[must_use]
pub fn new() -> Self {
Self {
histogram: Histogram::new_with_bounds(1, 3_600_000_000, 3)
.expect("Valid histogram bounds"),
operation_count: 0,
}
}
pub async fn record_async_operation<F, Fut, R>(&mut self, operation: F) -> R
where
F: FnOnce() -> Fut,
Fut: std::future::Future<Output = R>,
{
let start = Instant::now();
let result = operation().await;
let latency = start.elapsed();
let latency_micros = latency.as_micros() as u64;
if self.histogram.record(latency_micros).is_ok() {
self.operation_count += 1;
}
result
}
#[must_use]
pub fn get_percentiles(&self) -> LatencyPercentiles {
if self.operation_count == 0 {
return LatencyPercentiles {
p50_ms: 0.0,
p90_ms: 0.0,
p95_ms: 0.0,
p99_ms: 0.0,
max_ms: 0.0,
mean_ms: 0.0,
sample_count: 0,
};
}
let to_ms = |v: u64| v as f64 / 1000.0;
LatencyPercentiles {
p50_ms: to_ms(self.histogram.value_at_quantile(0.50)),
p90_ms: to_ms(self.histogram.value_at_quantile(0.90)),
p95_ms: to_ms(self.histogram.value_at_quantile(0.95)),
p99_ms: to_ms(self.histogram.value_at_quantile(0.99)),
max_ms: to_ms(self.histogram.max()),
mean_ms: self.histogram.mean() / 1000.0,
sample_count: self.operation_count,
}
}
}

View File

@ -0,0 +1,9 @@
pub mod latency_stats;
pub mod rocksdb_collector;
pub mod rocksdb_stats;
pub mod runtime_memory_allocator;
pub use latency_stats::*;
pub use rocksdb_collector::*;
pub use rocksdb_stats::*;
pub use runtime_memory_allocator::*;

View File

@ -0,0 +1,365 @@
use std::collections::HashMap;
use nomos_storage::backends::rocksdb::RocksBackend;
use serde::{Deserialize, Serialize};
pub struct RocksDbStatsCollector {
storage_ref: Option<*const RocksBackend>,
property_cache: HashMap<String, Option<u64>>,
stats_cache: Option<String>,
cache_valid: bool,
collection_count: u64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RocksDbStatsSnapshot {
pub stats: super::RocksDbStats,
pub collection_timestamp: chrono::DateTime<chrono::Utc>,
pub collection_id: u64,
pub cache_hits: u64,
pub cache_misses: u64,
}
impl RocksDbStatsCollector {
#[must_use]
pub fn new() -> Self {
Self {
storage_ref: None,
property_cache: HashMap::new(),
stats_cache: None,
cache_valid: false,
collection_count: 0,
}
}
pub fn attach(&mut self, storage: &RocksBackend) {
self.storage_ref = Some(std::ptr::from_ref::<RocksBackend>(storage));
self.invalidate_cache();
}
pub fn collect_stats(&mut self) -> Result<RocksDbStatsSnapshot, Box<dyn std::error::Error>> {
// SAFETY: storage_ref is set in attach() and guaranteed to be valid for the
// lifetime of this collector
let storage = unsafe {
self.storage_ref
.ok_or("No storage attached")?
.as_ref()
.ok_or("Invalid storage ref")?
};
self.collection_count += 1;
let stats = self.collect_with_caching(storage)?;
Ok(RocksDbStatsSnapshot {
stats,
collection_timestamp: chrono::Utc::now(),
collection_id: self.collection_count,
cache_hits: self.count_cache_hits(),
cache_misses: self.count_cache_misses(),
})
}
pub fn collect_before_after<F>(
&mut self,
operation: F,
) -> Result<(RocksDbStatsSnapshot, RocksDbStatsSnapshot), Box<dyn std::error::Error>>
where
F: FnOnce() -> Result<(), Box<dyn std::error::Error>>,
{
let before = self.collect_stats()?;
self.invalidate_cache();
operation()?;
let after = self.collect_stats()?;
Ok((before, after))
}
pub fn invalidate_cache(&mut self) {
self.property_cache.clear();
self.stats_cache = None;
self.cache_valid = false;
}
#[must_use]
pub fn collection_stats(&self) -> (u64, u64, u64) {
(
self.collection_count,
self.count_cache_hits(),
self.count_cache_misses(),
)
}
fn collect_with_caching(
&mut self,
storage: &RocksBackend,
) -> Result<super::RocksDbStats, Box<dyn std::error::Error>> {
let stats_string = if let Some(ref cached) = self.stats_cache {
cached.clone()
} else {
let stats = self.get_stats_string(storage)?;
self.stats_cache = Some(stats.clone());
stats
};
let (cache_hit_count, cache_miss_count) = self.parse_cache_hit_miss(&stats_string);
let cache_hit_rate = if cache_hit_count + cache_miss_count > 0 {
cache_hit_count as f64 / (cache_hit_count + cache_miss_count) as f64
} else {
0.0
};
let level_files: Vec<u64> = (0..7)
.map(|level| {
self.get_cached_property_u64(
storage,
rocksdb::properties::num_files_at_level(level).as_ref(),
)
})
.collect();
Ok(super::RocksDbStats {
cache_hit_rate,
cache_hit_count,
cache_miss_count,
block_cache_usage_bytes: self
.get_cached_property_u64(storage, rocksdb::properties::BLOCK_CACHE_USAGE.as_ref()),
block_cache_capacity_bytes: self.get_cached_property_u64(
storage,
rocksdb::properties::BLOCK_CACHE_CAPACITY.as_ref(),
),
index_cache_usage_bytes: self.get_cached_property_u64(
storage,
rocksdb::properties::ESTIMATE_TABLE_READERS_MEM.as_ref(),
),
compaction_pending_bytes: self.get_cached_property_u64(
storage,
rocksdb::properties::ESTIMATE_PENDING_COMPACTION_BYTES.as_ref(),
),
compaction_running_count: self.get_cached_property_u64(
storage,
rocksdb::properties::NUM_RUNNING_COMPACTIONS.as_ref(),
),
l0_file_count: level_files[0],
l1_file_count: level_files[1],
l2_file_count: level_files[2],
l3_file_count: level_files[3],
l4_file_count: level_files[4],
l5_file_count: level_files[5],
l6_file_count: level_files[6],
total_sst_files: level_files.iter().sum(),
total_sst_size_bytes: self.get_cached_property_u64(
storage,
rocksdb::properties::TOTAL_SST_FILES_SIZE.as_ref(),
),
memtable_count: self.parse_memtable_count(&stats_string),
num_immutable_memtables: self.parse_immutable_memtables(&stats_string),
memtable_flush_pending: self.get_cached_property_u64(
storage,
rocksdb::properties::NUM_RUNNING_FLUSHES.as_ref(),
),
approximate_memory_usage_bytes: self.get_cached_property_u64(
storage,
rocksdb::properties::CUR_SIZE_ALL_MEM_TABLES.as_ref(),
),
read_amplification: self.parse_read_amplification(&stats_string),
write_amplification: self.parse_write_amplification(&stats_string),
total_read_bytes: self.parse_total_read_bytes(&stats_string),
total_write_bytes: self.parse_total_write_bytes(&stats_string),
write_stall_time_ms: self.parse_write_stall_time(&stats_string),
live_sst_files_size_bytes: self.get_cached_property_u64(
storage,
rocksdb::properties::LIVE_SST_FILES_SIZE.as_ref(),
),
num_entries: self
.get_cached_property_u64(storage, rocksdb::properties::ESTIMATE_NUM_KEYS.as_ref()),
})
}
fn get_cached_property_u64(&mut self, storage: &RocksBackend, property: &str) -> u64 {
if let Some(cached_value) = self.property_cache.get(property) {
return cached_value.unwrap_or(0);
}
let value = self.query_property_u64(storage, property);
self.property_cache.insert(property.to_owned(), value);
value.unwrap_or(0)
}
fn query_property_u64(&self, storage: &RocksBackend, property: &str) -> Option<u64> {
let property_owned = property.to_owned();
let transaction = storage.txn(move |db| match db.property_value(&property_owned) {
Ok(Some(value_string)) => Ok(Some(value_string.into_bytes().into())),
Ok(None) => Ok(None),
Err(e) => Err(e),
});
match transaction.execute() {
Ok(Some(result_bytes)) => {
let value_str = String::from_utf8_lossy(&result_bytes);
value_str.trim().parse().ok()
}
_ => None,
}
}
fn get_stats_string(
&self,
storage: &RocksBackend,
) -> Result<String, Box<dyn std::error::Error>> {
let transaction = storage.txn(|db| match db.property_value(rocksdb::properties::STATS) {
Ok(Some(stats_string)) => Ok(Some(stats_string.into_bytes().into())),
_ => Ok(Some(b"".to_vec().into())),
});
match transaction.execute() {
Ok(Some(stats_bytes)) => Ok(String::from_utf8_lossy(&stats_bytes).to_string()),
_ => Ok(String::new()),
}
}
fn count_cache_hits(&self) -> u64 {
self.property_cache.values().filter(|v| v.is_some()).count() as u64
}
fn count_cache_misses(&self) -> u64 {
self.property_cache.values().filter(|v| v.is_none()).count() as u64
}
fn parse_cache_hit_miss(&self, stats: &str) -> (u64, u64) {
let mut hits = 0u64;
let mut misses = 0u64;
for line in stats.lines() {
if line.contains("Block cache hit count:") || line.contains("block.cache.hit") {
if let Some(value) = self.extract_number_from_line(line) {
hits = value;
}
} else if line.contains("Block cache miss count:") || line.contains("block.cache.miss")
{
if let Some(value) = self.extract_number_from_line(line) {
misses = value;
}
}
}
(hits, misses)
}
fn parse_memtable_count(&self, stats: &str) -> u64 {
for line in stats.lines() {
if line.contains("Number of memtables") || line.contains("num-live-memtables") {
if let Some(value) = self.extract_number_from_line(line) {
return value;
}
}
}
0
}
fn parse_immutable_memtables(&self, stats: &str) -> u64 {
for line in stats.lines() {
if line.contains("immutable memtables") || line.contains("num-immutable-mem-table") {
if let Some(value) = self.extract_number_from_line(line) {
return value;
}
}
}
0
}
fn parse_read_amplification(&self, stats: &str) -> f64 {
for line in stats.lines() {
if line.contains("read amplification") || line.contains("Read(GB)") {
if let Some(value) = self.extract_float_from_line(line) {
return value;
}
}
}
0.0
}
fn parse_write_amplification(&self, stats: &str) -> f64 {
for line in stats.lines() {
if line.contains("write amplification") || line.contains("Write(GB)") {
if let Some(value) = self.extract_float_from_line(line) {
return value;
}
}
}
0.0
}
fn parse_total_read_bytes(&self, stats: &str) -> u64 {
for line in stats.lines() {
if line.contains("total bytes read") || line.contains("Read(GB)") {
if let Some(value) = self.extract_number_from_line(line) {
return value;
}
}
}
0
}
fn parse_total_write_bytes(&self, stats: &str) -> u64 {
for line in stats.lines() {
if line.contains("total bytes written") || line.contains("Write(GB)") {
if let Some(value) = self.extract_number_from_line(line) {
return value;
}
}
}
0
}
fn parse_write_stall_time(&self, stats: &str) -> u64 {
for line in stats.lines() {
if line.contains("Cumulative stall:") && line.contains("H:M:S") {
if let Some(percent_pos) = line.find("percent") {
let before_percent = &line[..percent_pos];
if let Some(comma_pos) = before_percent.rfind(',') {
let percent_str = before_percent[comma_pos + 1..].trim();
if let Ok(percent) = percent_str.parse::<f64>() {
return (percent * 10.0) as u64;
}
}
}
}
}
0
}
fn extract_number_from_line(&self, line: &str) -> Option<u64> {
if let Some(colon_pos) = line.find(':') {
let value_part = line[colon_pos + 1..].trim();
if let Some(number_str) = value_part.split_whitespace().next() {
let clean_number = number_str.replace(',', "");
return clean_number.parse().ok();
}
}
None
}
fn extract_float_from_line(&self, line: &str) -> Option<f64> {
if let Some(colon_pos) = line.find(':') {
let value_part = line[colon_pos + 1..].trim();
if let Some(number_str) = value_part.split_whitespace().next() {
return number_str.parse().ok();
}
}
None
}
}
impl Default for RocksDbStatsCollector {
fn default() -> Self {
Self::new()
}
}

View File

@ -0,0 +1,386 @@
use nomos_storage::backends::rocksdb::RocksBackend;
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RocksDbStats {
pub cache_hit_rate: f64,
pub cache_hit_count: u64,
pub cache_miss_count: u64,
pub block_cache_usage_bytes: u64,
pub block_cache_capacity_bytes: u64,
pub index_cache_usage_bytes: u64,
pub compaction_pending_bytes: u64,
pub compaction_running_count: u64,
pub l0_file_count: u64,
pub l1_file_count: u64,
pub l2_file_count: u64,
pub l3_file_count: u64,
pub l4_file_count: u64,
pub l5_file_count: u64,
pub l6_file_count: u64,
pub total_sst_files: u64,
pub total_sst_size_bytes: u64,
pub memtable_count: u64,
pub num_immutable_memtables: u64,
pub memtable_flush_pending: u64,
pub approximate_memory_usage_bytes: u64,
pub read_amplification: f64,
pub write_amplification: f64,
pub total_read_bytes: u64,
pub total_write_bytes: u64,
pub write_stall_time_ms: u64,
pub live_sst_files_size_bytes: u64,
pub num_entries: u64,
}
impl Default for RocksDbStats {
fn default() -> Self {
Self {
cache_hit_rate: 0.0,
cache_hit_count: 0,
cache_miss_count: 0,
block_cache_usage_bytes: 0,
block_cache_capacity_bytes: 0,
index_cache_usage_bytes: 0,
compaction_pending_bytes: 0,
compaction_running_count: 0,
l0_file_count: 0,
l1_file_count: 0,
l2_file_count: 0,
l3_file_count: 0,
l4_file_count: 0,
l5_file_count: 0,
l6_file_count: 0,
total_sst_files: 0,
total_sst_size_bytes: 0,
memtable_count: 0,
num_immutable_memtables: 0,
memtable_flush_pending: 0,
approximate_memory_usage_bytes: 0,
read_amplification: 0.0,
write_amplification: 0.0,
total_read_bytes: 0,
total_write_bytes: 0,
write_stall_time_ms: 0,
live_sst_files_size_bytes: 0,
num_entries: 0,
}
}
}
#[must_use]
pub fn collect_rocksdb_stats(storage: &RocksBackend) -> RocksDbStats {
let (cache_hit_count, cache_miss_count) =
parse_cache_hit_miss_counts(&get_stats_string(storage));
let cache_hit_rate = if cache_hit_count + cache_miss_count > 0 {
cache_hit_count as f64 / (cache_hit_count + cache_miss_count) as f64
} else {
0.0
};
let l0_files = get_level_file_count(storage, 0);
let l1_files = get_level_file_count(storage, 1);
let l2_files = get_level_file_count(storage, 2);
let l3_files = get_level_file_count(storage, 3);
let l4_files = get_level_file_count(storage, 4);
let l5_files = get_level_file_count(storage, 5);
let l6_files = get_level_file_count(storage, 6);
RocksDbStats {
cache_hit_rate,
cache_hit_count,
cache_miss_count,
block_cache_usage_bytes: get_property_u64(
storage,
&rocksdb::properties::BLOCK_CACHE_USAGE.as_ref(),
),
block_cache_capacity_bytes: get_property_u64(
storage,
&rocksdb::properties::BLOCK_CACHE_CAPACITY.as_ref(),
),
index_cache_usage_bytes: get_property_u64(
storage,
&rocksdb::properties::ESTIMATE_TABLE_READERS_MEM.as_ref(),
),
compaction_pending_bytes: get_property_u64(
storage,
&rocksdb::properties::ESTIMATE_PENDING_COMPACTION_BYTES.as_ref(),
),
compaction_running_count: get_property_u64(
storage,
&rocksdb::properties::NUM_RUNNING_COMPACTIONS.as_ref(),
),
l0_file_count: l0_files,
l1_file_count: l1_files,
l2_file_count: l2_files,
l3_file_count: l3_files,
l4_file_count: l4_files,
l5_file_count: l5_files,
l6_file_count: l6_files,
total_sst_files: l0_files + l1_files + l2_files + l3_files + l4_files + l5_files + l6_files,
total_sst_size_bytes: get_property_u64(
storage,
&rocksdb::properties::TOTAL_SST_FILES_SIZE.as_ref(),
),
memtable_count: parse_memtable_count(&get_stats_string(storage)),
num_immutable_memtables: parse_immutable_memtables(&get_stats_string(storage)),
memtable_flush_pending: get_property_u64(
storage,
&rocksdb::properties::NUM_RUNNING_FLUSHES.as_ref(),
),
approximate_memory_usage_bytes: get_property_u64(
storage,
&rocksdb::properties::CUR_SIZE_ALL_MEM_TABLES.as_ref(),
),
read_amplification: parse_read_amplification(&get_stats_string(storage)),
write_amplification: parse_write_amplification(&get_stats_string(storage)),
total_read_bytes: parse_total_read_bytes(&get_stats_string(storage)),
total_write_bytes: parse_total_write_bytes(&get_stats_string(storage)),
write_stall_time_ms: parse_write_stall_time(&get_stats_string(storage)),
live_sst_files_size_bytes: get_property_u64(
storage,
&rocksdb::properties::LIVE_SST_FILES_SIZE.as_ref(),
),
num_entries: get_property_u64(storage, &rocksdb::properties::ESTIMATE_NUM_KEYS.as_ref()),
}
}
fn get_stats_string(storage: &RocksBackend) -> String {
let transaction = storage.txn(|db| match db.property_value(rocksdb::properties::STATS) {
Ok(Some(stats_string)) => Ok(Some(stats_string.into_bytes().into())),
_ => Ok(Some(b"".to_vec().into())),
});
match transaction.execute() {
Ok(Some(stats_bytes)) => String::from_utf8_lossy(&stats_bytes).to_string(),
_ => String::new(),
}
}
fn get_level_file_count(storage: &RocksBackend, level: i32) -> u64 {
get_property_u64(
storage,
&rocksdb::properties::num_files_at_level(level as usize).as_ref(),
)
}
fn get_property_u64(storage: &RocksBackend, property: &str) -> u64 {
match get_property_value(storage, property) {
Some(value) => {
log::debug!("Property '{}': {}", property, value);
value
}
None => {
log::debug!("Property '{}': unavailable", property);
0
}
}
}
fn get_property_value(storage: &RocksBackend, property: &str) -> Option<u64> {
let property_owned = property.to_owned();
let property_for_log = property.to_owned();
let transaction = storage.txn(move |db| match db.property_value(&property_owned) {
Ok(Some(value_string)) => Ok(Some(value_string.into_bytes().into())),
Ok(None) => Ok(None),
Err(e) => Err(e),
});
match transaction.execute() {
Ok(Some(result_bytes)) => {
let value_str = String::from_utf8_lossy(&result_bytes);
match value_str.trim().parse::<u64>() {
Ok(parsed) => {
log::trace!("Property '{}' available: {}", property_for_log, parsed);
Some(parsed)
}
Err(_) => {
log::trace!(
"Property '{}' parse error from: '{}'",
property_for_log,
value_str
);
None
}
}
}
Ok(None) => {
log::trace!("Property '{}' unavailable", property_for_log);
None
}
Err(e) => {
log::trace!("Property '{}' failed: {}", property_for_log, e);
None
}
}
}
fn parse_cache_hit_miss_counts(stats: &str) -> (u64, u64) {
let mut hits = 0u64;
let mut misses = 0u64;
for line in stats.lines() {
if line.contains("Block cache hit count:") || line.contains("block.cache.hit") {
if let Some(value) = extract_number_from_line(line) {
hits = value;
}
} else if line.contains("Block cache miss count:") || line.contains("block.cache.miss") {
if let Some(value) = extract_number_from_line(line) {
misses = value;
}
}
}
(hits, misses)
}
fn parse_write_stall_time(stats: &str) -> u64 {
for line in stats.lines() {
if line.contains("Cumulative stall:") && line.contains("H:M:S") {
if let Some(percent_pos) = line.find("percent") {
let before_percent = &line[..percent_pos];
if let Some(comma_pos) = before_percent.rfind(',') {
let percent_str = before_percent[comma_pos + 1..].trim();
if let Ok(percent) = percent_str.parse::<f64>() {
return (percent * 10.0) as u64;
}
}
}
}
}
0
}
fn extract_number_from_line(line: &str) -> Option<u64> {
if let Some(colon_pos) = line.find(':') {
let value_part = line[colon_pos + 1..].trim();
if let Some(number_str) = value_part.split_whitespace().next() {
let clean_number = number_str.replace(',', "");
return clean_number.parse().ok();
}
}
None
}
fn parse_read_amplification(stats: &str) -> f64 {
for line in stats.lines() {
if line.contains("read amplification") || line.contains("Read(GB)") {
if let Some(value) = extract_float_from_line(line) {
return value;
}
}
}
0.0
}
fn parse_write_amplification(stats: &str) -> f64 {
for line in stats.lines() {
if line.contains("write amplification") || line.contains("Write(GB)") {
if let Some(value) = extract_float_from_line(line) {
return value;
}
}
}
0.0
}
fn parse_total_read_bytes(stats: &str) -> u64 {
for line in stats.lines() {
if line.contains("total bytes read") || line.contains("Read(GB)") {
if let Some(value) = extract_number_from_line(line) {
return value;
}
}
}
0
}
fn parse_total_write_bytes(stats: &str) -> u64 {
for line in stats.lines() {
if line.contains("total bytes written") || line.contains("Write(GB)") {
if let Some(value) = extract_number_from_line(line) {
return value;
}
}
}
0
}
fn parse_memtable_count(stats: &str) -> u64 {
for line in stats.lines() {
if line.contains("Number of memtables") || line.contains("num-live-memtables") {
if let Some(value) = extract_number_from_line(line) {
return value;
}
}
}
0
}
fn parse_immutable_memtables(stats: &str) -> u64 {
for line in stats.lines() {
if line.contains("immutable memtables") || line.contains("num-immutable-mem-table") {
if let Some(value) = extract_number_from_line(line) {
return value;
}
}
}
0
}
fn extract_float_from_line(line: &str) -> Option<f64> {
if let Some(colon_pos) = line.find(':') {
let value_part = line[colon_pos + 1..].trim();
if let Some(number_str) = value_part.split_whitespace().next() {
return number_str.parse().ok();
}
}
None
}
pub struct StatsCollector {
pub before: RocksDbStats,
pub after: RocksDbStats,
}
impl StatsCollector {
pub fn new() -> Self {
Self {
before: RocksDbStats::default(),
after: RocksDbStats::default(),
}
}
pub fn collect_before(&mut self, storage: &RocksBackend) {
self.before = collect_rocksdb_stats(storage);
log::debug!(
"Before: cache {:.1}%, L0 files {}",
self.before.cache_hit_rate * 100.0,
self.before.l0_file_count
);
}
pub fn collect_after(&mut self, storage: &RocksBackend) {
self.after = collect_rocksdb_stats(storage);
log::debug!(
"After: cache {:.1}%, L0 files {}",
self.after.cache_hit_rate * 100.0,
self.after.l0_file_count
);
}
}

View File

@ -0,0 +1,114 @@
use std::{
alloc::{GlobalAlloc, Layout, System},
sync::atomic::{AtomicUsize, Ordering},
};
pub struct RuntimeValidatorAllocator {
inner: System,
allocated: AtomicUsize,
limit: AtomicUsize,
}
impl RuntimeValidatorAllocator {
pub const fn new() -> Self {
Self {
inner: System,
allocated: AtomicUsize::new(0),
limit: AtomicUsize::new(16 * 1024 * 1024 * 1024),
}
}
pub fn set_limit_gb(&self, limit_gb: usize) {
let limit_bytes = limit_gb * 1024 * 1024 * 1024;
self.limit.store(limit_bytes, Ordering::SeqCst);
log::info!(
"Memory limit updated to {}GB ({} bytes)",
limit_gb,
limit_bytes
);
}
pub fn usage_mb(&self) -> f64 {
self.allocated.load(Ordering::Relaxed) as f64 / 1024.0 / 1024.0
}
pub fn usage_percent(&self) -> f64 {
let current = self.allocated.load(Ordering::Relaxed);
let limit = self.limit.load(Ordering::Relaxed);
if limit > 0 {
current as f64 / limit as f64 * 100.0
} else {
0.0
}
}
pub fn limit_gb(&self) -> usize {
self.limit.load(Ordering::Relaxed) / 1024 / 1024 / 1024
}
pub fn actual_limit_gb(&self) -> usize {
self.limit_gb()
}
pub fn would_exceed_limit(&self, size: usize) -> bool {
let current = self.allocated.load(Ordering::Relaxed);
let limit = self.limit.load(Ordering::Relaxed);
current + size > limit
}
pub fn allocation_failures(&self) -> u64 {
0
}
}
unsafe impl GlobalAlloc for RuntimeValidatorAllocator {
unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
let size = layout.size();
let current = self.allocated.fetch_add(size, Ordering::SeqCst);
let limit = self.limit.load(Ordering::Relaxed);
if current + size > limit {
self.allocated.fetch_sub(size, Ordering::SeqCst);
if size >= 1024 * 1024 {
log::warn!(
"Memory limit exceeded: {}MB allocation blocked ({}GB limit, {:.1}% used)",
size / 1024 / 1024,
limit / 1024 / 1024 / 1024,
current as f64 / limit as f64 * 100.0
);
}
return std::ptr::null_mut();
}
unsafe { self.inner.alloc(layout) }
}
unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) {
self.allocated.fetch_sub(layout.size(), Ordering::SeqCst);
unsafe {
self.inner.dealloc(ptr, layout);
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_runtime_limit_setting() {
let allocator = RuntimeValidatorAllocator::new();
assert_eq!(allocator.limit_gb(), 16);
allocator.set_limit_gb(8);
assert_eq!(allocator.limit_gb(), 8);
assert_eq!(allocator.actual_limit_gb(), 8);
allocator.set_limit_gb(32);
assert_eq!(allocator.limit_gb(), 32);
}
}

View File

@ -0,0 +1,3 @@
pub mod rocksdb_options_tuning;
pub use rocksdb_options_tuning::*;

View File

@ -0,0 +1,180 @@
use serde::{Deserialize, Serialize};
use crate::CompressionType;
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct RocksDbTuningOptions {
pub cache_size_percent: Option<u32>,
pub write_buffer_mb: Option<u32>,
pub compaction_jobs: Option<u32>,
pub block_size_kb: Option<u32>,
pub compression: Option<CompressionType>,
pub bloom_filter_bits: Option<u32>,
}
impl RocksDbTuningOptions {
pub fn apply_to_options(
&self,
opts: &mut rocksdb::Options,
) -> Result<(), Box<dyn std::error::Error>> {
if let Some(cache_percent) = self.cache_size_percent {
let system_memory_gb = get_system_memory_gb();
let cache_size_bytes = ((system_memory_gb as f64 * (f64::from(cache_percent) / 100.0))
* 1024.0
* 1024.0
* 1024.0) as usize;
let cache = rocksdb::Cache::new_lru_cache(cache_size_bytes);
let mut block_opts = rocksdb::BlockBasedOptions::default();
block_opts.set_block_cache(&cache);
opts.set_block_based_table_factory(&block_opts);
log::info!(
"Applied block cache: {}% of RAM = {}MB",
cache_percent,
cache_size_bytes / 1024 / 1024
);
}
if let Some(buffer_mb) = self.write_buffer_mb {
let buffer_bytes = (buffer_mb as usize) * 1024 * 1024;
opts.set_write_buffer_size(buffer_bytes);
log::info!("Applied write buffer: {}MB", buffer_mb);
}
if let Some(jobs) = self.compaction_jobs {
opts.set_max_background_jobs(jobs as i32);
log::info!("Applied compaction jobs: {}", jobs);
}
if let Some(block_size_kb) = self.block_size_kb {
let block_size_bytes = (block_size_kb as usize) * 1024;
let mut block_opts = rocksdb::BlockBasedOptions::default();
block_opts.set_block_size(block_size_bytes);
opts.set_block_based_table_factory(&block_opts);
log::info!("Applied block size: {}KB", block_size_kb);
}
if let Some(compression) = self.compression {
match compression {
CompressionType::None => {
opts.set_compression_type(rocksdb::DBCompressionType::None);
log::info!("Applied compression: None");
}
CompressionType::Lz4 => {
opts.set_compression_type(rocksdb::DBCompressionType::Lz4);
log::info!("Applied compression: LZ4");
}
CompressionType::Snappy => {
opts.set_compression_type(rocksdb::DBCompressionType::Snappy);
log::info!("Applied compression: Snappy");
}
CompressionType::Zstd => {
opts.set_compression_type(rocksdb::DBCompressionType::Zstd);
log::info!("Applied compression: Zstd");
}
}
}
Ok(())
}
pub fn from_args(args: &[String]) -> (Self, bool) {
let mut config = Self::default();
let mut read_only = false;
let mut i = 0;
while i < args.len() {
match args[i].as_str() {
"--cache-size" if i + 1 < args.len() => {
config.cache_size_percent = args[i + 1].parse().ok();
i += 2;
}
"--write-buffer" if i + 1 < args.len() => {
config.write_buffer_mb = args[i + 1].parse().ok();
i += 2;
}
"--compaction-jobs" if i + 1 < args.len() => {
config.compaction_jobs = args[i + 1].parse().ok();
i += 2;
}
"--block-size" if i + 1 < args.len() => {
config.block_size_kb = args[i + 1].parse().ok();
i += 2;
}
"--read-only" => {
read_only = true;
i += 1;
}
"--compression" if i + 1 < args.len() => {
match args[i + 1].parse::<CompressionType>() {
Ok(compression_type) => config.compression = Some(compression_type),
Err(e) => log::warn!("Invalid compression type: {}", e),
}
i += 2;
}
_ => {
i += 1;
}
}
}
(config, read_only)
}
pub fn description(&self) -> String {
let mut parts = Vec::new();
if let Some(cache) = self.cache_size_percent {
parts.push(format!("cache:{}%", cache));
}
if let Some(buffer) = self.write_buffer_mb {
parts.push(format!("buffer:{}MB", buffer));
}
if let Some(jobs) = self.compaction_jobs {
parts.push(format!("jobs:{}", jobs));
}
if let Some(block_size) = self.block_size_kb {
parts.push(format!("block:{}KB", block_size));
}
if parts.is_empty() {
"defaults".to_string()
} else {
parts.join(",")
}
}
}
fn get_system_memory_gb() -> usize {
if let Ok(meminfo) = std::fs::read_to_string("/proc/meminfo") {
for line in meminfo.lines() {
if line.starts_with("MemTotal:") {
if let Some(kb_str) = line.split_whitespace().nth(1) {
if let Ok(kb) = kb_str.parse::<usize>() {
return kb / 1024 / 1024;
}
}
}
}
}
16
}
pub fn create_tuned_rocksdb_options(tuning_config: &RocksDbTuningOptions) -> rocksdb::Options {
let mut opts = rocksdb::Options::default();
opts.create_if_missing(true);
opts.create_missing_column_families(true);
if let Err(e) = tuning_config.apply_to_options(&mut opts) {
log::error!("Failed to apply RocksDB tuning: {}", e);
}
opts
}