storage benchmarks infrastructure

2026-01-02 13:13:09 +00:00 · 2025-10-06 11:11:53 +02:00 · 2025-10-06 11:11:53 +02:00 · 4de3e1e68e
commit 4de3e1e68e
parent 73441efb69
45 changed files with 6461 additions and 2 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,3 +1,25 @@
+# Generated by Cargo
+# will have compiled files and executables
+/target/
+*/target/
+
+# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
+# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
 Cargo.lock
-target/
-.vscode
+
+# These are backup files generated by rustfmt
+**/*.rs.bk
+
+# Files generated by build processes or applications
+config.yml
+store.*
+*.txt
+.env
+.idea/
+.vscode/
+
+# Integration test temp dirs
+tests/.tmp*
+
+# Wildcard for any file that contains ignore
+*ignore*
--- a/storage-benchmarks/Cargo.toml
+++ b/storage-benchmarks/Cargo.toml
@ -0,0 +1,244 @@
+[package]
+edition = "2021"
+name    = "storage-benchmarks"
+version = "0.1.0"
+
+[dependencies]
+# Storage APIs - using public nomos repository
+cryptarchia-engine = { git = "https://github.com/logos-co/nomos-node", package = "cryptarchia-engine" }
+nomos-core         = { git = "https://github.com/logos-co/nomos-node", package = "nomos-core" }
+nomos-storage      = { git = "https://github.com/logos-co/nomos-node", package = "nomos-storage", features = ["rocksdb-backend"] }
+
+# Database
+rocksdb = { version = "0.24", features = ["bindgen-runtime"] }
+
+# Async runtime
+tokio = { features = ["macros", "rt-multi-thread", "time"], version = "1" }
+async-trait = "0.1"
+
+# Data structures  
+bincode    = "1.0"
+bytes      = "1.3"
+chrono     = { version = "0.4", features = ["serde"] }
+env_logger = "0.10"
+log        = "0.4"
+num_cpus   = "1.0"
+serde      = { version = "1.0", features = ["derive"] }
+serde_json = "1.0"
+tempfile   = "3"
+toml       = "0.8"
+rand       = "0.8"
+rand_chacha = "0.3"
+ed25519-dalek = "2.0"
+groth16    = { git = "https://github.com/logos-co/nomos-node", package = "groth16" }
+pol        = { git = "https://github.com/logos-co/nomos-node", package = "pol" }
+futures    = "0.3"
+rayon      = "1.0"
+rand_distr = "0.4"
+hdrhistogram = "7.5"
+md5 = "0.7"
+clap = { version = "4.0", features = ["derive"] }
+thiserror = "1.0"
+smallvec = "1.0"
+
+# Optional allocator features
+mimalloc = { version = "0.1", optional = true }
+jemallocator = { version = "0.5", optional = true }
+
+[features]
+default = []
+allocator-mimalloc = ["mimalloc"]
+allocator-jemalloc = ["jemallocator"]
+
+[dev-dependencies]
+divan = { default-features = false, version = "0.1" }
+
+# Production binaries
+[[bin]]
+name = "dataset_generator"
+path = "src/bin/dataset_generator.rs"
+
+[[bin]]
+name = "verify_dataset_integrity"
+path = "src/bin/verify_dataset_integrity.rs"
+
+[[bin]]
+name = "storage_bench_runner"
+path = "src/bin/storage_bench_runner.rs"
+
+[[bin]]
+name = "verify_rocksdb_properties"
+path = "src/bin/verify_rocksdb_properties.rs"
+
+[[bin]]
+name = "dataset_builder"
+path = "src/bin/dataset_builder.rs"
+
+# Educational examples
+[[example]]
+name = "storage_capacity_calculator"
+path = "examples/storage_capacity_calculator.rs"
+
+[lints.clippy]
+
+# Nursery and allowed nursery warnings (new lints will warn by default)
+nursery = { level = "warn", priority = -1 }
+
+# Pedantic and allowed pedantic warnings (new lints will warn by default)
+pedantic = { level = "warn", priority = -1 }
+
+similar_names = { level = "allow" }
+
+# Restriction and allowed restriction warnings (new lints will warn by default)
+restriction = { level = "warn", priority = -1 }
+
+absolute_paths                    = { level = "allow" }
+alloc_instead_of_core             = { level = "allow" }
+arbitrary_source_item_ordering    = { level = "allow" }
+big_endian_bytes                  = { level = "allow" }
+blanket_clippy_restriction_lints  = { level = "allow" }
+decimal_literal_representation    = { level = "allow" }
+default_numeric_fallback          = { level = "allow" }
+deref_by_slicing                  = { level = "allow" }
+else_if_without_else              = { level = "allow" }
+exhaustive_enums                  = { level = "allow" }
+exhaustive_structs                = { level = "allow" }
+exit                              = { level = "allow" }
+expect_used                       = { level = "allow" }
+field_scoped_visibility_modifiers = { level = "allow" }
+float_arithmetic                  = { level = "allow" }
+get_unwrap                        = { level = "allow" }
+host_endian_bytes                 = { level = "allow" }
+implicit_return                   = { level = "allow" }
+integer_division_remainder_used   = { level = "allow" }
+iter_over_hash_type               = { level = "allow" }
+let_underscore_must_use           = { level = "allow" }
+let_underscore_untyped            = { level = "allow" }
+little_endian_bytes               = { level = "allow" }
+map_err_ignore                    = { level = "allow" }
+min_ident_chars                   = { level = "allow" }
+missing_asserts_for_indexing      = { level = "allow" }
+missing_docs_in_private_items     = { level = "allow" }
+missing_inline_in_public_items    = { level = "allow" }
+missing_trait_methods             = { level = "allow" }
+mixed_read_write_in_expression    = { level = "allow" }
+mod_module_files                  = { level = "allow" }
+module_name_repetitions           = { level = "allow" }
+modulo_arithmetic                 = { level = "allow" }
+panic                             = { level = "allow" }
+panic_in_result_fn                = { level = "allow" }
+partial_pub_fields                = { level = "allow" }
+print_stderr                      = { level = "allow" }
+print_stdout                      = { level = "allow" }
+pub_use                           = { level = "allow" }
+pub_with_shorthand                = { level = "allow" }
+question_mark_used                = { level = "allow" }
+self_named_module_files           = { level = "allow" }
+semicolon_inside_block            = { level = "allow" }
+single_call_fn                    = { level = "allow" }
+single_char_lifetime_names        = { level = "allow" }
+std_instead_of_alloc              = { level = "allow" }
+std_instead_of_core               = { level = "allow" }
+struct_field_names                = { level = "allow" }
+unseparated_literal_suffix        = { level = "allow" }
+use_debug                         = { level = "allow" }
+wildcard_enum_match_arm           = { level = "allow" }
+
+arithmetic_side_effects     = { level = "allow" }
+as_conversions              = { level = "allow" }
+as_pointer_underscore       = { level = "allow" }
+as_underscore               = { level = "allow" }
+assertions_on_result_states = { level = "allow" }
+cast_possible_truncation    = { level = "allow" }
+cast_possible_wrap          = { level = "allow" }
+cast_precision_loss         = { level = "allow" }
+cast_sign_loss              = { level = "allow" }
+doc_broken_link             = { level = "allow" }
+string_slice                = { level = "allow" }
+future_not_send             = { level = "allow" }
+unused_self                 = { level = "allow" }
+unnecessary_wraps           = { level = "allow" }
+single_match_else           = { level = "allow" }
+option_if_let_else          = { level = "allow" }
+uninlined_format_args       = { level = "allow" }
+needless_borrow             = { level = "allow" }
+str_to_string               = { level = "allow" }
+new_without_default         = { level = "allow" }
+must_use_candidate          = { level = "allow" }
+missing_const_for_fn        = { level = "allow" }
+large_stack_arrays          = { level = "allow" }
+unnecessary_to_owned        = { level = "allow" }
+undocumented_unsafe_blocks  = { level = "allow" }
+ref_as_ptr                  = { level = "allow" }
+unused_async                = { level = "allow" }
+items_after_statements      = { level = "allow" }
+ok_expect                   = { level = "allow" }
+map_with_unused_argument_over_ranges = { level = "allow" }
+ignored_unit_patterns       = { level = "allow" }
+too_many_lines              = { level = "allow" }
+not_unsafe_ptr_arg_deref    = { level = "allow" }
+type_complexity             = { level = "allow" }
+single_match                = { level = "allow" }
+error_impl_error            = { level = "allow" }
+impl_trait_in_params        = { level = "allow" }
+indexing_slicing            = { level = "allow" }
+infinite_loop               = { level = "allow" }
+integer_division            = { level = "allow" }
+large_stack_frames          = { level = "allow" }
+missing_assert_message      = { level = "allow" }
+missing_errors_doc          = { level = "allow" }
+missing_panics_doc          = { level = "allow" }
+pattern_type_mismatch       = { level = "allow" }
+redundant_test_prefix       = { level = "allow" }
+ref_patterns                = { level = "allow" }
+renamed_function_params     = { level = "allow" }
+same_name_method            = { level = "allow" }
+shadow_reuse                = { level = "allow" }
+shadow_same                 = { level = "allow" }
+shadow_unrelated            = { level = "allow" }
+tests_outside_test_module   = { level = "allow" }
+todo                        = { level = "allow" }
+unimplemented               = { level = "allow" }
+unreachable                 = { level = "allow" }
+unwrap_in_result            = { level = "allow" }
+unwrap_used                 = { level = "allow" }
+
+[lints.rust]
+
+unused_crate_dependencies = { level = "allow" }
+unused_results            = { level = "allow" }
+
+impl_trait_redundant_captures = { level = "warn" }
+missing_unsafe_on_extern      = { level = "warn" }
+redundant_imports             = { level = "warn" }
+redundant_lifetimes           = { level = "warn" }
+single_use_lifetimes          = { level = "warn" }
+trivial_numeric_casts         = { level = "warn" }
+unsafe_attr_outside_unsafe    = { level = "warn" }
+unsafe_op_in_unsafe_fn        = { level = "warn" }
+unstable_features             = { level = "warn" }
+unused_extern_crates          = { level = "warn" }
+unused_import_braces          = { level = "warn" }
+unused_lifetimes              = { level = "warn" }
+unused_macro_rules            = { level = "warn" }
+unused_qualifications         = { level = "warn" }
+
+absolute_paths_not_starting_with_crate = { level = "allow" }
+ambiguous_negative_literals            = { level = "allow" }
+closure_returning_async_block          = { level = "allow" }
+deref_into_dyn_supertrait              = { level = "allow" }
+elided_lifetimes_in_paths              = { level = "allow" }
+ffi_unwind_calls                       = { level = "allow" }
+impl_trait_overcaptures                = { level = "allow" }
+let_underscore_drop                    = { level = "allow" }
+linker_messages                        = { level = "allow" }
+macro_use_extern_crate                 = { level = "allow" }
+missing_copy_implementations           = { level = "allow" }
+missing_debug_implementations          = { level = "allow" }
+missing_docs                           = { level = "allow" }
+tail_expr_drop_order                   = { level = "allow" }
+trivial_casts                          = { level = "allow" }
+unit_bindings                          = { level = "allow" }
+unreachable_pub                        = { level = "allow" }
+unsafe_code                            = { level = "allow" }
+variant_size_differences               = { level = "allow" }
--- a/storage-benchmarks/README.md
+++ b/storage-benchmarks/README.md
@ -0,0 +1,181 @@
+# Nomos Storage Benchmarks
+
+Goal: tune RocksDB for Nomos validator workloads using realistic data and sizes. The approach is to run benchmarks and try different parameters and settings, then compare results.
+
+## What it does
+
+- Generates datasets that approximate realistic sizes and access patterns.
+- Runs mixed read/write validator-style workloads against RocksDB.
+- Varies RocksDB parameters (cache, write buffer, compaction, block size, compression).
+- Records throughput and basic variability across repeated runs.
+
+## Quick start
+
+1) Generate a dataset
+```bash
+POL_PROOF_DEV_MODE=true RUST_LOG=info cargo run --bin dataset_generator -- --config dataset_configs/annual_mainnet.toml
+```
+
+2) Run a baseline
+```bash
+RUST_LOG=info cargo run --bin storage_bench_runner -- --profile mainnet --memory 8 --duration 120
+```
+
+3) Try parameters and compare
+```bash
+# Cache size
+cargo run --bin storage_bench_runner -- --profile mainnet --memory 8 --duration 120 --cache-size 25
+cargo run --bin storage_bench_runner -- --profile mainnet --memory 8 --duration 120 --cache-size 40
+cargo run --bin storage_bench_runner -- --profile mainnet --memory 8 --duration 120 --cache-size 55
+
+# Write buffer (use the best cache size observed)
+cargo run --bin storage_bench_runner -- --profile mainnet --memory 8 --duration 120 --cache-size 40 --write-buffer 128
+cargo run --bin storage_bench_runner -- --profile mainnet --memory 8 --duration 120 --cache-size 40 --write-buffer 256
+
+# Compaction jobs
+cargo run --bin storage_bench_runner -- --profile mainnet --memory 8 --duration 120 --cache-size 40 --write-buffer 128 --compaction-jobs 8
+```
+
+## How to evaluate
+
+- One warmup and at least three measured runs per setting.
+- Fixed seed when exact reproducibility is required.
+- Compare mean ops/sec and variability across runs.
+- Change one setting at a time.
+
+## Parameter ranges under evaluation
+
+- Block cache: 25–55% of RAM
+- Write buffer: 64–256 MB
+- Compaction jobs: 4–12
+- Block size: 16–64 KB
+- Compression: none, lz4, snappy, zstd
+
+## Profiles and datasets
+
+Validator profiles:
+- light (~100 validators)
+- mainnet (~2000 validators)
+- testnet (~1000 validators)
+
+Datasets:
+- quick_test.toml: ~27 MB (fast checks)
+- testnet_sim.toml: ~1 GB
+- annual_mainnet.toml: ~40 GB
+
+## CLI
+```bash
+cargo run --bin storage_bench_runner -- [OPTIONS]
+
+--profile          light | mainnet | testnet
+--memory           RAM limit in GB (default: 8)
+--duration         Benchmark duration (default: 120)
+--cache-size       Block cache size as % of RAM (20–60)
+--write-buffer     Write buffer size in MB (64–512)
+--compaction-jobs  Background compaction jobs (4–16)
+--block-size       Table block size in KB (8–64)
+--compression      none | lz4 | snappy | zstd
+--seed             RNG seed
+--warmup-runs      Warmup iterations (default: 1)
+--measurement-runs Measurement iterations (default: 3)
+--read-only        Read-only mode
+```
+
+Reproducible run:
+```bash
+cargo run --bin storage_bench_runner -- --profile mainnet --memory 8 --duration 120 --seed 12345
+```
+
+## Test plan
+
+Purpose: verify that benchmarks run, produce results, and that parameter changes have measurable effects.
+
+### Scope
+
+- Dataset generation at different sizes.
+- Benchmark runs across profiles.
+- Parameter sweeps for cache, write buffer, compaction, block size, compression.
+- Result capture (JSON) and basic summary output.
+
+### Environments
+
+- Memory limits: 4 GB, 8 GB, 16 GB.
+- Datasets: small (quick), medium, large.
+- Duration: short for exploration (60–120s), longer to confirm (180–300s).
+
+### Test cases
+
+1. Dataset generation
+   - Small dataset completes.
+   - Large dataset resumes if partially present.
+   - Outputs stored in expected path.
+
+2. Baseline benchmark
+   - Runs with selected profile and memory limit.
+   - Produces JSON results and console summary.
+
+3. Cache size
+   - 25%, 40%, 55%.
+   - Compare mean ops/sec and variability.
+   - Record chosen value.
+
+4. Write buffer
+   - Keep chosen cache.
+   - 128 MB, 256 MB (and 64/512 MB if needed).
+   - Record impact, pick value.
+
+5. Compaction jobs
+   - 4, 8, 12 (or within system limits).
+   - Check for stalls or CPU saturation.
+
+6. Block size
+   - 16 KB, 32 KB, 64 KB.
+   - Evaluate read performance and variability.
+
+7. Compression
+   - none, lz4, snappy, zstd.
+   - Compare throughput; consider disk footprint if relevant.
+
+8. Reproducibility
+   - Repeat a chosen run with a fixed seed.
+   - Confirm similar results across iterations.
+
+9. Memory sensitivity
+   - Re-run chosen settings at lower and higher memory limits.
+   - Check for regressions.
+
+### Acceptance criteria
+
+- All runs complete without errors.
+- Results are saved (JSON present).
+- Chosen settings show a measurable improvement over baseline.
+- Variability remains acceptable for this use case.
+
+### Reporting
+
+- Log command lines and seeds used.
+- Note dataset, profile, memory, and duration for each run.
+- Store JSON result files together for comparison.
+
+## Outputs
+
+- Datasets: ~/.nomos_storage_benchmarks/rocksdb_data
+- Results (JSON): ~/.nomos_storage_benchmarks/results/
+- Console summary shows mean ops/sec and variability.
+
+## Requirements
+
+- Rust 1.75+
+- 8+ GB RAM (more for larger datasets)
+- ~50+ GB disk for the largest dataset
+
+## Notes
+
+- Baseline first, then change one parameter at a time.
+- Keep runs short while exploring; confirm with longer runs when needed.
+
+## Why no general-purpose benchmarking library
+
+- Workloads require long-running mixed operations (reads, range scans, writes) against a prebuilt dataset; typical micro-benchmark frameworks focus on short, isolated functions.
+- We need control over dataset size/layout, memory limits, and external RocksDB options; this is easier with a purpose-built runner.
+- Results include per-run JSON with config and summary metrics; integrating this into a generic harness would add overhead without benefit here.
--- a/storage-benchmarks/dataset_configs/annual_mainnet.toml
+++ b/storage-benchmarks/dataset_configs/annual_mainnet.toml
@ -0,0 +1,19 @@
+[dataset]
+block_time_seconds = 30
+days               = 365
+name               = "annual_mainnet_conservative"
+
+[network]
+blobs_per_block = 50
+load_name       = "annual_mainnet"
+total_subnets   = 2048
+
+[validator]
+assigned_subnets = 10
+
+[blocks]
+size_bytes = 34371
+
+[da]
+commitment_size_bytes = 220000
+share_size_bytes      = 1024
--- a/storage-benchmarks/dataset_configs/quick_test.toml
+++ b/storage-benchmarks/dataset_configs/quick_test.toml
@ -0,0 +1,19 @@
+[dataset]
+days = 1
+block_time_seconds = 30
+name = "quick_test"
+
+[network]
+load_name = "light_testnet"
+blobs_per_block = 2
+total_subnets = 2048
+
+[validator]
+assigned_subnets = 1
+
+[blocks]
+size_bytes = 10000
+
+[da]
+share_size_bytes = 512
+commitment_size_bytes = 50000
--- a/storage-benchmarks/dataset_configs/testnet_sim.toml
+++ b/storage-benchmarks/dataset_configs/testnet_sim.toml
@ -0,0 +1,19 @@
+[dataset]
+days = 7
+block_time_seconds = 30
+name = "testnet_simulation"
+
+[network]
+load_name = "medium_testnet"
+blobs_per_block = 15
+total_subnets = 2048
+
+[validator]
+assigned_subnets = 5
+
+[blocks]
+size_bytes = 34371
+
+[da]
+share_size_bytes = 1024
+commitment_size_bytes = 220000
--- a/storage-benchmarks/dataset_configs/validator_profiles.toml
+++ b/storage-benchmarks/dataset_configs/validator_profiles.toml
@ -0,0 +1,55 @@
+# Spec-accurate validator operation profiles
+
+[light]
+name = "light"
+description = "Light validator with minimal resources and spec-accurate patterns"
+# Read frequencies
+block_read_rate_hz = 2.0              # Block validation reads
+da_share_read_rate_hz = 0.67          # DA sampling reads (20 samples per 30s block)
+range_scan_rate_hz = 0.01             # Occasional sync serving
+# Write frequencies
+block_write_rate_hz = 0.033           # New block finalization (30s blocks)
+da_share_write_rate_hz = 0.5          # New DA share storage
+commitment_write_rate_hz = 0.5        # New commitments
+# Access patterns (spec-accurate temporal distribution)
+recent_access_ratio = 0.80            # 80% reads from recent data (Zipfian)
+historical_access_ratio = 0.20        # 20% reads from historical data (uniform)
+# Network scaling
+total_validators = 100                 # Small testnet
+assigned_subnets = 20                  # High subnet assignment for small network
+
+[mainnet]
+name = "mainnet"  
+description = "Mainnet validator with high activity and spec-accurate patterns"
+# Read frequencies (higher validation load)
+block_read_rate_hz = 10.0             # High block validation rate
+da_share_read_rate_hz = 5.0           # Higher DA sampling frequency
+range_scan_rate_hz = 0.1              # More frequent sync serving
+# Write frequencies (mainnet load)
+block_write_rate_hz = 0.033           # Same block time
+da_share_write_rate_hz = 5.0          # High DA write activity
+commitment_write_rate_hz = 5.0        # Matching commitment writes
+# Access patterns (more recent focus)
+recent_access_ratio = 0.90            # 90% recent access (heavy tip bias)
+historical_access_ratio = 0.10        # 10% historical access
+# Network scaling  
+total_validators = 2000                # Mainnet scale
+assigned_subnets = 10                  # Medium subnet assignment
+
+[testnet]
+name = "testnet"
+description = "Testnet network with heavy sync activity and range scanning"
+# Read frequencies (sync serving dominates)
+block_read_rate_hz = 50.0             # Heavy block serving for sync
+da_share_read_rate_hz = 10.0          # Moderate DA validation
+range_scan_rate_hz = 10.0             # Continuous range scans for sync
+# Write frequencies (reduced during sync)
+block_write_rate_hz = 0.01            # Minimal new blocks
+da_share_write_rate_hz = 0.1          # Reduced DA writes
+commitment_write_rate_hz = 0.1        # Reduced commitments
+# Access patterns (historical focus for sync)
+recent_access_ratio = 0.20            # Mostly historical data
+historical_access_ratio = 0.80        # Heavy historical access
+# Network scaling
+total_validators = 1000                # Medium network during sync
+assigned_subnets = 10                  # Standard subnet assignment
--- a/storage-benchmarks/examples/storage_capacity_calculator.rs
+++ b/storage-benchmarks/examples/storage_capacity_calculator.rs
@ -0,0 +1,440 @@
+//! Storage capacity estimator
+//!
+//! Computes block and DA storage requirements for various time periods and
+//! network scenarios. Produces summaries, time breakdowns, and simple hardware
+//! recommendations.
+
+use std::{collections::HashMap, fs};
+
+use serde::{Deserialize, Serialize};
+use storage_benchmarks::BenchConfig;
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct TimePeriod {
+    /// Number of days represented by the period
+    pub days: u64,
+    /// Human-readable label (e.g., "1 year")
+    pub description: String,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct NetworkConfig {
+    /// Block time in seconds
+    pub block_time_seconds: u64,
+    /// Average block size in bytes
+    pub avg_block_size_bytes: u64,
+    /// Total DA subnets
+    pub total_subnets: u64,
+    /// DA share size in bytes
+    pub da_share_size_bytes: u64,
+    /// DA commitment size in bytes
+    pub da_commitment_size_bytes: u64,
+    /// Shares per blob
+    pub shares_per_blob: u64,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct NetworkScenario {
+    /// Scenario name
+    pub name: String,
+    /// Blobs per block
+    pub blobs_per_block: u64,
+    /// Total validators used to estimate DA responsibility
+    pub total_validators: u64,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct CalculationConfig {
+    /// Time window for the calculation
+    pub time_period: TimePeriod,
+    /// Network parameters used across scenarios
+    pub network: NetworkConfig,
+    /// Scenarios to evaluate
+    pub scenarios: Vec<NetworkScenario>,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+pub struct BlockDataResults {
+    /// Blocks produced per day
+    pub blocks_per_day: u64,
+    /// Total blocks in the period
+    pub blocks_for_period: u64,
+    /// Average block size in KiB
+    pub avg_block_size_kb: u64,
+    /// Total block data size in GiB for the period
+    pub total_block_data_gb: f64,
+    /// Period label
+    pub time_period_description: String,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+pub struct ScenarioResults {
+    /// Scenario label
+    pub scenario_name: String,
+    /// Blobs per block for this scenario
+    pub blobs_per_block: u64,
+    /// Total validators
+    pub total_validators: u64,
+    /// Typical subnets assigned per validator
+    pub typical_subnets_per_validator: u64,
+    /// Percent of subnets likely assigned to a validator
+    pub subnet_assignment_percent: f64,
+    /// Count of DA shares stored by the validator over the period
+    pub shares_stored_count: u64,
+    /// Count of blobs assigned over the period
+    pub blobs_assigned_count: u64,
+    /// DA shares size in GiB
+    pub da_shares_gb: f64,
+    /// DA commitments size in GiB
+    pub da_commitments_gb: f64,
+    /// Total DA data size in GiB
+    pub total_da_gb: f64,
+    /// Total validator storage in GiB (blocks + DA)
+    pub total_validator_storage_gb: f64,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+pub struct TimeBreakdown {
+    /// Sequential period index
+    pub period_number: u64,
+    /// Label (Month/Week/Day N)
+    pub period_description: String,
+    /// Cumulative storage at this step in GiB
+    pub cumulative_gb: f64,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+pub struct HardwareRecommendation {
+    /// Scenario label
+    pub scenario: String,
+    /// Required storage in GiB for the period
+    pub storage_gb_for_period: u64,
+    /// Recommended device size
+    pub recommended_storage: String,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+pub struct StorageCalculationResults {
+    /// Input config used to compute results
+    pub calculation_config: CalculationConfig,
+    /// Aggregate block data for the period
+    pub block_data: BlockDataResults,
+    /// Per-scenario storage summaries
+    pub scenarios: Vec<ScenarioResults>,
+    /// Time-based accumulation for visualization
+    pub time_breakdown: Vec<TimeBreakdown>,
+    /// Simple hardware sizing suggestions
+    pub hardware_recommendations: Vec<HardwareRecommendation>,
+    /// Notes for stress testing considerations
+    pub stress_testing_notes: Vec<String>,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+pub struct CapacityCalculationReport {
+    pub calculation_results: std::collections::HashMap<String, StorageCalculationResults>,
+    pub summary: CalculationSummary,
+    pub metadata: ReportMetadata,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+pub struct CalculationSummary {
+    pub scenarios_calculated: usize,
+    pub total_time_periods: usize,
+    pub calculation_timestamp: String,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+pub struct ReportMetadata {
+    pub tool: String,
+    pub version: String,
+    pub description: String,
+}
+
+impl Default for NetworkConfig {
+    fn default() -> Self {
+        Self {
+            block_time_seconds: 30,
+            avg_block_size_bytes: 34_371,
+            total_subnets: 2048,
+            da_share_size_bytes: 1_024,
+            da_commitment_size_bytes: 220_000,
+            shares_per_blob: 512,
+        }
+    }
+}
+
+impl Default for CalculationConfig {
+    fn default() -> Self {
+        Self {
+            time_period: TimePeriod {
+                days: 365,
+                description: "1 year".to_string(),
+            },
+            network: NetworkConfig::default(),
+            scenarios: vec![
+                NetworkScenario {
+                    name: "Conservative".to_string(),
+                    blobs_per_block: 50,
+                    total_validators: 2000,
+                },
+                NetworkScenario {
+                    name: "Active".to_string(),
+                    blobs_per_block: 100,
+                    total_validators: 2000,
+                },
+                NetworkScenario {
+                    name: "High Activity".to_string(),
+                    blobs_per_block: 200,
+                    total_validators: 3000,
+                },
+                NetworkScenario {
+                    name: "Peak".to_string(),
+                    blobs_per_block: 500,
+                    total_validators: 5000,
+                },
+            ],
+        }
+    }
+}
+
+/// Compute storage with blob/share separation for DA
+///
+/// - Includes blocks, DA shares, and commitments
+/// - Returns summaries, breakdowns, and recommendations
+fn calculate_storage_requirements(config: &CalculationConfig) -> StorageCalculationResults {
+    let blocks_per_day = (24 * 60 * 60) / config.network.block_time_seconds;
+    let total_blocks_for_period = config.time_period.days * blocks_per_day;
+
+    let block_data_for_period_gb = (total_blocks_for_period as f64
+        * config.network.avg_block_size_bytes as f64)
+        / (1024.0 * 1024.0 * 1024.0);
+
+    let block_data = BlockDataResults {
+        blocks_per_day,
+        blocks_for_period: total_blocks_for_period,
+        avg_block_size_kb: config.network.avg_block_size_bytes / 1024,
+        total_block_data_gb: block_data_for_period_gb,
+        time_period_description: config.time_period.description.clone(),
+    };
+
+    let mut scenarios = Vec::new();
+    let mut scenario_storage_map = HashMap::new();
+
+    for scenario in &config.scenarios {
+        let typical_subnets_per_validator =
+            config.network.total_subnets / (scenario.total_validators / 10).max(1);
+        let subnet_assignment_probability =
+            typical_subnets_per_validator as f64 / config.network.total_subnets as f64;
+
+        let total_blobs_for_period = total_blocks_for_period * scenario.blobs_per_block;
+
+        let validator_assigned_blobs =
+            (total_blobs_for_period as f64 * subnet_assignment_probability) as u64;
+
+        let shares_per_assigned_blob =
+            config.network.shares_per_blob / config.network.total_subnets;
+        let total_shares_stored = validator_assigned_blobs * shares_per_assigned_blob.max(1);
+        let da_shares_size_gb = (total_shares_stored * config.network.da_share_size_bytes) as f64
+            / (1024.0 * 1024.0 * 1024.0);
+
+        let da_commitments_size_gb = (validator_assigned_blobs
+            * config.network.da_commitment_size_bytes) as f64
+            / (1024.0 * 1024.0 * 1024.0);
+
+        let total_da_size_gb = da_shares_size_gb + da_commitments_size_gb;
+        let total_storage_for_period = block_data_for_period_gb + total_da_size_gb;
+
+        scenario_storage_map.insert(scenario.name.clone(), total_da_size_gb);
+
+        scenarios.push(ScenarioResults {
+            scenario_name: scenario.name.clone(),
+            blobs_per_block: scenario.blobs_per_block,
+            total_validators: scenario.total_validators,
+            typical_subnets_per_validator,
+            subnet_assignment_percent: subnet_assignment_probability * 100.0,
+            shares_stored_count: total_shares_stored,
+            blobs_assigned_count: validator_assigned_blobs,
+            da_shares_gb: da_shares_size_gb,
+            da_commitments_gb: da_commitments_size_gb,
+            total_da_gb: total_da_size_gb,
+            total_validator_storage_gb: total_storage_for_period,
+        });
+    }
+
+    let breakdown_periods = if config.time_period.days >= 365 {
+        12
+    } else if config.time_period.days >= 30 {
+        config.time_period.days / 7
+    } else {
+        config.time_period.days
+    };
+
+    let first_scenario_da_gb = scenario_storage_map.values().next().copied().unwrap_or(0.0);
+    let total_gb_per_period = block_data_for_period_gb + first_scenario_da_gb;
+    let increment_gb = total_gb_per_period / breakdown_periods as f64;
+
+    let mut time_breakdown = Vec::new();
+    for period in 1..=breakdown_periods {
+        let cumulative_gb = increment_gb * period as f64;
+        let period_desc = if config.time_period.days >= 365 {
+            format!("Month {}", period)
+        } else if config.time_period.days >= 30 {
+            format!("Week {}", period)
+        } else {
+            format!("Day {}", period)
+        };
+
+        time_breakdown.push(TimeBreakdown {
+            period_number: period,
+            period_description: period_desc,
+            cumulative_gb,
+        });
+    }
+
+    let mut hardware_recommendations = Vec::new();
+    for scenario in &scenarios {
+        let storage_gb = scenario.total_validator_storage_gb as u64;
+        let recommended = if storage_gb < 50 {
+            "100GB+ storage"
+        } else if storage_gb < 100 {
+            "200GB+ storage"
+        } else if storage_gb < 200 {
+            "500GB+ storage"
+        } else if storage_gb < 500 {
+            "1TB+ storage"
+        } else {
+            "2TB+ storage"
+        };
+
+        hardware_recommendations.push(HardwareRecommendation {
+            scenario: scenario.scenario_name.clone(),
+            storage_gb_for_period: storage_gb,
+            recommended_storage: recommended.to_string(),
+        });
+    }
+
+    let stress_testing_notes = vec![
+        "Memory pressure increases with database size".to_string(),
+        "Cache efficiency decreases as dataset grows beyond memory".to_string(),
+        "Compaction overhead increases with write frequency".to_string(),
+        "Range scan performance degrades with database size".to_string(),
+        "Storage benchmarks should test multi-GB datasets for realism".to_string(),
+        format!(
+            "Test with datasets representing {}-{} days of operation",
+            config.time_period.days / 4,
+            config.time_period.days / 2
+        ),
+    ];
+
+    StorageCalculationResults {
+        calculation_config: config.clone(),
+        block_data,
+        scenarios,
+        time_breakdown,
+        hardware_recommendations,
+        stress_testing_notes,
+    }
+}
+
+fn main() {
+    let default_config = CalculationConfig::default();
+
+    let monthly_config = CalculationConfig {
+        time_period: TimePeriod {
+            days: 30,
+            description: "30 days".to_string(),
+        },
+        network: NetworkConfig::default(),
+        scenarios: vec![
+            NetworkScenario {
+                name: "Testnet Conservative".to_string(),
+                blobs_per_block: 25,
+                total_validators: 100,
+            },
+            NetworkScenario {
+                name: "Testnet Active".to_string(),
+                blobs_per_block: 50,
+                total_validators: 100,
+            },
+        ],
+    };
+
+    let weekly_config = CalculationConfig {
+        time_period: TimePeriod {
+            days: 7,
+            description: "1 week".to_string(),
+        },
+        network: NetworkConfig {
+            block_time_seconds: 15,
+            shares_per_blob: 256,
+            ..NetworkConfig::default()
+        },
+        scenarios: vec![NetworkScenario {
+            name: "Development".to_string(),
+            blobs_per_block: 10,
+            total_validators: 10,
+        }],
+    };
+
+    let configs = vec![
+        ("annual", default_config),
+        ("monthly", monthly_config),
+        ("weekly", weekly_config),
+    ];
+
+    let mut all_results = HashMap::new();
+
+    for (name, config) in configs {
+        let results = calculate_storage_requirements(&config);
+        all_results.insert(name, results);
+    }
+
+    save_capacity_results(&all_results);
+
+    match serde_json::to_string_pretty(&all_results) {
+        Ok(json) => println!("{}", json),
+        Err(e) => eprintln!("Error serializing results: {}", e),
+    }
+}
+
+fn save_capacity_results(all_results: &HashMap<&str, StorageCalculationResults>) {
+    let results_dir = BenchConfig::results_path();
+    let timestamp = chrono::Utc::now().format("%Y%m%d_%H%M%S");
+    let filename = format!("storage_capacity_calculation_{}.json", timestamp);
+    let filepath = results_dir.join(filename);
+
+    let calculation_results: std::collections::HashMap<String, StorageCalculationResults> =
+        all_results
+            .iter()
+            .map(|(k, v)| (k.to_string(), v.clone()))
+            .collect();
+
+    let report = CapacityCalculationReport {
+        calculation_results,
+        summary: CalculationSummary {
+            scenarios_calculated: all_results.len(),
+            total_time_periods: all_results
+                .values()
+                .map(|r| r.scenarios.len())
+                .sum::<usize>(),
+            calculation_timestamp: chrono::Utc::now().to_rfc3339(),
+        },
+        metadata: ReportMetadata {
+            tool: "storage_capacity_calculator".to_string(),
+            version: env!("CARGO_PKG_VERSION").to_string(),
+            description: "Storage capacity estimates for Nomos validator scenarios".to_string(),
+        },
+    };
+
+    match fs::write(&filepath, serde_json::to_string_pretty(&report).unwrap()) {
+        Ok(_) => eprintln!(
+            "Capacity calculation results saved to: {}",
+            filepath.display()
+        ),
+        Err(e) => eprintln!(
+            "Failed to save capacity results to {}: {}",
+            filepath.display(),
+            e
+        ),
+    }
+}
--- a/storage-benchmarks/rust-toolchain.toml
+++ b/storage-benchmarks/rust-toolchain.toml
@ -0,0 +1,12 @@
+[toolchain]
+# Keep this version in sync also in the following places:
+# * Dockerfile
+# * flake.nix
+# * testnet/Dockerfile
+# Also, update the version of the nightly toolchain to the latest nightly of the new version specified in the following places:
+# * .github/workflows/code-check.yml (fmt job)
+# * .pre-commit-config.yml (fmt hook)
+# Then, if there is any new allow-by-default rustc lint introduced/stabilized, add it to the respective entry in our `config.toml`.
+channel = "1.90.0"
+# Even if clippy should be included in the default profile, in some cases it is not installed. So we force it with an explicit declaration.
+components = ["clippy"]
--- a/storage-benchmarks/rustfmt.toml
+++ b/storage-benchmarks/rustfmt.toml
@ -0,0 +1,5 @@
+group_imports       = "StdExternalCrate"
+imports_granularity = "Crate"
+reorder_imports     = true
+reorder_modules     = true
+wrap_comments       = true
--- a/storage-benchmarks/src/benchmark/analysis.rs
+++ b/storage-benchmarks/src/benchmark/analysis.rs
@ -0,0 +1,71 @@
+use nomos_storage::{
+    api::{chain::StorageChainApi as _, da::StorageDaApi as _},
+    backends::rocksdb::RocksBackend,
+};
+
+use super::{create_blob_id, create_header_id};
+
+pub async fn analyze_dataset(
+    storage: &mut RocksBackend,
+) -> Result<(usize, usize), Box<dyn std::error::Error>> {
+    log::info!("Analyzing dataset size with adaptive probing...");
+
+    let mut upper_bound = 10000;
+    while upper_bound < 10_000_000 {
+        let header_id = create_header_id(upper_bound);
+        let block_result = storage.get_block(header_id).await;
+        match block_result {
+            Ok(Some(_)) => {}
+            Ok(None) | Err(_) => {
+                break;
+            }
+        }
+        upper_bound *= 2;
+    }
+
+    let mut low = upper_bound / 2;
+    let mut high = upper_bound;
+    let mut block_count = low;
+
+    while low <= high {
+        let mid = usize::midpoint(low, high);
+        let header_id = create_header_id(mid);
+
+        match storage.get_block(header_id).await {
+            Ok(Some(_)) => {
+                block_count = mid;
+                low = mid + 1;
+            }
+            _ => {
+                high = mid - 1;
+            }
+        }
+    }
+
+    let mut share_count = 0;
+    let da_sample_size = std::cmp::min(1000, block_count / 100);
+
+    for blob_idx in 0..da_sample_size {
+        for subnet in 0..50 {
+            let blob_id = create_blob_id(blob_idx, 0);
+            let share_idx = [subnet as u8, 0u8];
+            if let Ok(Some(_)) = storage.get_light_share(blob_id, share_idx).await {
+                share_count += 1;
+            }
+        }
+    }
+
+    let estimated_da_total = if da_sample_size > 0 {
+        share_count * (block_count / da_sample_size)
+    } else {
+        share_count
+    };
+
+    log::info!("DA estimation: sampled {share_count} objects from {da_sample_size} blocks, extrapolated to {estimated_da_total} total (assumes uniform distribution)");
+
+    log::info!(
+        "Dataset analysis complete: {block_count} blocks, ~{estimated_da_total} DA objects (sampled)"
+    );
+
+    Ok((block_count, estimated_da_total))
+}
--- a/storage-benchmarks/src/benchmark/mod.rs
+++ b/storage-benchmarks/src/benchmark/mod.rs
@ -0,0 +1,11 @@
+pub mod analysis;
+pub mod runner;
+pub mod types;
+pub mod utilities;
+pub mod workloads;
+
+pub use analysis::*;
+pub use runner::*;
+pub use types::*;
+pub use utilities::*;
+pub use workloads::*;
--- a/storage-benchmarks/src/benchmark/runner.rs
+++ b/storage-benchmarks/src/benchmark/runner.rs
@ -0,0 +1,250 @@
+use std::time::Duration;
+
+use log::info;
+use nomos_storage::backends::{rocksdb::RocksBackend, StorageBackend as _};
+use serde::{Deserialize, Serialize};
+
+use super::{
+    analyze_dataset, run_concurrent_validator_benchmark, BenchConfigSummary, BenchmarkReport,
+    BenchmarkResultsSummary, ConcurrentBenchmarkResult, ReportMetadata, StatisticsSummary,
+};
+use crate::{
+    config::{ProductionBenchConfig, ValidatorProfile, ValidatorProfiles},
+    BenchConfig,
+};
+
+pub struct BenchmarkRunner {
+    config: ProductionBenchConfig,
+    profile: ValidatorProfile,
+    storage_config: BenchConfig,
+    execution_state: ExecutionState,
+    results: BenchmarkResults,
+}
+
+#[derive(Debug, Clone, Default)]
+struct ExecutionState {
+    warmup_completed: usize,
+    measurements_completed: usize,
+    dataset_size: Option<(usize, usize)>,
+}
+
+#[derive(Debug, Clone, Default)]
+pub struct BenchmarkResults {
+    pub raw_measurements: Vec<f64>,
+    pub warmup_results: Vec<f64>,
+    pub detailed_results: Vec<ConcurrentBenchmarkResult>,
+    pub mean_ops_sec: f64,
+    pub variability_percent: f64,
+    pub best_result: Option<ConcurrentBenchmarkResult>,
+    pub stats_summary: Option<RocksDbStatsSummary>,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct RocksDbStatsSummary {
+    pub cache_hit_rate_improvement: f64,
+    pub l0_file_growth: i64,
+    pub compaction_activity: u64,
+    pub memory_usage_change: i64,
+}
+
+impl BenchmarkRunner {
+    pub fn new(config: ProductionBenchConfig) -> Result<Self, Box<dyn std::error::Error>> {
+        config.validate()?;
+
+        let profiles = ValidatorProfiles::from_file("dataset_configs/validator_profiles.toml")?;
+        let profile = profiles
+            .get_profile(&config.profile.to_string())
+            .ok_or_else(|| format!("Profile '{}' not found", config.profile))?
+            .clone();
+
+        let storage_config = BenchConfig::production();
+        if !storage_config.settings.db_path.exists() {
+            return Err("No dataset found - run dataset_generator first".into());
+        }
+
+        Ok(Self {
+            config,
+            profile,
+            storage_config,
+            execution_state: ExecutionState::default(),
+            results: BenchmarkResults::default(),
+        })
+    }
+
+    pub async fn execute_benchmark(
+        &mut self,
+    ) -> Result<BenchmarkResults, Box<dyn std::error::Error>> {
+        self.setup_memory_limits();
+        self.analyze_dataset().await?;
+
+        info!("Starting warmup phase: {} runs", self.config.warmup_runs);
+        for i in 1..=self.config.warmup_runs {
+            info!("Warmup run {}/{}", i, self.config.warmup_runs);
+            let result = self.run_single_iteration().await?;
+            self.results.warmup_results.push(result);
+            self.execution_state.warmup_completed = i;
+        }
+
+        info!(
+            "Starting measurement phase: {} runs",
+            self.config.measurement_runs
+        );
+        for i in 1..=self.config.measurement_runs {
+            info!("Measurement run {}/{}", i, self.config.measurement_runs);
+            let result = self.run_single_iteration().await?;
+            info!("Run {i} result: {result:.1} ops/sec");
+            self.results.raw_measurements.push(result);
+            self.execution_state.measurements_completed = i;
+        }
+
+        self.calculate_final_statistics();
+        self.save_results();
+
+        Ok(self.results.clone())
+    }
+
+    fn setup_memory_limits(&self) {
+        info!("Setting memory limit to {}GB", self.config.memory);
+    }
+
+    async fn analyze_dataset(&mut self) -> Result<(), Box<dyn std::error::Error>> {
+        let mut storage_settings = self.storage_config.settings.clone();
+        storage_settings.read_only = self.config.read_only;
+
+        let mut storage = RocksBackend::new(storage_settings)?;
+        let dataset_size = analyze_dataset(&mut storage).await?;
+
+        self.execution_state.dataset_size = Some(dataset_size);
+        info!(
+            "Dataset analysis: {} blocks, {} shares",
+            dataset_size.0, dataset_size.1
+        );
+
+        Ok(())
+    }
+
+    async fn run_single_iteration(&mut self) -> Result<f64, Box<dyn std::error::Error>> {
+        let mut storage_settings = self.storage_config.settings.clone();
+        storage_settings.read_only = self.config.read_only;
+
+        let storage = RocksBackend::new(storage_settings)?;
+        let dataset_size = self.execution_state.dataset_size.unwrap_or((0, 0));
+
+        match run_concurrent_validator_benchmark(
+            storage,
+            Duration::from_secs(self.config.duration),
+            &self.profile,
+            dataset_size,
+            self.config.read_only,
+        )
+        .await
+        {
+            Ok(detailed_result) => {
+                let throughput = detailed_result.combined_throughput();
+                self.results.detailed_results.push(detailed_result);
+                Ok(throughput)
+            }
+            Err(e) => {
+                log::error!("Benchmark iteration failed: {e}");
+                Ok(0.0)
+            }
+        }
+    }
+
+    fn calculate_final_statistics(&mut self) {
+        if self.results.raw_measurements.is_empty() {
+            return;
+        }
+
+        let mean = self.results.raw_measurements.iter().sum::<f64>()
+            / self.results.raw_measurements.len() as f64;
+        let min = self
+            .results
+            .raw_measurements
+            .iter()
+            .fold(f64::INFINITY, |a, &b| a.min(b));
+        let max = self
+            .results
+            .raw_measurements
+            .iter()
+            .fold(f64::NEG_INFINITY, |a, &b| a.max(b));
+        let variability = if mean > 0.0 {
+            (max - min) / mean * 100.0
+        } else {
+            0.0
+        };
+
+        self.results.mean_ops_sec = mean;
+        self.results.variability_percent = variability;
+
+        if let Some(best_idx) = self
+            .results
+            .raw_measurements
+            .iter()
+            .enumerate()
+            .max_by(|a, b| a.1.partial_cmp(b.1).unwrap())
+            .map(|(idx, _)| idx)
+        {
+            self.results.best_result = self.results.detailed_results.get(best_idx).cloned();
+        }
+    }
+
+    fn save_results(&self) {
+        let results_dir = BenchConfig::results_path();
+        let timestamp = chrono::Utc::now().format("%Y%m%d_%H%M%S");
+        let filename = format!(
+            "bench_{}_{}_{}gb_{}.json",
+            self.config.profile, timestamp, self.config.memory, self.config.duration
+        );
+        let filepath = results_dir.join(filename);
+
+        let report = BenchmarkReport {
+            config_summary: BenchConfigSummary {
+                profile: format!("{:?}", self.config.profile),
+                memory_gb: self.config.memory,
+                duration_seconds: self.config.duration,
+                warmup_runs: self.config.warmup_runs,
+                measurement_runs: self.config.measurement_runs,
+            },
+            results: BenchmarkResultsSummary {
+                raw_measurements: self.results.raw_measurements.clone(),
+                warmup_results: self.results.warmup_results.clone(),
+                statistics: StatisticsSummary {
+                    mean_ops_sec: self.results.mean_ops_sec,
+                    min_ops_sec: 0.0,
+                    max_ops_sec: 0.0,
+                    variability_percent: self.results.variability_percent,
+                    sample_count: self.results.raw_measurements.len(),
+                },
+            },
+            metadata: ReportMetadata {
+                timestamp: chrono::Utc::now().to_rfc3339(),
+                tool_version: env!("CARGO_PKG_VERSION").to_owned(),
+                runner_type: "batch".to_owned(),
+            },
+        };
+
+        match std::fs::write(&filepath, serde_json::to_string_pretty(&report).unwrap()) {
+            Ok(()) => log::info!(
+                "Stateful benchmark results saved to: {}",
+                filepath.display()
+            ),
+            Err(e) => log::warn!("Failed to save results to {}: {}", filepath.display(), e),
+        }
+    }
+
+    #[must_use]
+    pub const fn execution_progress(&self) -> (usize, usize, usize, usize) {
+        (
+            self.execution_state.warmup_completed,
+            self.config.warmup_runs,
+            self.execution_state.measurements_completed,
+            self.config.measurement_runs,
+        )
+    }
+
+    #[must_use]
+    pub const fn current_results(&self) -> &BenchmarkResults {
+        &self.results
+    }
+}
--- a/storage-benchmarks/src/benchmark/types.rs
+++ b/storage-benchmarks/src/benchmark/types.rs
@ -0,0 +1,266 @@
+use std::time::Duration;
+
+use serde::{Deserialize, Serialize};
+
+use crate::{
+    config::types::WorkloadType,
+    metrics::{LatencyPercentiles, RocksDbStats},
+};
+
+#[derive(Debug, Clone)]
+pub struct WorkloadStreamResult {
+    pub workload_type: WorkloadType,
+
+    pub executed: bool,
+
+    pub operations_total: u64,
+
+    pub operations_success: u64,
+
+    pub bytes_read: u64,
+
+    pub bytes_written: u64,
+
+    pub duration: Duration,
+
+    pub errors: u64,
+
+    pub cache_misses: u64,
+
+    pub latency_percentiles: Option<LatencyPercentiles>,
+}
+
+#[derive(Debug, Clone)]
+pub struct ConcurrentBenchmarkResult {
+    pub block_validation: WorkloadStreamResult,
+    pub da_sampling: WorkloadStreamResult,
+
+    pub da_commitments: WorkloadStreamResult,
+    pub ibd_serving: WorkloadStreamResult,
+    pub block_storage: WorkloadStreamResult,
+    pub da_storage: WorkloadStreamResult,
+
+    pub total_duration: Duration,
+
+    pub peak_memory_mb: f64,
+
+    pub resource_contention_factor: f64,
+
+    pub concurrent_operations_peak: u64,
+
+    pub rocksdb_stats_before: RocksDbStats,
+
+    pub rocksdb_stats_after: RocksDbStats,
+}
+
+impl ConcurrentBenchmarkResult {
+    #[must_use]
+    pub const fn total_operations(&self) -> u64 {
+        let mut total = 0;
+        if self.block_validation.executed {
+            total += self.block_validation.operations_total;
+        }
+        if self.da_sampling.executed {
+            total += self.da_sampling.operations_total;
+        }
+        if self.da_commitments.executed {
+            total += self.da_commitments.operations_total;
+        }
+        if self.ibd_serving.executed {
+            total += self.ibd_serving.operations_total;
+        }
+        if self.block_storage.executed {
+            total += self.block_storage.operations_total;
+        }
+        if self.da_storage.executed {
+            total += self.da_storage.operations_total;
+        }
+        total
+    }
+
+    #[must_use]
+    pub const fn total_success(&self) -> u64 {
+        let mut total = 0;
+        if self.block_validation.executed {
+            total += self.block_validation.operations_success;
+        }
+        if self.da_sampling.executed {
+            total += self.da_sampling.operations_success;
+        }
+        if self.da_commitments.executed {
+            total += self.da_commitments.operations_success;
+        }
+        if self.ibd_serving.executed {
+            total += self.ibd_serving.operations_success;
+        }
+        if self.block_storage.executed {
+            total += self.block_storage.operations_success;
+        }
+        if self.da_storage.executed {
+            total += self.da_storage.operations_success;
+        }
+        total
+    }
+
+    #[must_use]
+    pub fn combined_throughput(&self) -> f64 {
+        self.total_success() as f64 / self.total_duration.as_secs_f64()
+    }
+
+    #[must_use]
+    pub fn success_rate(&self) -> f64 {
+        if self.total_operations() > 0 {
+            self.total_success() as f64 / self.total_operations() as f64
+        } else {
+            0.0
+        }
+    }
+
+    #[must_use]
+    pub fn total_data_throughput_mbps(&self) -> f64 {
+        let mut total_bytes = 0;
+        if self.block_validation.executed {
+            total_bytes += self.block_validation.bytes_read;
+        }
+        if self.da_sampling.executed {
+            total_bytes += self.da_sampling.bytes_read;
+        }
+        if self.da_commitments.executed {
+            total_bytes += self.da_commitments.bytes_read;
+        }
+        if self.ibd_serving.executed {
+            total_bytes += self.ibd_serving.bytes_read;
+        }
+        if self.block_storage.executed {
+            total_bytes += self.block_storage.bytes_written;
+        }
+        if self.da_storage.executed {
+            total_bytes += self.da_storage.bytes_written;
+        }
+        total_bytes as f64 / 1024.0 / 1024.0 / self.total_duration.as_secs_f64()
+    }
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+pub struct StorageBenchReport {
+    pub benchmark_config: BenchConfigSummary,
+    pub results: BenchResultsSummary,
+    pub timestamp: String,
+    pub tool_version: String,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+pub struct BenchConfigSummary {
+    pub profile: String,
+    pub memory_gb: u32,
+    pub duration_seconds: u64,
+    pub warmup_runs: usize,
+    pub measurement_runs: usize,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+pub struct BenchResultsSummary {
+    pub raw_measurements: Vec<f64>,
+    pub statistics: StatisticsSummary,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+pub struct StatisticsSummary {
+    pub mean_ops_sec: f64,
+    pub min_ops_sec: f64,
+    pub max_ops_sec: f64,
+    pub variability_percent: f64,
+    pub sample_count: usize,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+pub struct DatasetGenerationReport {
+    pub generation_summary: GenerationSummary,
+    pub performance: GenerationPerformance,
+    pub timestamp: String,
+    pub tool_version: String,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+pub struct GenerationSummary {
+    pub blocks_generated: usize,
+    pub da_objects_generated: usize,
+    pub total_objects: usize,
+    pub duration_seconds: u64,
+    pub duration_minutes: f64,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+pub struct GenerationPerformance {
+    pub total_rate_objects_per_sec: f64,
+    pub block_rate_per_sec: f64,
+    pub da_rate_per_sec: f64,
+    pub cpu_cores_used: usize,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+pub struct DatasetVerificationReport {
+    pub verification_summary: VerificationSummary,
+    pub data_sizes: DataSizesSummary,
+    pub completeness_estimates: CompletenessSummary,
+    pub performance: VerificationPerformance,
+    pub warnings: WarningsSummary,
+    pub timestamp: String,
+    pub tool_version: String,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+pub struct VerificationSummary {
+    pub blocks_found: usize,
+    pub da_shares_found: usize,
+    pub da_commitments_found: usize,
+    pub total_objects_found: usize,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+pub struct DataSizesSummary {
+    pub total_block_size_bytes: u64,
+    pub total_share_size_bytes: u64,
+    pub total_commitment_size_bytes: u64,
+    pub total_verified_size_bytes: u64,
+    pub total_verified_size_gb: f64,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+pub struct CompletenessSummary {
+    pub block_completeness_percent: f64,
+    pub da_completeness_percent: f64,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+pub struct VerificationPerformance {
+    pub verification_time_seconds: f64,
+    pub objects_verified_per_sec: f64,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+pub struct WarningsSummary {
+    pub block_generation_incomplete: bool,
+    pub data_size_smaller_than_expected: bool,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+pub struct BenchmarkReport {
+    pub config_summary: BenchConfigSummary,
+    pub results: BenchmarkResultsSummary,
+    pub metadata: ReportMetadata,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+pub struct BenchmarkResultsSummary {
+    pub raw_measurements: Vec<f64>,
+    pub warmup_results: Vec<f64>,
+    pub statistics: StatisticsSummary,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+pub struct ReportMetadata {
+    pub timestamp: String,
+    pub tool_version: String,
+    pub runner_type: String,
+}
--- a/storage-benchmarks/src/benchmark/utilities.rs
+++ b/storage-benchmarks/src/benchmark/utilities.rs
@ -0,0 +1,37 @@
+use std::time::Duration;
+
+use crate::config::ValidatorProfile;
+
+#[must_use]
+pub fn create_header_id(index: usize) -> nomos_core::header::HeaderId {
+    let mut id = [0u8; 32];
+    id[0..4].copy_from_slice(&(index as u32).to_be_bytes());
+    nomos_core::header::HeaderId::from(id)
+}
+
+#[must_use]
+pub fn create_blob_id(block: usize, blob_in_block: usize) -> nomos_core::da::BlobId {
+    let mut id = [0u8; 32];
+    id[0..4].copy_from_slice(&(block as u32).to_be_bytes());
+    id[4..8].copy_from_slice(&(blob_in_block as u32).to_be_bytes());
+    nomos_core::da::BlobId::from(id)
+}
+
+pub fn safe_interval_from_hz(frequency_hz: f64, workload_type: &str) -> Result<Duration, String> {
+    if frequency_hz <= 0.0 {
+        return Err(format!(
+            "Invalid frequency {frequency_hz} Hz for {workload_type}"
+        ));
+    }
+
+    let interval_ms = (1000.0 / frequency_hz) as u64;
+    Ok(Duration::from_millis(interval_ms))
+}
+
+#[must_use]
+pub fn estimate_sequential_performance(profile: &ValidatorProfile) -> f64 {
+    profile.range_scan_rate_hz.mul_add(
+        10.0,
+        profile.block_read_rate_hz + profile.da_share_read_rate_hz + profile.da_share_write_rate_hz,
+    )
+}
--- a/storage-benchmarks/src/benchmark/workloads/mod.rs
+++ b/storage-benchmarks/src/benchmark/workloads/mod.rs
@ -0,0 +1,9 @@
+pub mod orchestrator;
+pub mod reads;
+pub mod runners;
+pub mod writes;
+
+pub use orchestrator::*;
+pub use reads::*;
+pub use runners::*;
+pub use writes::*;
--- a/storage-benchmarks/src/benchmark/workloads/orchestrator.rs
+++ b/storage-benchmarks/src/benchmark/workloads/orchestrator.rs
@ -0,0 +1,126 @@
+use std::{sync::Arc, time::Instant};
+
+use log::info;
+use nomos_storage::backends::rocksdb::RocksBackend;
+use tokio::sync::Mutex;
+
+use super::{
+    super::{estimate_sequential_performance, ConcurrentBenchmarkResult},
+    reads::{
+        run_block_validation_workload, run_da_commitments_workload, run_da_sampling_workload,
+        run_ibd_serving_workload,
+    },
+    writes::{run_conditional_block_storage_workload, run_conditional_da_storage_workload},
+};
+use crate::{config::ValidatorProfile, metrics::StatsCollector};
+
+pub async fn run_concurrent_validator_benchmark(
+    storage: RocksBackend,
+    duration: std::time::Duration,
+    profile: &ValidatorProfile,
+    dataset_size: (usize, usize),
+    is_read_only: bool,
+) -> Result<ConcurrentBenchmarkResult, Box<dyn std::error::Error>> {
+    if is_read_only && (profile.block_write_rate_hz > 0.0 || profile.da_share_write_rate_hz > 0.0) {
+        log::warn!("Storage is read-only but profile has write operations. Write workloads will be skipped.");
+    }
+
+    let storage = Arc::new(Mutex::new(storage));
+
+    let mut stats_collector = StatsCollector::new();
+    stats_collector.collect_before(&*storage.lock().await);
+
+    let start_time = Instant::now();
+
+    info!(
+        "Starting concurrent validator simulation for {:.1}s",
+        duration.as_secs_f64()
+    );
+    info!(
+        "Network-aware concurrency: {} validators \u{2192} {} IBD streams, {} DA streams",
+        profile.total_validators,
+        profile.ibd_concurrent_streams(),
+        profile.da_concurrent_streams()
+    );
+
+    let (
+        block_validation_result,
+        da_sampling_result,
+        da_commitments_result,
+        ibd_serving_result,
+        block_storage_result,
+        da_storage_result,
+    ) = tokio::join!(
+        run_block_validation_workload(
+            Arc::clone(&storage),
+            duration,
+            profile.block_read_rate_hz,
+            dataset_size.0,
+            profile
+        ),
+        run_da_sampling_workload(
+            Arc::clone(&storage),
+            duration,
+            profile.da_share_read_rate_hz,
+            dataset_size.0,
+            profile
+        ),
+        run_da_commitments_workload(
+            Arc::clone(&storage),
+            duration,
+            profile.da_share_read_rate_hz * 0.3,
+            dataset_size.0,
+            profile
+        ),
+        run_ibd_serving_workload(
+            Arc::clone(&storage),
+            duration,
+            profile.range_scan_rate_hz,
+            dataset_size.0
+        ),
+        run_conditional_block_storage_workload(
+            Arc::clone(&storage),
+            duration,
+            profile.block_write_rate_hz,
+            dataset_size.0,
+            is_read_only
+        ),
+        run_conditional_da_storage_workload(
+            Arc::clone(&storage),
+            duration,
+            profile.da_share_write_rate_hz,
+            dataset_size.1,
+            is_read_only
+        )
+    );
+
+    let total_duration = start_time.elapsed();
+
+    stats_collector.collect_after(&*storage.lock().await);
+
+    let sequential_estimated_throughput = estimate_sequential_performance(profile);
+    let actual_concurrent_throughput = (block_validation_result.operations_success
+        + da_sampling_result.operations_success
+        + da_commitments_result.operations_success
+        + ibd_serving_result.operations_success
+        + block_storage_result.operations_success
+        + da_storage_result.operations_success) as f64
+        / total_duration.as_secs_f64();
+
+    let contention_factor = actual_concurrent_throughput / sequential_estimated_throughput;
+
+    Ok(ConcurrentBenchmarkResult {
+        block_validation: block_validation_result,
+        da_sampling: da_sampling_result,
+        da_commitments: da_commitments_result,
+        ibd_serving: ibd_serving_result,
+        block_storage: block_storage_result,
+        da_storage: da_storage_result,
+        total_duration,
+        peak_memory_mb: 0.0,
+        resource_contention_factor: contention_factor,
+        concurrent_operations_peak: 6,
+        rocksdb_stats_before: stats_collector.before.clone(),
+        rocksdb_stats_after: stats_collector.after.clone(),
+    })
+}
--- a/storage-benchmarks/src/benchmark/workloads/reads.rs
+++ b/storage-benchmarks/src/benchmark/workloads/reads.rs
@ -0,0 +1,302 @@
+use std::{
+    sync::Arc,
+    time::{Duration, Instant},
+};
+
+use nomos_storage::{
+    api::{chain::StorageChainApi as _, da::StorageDaApi as _},
+    backends::rocksdb::RocksBackend,
+};
+use tokio::sync::Mutex;
+
+use super::super::{create_blob_id, create_header_id, safe_interval_from_hz, WorkloadStreamResult};
+use crate::{
+    config::{types::WorkloadType, ValidatorProfile},
+    data::{select_block_spec_accurate, select_da_spec_accurate},
+    metrics::LatencyTracker,
+};
+
+pub async fn run_block_validation_workload(
+    storage: Arc<Mutex<RocksBackend>>,
+    duration: Duration,
+    frequency_hz: f64,
+    max_blocks: usize,
+    profile: &ValidatorProfile,
+) -> WorkloadStreamResult {
+    let mut result = WorkloadStreamResult {
+        workload_type: WorkloadType::BlockValidation,
+        executed: true,
+        operations_total: 0,
+        operations_success: 0,
+        bytes_read: 0,
+        bytes_written: 0,
+        duration,
+        errors: 0,
+        cache_misses: 0,
+        latency_percentiles: None,
+    };
+
+    let mut latency_tracker = LatencyTracker::new();
+
+    let interval = match safe_interval_from_hz(frequency_hz, &result.workload_type.to_string()) {
+        Ok(interval) => interval,
+        Err(e) => {
+            log::warn!("{e}");
+            result.duration = duration;
+            result.latency_percentiles = Some(latency_tracker.get_percentiles());
+            return result;
+        }
+    };
+
+    let mut ticker = tokio::time::interval(interval);
+    let end_time = Instant::now() + duration;
+
+    while Instant::now() < end_time {
+        ticker.tick().await;
+
+        let block_index = select_block_spec_accurate(result.operations_total, max_blocks, profile);
+        let header_id = create_header_id(block_index);
+
+        let operation_result = latency_tracker
+            .record_async_operation(|| async {
+                let mut storage_guard = storage.lock().await;
+                let get_result = storage_guard.get_block(header_id).await;
+                drop(storage_guard);
+                get_result
+            })
+            .await;
+
+        match operation_result {
+            Ok(Some(data)) => {
+                result.operations_success += 1;
+                result.bytes_read += data.len() as u64;
+            }
+            Ok(None) => {}
+            Err(_) => result.errors += 1,
+        }
+
+        result.operations_total += 1;
+    }
+
+    result.duration = duration;
+    result.latency_percentiles = Some(latency_tracker.get_percentiles());
+    result
+}
+
+pub async fn run_da_sampling_workload(
+    storage: Arc<Mutex<RocksBackend>>,
+    duration: Duration,
+    frequency_hz: f64,
+    max_blocks: usize,
+    profile: &ValidatorProfile,
+) -> WorkloadStreamResult {
+    let mut result = WorkloadStreamResult {
+        workload_type: WorkloadType::DaSampling,
+        executed: true,
+        operations_total: 0,
+        operations_success: 0,
+        bytes_read: 0,
+        bytes_written: 0,
+        duration,
+        errors: 0,
+        cache_misses: 0,
+        latency_percentiles: None,
+    };
+
+    let mut latency_tracker = LatencyTracker::new();
+
+    let interval = match safe_interval_from_hz(frequency_hz, &result.workload_type.to_string()) {
+        Ok(interval) => interval,
+        Err(e) => {
+            log::warn!("{e}");
+            result.duration = duration;
+            result.latency_percentiles = Some(latency_tracker.get_percentiles());
+            return result;
+        }
+    };
+
+    let mut ticker = tokio::time::interval(interval);
+    let end_time = Instant::now() + duration;
+
+    while Instant::now() < end_time {
+        ticker.tick().await;
+
+        let blob_index = select_da_spec_accurate(result.operations_total, max_blocks, profile);
+        let blob_id = create_blob_id(blob_index, 0);
+        let share_idx = [(result.operations_total % 20) as u8, 0u8];
+
+        let operation_result = latency_tracker
+            .record_async_operation(|| async {
+                let mut storage_guard = storage.lock().await;
+                let get_result = storage_guard.get_light_share(blob_id, share_idx).await;
+                drop(storage_guard);
+                get_result
+            })
+            .await;
+
+        match operation_result {
+            Ok(Some(data)) => {
+                result.operations_success += 1;
+                result.bytes_read += data.len() as u64;
+            }
+            Ok(None) => {}
+            Err(_) => result.errors += 1,
+        }
+
+        result.operations_total += 1;
+    }
+
+    result.duration = duration;
+    result.latency_percentiles = Some(latency_tracker.get_percentiles());
+    result
+}
+
+pub async fn run_ibd_serving_workload(
+    storage: Arc<Mutex<RocksBackend>>,
+    duration: Duration,
+    frequency_hz: f64,
+    max_blocks: usize,
+) -> WorkloadStreamResult {
+    const IBD_CHUNK_SIZE: usize = 1000;
+
+    let mut result = WorkloadStreamResult {
+        workload_type: WorkloadType::IbdServing,
+        executed: true,
+        operations_total: 0,
+        operations_success: 0,
+        bytes_read: 0,
+        bytes_written: 0,
+        duration,
+        errors: 0,
+        cache_misses: 0,
+        latency_percentiles: None,
+    };
+
+    let mut latency_tracker = LatencyTracker::new();
+
+    let interval = match safe_interval_from_hz(frequency_hz, &result.workload_type.to_string()) {
+        Ok(interval) => interval,
+        Err(e) => {
+            log::warn!("{e}");
+            result.duration = duration;
+            result.latency_percentiles = Some(latency_tracker.get_percentiles());
+            return result;
+        }
+    };
+
+    let mut ticker = tokio::time::interval(interval);
+    let end_time = Instant::now() + duration;
+
+    while Instant::now() < end_time {
+        ticker.tick().await;
+
+        let max_safe_blocks = max_blocks.saturating_sub(IBD_CHUNK_SIZE).max(1);
+        let start_block = (result.operations_total as usize * IBD_CHUNK_SIZE) % max_safe_blocks;
+        let start_slot = cryptarchia_engine::Slot::from(start_block as u64);
+        let end_slot = cryptarchia_engine::Slot::from((start_block + IBD_CHUNK_SIZE) as u64);
+        let Some(limit) = std::num::NonZeroUsize::new(IBD_CHUNK_SIZE) else {
+            log::error!("Invalid IBD chunk size: {IBD_CHUNK_SIZE}");
+            result.errors += 1;
+            continue;
+        };
+
+        let operation_result = latency_tracker
+            .record_async_operation(|| async {
+                let mut storage_guard = storage.lock().await;
+                let scan_result = storage_guard
+                    .scan_immutable_block_ids(start_slot..=end_slot, limit)
+                    .await;
+
+                if let Ok(header_ids) = &scan_result {
+                    for header_id in header_ids.iter().take(IBD_CHUNK_SIZE) {
+                        let _ = storage_guard.get_block(*header_id).await;
+                    }
+                }
+
+                drop(storage_guard);
+                scan_result
+            })
+            .await;
+
+        match operation_result {
+            Ok(header_ids) => {
+                result.operations_success += 1;
+                let estimated_bytes = header_ids.len() as u64 * 34371;
+                result.bytes_read += estimated_bytes;
+            }
+            Err(_) => result.errors += 1,
+        }
+        result.operations_total += 1;
+    }
+
+    result.duration = duration;
+    result.latency_percentiles = Some(latency_tracker.get_percentiles());
+    result
+}
+
+pub async fn run_da_commitments_workload(
+    storage: Arc<Mutex<RocksBackend>>,
+    duration: Duration,
+    frequency_hz: f64,
+    max_blocks: usize,
+    profile: &ValidatorProfile,
+) -> WorkloadStreamResult {
+    let mut result = WorkloadStreamResult {
+        workload_type: WorkloadType::DaCommitments,
+        executed: true,
+        operations_total: 0,
+        operations_success: 0,
+        bytes_read: 0,
+        bytes_written: 0,
+        duration,
+        errors: 0,
+        cache_misses: 0,
+        latency_percentiles: None,
+    };
+
+    let mut latency_tracker = LatencyTracker::new();
+
+    let interval = match safe_interval_from_hz(frequency_hz, &result.workload_type.to_string()) {
+        Ok(interval) => interval,
+        Err(e) => {
+            log::warn!("{e}");
+            result.duration = duration;
+            result.latency_percentiles = Some(latency_tracker.get_percentiles());
+            return result;
+        }
+    };
+
+    let mut ticker = tokio::time::interval(interval);
+    let end_time = Instant::now() + duration;
+
+    while Instant::now() < end_time {
+        ticker.tick().await;
+
+        let blob_index = select_da_spec_accurate(result.operations_total, max_blocks, profile);
+        let blob_id = create_blob_id(blob_index, 0);
+
+        let operation_result = latency_tracker
+            .record_async_operation(|| async {
+                let mut storage_guard = storage.lock().await;
+                let get_result = storage_guard.get_shared_commitments(blob_id).await;
+                drop(storage_guard);
+                get_result
+            })
+            .await;
+
+        match operation_result {
+            Ok(Some(data)) => {
+                result.operations_success += 1;
+                result.bytes_read += data.len() as u64;
+            }
+            Ok(None) => {}
+            Err(_) => result.errors += 1,
+        }
+
+        result.operations_total += 1;
+    }
+
+    result.duration = duration;
+    result.latency_percentiles = Some(latency_tracker.get_percentiles());
+    result
+}
--- a/storage-benchmarks/src/benchmark/workloads/runners.rs
+++ b/storage-benchmarks/src/benchmark/workloads/runners.rs
@ -0,0 +1,293 @@
+use std::{
+    sync::Arc,
+    time::{Duration, Instant},
+};
+
+use async_trait::async_trait;
+use nomos_storage::{
+    api::{chain::StorageChainApi as _, da::StorageDaApi as _},
+    backends::rocksdb::RocksBackend,
+};
+use serde::{Deserialize, Serialize};
+use tokio::sync::Mutex;
+
+use super::super::{create_blob_id, create_header_id, safe_interval_from_hz, WorkloadStreamResult};
+use crate::{
+    config::{types::WorkloadType, ValidatorProfile},
+    data::{select_block_spec_accurate, select_da_spec_accurate},
+    metrics::LatencyTracker,
+};
+
+#[async_trait]
+pub trait WorkloadRunner {
+    async fn execute(&mut self, duration: Duration) -> WorkloadStreamResult;
+    fn workload_type(&self) -> WorkloadType;
+    fn is_read_only(&self) -> bool;
+}
+
+pub struct BlockValidationRunner {
+    storage: Arc<Mutex<RocksBackend>>,
+    profile: ValidatorProfile,
+    max_blocks: usize,
+    frequency_hz: f64,
+    latency_tracker: LatencyTracker,
+    execution_stats: WorkloadExecutionStats,
+}
+
+pub struct DaSamplingRunner {
+    storage: Arc<Mutex<RocksBackend>>,
+    profile: ValidatorProfile,
+    max_blocks: usize,
+    frequency_hz: f64,
+    latency_tracker: LatencyTracker,
+    execution_stats: WorkloadExecutionStats,
+}
+
+#[derive(Debug, Clone, Default, Serialize, Deserialize)]
+pub struct WorkloadExecutionStats {
+    pub operations_attempted: u64,
+    pub operations_successful: u64,
+    pub bytes_processed: u64,
+    pub errors_encountered: u64,
+    pub cache_misses_estimated: u64,
+    pub execution_start: Option<chrono::DateTime<chrono::Utc>>,
+    pub last_operation_time: Option<chrono::DateTime<chrono::Utc>>,
+}
+
+impl BlockValidationRunner {
+    pub fn new(
+        storage: Arc<Mutex<RocksBackend>>,
+        profile: ValidatorProfile,
+        max_blocks: usize,
+        frequency_hz: f64,
+    ) -> Self {
+        Self {
+            storage,
+            profile,
+            max_blocks,
+            frequency_hz,
+            latency_tracker: LatencyTracker::new(),
+            execution_stats: WorkloadExecutionStats::default(),
+        }
+    }
+
+    pub async fn execute(&mut self, duration: Duration) -> WorkloadStreamResult {
+        self.execution_stats.execution_start = Some(chrono::Utc::now());
+
+        let interval = match safe_interval_from_hz(self.frequency_hz, "block_validation") {
+            Ok(interval) => interval,
+            Err(e) => {
+                log::warn!("{e}");
+                return self.create_error_result(duration);
+            }
+        };
+
+        let mut ticker = tokio::time::interval(interval);
+        let end_time = Instant::now() + duration;
+
+        while Instant::now() < end_time {
+            ticker.tick().await;
+            self.execute_single_block_validation().await;
+        }
+
+        self.create_final_result(duration)
+    }
+
+    async fn execute_single_block_validation(&mut self) {
+        let block_index = select_block_spec_accurate(
+            self.execution_stats.operations_attempted,
+            self.max_blocks,
+            &self.profile,
+        );
+        let header_id = create_header_id(block_index);
+
+        let operation_result = self
+            .latency_tracker
+            .record_async_operation(|| async {
+                let mut storage_guard = self.storage.lock().await;
+                let result = storage_guard.get_block(header_id).await;
+                drop(storage_guard);
+                result
+            })
+            .await;
+
+        match operation_result {
+            Ok(Some(data)) => {
+                self.execution_stats.operations_successful += 1;
+                self.execution_stats.bytes_processed += data.len() as u64;
+            }
+            Ok(None) => {}
+            Err(_) => self.execution_stats.errors_encountered += 1,
+        }
+
+        self.execution_stats.operations_attempted += 1;
+        self.execution_stats.last_operation_time = Some(chrono::Utc::now());
+    }
+
+    fn create_final_result(&self, duration: Duration) -> WorkloadStreamResult {
+        WorkloadStreamResult {
+            workload_type: WorkloadType::BlockValidation,
+            executed: true,
+            operations_total: self.execution_stats.operations_attempted,
+            operations_success: self.execution_stats.operations_successful,
+            bytes_read: self.execution_stats.bytes_processed,
+            bytes_written: 0,
+            duration,
+            errors: self.execution_stats.errors_encountered,
+            cache_misses: self.execution_stats.cache_misses_estimated,
+            latency_percentiles: Some(self.latency_tracker.get_percentiles()),
+        }
+    }
+
+    fn create_error_result(&self, duration: Duration) -> WorkloadStreamResult {
+        WorkloadStreamResult {
+            workload_type: WorkloadType::BlockValidation,
+            executed: false,
+            operations_total: 0,
+            operations_success: 0,
+            bytes_read: 0,
+            bytes_written: 0,
+            duration,
+            errors: 1,
+            cache_misses: 0,
+            latency_percentiles: Some(self.latency_tracker.get_percentiles()),
+        }
+    }
+
+    #[must_use]
+    pub const fn execution_state(&self) -> &WorkloadExecutionStats {
+        &self.execution_stats
+    }
+}
+
+#[async_trait]
+impl WorkloadRunner for BlockValidationRunner {
+    async fn execute(&mut self, duration: Duration) -> WorkloadStreamResult {
+        Self::execute(self, duration).await
+    }
+
+    fn workload_type(&self) -> WorkloadType {
+        WorkloadType::BlockValidation
+    }
+
+    fn is_read_only(&self) -> bool {
+        true
+    }
+}
+
+impl DaSamplingRunner {
+    pub fn new(
+        storage: Arc<Mutex<RocksBackend>>,
+        profile: ValidatorProfile,
+        max_blocks: usize,
+        frequency_hz: f64,
+    ) -> Self {
+        Self {
+            storage,
+            profile,
+            max_blocks,
+            frequency_hz,
+            latency_tracker: LatencyTracker::new(),
+            execution_stats: WorkloadExecutionStats::default(),
+        }
+    }
+
+    pub async fn execute(&mut self, duration: Duration) -> WorkloadStreamResult {
+        self.execution_stats.execution_start = Some(chrono::Utc::now());
+
+        let interval = match safe_interval_from_hz(self.frequency_hz, "da_sampling") {
+            Ok(interval) => interval,
+            Err(e) => {
+                log::warn!("{e}");
+                return self.create_error_result(duration);
+            }
+        };
+
+        let mut ticker = tokio::time::interval(interval);
+        let end_time = Instant::now() + duration;
+
+        while Instant::now() < end_time {
+            ticker.tick().await;
+            self.execute_single_da_sample().await;
+        }
+
+        self.create_final_result(duration)
+    }
+
+    async fn execute_single_da_sample(&mut self) {
+        let blob_index = select_da_spec_accurate(
+            self.execution_stats.operations_attempted,
+            self.max_blocks,
+            &self.profile,
+        );
+        let blob_id = create_blob_id(blob_index, 0);
+        let share_idx = [(self.execution_stats.operations_attempted % 20) as u8, 0u8];
+
+        let operation_result = self
+            .latency_tracker
+            .record_async_operation(|| async {
+                let mut storage_guard = self.storage.lock().await;
+                let result = storage_guard.get_light_share(blob_id, share_idx).await;
+                drop(storage_guard);
+                result
+            })
+            .await;
+
+        match operation_result {
+            Ok(Some(data)) => {
+                self.execution_stats.operations_successful += 1;
+                self.execution_stats.bytes_processed += data.len() as u64;
+            }
+            Ok(None) => {}
+            Err(_) => self.execution_stats.errors_encountered += 1,
+        }
+
+        self.execution_stats.operations_attempted += 1;
+        self.execution_stats.last_operation_time = Some(chrono::Utc::now());
+    }
+
+    fn create_final_result(&self, duration: Duration) -> WorkloadStreamResult {
+        WorkloadStreamResult {
+            workload_type: WorkloadType::DaSampling,
+            executed: true,
+            operations_total: self.execution_stats.operations_attempted,
+            operations_success: self.execution_stats.operations_successful,
+            bytes_read: self.execution_stats.bytes_processed,
+            bytes_written: 0,
+            duration,
+            errors: self.execution_stats.errors_encountered,
+            cache_misses: self.execution_stats.cache_misses_estimated,
+            latency_percentiles: Some(self.latency_tracker.get_percentiles()),
+        }
+    }
+
+    fn create_error_result(&self, duration: Duration) -> WorkloadStreamResult {
+        WorkloadStreamResult {
+            workload_type: WorkloadType::DaSampling,
+            executed: false,
+            operations_total: 0,
+            operations_success: 0,
+            bytes_read: 0,
+            bytes_written: 0,
+            duration,
+            errors: 1,
+            cache_misses: 0,
+            latency_percentiles: Some(self.latency_tracker.get_percentiles()),
+        }
+    }
+}
+
+#[async_trait]
+impl WorkloadRunner for DaSamplingRunner {
+    async fn execute(&mut self, duration: Duration) -> WorkloadStreamResult {
+        Self::execute(self, duration).await
+    }
+
+    fn workload_type(&self) -> WorkloadType {
+        WorkloadType::DaSampling
+    }
+
+    fn is_read_only(&self) -> bool {
+        true
+    }
+}
--- a/storage-benchmarks/src/benchmark/workloads/writes.rs
+++ b/storage-benchmarks/src/benchmark/workloads/writes.rs
@ -0,0 +1,227 @@
+use std::{
+    sync::Arc,
+    time::{Duration, Instant},
+};
+
+use nomos_storage::{
+    api::{chain::StorageChainApi as _, da::StorageDaApi as _},
+    backends::rocksdb::RocksBackend,
+};
+use tokio::sync::Mutex;
+
+use super::super::{
+    safe_interval_from_hz,
+    utilities::{create_blob_id, create_header_id},
+    WorkloadStreamResult,
+};
+use crate::{
+    config::types::WorkloadType,
+    data::{create_block_data, create_commitment, create_da_share},
+    metrics::LatencyTracker,
+};
+
+pub async fn run_block_storage_workload(
+    storage: Arc<Mutex<RocksBackend>>,
+    duration: Duration,
+    frequency_hz: f64,
+    starting_block_height: usize,
+) -> WorkloadStreamResult {
+    let mut result = WorkloadStreamResult {
+        workload_type: WorkloadType::BlockStorage,
+        executed: true,
+        operations_total: 0,
+        operations_success: 0,
+        bytes_read: 0,
+        bytes_written: 0,
+        duration,
+        errors: 0,
+        cache_misses: 0,
+        latency_percentiles: None,
+    };
+
+    let mut latency_tracker = LatencyTracker::new();
+
+    let interval = match safe_interval_from_hz(frequency_hz, &result.workload_type.to_string()) {
+        Ok(interval) => interval,
+        Err(e) => {
+            log::warn!("{e}");
+            result.duration = duration;
+            result.latency_percentiles = Some(latency_tracker.get_percentiles());
+            return result;
+        }
+    };
+
+    let mut ticker = tokio::time::interval(interval);
+    let end_time = Instant::now() + duration;
+    let mut current_height = starting_block_height;
+
+    while Instant::now() < end_time {
+        ticker.tick().await;
+
+        let header_id = create_header_id(current_height);
+        let block_data = create_block_data(current_height, 34_371);
+
+        let operation_result = latency_tracker
+            .record_async_operation(|| async {
+                let mut storage_guard = storage.lock().await;
+                let store_result = storage_guard
+                    .store_block(header_id, block_data.clone())
+                    .await;
+
+                if store_result.is_ok() {
+                    let slot = cryptarchia_engine::Slot::from(current_height as u64);
+                    let ids = std::collections::BTreeMap::from([(slot, header_id)]);
+                    let _ = storage_guard.store_immutable_block_ids(ids).await;
+                }
+
+                drop(storage_guard);
+                store_result
+            })
+            .await;
+
+        match operation_result {
+            Ok(()) => {
+                result.operations_success += 1;
+                result.bytes_written += block_data.len() as u64;
+                current_height += 1;
+            }
+            Err(_) => result.errors += 1,
+        }
+
+        result.operations_total += 1;
+    }
+
+    result.duration = duration;
+    result.latency_percentiles = Some(latency_tracker.get_percentiles());
+    result
+}
+
+pub async fn run_da_storage_workload(
+    storage: Arc<Mutex<RocksBackend>>,
+    duration: Duration,
+    frequency_hz: f64,
+    starting_share_count: usize,
+) -> WorkloadStreamResult {
+    let mut result = WorkloadStreamResult {
+        workload_type: WorkloadType::DaStorage,
+        executed: true,
+        operations_total: 0,
+        operations_success: 0,
+        bytes_read: 0,
+        bytes_written: 0,
+        duration,
+        errors: 0,
+        cache_misses: 0,
+        latency_percentiles: None,
+    };
+
+    let mut latency_tracker = LatencyTracker::new();
+
+    let interval = match safe_interval_from_hz(frequency_hz, &result.workload_type.to_string()) {
+        Ok(interval) => interval,
+        Err(e) => {
+            log::warn!("{e}");
+            result.duration = duration;
+            result.latency_percentiles = Some(latency_tracker.get_percentiles());
+            return result;
+        }
+    };
+
+    let mut ticker = tokio::time::interval(interval);
+    let end_time = Instant::now() + duration;
+
+    while Instant::now() < end_time {
+        ticker.tick().await;
+
+        let blob_id = create_blob_id(starting_share_count + result.operations_total as usize, 0);
+        let share_idx = [(result.operations_total % 20) as u8, 0u8];
+        let share_data = create_da_share(
+            starting_share_count + result.operations_total as usize,
+            0,
+            1024,
+        );
+
+        let operation_result = latency_tracker
+            .record_async_operation(|| async {
+                let mut storage_guard = storage.lock().await;
+                let store_result = storage_guard
+                    .store_light_share(blob_id, share_idx, share_data.clone())
+                    .await;
+
+                if store_result.is_ok() {
+                    if let Ok(commitment) = create_commitment(
+                        starting_share_count + result.operations_total as usize,
+                        0,
+                        220_000,
+                    )
+                    .await
+                    {
+                        let _ = storage_guard
+                            .store_shared_commitments(blob_id, commitment)
+                            .await;
+                    }
+                }
+
+                drop(storage_guard);
+                store_result
+            })
+            .await;
+
+        match operation_result {
+            Ok(()) => {
+                result.operations_success += 1;
+                result.bytes_written += share_data.len() as u64 + 220_000;
+            }
+            Err(_) => result.errors += 1,
+        }
+
+        result.operations_total += 1;
+    }
+
+    result.duration = duration;
+    result.latency_percentiles = Some(latency_tracker.get_percentiles());
+    result
+}
+
+pub async fn run_conditional_block_storage_workload(
+    storage: Arc<Mutex<RocksBackend>>,
+    duration: Duration,
+    frequency_hz: f64,
+    starting_block_height: usize,
+    is_read_only: bool,
+) -> WorkloadStreamResult {
+    if is_read_only || frequency_hz == 0.0 {
+        return create_empty_workload_result(WorkloadType::BlockStorage);
+    }
+
+    run_block_storage_workload(storage, duration, frequency_hz, starting_block_height).await
+}
+
+pub async fn run_conditional_da_storage_workload(
+    storage: Arc<Mutex<RocksBackend>>,
+    duration: Duration,
+    frequency_hz: f64,
+    starting_share_count: usize,
+    is_read_only: bool,
+) -> WorkloadStreamResult {
+    if is_read_only || frequency_hz == 0.0 {
+        return create_empty_workload_result(WorkloadType::DaStorage);
+    }
+
+    run_da_storage_workload(storage, duration, frequency_hz, starting_share_count).await
+}
+
+const fn create_empty_workload_result(workload_type: WorkloadType) -> WorkloadStreamResult {
+    WorkloadStreamResult {
+        workload_type,
+        executed: false,
+        operations_total: 0,
+        operations_success: 0,
+        bytes_read: 0,
+        bytes_written: 0,
+        duration: Duration::from_secs(0),
+        errors: 0,
+        cache_misses: 0,
+        latency_percentiles: None,
+    }
+}
--- a/storage-benchmarks/src/bin/dataset_builder.rs
+++ b/storage-benchmarks/src/bin/dataset_builder.rs
@ -0,0 +1,311 @@
+use std::{env, time::Instant};
+
+use log::info;
+use nomos_storage::{
+    api::chain::StorageChainApi as _,
+    backends::{rocksdb::RocksBackend, StorageBackend as _},
+};
+use serde::{Deserialize, Serialize};
+use storage_benchmarks::{
+    benchmark::{analyze_dataset, utilities::create_header_id},
+    data::create_block_data,
+    BenchConfig, DatasetGenConfig,
+};
+
+pub struct DatasetGenerator {
+    config: DatasetGenConfig,
+    storage: RocksBackend,
+    progress: GenerationProgress,
+    stats: GenerationStats,
+}
+
+#[derive(Debug, Clone, Default)]
+pub struct GenerationProgress {
+    pub blocks_completed: usize,
+    pub da_objects_completed: usize,
+    pub current_batch_start: usize,
+    pub total_target_blocks: usize,
+    pub generation_start_time: Option<Instant>,
+}
+
+#[derive(Debug, Clone, Default, Serialize, Deserialize)]
+pub struct GenerationStats {
+    pub blocks_generated_this_run: usize,
+    pub da_objects_generated_this_run: usize,
+    pub total_generation_time: std::time::Duration,
+    pub block_generation_rate: f64,
+    pub da_generation_rate: f64,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+pub struct DatasetGenerationReport {
+    pub generation_summary: GenerationSummary,
+    pub performance: PerformanceMetrics,
+    pub config: DatasetGenConfig,
+    pub timestamp: String,
+    pub tool_version: String,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+pub struct GenerationSummary {
+    pub blocks_generated: usize,
+    pub da_objects_generated: usize,
+    pub total_objects: usize,
+    pub duration_seconds: u64,
+    pub duration_minutes: f64,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+pub struct PerformanceMetrics {
+    pub block_rate_per_sec: f64,
+    pub da_rate_per_sec: f64,
+    pub total_rate_objects_per_sec: f64,
+    pub cpu_cores_used: usize,
+}
+
+impl DatasetGenerator {
+    pub async fn new(config_path: &str) -> Result<Self, Box<dyn std::error::Error>> {
+        let config = DatasetGenConfig::from_file(config_path)?;
+        let benchmark_config = BenchConfig::production();
+        let storage = RocksBackend::new(benchmark_config.settings)?;
+
+        let mut generator = Self {
+            config,
+            storage,
+            progress: GenerationProgress::default(),
+            stats: GenerationStats::default(),
+        };
+
+        generator.analyze_existing_data().await?;
+
+        Ok(generator)
+    }
+
+    async fn analyze_existing_data(&mut self) -> Result<(), Box<dyn std::error::Error>> {
+        let (existing_blocks, existing_da) = analyze_dataset(&mut self.storage).await?;
+
+        self.progress.blocks_completed = existing_blocks;
+        self.progress.da_objects_completed = existing_da;
+        self.progress.total_target_blocks = self.config.total_blocks();
+
+        info!(
+            "Found existing data: {} blocks, {} DA objects",
+            existing_blocks, existing_da
+        );
+        info!("Target: {} total blocks", self.progress.total_target_blocks);
+
+        Ok(())
+    }
+
+    pub async fn generate_dataset(
+        &mut self,
+    ) -> Result<GenerationStats, Box<dyn std::error::Error>> {
+        info!(
+            "Multi-core generation: {} ({} cores available)",
+            self.config.dataset.name,
+            num_cpus::get()
+        );
+
+        self.progress.generation_start_time = Some(Instant::now());
+
+        if self.progress.blocks_completed < self.progress.total_target_blocks {
+            self.generate_remaining_blocks().await?;
+        } else {
+            info!("All blocks already generated!");
+        }
+
+        self.generate_da_objects()?;
+
+        self.finalize_generation();
+        self.save_generation_report();
+
+        Ok(self.stats.clone())
+    }
+
+    async fn generate_remaining_blocks(&mut self) -> Result<(), Box<dyn std::error::Error>> {
+        let remaining_blocks = self.progress.total_target_blocks - self.progress.blocks_completed;
+
+        info!(
+            "Resuming block generation from block {}, generating {} more blocks",
+            self.progress.blocks_completed, remaining_blocks
+        );
+
+        let blocks_generated = self.generate_blocks_in_batches(remaining_blocks).await?;
+        self.stats.blocks_generated_this_run = blocks_generated;
+
+        Ok(())
+    }
+
+    async fn generate_blocks_in_batches(
+        &mut self,
+        blocks_to_generate: usize,
+    ) -> Result<usize, Box<dyn std::error::Error>> {
+        const PARALLEL_BATCH_SIZE: usize = 1000;
+        let mut blocks_generated = 0;
+
+        for batch_start in (0..blocks_to_generate).step_by(PARALLEL_BATCH_SIZE) {
+            let batch_end = std::cmp::min(batch_start + PARALLEL_BATCH_SIZE, blocks_to_generate);
+            let actual_batch_start = self.progress.blocks_completed + batch_start;
+
+            let batch_data =
+                self.generate_block_batch_parallel(actual_batch_start, batch_end - batch_start)?;
+            self.store_block_batch(&batch_data).await?;
+
+            blocks_generated += batch_end - batch_start;
+            self.log_block_progress(actual_batch_start, blocks_generated);
+        }
+
+        Ok(blocks_generated)
+    }
+
+    fn generate_block_batch_parallel(
+        &self,
+        start_index: usize,
+        count: usize,
+    ) -> Result<Vec<(usize, bytes::Bytes)>, Box<dyn std::error::Error>> {
+        use rayon::prelude::*;
+
+        let generation_start = Instant::now();
+        let batch_data: Vec<_> = (0..count)
+            .into_par_iter()
+            .map(|i| {
+                let block_index = start_index + i;
+                let block_data = create_block_data(block_index, self.config.blocks.size_bytes);
+                (block_index, block_data)
+            })
+            .collect();
+
+        let generation_time = generation_start.elapsed();
+        info!(
+            "Generated {} blocks in {:.2}s ({:.0} blocks/s)",
+            count,
+            generation_time.as_secs_f64(),
+            count as f64 / generation_time.as_secs_f64()
+        );
+
+        Ok(batch_data)
+    }
+
+    async fn store_block_batch(
+        &mut self,
+        batch: &[(usize, bytes::Bytes)],
+    ) -> Result<(), Box<dyn std::error::Error>> {
+        let storage_start = Instant::now();
+
+        for (block_index, block_data) in batch {
+            let header_id = create_header_id(*block_index);
+            self.storage
+                .store_block(header_id, block_data.clone())
+                .await?;
+
+            let slot = cryptarchia_engine::Slot::from(*block_index as u64);
+            let ids = std::collections::BTreeMap::from([(slot, header_id)]);
+            self.storage.store_immutable_block_ids(ids).await?;
+        }
+
+        let storage_time = storage_start.elapsed();
+        info!(
+            "Stored {} blocks in {:.2}s ({:.0} blocks/s)",
+            batch.len(),
+            storage_time.as_secs_f64(),
+            batch.len() as f64 / storage_time.as_secs_f64()
+        );
+
+        Ok(())
+    }
+
+    fn generate_da_objects(&mut self) -> Result<(), Box<dyn std::error::Error>> {
+        self.stats.da_objects_generated_this_run = 0;
+        Ok(())
+    }
+
+    fn log_block_progress(&self, current_block: usize, blocks_generated: usize) {
+        if self.progress.total_target_blocks > 1000 {
+            let completion_percent =
+                (blocks_generated * 100) as f64 / self.progress.total_target_blocks as f64;
+            info!(
+                "Block progress: {} completed - {:.1}% total",
+                current_block, completion_percent
+            );
+        }
+    }
+
+    fn finalize_generation(&mut self) {
+        if let Some(start_time) = self.progress.generation_start_time {
+            self.stats.total_generation_time = start_time.elapsed();
+
+            if self.stats.total_generation_time.as_secs() > 0 {
+                self.stats.block_generation_rate = self.stats.blocks_generated_this_run as f64
+                    / self.stats.total_generation_time.as_secs_f64();
+                self.stats.da_generation_rate = self.stats.da_objects_generated_this_run as f64
+                    / self.stats.total_generation_time.as_secs_f64();
+            }
+        }
+    }
+
+    fn save_generation_report(&self) {
+        let results_dir = BenchConfig::results_path();
+        let timestamp = chrono::Utc::now().format("%Y%m%d_%H%M%S");
+        let filename = format!("dataset_generation_{}.json", timestamp);
+        let filepath = results_dir.join(filename);
+
+        let report = DatasetGenerationReport {
+            generation_summary: GenerationSummary {
+                blocks_generated: self.stats.blocks_generated_this_run,
+                da_objects_generated: self.stats.da_objects_generated_this_run,
+                total_objects: self.stats.blocks_generated_this_run
+                    + self.stats.da_objects_generated_this_run,
+                duration_seconds: self.stats.total_generation_time.as_secs(),
+                duration_minutes: self.stats.total_generation_time.as_secs_f64() / 60.0,
+            },
+            performance: PerformanceMetrics {
+                block_rate_per_sec: self.stats.block_generation_rate,
+                da_rate_per_sec: self.stats.da_generation_rate,
+                total_rate_objects_per_sec: self.stats.block_generation_rate
+                    + self.stats.da_generation_rate,
+                cpu_cores_used: num_cpus::get(),
+            },
+            config: self.config.clone(),
+            timestamp: chrono::Utc::now().to_rfc3339(),
+            tool_version: env!("CARGO_PKG_VERSION").to_string(),
+        };
+
+        match std::fs::write(&filepath, serde_json::to_string_pretty(&report).unwrap()) {
+            Ok(_) => info!("Generation report saved to: {}", filepath.display()),
+            Err(e) => log::warn!("Failed to save report to {}: {}", filepath.display(), e),
+        }
+    }
+}
+
+#[tokio::main]
+async fn main() -> Result<(), Box<dyn std::error::Error>> {
+    env_logger::init();
+
+    let args: Vec<String> = env::args().collect();
+
+    if args.len() < 3 || args[1] != "--config" {
+        print_usage();
+        return Err("Configuration file required".into());
+    }
+
+    let mut generator = DatasetGenerator::new(&args[2]).await?;
+    let final_stats = generator.generate_dataset().await?;
+
+    info!("Generation completed successfully!");
+    info!(
+        "Final stats: {} blocks, {} DA objects in {:.1}min",
+        final_stats.blocks_generated_this_run,
+        final_stats.da_objects_generated_this_run,
+        final_stats.total_generation_time.as_secs_f64() / 60.0
+    );
+
+    Ok(())
+}
+
+fn print_usage() {
+    eprintln!("Multi-core Dataset Builder");
+    eprintln!("Generates blocks and DA data in parallel");
+    eprintln!();
+    eprintln!("USAGE:");
+    eprintln!("  cargo run --bin dataset_builder -- --config <file>");
+}
--- a/storage-benchmarks/src/bin/dataset_generator.rs
+++ b/storage-benchmarks/src/bin/dataset_generator.rs
@ -0,0 +1,375 @@
+use std::{env, time::Instant};
+
+use log::info;
+use nomos_storage::{
+    api::{chain::StorageChainApi as _, da::StorageDaApi as _},
+    backends::{rocksdb::RocksBackend, StorageBackend as _},
+};
+use rand::SeedableRng as _;
+use rayon::prelude::*;
+use storage_benchmarks::{
+    benchmark::{
+        analyze_dataset,
+        utilities::{create_blob_id, create_header_id},
+        DatasetGenerationReport, GenerationPerformance, GenerationSummary,
+    },
+    data::{create_block_data, create_da_share},
+    BenchConfig, DatasetGenConfig,
+};
+
+const PARALLEL_BATCH_SIZE: usize = 1000;
+
+#[tokio::main]
+async fn main() -> Result<(), Box<dyn std::error::Error>> {
+    env_logger::init();
+
+    let args: Vec<String> = env::args().collect();
+
+    if args.len() < 3 || args[1] != "--config" {
+        print_usage();
+        return Err("Configuration file required".into());
+    }
+
+    run_multicore_generation(&args[2]).await
+}
+
+async fn run_multicore_generation(config_path: &str) -> Result<(), Box<dyn std::error::Error>> {
+    let config = DatasetGenConfig::from_file(config_path)?;
+
+    info!(
+        "Multi-core generation: {} ({} cores available)",
+        config.dataset.name,
+        num_cpus::get()
+    );
+
+    let generation_start = Instant::now();
+    let benchmark_config = BenchConfig::production();
+    let mut storage = RocksBackend::new(benchmark_config.settings)?;
+
+    let (existing_blocks, existing_da) = analyze_dataset(&mut storage).await?;
+    let total_blocks = config.total_blocks();
+
+    info!(
+        "Found existing data: {} blocks, {} DA objects",
+        existing_blocks, existing_da
+    );
+    info!("Target: {} total blocks", total_blocks);
+
+    let blocks_generated = if existing_blocks < total_blocks {
+        let remaining_blocks = total_blocks - existing_blocks;
+        info!(
+            "Resuming block generation from block {}, generating {} more blocks",
+            existing_blocks, remaining_blocks
+        );
+        generate_blocks_multicore(&mut storage, &config, remaining_blocks, existing_blocks).await?
+    } else {
+        info!("All blocks already generated!");
+        0
+    };
+
+    let da_generated = generate_da_objects_multicore(&mut storage, &config, total_blocks).await?;
+
+    let total_time = generation_start.elapsed();
+
+    log_generation_completion(blocks_generated, da_generated, total_time);
+
+    Ok(())
+}
+
+async fn generate_blocks_multicore(
+    storage: &mut RocksBackend,
+    config: &DatasetGenConfig,
+    blocks_to_generate: usize,
+    start_from_block: usize,
+) -> Result<usize, Box<dyn std::error::Error>> {
+    let mut blocks_generated = 0;
+
+    for batch_start in (0..blocks_to_generate).step_by(PARALLEL_BATCH_SIZE) {
+        let batch_end = std::cmp::min(batch_start + PARALLEL_BATCH_SIZE, blocks_to_generate);
+        let batch_size = batch_end - batch_start;
+
+        let actual_batch_start = start_from_block + batch_start;
+        let actual_batch_end = start_from_block + batch_end;
+
+        let block_data_batch =
+            generate_block_batch_parallel(actual_batch_start, actual_batch_end, config)?;
+
+        store_block_batch(storage, &block_data_batch).await?;
+
+        blocks_generated += batch_size;
+
+        log_block_progress(
+            actual_batch_start,
+            actual_batch_end,
+            start_from_block + blocks_to_generate,
+            blocks_generated,
+        );
+    }
+
+    Ok(blocks_generated)
+}
+
+async fn generate_da_objects_multicore(
+    storage: &mut RocksBackend,
+    config: &DatasetGenConfig,
+    total_blocks: usize,
+) -> Result<usize, Box<dyn std::error::Error>> {
+    info!(
+        "Generating DA objects using {} CPU cores...",
+        num_cpus::get()
+    );
+    let mut da_objects_generated = 0;
+
+    for batch_start in (0..total_blocks).step_by(PARALLEL_BATCH_SIZE) {
+        let batch_end = std::cmp::min(batch_start + PARALLEL_BATCH_SIZE, total_blocks);
+
+        let da_batch_count =
+            generate_da_batch_for_blocks(storage, config, batch_start, batch_end).await?;
+
+        da_objects_generated += da_batch_count;
+    }
+
+    Ok(da_objects_generated)
+}
+
+fn generate_block_batch_parallel(
+    batch_start: usize,
+    batch_end: usize,
+    config: &DatasetGenConfig,
+) -> Result<Vec<(usize, bytes::Bytes)>, Box<dyn std::error::Error>> {
+    let batch_indices: Vec<usize> = (batch_start..batch_end).collect();
+
+    let generation_start = Instant::now();
+    let block_data_batch: Vec<(usize, bytes::Bytes)> = batch_indices
+        .par_iter()
+        .map(|&block_index| {
+            let block_data = create_block_data(block_index, config.blocks.size_bytes);
+            (block_index, block_data)
+        })
+        .collect();
+
+    let generation_time = generation_start.elapsed();
+    info!(
+        "Generated {} blocks in {:.2}s ({:.0} blocks/s)",
+        batch_end - batch_start,
+        generation_time.as_secs_f64(),
+        (batch_end - batch_start) as f64 / generation_time.as_secs_f64()
+    );
+
+    Ok(block_data_batch)
+}
+
+async fn store_block_batch(
+    storage: &mut RocksBackend,
+    block_batch: &[(usize, bytes::Bytes)],
+) -> Result<(), Box<dyn std::error::Error>> {
+    let storage_start = Instant::now();
+
+    for (block_index, block_data) in block_batch {
+        let header_id = create_header_id(*block_index);
+
+        storage.store_block(header_id, block_data.clone()).await?;
+
+        let slot = cryptarchia_engine::Slot::from(*block_index as u64);
+        let ids = std::collections::BTreeMap::from([(slot, header_id)]);
+        storage.store_immutable_block_ids(ids).await?;
+    }
+
+    let storage_time = storage_start.elapsed();
+    info!(
+        "Stored {} blocks in {:.2}s ({:.0} blocks/s)",
+        block_batch.len(),
+        storage_time.as_secs_f64(),
+        block_batch.len() as f64 / storage_time.as_secs_f64()
+    );
+
+    Ok(())
+}
+
+async fn generate_da_batch_for_blocks(
+    storage: &mut RocksBackend,
+    config: &DatasetGenConfig,
+    batch_start: usize,
+    batch_end: usize,
+) -> Result<usize, Box<dyn std::error::Error>> {
+    let da_specs = collect_da_specs_for_blocks(config, batch_start, batch_end);
+
+    if da_specs.is_empty() {
+        return Ok(0);
+    }
+
+    let da_data_batch = generate_da_data_parallel(&da_specs, config)?;
+
+    store_da_batch(storage, &da_data_batch).await?;
+
+    Ok(da_data_batch.len())
+}
+
+fn collect_da_specs_for_blocks(
+    config: &DatasetGenConfig,
+    batch_start: usize,
+    batch_end: usize,
+) -> Vec<(usize, usize, usize)> {
+    let mut da_specs = Vec::new();
+
+    for block in batch_start..batch_end {
+        for blob in 0..config.network.blobs_per_block {
+            let blob_global_index = block * config.network.blobs_per_block + blob;
+            let subnet = blob_global_index % config.network.total_subnets;
+
+            if subnet < config.validator.assigned_subnets {
+                da_specs.push((block, blob, subnet));
+            }
+        }
+    }
+
+    da_specs
+}
+
+fn generate_da_data_parallel(
+    da_specs: &[(usize, usize, usize)],
+    config: &DatasetGenConfig,
+) -> Result<
+    Vec<(nomos_core::da::BlobId, [u8; 2], bytes::Bytes, bytes::Bytes)>,
+    Box<dyn std::error::Error>,
+> {
+    let generation_start = Instant::now();
+
+    let da_data_batch: Vec<_> = da_specs
+        .par_iter()
+        .map(|&(block, blob, subnet)| {
+            let blob_id = create_blob_id(block, blob);
+            let share_idx = [subnet as u8, 0u8];
+            let share_data = create_da_share(block, blob, config.da.share_size_bytes);
+
+            let commitment_data = {
+                let mut rng =
+                    rand_chacha::ChaCha20Rng::seed_from_u64((block as u64 * 1000) + blob as u64);
+                use rand::Rng as _;
+                let data: Vec<u8> = (0..config.da.commitment_size_bytes)
+                    .map(|_| rng.gen())
+                    .collect();
+                bytes::Bytes::from(data)
+            };
+
+            (blob_id, share_idx, share_data, commitment_data)
+        })
+        .collect();
+
+    let generation_time = generation_start.elapsed();
+    info!(
+        "Generated {} DA objects in {:.2}s ({:.0} objects/s)",
+        da_data_batch.len(),
+        generation_time.as_secs_f64(),
+        da_data_batch.len() as f64 / generation_time.as_secs_f64()
+    );
+
+    Ok(da_data_batch)
+}
+
+async fn store_da_batch(
+    storage: &mut RocksBackend,
+    da_batch: &[(nomos_core::da::BlobId, [u8; 2], bytes::Bytes, bytes::Bytes)],
+) -> Result<(), Box<dyn std::error::Error>> {
+    let storage_start = Instant::now();
+
+    for (blob_id, share_idx, share_data, commitment_data) in da_batch {
+        storage
+            .store_light_share(*blob_id, *share_idx, share_data.clone())
+            .await?;
+        storage
+            .store_shared_commitments(*blob_id, commitment_data.clone())
+            .await?;
+    }
+
+    let storage_time = storage_start.elapsed();
+    info!(
+        "Stored {} DA objects in {:.2}s ({:.0} objects/s)",
+        da_batch.len(),
+        storage_time.as_secs_f64(),
+        da_batch.len() as f64 / storage_time.as_secs_f64()
+    );
+
+    Ok(())
+}
+
+fn log_block_progress(
+    batch_start: usize,
+    batch_end: usize,
+    total_blocks: usize,
+    blocks_generated: usize,
+) {
+    if total_blocks > 1000 {
+        info!(
+            "Block progress: {}-{} completed - {:.1}% total",
+            batch_start,
+            batch_end - 1,
+            (blocks_generated * 100) as f64 / total_blocks as f64
+        );
+    }
+}
+
+fn log_generation_completion(
+    blocks_generated: usize,
+    da_generated: usize,
+    total_time: std::time::Duration,
+) {
+    save_generation_report(blocks_generated, da_generated, total_time);
+
+    info!(
+        "Multi-core generation completed: {} blocks, {} DA objects in {:.1}min",
+        blocks_generated,
+        da_generated,
+        total_time.as_secs_f64() / 60.0
+    );
+
+    let total_rate = (blocks_generated + da_generated) as f64 / total_time.as_secs_f64();
+    info!(
+        "Total rate: {:.0} objects/sec using {} CPU cores",
+        total_rate,
+        num_cpus::get()
+    );
+}
+
+fn save_generation_report(
+    blocks_generated: usize,
+    da_generated: usize,
+    total_time: std::time::Duration,
+) {
+    let results_dir = BenchConfig::results_path();
+    let timestamp = chrono::Utc::now().format("%Y%m%d_%H%M%S");
+    let filename = format!("dataset_generation_{}.json", timestamp);
+    let filepath = results_dir.join(filename);
+
+    let report = DatasetGenerationReport {
+        generation_summary: GenerationSummary {
+            blocks_generated,
+            da_objects_generated: da_generated,
+            total_objects: blocks_generated + da_generated,
+            duration_seconds: total_time.as_secs(),
+            duration_minutes: total_time.as_secs_f64() / 60.0,
+        },
+        performance: GenerationPerformance {
+            total_rate_objects_per_sec: (blocks_generated + da_generated) as f64
+                / total_time.as_secs_f64(),
+            block_rate_per_sec: blocks_generated as f64 / total_time.as_secs_f64(),
+            da_rate_per_sec: da_generated as f64 / total_time.as_secs_f64(),
+            cpu_cores_used: num_cpus::get(),
+        },
+        timestamp: chrono::Utc::now().to_rfc3339(),
+        tool_version: env!("CARGO_PKG_VERSION").to_string(),
+    };
+
+    match std::fs::write(&filepath, serde_json::to_string_pretty(&report).unwrap()) {
+        Ok(_) => info!("Generation report saved to: {}", filepath.display()),
+        Err(e) => log::warn!("Failed to save report to {}: {}", filepath.display(), e),
+    }
+}
+
+fn print_usage() {
+    eprintln!("Multi-core Dataset Generator");
+    eprintln!("Uses all CPU cores for parallel data generation");
+    eprintln!();
+    eprintln!("USAGE:");
+    eprintln!("  POL_PROOF_DEV_MODE=true cargo run --example multicore_dataset_generator -- --config <file>");
+}
--- a/storage-benchmarks/src/bin/storage_bench_runner.rs
+++ b/storage-benchmarks/src/bin/storage_bench_runner.rs
@ -0,0 +1,193 @@
+use clap::Parser as _;
+use log::info;
+use nomos_storage::backends::{rocksdb::RocksBackend, StorageBackend as _};
+use storage_benchmarks::{
+    benchmark::{
+        analyze_dataset, run_concurrent_validator_benchmark, BenchConfigSummary,
+        BenchResultsSummary, StatisticsSummary, StorageBenchReport,
+    },
+    config::{ProductionBenchConfig, ValidatorProfiles},
+    data::initialize_benchmark_seed,
+    metrics::RuntimeValidatorAllocator,
+    BenchConfig,
+};
+
+#[global_allocator]
+static ALLOCATOR: RuntimeValidatorAllocator = RuntimeValidatorAllocator::new();
+
+#[tokio::main]
+async fn main() -> Result<(), Box<dyn std::error::Error>> {
+    env_logger::init();
+
+    let config = ProductionBenchConfig::parse();
+    config.validate()?;
+
+    let _seed_config = initialize_benchmark_seed(&[]);
+
+    run_benchmark(config).await
+}
+
+async fn run_benchmark(config: ProductionBenchConfig) -> Result<(), Box<dyn std::error::Error>> {
+    ALLOCATOR.set_limit_gb(config.memory as usize);
+
+    let profiles = ValidatorProfiles::from_file("dataset_configs/validator_profiles.toml")?;
+    let profile = profiles
+        .get_profile(&config.profile.to_string())
+        .ok_or_else(|| format!("Profile '{}' not found", config.profile))?;
+
+    let bench_config = BenchConfig::production();
+    if !bench_config.settings.db_path.exists() {
+        return Err("No dataset found".into());
+    }
+
+    let mut results = Vec::new();
+
+    for i in 1..=config.warmup_runs {
+        info!("Warmup run {}/{}", i, config.warmup_runs);
+        let _ = run_iteration(&bench_config, profile, &config).await;
+    }
+
+    for i in 1..=config.measurement_runs {
+        info!("Measurement run {}/{}", i, config.measurement_runs);
+        let result = run_iteration(&bench_config, profile, &config).await;
+        info!("Run {} result: {:.1} ops/sec", i, result);
+        results.push(result);
+    }
+
+    report_results(&results, &config);
+
+    Ok(())
+}
+
+async fn run_iteration(
+    bench_config: &BenchConfig,
+    profile: &storage_benchmarks::config::ValidatorProfile,
+    config: &ProductionBenchConfig,
+) -> f64 {
+    let mut storage_settings = bench_config.settings.clone();
+    storage_settings.read_only = config.read_only;
+
+    match RocksBackend::new(storage_settings) {
+        Ok(mut storage) => {
+            if let Ok((block_count, share_count)) = analyze_dataset(&mut storage).await {
+                if let Ok(result) = run_concurrent_validator_benchmark(
+                    storage,
+                    std::time::Duration::from_secs(config.duration),
+                    profile,
+                    (block_count, share_count),
+                    config.read_only,
+                )
+                .await
+                {
+                    return result.combined_throughput();
+                }
+            }
+        }
+        Err(e) => log::error!("Storage error: {}", e),
+    }
+
+    0.0
+}
+
+fn report_results(results: &[f64], config: &ProductionBenchConfig) {
+    save_results_to_file(results, config);
+    print_results_summary(results, config);
+}
+
+fn save_results_to_file(results: &[f64], config: &ProductionBenchConfig) {
+    let results_dir = BenchConfig::results_path();
+    let timestamp = chrono::Utc::now().format("%Y%m%d_%H%M%S");
+    let filename = format!(
+        "storage_bench_{}_{}_{}gb_{}.json",
+        config.profile, timestamp, config.memory, config.duration
+    );
+    let filepath = results_dir.join(filename);
+
+    let mean = if results.is_empty() {
+        0.0
+    } else {
+        results.iter().sum::<f64>() / results.len() as f64
+    };
+    let min = results.iter().fold(f64::INFINITY, |a, &b| a.min(b));
+    let max = results.iter().fold(f64::NEG_INFINITY, |a, &b| a.max(b));
+    let variability = if mean > 0.0 {
+        (max - min) / mean * 100.0
+    } else {
+        0.0
+    };
+
+    let detailed_results = StorageBenchReport {
+        benchmark_config: BenchConfigSummary {
+            profile: format!("{:?}", config.profile),
+            memory_gb: config.memory,
+            duration_seconds: config.duration,
+            warmup_runs: config.warmup_runs,
+            measurement_runs: config.measurement_runs,
+        },
+        results: BenchResultsSummary {
+            raw_measurements: results.to_vec(),
+            statistics: StatisticsSummary {
+                mean_ops_sec: mean,
+                min_ops_sec: min,
+                max_ops_sec: max,
+                variability_percent: variability,
+                sample_count: results.len(),
+            },
+        },
+        timestamp: chrono::Utc::now().to_rfc3339(),
+        tool_version: env!("CARGO_PKG_VERSION").to_string(),
+    };
+
+    let json_content = match serde_json::to_string_pretty(&detailed_results) {
+        Ok(content) => content,
+        Err(e) => {
+            log::error!("Failed to serialize results: {}", e);
+            return;
+        }
+    };
+
+    match std::fs::write(&filepath, json_content) {
+        Ok(_) => log::info!("Results saved to: {}", filepath.display()),
+        Err(e) => log::warn!("Failed to save results to {}: {}", filepath.display(), e),
+    }
+}
+
+fn print_results_summary(results: &[f64], config: &ProductionBenchConfig) {
+    if results.is_empty() {
+        return;
+    }
+
+    let mean = results.iter().sum::<f64>() / results.len() as f64;
+    let min = results.iter().fold(f64::INFINITY, |a, &b| a.min(b));
+    let max = results.iter().fold(f64::NEG_INFINITY, |a, &b| a.max(b));
+    let variability = if mean > 0.0 {
+        (max - min) / mean * 100.0
+    } else {
+        0.0
+    };
+
+    info!(
+        "Mean: {:.1} ops/sec, Range: {:.1}-{:.1}, Variability: {:.1}%",
+        mean, min, max, variability
+    );
+
+    let summary = StatisticsSummary {
+        mean_ops_sec: mean,
+        min_ops_sec: min,
+        max_ops_sec: max,
+        variability_percent: variability,
+        sample_count: results.len(),
+    };
+
+    log::info!(
+        "MACHINE_READABLE: {}",
+        serde_json::to_string(&summary).unwrap_or_default()
+    );
+
+    println!("\n| Profile | Memory | Ops/sec | Variability |");
+    println!("|---------|--------|---------|-------------|");
+    println!(
+        "| {} | {}GB | {:.1} | {:.1}% |",
+        config.profile, config.memory, mean, variability
+    );
+}
--- a/storage-benchmarks/src/bin/verify_dataset_integrity.rs
+++ b/storage-benchmarks/src/bin/verify_dataset_integrity.rs
@ -0,0 +1,259 @@
+use std::time::Instant;
+
+use log::info;
+use nomos_storage::{
+    api::{chain::StorageChainApi as _, da::StorageDaApi as _},
+    backends::{rocksdb::RocksBackend, StorageBackend as _},
+};
+use storage_benchmarks::{
+    benchmark::utilities::{create_blob_id, create_header_id},
+    BenchConfig, CompletenessSummary, DataSizesSummary, DatasetVerificationReport,
+    VerificationPerformance, VerificationSummary, WarningsSummary,
+};
+
+#[tokio::main]
+async fn main() -> Result<(), Box<dyn std::error::Error>> {
+    env_logger::init();
+
+    let config = BenchConfig::production();
+
+    if !config.settings.db_path.exists() {
+        println!(
+            "No database found at: {}",
+            config.settings.db_path.display()
+        );
+        return Err("Database not found".into());
+    }
+
+    info!("Opening database: {}", config.settings.db_path.display());
+
+    let mut storage_settings = config.settings.clone();
+    storage_settings.read_only = true;
+
+    let mut storage = RocksBackend::new(storage_settings)?;
+
+    info!("Starting database verification");
+    info!("=== Database Verification ===");
+
+    info!("Checking blocks...");
+    let start_time = Instant::now();
+    let mut blocks_found = 0;
+    let mut total_block_size = 0u64;
+
+    for chunk_start in (0..1_100_000).step_by(10_000) {
+        let mut chunk_found = 0;
+        let chunk_end = chunk_start + 10_000;
+
+        for i in chunk_start..chunk_end {
+            let header_id = create_header_id(i);
+
+            match storage.get_block(header_id).await {
+                Ok(Some(data)) => {
+                    blocks_found += 1;
+                    total_block_size += data.len() as u64;
+                    chunk_found += 1;
+                }
+                Ok(None) => {
+                    if chunk_found == 0 {
+                        info!("No more blocks found after block {}", i);
+                        break;
+                    }
+                }
+                Err(_) => {}
+            }
+        }
+
+        if chunk_found == 0 {
+            break;
+        }
+
+        info!(
+            "Blocks {}-{}: found {} blocks",
+            chunk_start,
+            chunk_start + chunk_found - 1,
+            chunk_found
+        );
+    }
+
+    let blocks_check_time = start_time.elapsed();
+
+    println!("Block Data:");
+    println!("   Blocks found: {}", blocks_found);
+    println!("   Expected blocks: 1,051,200");
+    println!(
+        "   Total block size: {:.1} GB",
+        total_block_size as f64 / 1024.0 / 1024.0 / 1024.0
+    );
+    println!(
+        "   Average block size: {} bytes",
+        if blocks_found > 0 {
+            total_block_size / blocks_found
+        } else {
+            0
+        }
+    );
+    println!("   Check time: {:.1}s", blocks_check_time.as_secs_f64());
+    println!();
+
+    info!("Checking DA shares...");
+    let start_time = Instant::now();
+    let mut shares_found = 0;
+    let mut total_share_size = 0u64;
+    let mut commitments_found = 0;
+    let mut total_commitment_size = 0u64;
+
+    for blob_idx in 0..1000 {
+        for subnet in 0..50 {
+            let blob_id = create_blob_id(blob_idx, 0);
+            let share_idx = [subnet as u8, 0u8];
+
+            if let Ok(Some(data)) = storage.get_light_share(blob_id, share_idx).await {
+                shares_found += 1;
+                total_share_size += data.len() as u64;
+            }
+
+            if let Ok(Some(data)) = storage.get_shared_commitments(blob_id).await {
+                commitments_found += 1;
+                total_commitment_size += data.len() as u64;
+            }
+        }
+
+        if blob_idx % 100 == 0 {
+            info!(
+                "Checked blob {} - found {} shares, {} commitments so far",
+                blob_idx, shares_found, commitments_found
+            );
+        }
+    }
+
+    let da_check_time = start_time.elapsed();
+
+    println!("DA Data:");
+    println!(
+        "   DA shares found: {} (sampled from first 50K possibilities)",
+        shares_found
+    );
+    println!("   Expected DA shares: ~256,650 total");
+    println!(
+        "   Total share size: {:.1} MB",
+        total_share_size as f64 / 1024.0 / 1024.0
+    );
+    println!(
+        "   Average share size: {} bytes",
+        if shares_found > 0 {
+            total_share_size / shares_found
+        } else {
+            0
+        }
+    );
+    println!();
+    println!("   Commitments found: {}", commitments_found);
+    println!(
+        "   Total commitment size: {:.1} GB",
+        total_commitment_size as f64 / 1024.0 / 1024.0 / 1024.0
+    );
+    println!(
+        "   Average commitment size: {} bytes",
+        if commitments_found > 0 {
+            total_commitment_size / commitments_found
+        } else {
+            0
+        }
+    );
+    println!("   Check time: {:.1}s", da_check_time.as_secs_f64());
+    println!();
+
+    let total_verified_size = total_block_size + total_share_size + total_commitment_size;
+
+    println!("Summary:");
+    println!("   Database on disk: 4.8 GB");
+    println!(
+        "   Verified data size: {:.1} GB",
+        total_verified_size as f64 / 1024.0 / 1024.0 / 1024.0
+    );
+    println!(
+        "   Blocks completeness: {:.1}%",
+        blocks_found as f64 / 1_051_200.0 * 100.0
+    );
+    println!(
+        "   Estimated DA completeness: {:.1}%",
+        shares_found as f64 / (256_650.0 / 50.0) * 100.0
+    );
+
+    if blocks_found < 1_000_000 {
+        println!("WARNING:  Block generation may have been incomplete");
+    }
+
+    if total_verified_size < 50 * 1024 * 1024 * 1024 {
+        println!("WARNING:  Data size much smaller than expected - check generation logic");
+    }
+
+    save_verification_report(
+        blocks_found as usize,
+        shares_found as usize,
+        commitments_found as usize,
+        total_block_size,
+        total_share_size,
+        total_commitment_size,
+        blocks_check_time + da_check_time,
+    );
+
+    Ok(())
+}
+
+fn save_verification_report(
+    blocks_found: usize,
+    shares_found: usize,
+    commitments_found: usize,
+    total_block_size: u64,
+    total_share_size: u64,
+    total_commitment_size: u64,
+    verification_time: std::time::Duration,
+) {
+    let results_dir = BenchConfig::results_path();
+    let timestamp = chrono::Utc::now().format("%Y%m%d_%H%M%S");
+    let filename = format!("dataset_verification_{}.json", timestamp);
+    let filepath = results_dir.join(filename);
+
+    let total_verified_size = total_block_size + total_share_size + total_commitment_size;
+
+    let report = DatasetVerificationReport {
+        verification_summary: VerificationSummary {
+            blocks_found,
+            da_shares_found: shares_found,
+            da_commitments_found: commitments_found,
+            total_objects_found: blocks_found + shares_found + commitments_found,
+        },
+        data_sizes: DataSizesSummary {
+            total_block_size_bytes: total_block_size,
+            total_share_size_bytes: total_share_size,
+            total_commitment_size_bytes: total_commitment_size,
+            total_verified_size_bytes: total_verified_size,
+            total_verified_size_gb: total_verified_size as f64 / (1024.0 * 1024.0 * 1024.0),
+        },
+        completeness_estimates: CompletenessSummary {
+            block_completeness_percent: blocks_found as f64 / 1_051_200.0 * 100.0,
+            da_completeness_percent: shares_found as f64 / (256_650.0 / 50.0) * 100.0,
+        },
+        performance: VerificationPerformance {
+            verification_time_seconds: verification_time.as_secs_f64(),
+            objects_verified_per_sec: (blocks_found + shares_found + commitments_found) as f64
+                / verification_time.as_secs_f64(),
+        },
+        warnings: WarningsSummary {
+            block_generation_incomplete: blocks_found < 1_000_000,
+            data_size_smaller_than_expected: total_verified_size < 50 * 1024 * 1024 * 1024,
+        },
+        timestamp: chrono::Utc::now().to_rfc3339(),
+        tool_version: env!("CARGO_PKG_VERSION").to_string(),
+    };
+
+    match std::fs::write(&filepath, serde_json::to_string_pretty(&report).unwrap()) {
+        Ok(_) => info!("Verification report saved to: {}", filepath.display()),
+        Err(e) => log::warn!(
+            "Failed to save verification report to {}: {}",
+            filepath.display(),
+            e
+        ),
+    }
+}
--- a/storage-benchmarks/src/bin/verify_rocksdb_properties.rs
+++ b/storage-benchmarks/src/bin/verify_rocksdb_properties.rs
@ -0,0 +1,161 @@
+use nomos_storage::backends::{rocksdb::RocksBackend, StorageBackend as _};
+use storage_benchmarks::BenchConfig;
+
+#[tokio::main]
+async fn main() -> Result<(), Box<dyn std::error::Error>> {
+    env_logger::init();
+
+    let config = BenchConfig::production();
+
+    if !config.settings.db_path.exists() {
+        println!(
+            "No database found at: {}",
+            config.settings.db_path.display()
+        );
+        return Err("Database not found - run dataset_generator first".into());
+    }
+
+    let storage = RocksBackend::new(config.settings)?;
+
+    println!("=== RocksDB Properties Verification ===");
+    println!();
+
+    verify_property_availability(&storage);
+
+    Ok(())
+}
+
+fn verify_property_availability(storage: &RocksBackend) {
+    let level0_prop = rocksdb::properties::num_files_at_level(0);
+    let level1_prop = rocksdb::properties::num_files_at_level(1);
+    let level2_prop = rocksdb::properties::num_files_at_level(2);
+
+    let properties_to_test = vec![
+        ("STATS", rocksdb::properties::STATS),
+        (
+            "BLOCK_CACHE_CAPACITY",
+            rocksdb::properties::BLOCK_CACHE_CAPACITY,
+        ),
+        (
+            "TOTAL_SST_FILES_SIZE",
+            rocksdb::properties::TOTAL_SST_FILES_SIZE,
+        ),
+        (
+            "CUR_SIZE_ALL_MEM_TABLES",
+            rocksdb::properties::CUR_SIZE_ALL_MEM_TABLES,
+        ),
+        (
+            "LIVE_SST_FILES_SIZE",
+            rocksdb::properties::LIVE_SST_FILES_SIZE,
+        ),
+        ("ESTIMATE_NUM_KEYS", rocksdb::properties::ESTIMATE_NUM_KEYS),
+        ("NUM_FILES_AT_LEVEL0", &level0_prop),
+        ("NUM_FILES_AT_LEVEL1", &level1_prop),
+        ("NUM_FILES_AT_LEVEL2", &level2_prop),
+    ];
+
+    let custom_properties = vec![
+        "rocksdb.index-and-filter-cache.usage",
+        "rocksdb.index-and-filter-cache.capacity",
+        "rocksdb.compaction-pending",
+        "rocksdb.number.compactions",
+        "rocksdb.compact.read.bytes",
+        "rocksdb.compact.write.bytes",
+        "rocksdb.compaction.cpu.time",
+        "rocksdb.mem-table-flush-pending",
+        "rocksdb.space.amplification",
+        "rocksdb.total-sst-files-size",
+        "rocksdb.number.keys.deleted",
+        "rocksdb.size-bytes-at-level0",
+        "rocksdb.size-bytes-at-level1",
+    ];
+
+    println!("Standard RocksDB Properties:");
+    for (name, prop) in properties_to_test {
+        test_standard_property(storage, name, &prop.to_string());
+    }
+
+    println!("\nCustom/Extended Properties:");
+    for prop_name in custom_properties {
+        test_custom_property(storage, prop_name);
+    }
+
+    println!("\nSTATS Property Sample:");
+    test_stats_property(storage);
+}
+
+fn test_standard_property(storage: &RocksBackend, name: &str, property: &str) {
+    let property_owned = property.to_string();
+    let transaction = storage.txn(move |db| match db.property_value(&property_owned) {
+        Ok(Some(value)) => Ok(Some(value.into_bytes().into())),
+        Ok(None) => Ok(None),
+        Err(e) => Err(e),
+    });
+
+    match transaction.execute() {
+        Ok(Some(value_bytes)) => {
+            let value_str = String::from_utf8_lossy(&value_bytes);
+            let truncated = if value_str.len() > 100 {
+                format!("{}...", &value_str[..100])
+            } else {
+                value_str.to_string()
+            };
+            println!("OK {}: {}", name, truncated);
+        }
+        Ok(None) => {
+            println!("FAIL {}: None (property exists but no value)", name);
+        }
+        Err(e) => {
+            println!("FAIL {}: Error - {}", name, e);
+        }
+    }
+}
+
+fn test_custom_property(storage: &RocksBackend, property: &str) {
+    let prop_owned = property.to_string();
+    let transaction = storage.txn(move |db| match db.property_value(&prop_owned) {
+        Ok(Some(value)) => Ok(Some(value.into_bytes().into())),
+        Ok(None) => Ok(None),
+        Err(e) => Err(e),
+    });
+
+    match transaction.execute() {
+        Ok(Some(value_bytes)) => {
+            let value_str = String::from_utf8_lossy(&value_bytes);
+            println!("OK {}: {}", property, value_str.trim());
+        }
+        Ok(None) => {
+            println!("FAIL {}: None (property exists but no value)", property);
+        }
+        Err(e) => {
+            println!("FAIL {}: Error - {}", property, e);
+        }
+    }
+}
+
+fn test_stats_property(storage: &RocksBackend) {
+    let transaction = storage.txn(|db| match db.property_value(rocksdb::properties::STATS) {
+        Ok(Some(stats)) => Ok(Some(stats.into_bytes().into())),
+        Ok(None) => Ok(None),
+        Err(e) => Err(e),
+    });
+
+    match transaction.execute() {
+        Ok(Some(stats_bytes)) => {
+            let stats_str = String::from_utf8_lossy(&stats_bytes);
+            println!("Sample STATS lines:");
+            for (i, line) in stats_str.lines().take(10).enumerate() {
+                println!("  {}: {}", i + 1, line);
+            }
+            if stats_str.lines().count() > 10 {
+                println!("  ... ({} total lines)", stats_str.lines().count());
+            }
+        }
+        Ok(None) => {
+            println!("FAIL STATS: None");
+        }
+        Err(e) => {
+            println!("FAIL STATS: Error - {}", e);
+        }
+    }
+}
--- a/storage-benchmarks/src/config/cli.rs
+++ b/storage-benchmarks/src/config/cli.rs
@ -0,0 +1,152 @@
+use clap::Parser;
+
+use super::types::{CompressionType, ProfileType};
+use crate::RocksDbTuningOptions;
+
+#[derive(Debug, Clone, Parser)]
+#[command(name = "optimization_bench")]
+#[command(about = "RocksDB optimization benchmarks")]
+#[command(long_about = "Systematic RocksDB parameter optimization with statistical rigor")]
+#[non_exhaustive]
+pub struct ProductionBenchConfig {
+    #[arg(long)]
+    pub profile: ProfileType,
+
+    #[arg(long, default_value = "8")]
+    pub memory: u32,
+
+    #[arg(long, default_value = "120")]
+    pub duration: u64,
+
+    #[arg(long)]
+    pub cache_size: Option<u32>,
+
+    #[arg(long)]
+    pub write_buffer: Option<u32>,
+
+    #[arg(long)]
+    pub compaction_jobs: Option<u32>,
+
+    #[arg(long)]
+    pub block_size: Option<u32>,
+
+    #[arg(long)]
+    pub compression: Option<CompressionType>,
+
+    #[arg(long)]
+    pub read_only: bool,
+
+    #[arg(long)]
+    pub seed: Option<u64>,
+
+    #[arg(long, default_value = "1")]
+    pub warmup_runs: usize,
+
+    #[arg(long, default_value = "3")]
+    pub measurement_runs: usize,
+}
+
+#[derive(Debug, Clone, Parser)]
+#[command(name = "dataset_generator")]
+#[command(about = "Multi-core dataset generation")]
+pub struct DatasetGeneratorConfig {
+    #[arg(long)]
+    pub config: std::path::PathBuf,
+
+    #[arg(long)]
+    pub seed: Option<u64>,
+
+    #[arg(long)]
+    pub size_limit: Option<f64>,
+}
+
+#[derive(Debug, thiserror::Error)]
+pub enum ConfigValidationError {
+    #[error("Memory limit must be between 1-512GB, got {0}GB")]
+    InvalidMemoryLimit(u32),
+
+    #[error("Duration must be between 1-86400 seconds, got {0}s")]
+    InvalidDuration(u64),
+
+    #[error("Cache size must be between 1-80% of RAM, got {0}%")]
+    InvalidCacheSize(u32),
+
+    #[error("Write buffer must be between 16-2048MB, got {0}MB")]
+    InvalidWriteBuffer(u32),
+
+    #[error("Compaction jobs must be between 1-32, got {0}")]
+    InvalidCompactionJobs(u32),
+
+    #[error("Block size must be between 1-128KB, got {0}KB")]
+    InvalidBlockSize(u32),
+
+    #[error("Warmup runs must be less than measurement runs, got warmup={0}, measurement={1}")]
+    InvalidRunCounts(usize, usize),
+
+    #[error("Unknown compression type: {0} (valid: none, lz4, zstd)")]
+    InvalidCompression(String),
+
+    #[error("Profile '{0}' not found in validator_profiles.toml")]
+    ProfileNotFound(String),
+}
+
+impl ProductionBenchConfig {
+    pub fn validate(&self) -> Result<(), ConfigValidationError> {
+        if !(1..=512).contains(&self.memory) {
+            return Err(ConfigValidationError::InvalidMemoryLimit(self.memory));
+        }
+
+        if !(1..=86400).contains(&self.duration) {
+            return Err(ConfigValidationError::InvalidDuration(self.duration));
+        }
+
+        if let Some(cache) = self.cache_size {
+            if !(1..=80).contains(&cache) {
+                return Err(ConfigValidationError::InvalidCacheSize(cache));
+            }
+        }
+
+        if let Some(buffer) = self.write_buffer {
+            if !(16..=2048).contains(&buffer) {
+                return Err(ConfigValidationError::InvalidWriteBuffer(buffer));
+            }
+        }
+
+        if let Some(jobs) = self.compaction_jobs {
+            if !(1..=32).contains(&jobs) {
+                return Err(ConfigValidationError::InvalidCompactionJobs(jobs));
+            }
+        }
+
+        if let Some(block_size) = self.block_size {
+            if !(1..=128).contains(&block_size) {
+                return Err(ConfigValidationError::InvalidBlockSize(block_size));
+            }
+        }
+
+        if self.warmup_runs >= self.measurement_runs {
+            return Err(ConfigValidationError::InvalidRunCounts(
+                self.warmup_runs,
+                self.measurement_runs,
+            ));
+        }
+
+        if let Some(comp) = self.compression {
+            log::debug!("Compression type: {comp}");
+        }
+
+        Ok(())
+    }
+
+    #[must_use]
+    pub const fn to_rocksdb_tuning(&self) -> RocksDbTuningOptions {
+        RocksDbTuningOptions {
+            cache_size_percent: self.cache_size,
+            write_buffer_mb: self.write_buffer,
+            compaction_jobs: self.compaction_jobs,
+            block_size_kb: self.block_size,
+            compression: self.compression,
+            bloom_filter_bits: None,
+        }
+    }
+}
--- a/storage-benchmarks/src/config/dataset_generation.rs
+++ b/storage-benchmarks/src/config/dataset_generation.rs
@ -0,0 +1,88 @@
+use serde::{Deserialize, Serialize};
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct DatasetGenConfig {
+    pub dataset: DatasetParams,
+    pub network: NetworkParams,
+    pub validator: ValidatorParams,
+    pub blocks: BlockParams,
+    pub da: DataAvailabilityParams,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct DatasetParams {
+    pub days: usize,
+    pub block_time_seconds: u64,
+    pub name: String,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct NetworkParams {
+    pub load_name: String,
+    pub blobs_per_block: usize,
+    pub total_subnets: usize,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ValidatorParams {
+    pub assigned_subnets: usize,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct BlockParams {
+    pub size_bytes: usize,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct DataAvailabilityParams {
+    pub share_size_bytes: usize,
+    pub commitment_size_bytes: usize,
+    pub shares_per_blob: usize,
+}
+
+impl DatasetGenConfig {
+    pub fn from_file<P: AsRef<std::path::Path>>(
+        path: P,
+    ) -> Result<Self, Box<dyn std::error::Error>> {
+        let content = std::fs::read_to_string(path)?;
+        let config: Self =
+            toml::from_str(&content).map_err(|e| format!("Failed to parse TOML: {e}"))?;
+        Ok(config)
+    }
+
+    #[must_use]
+    pub const fn total_blocks(&self) -> usize {
+        let blocks_per_day = (24 * 60 * 60) / self.dataset.block_time_seconds as usize;
+        self.dataset.days * blocks_per_day
+    }
+
+    #[must_use]
+    pub fn estimated_size(&self) -> String {
+        let total_blocks = self.total_blocks() as u64;
+        let block_size = self.blocks.size_bytes as u64;
+
+        let subnet_assignment_probability =
+            self.validator.assigned_subnets as f64 / self.network.total_subnets as f64;
+
+        let total_blobs = total_blocks * self.network.blobs_per_block as u64;
+        let validator_assigned_blobs = (total_blobs as f64 * subnet_assignment_probability) as u64;
+
+        let shares_per_assigned_blob =
+            self.da.shares_per_blob as u64 / self.network.total_subnets as u64;
+        let total_shares_stored = validator_assigned_blobs * shares_per_assigned_blob;
+
+        let block_data_size = total_blocks * block_size;
+        let da_shares_size = total_shares_stored * self.da.share_size_bytes as u64;
+        let da_commitments_size = validator_assigned_blobs * self.da.commitment_size_bytes as u64;
+        let da_data_size = da_shares_size + da_commitments_size;
+        let total_bytes = block_data_size + da_data_size;
+
+        if total_bytes < 1024 * 1024 {
+            format!("{:.1} KB", total_bytes as f64 / 1024.0)
+        } else if total_bytes < 1024 * 1024 * 1024 {
+            format!("{:.1} MB", total_bytes as f64 / 1024.0 / 1024.0)
+        } else {
+            format!("{:.1} GB", total_bytes as f64 / 1024.0 / 1024.0 / 1024.0)
+        }
+    }
+}
--- a/storage-benchmarks/src/config/mod.rs
+++ b/storage-benchmarks/src/config/mod.rs
@ -0,0 +1,9 @@
+pub mod cli;
+pub mod dataset_generation;
+pub mod types;
+pub mod validator_profiles;
+
+pub use cli::*;
+pub use dataset_generation::*;
+pub use types::*;
+pub use validator_profiles::*;
--- a/storage-benchmarks/src/config/types.rs
+++ b/storage-benchmarks/src/config/types.rs
@ -0,0 +1,122 @@
+use std::str::FromStr;
+
+use clap::ValueEnum;
+use serde::{Deserialize, Serialize};
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, ValueEnum)]
+#[non_exhaustive]
+pub enum CompressionType {
+    None,
+
+    Lz4,
+
+    Snappy,
+
+    Zstd,
+}
+
+impl Default for CompressionType {
+    fn default() -> Self {
+        Self::Snappy
+    }
+}
+
+impl std::fmt::Display for CompressionType {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Self::None => write!(f, "none"),
+            Self::Lz4 => write!(f, "lz4"),
+            Self::Snappy => write!(f, "snappy"),
+            Self::Zstd => write!(f, "zstd"),
+        }
+    }
+}
+
+impl FromStr for CompressionType {
+    type Err = String;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        match s {
+            "none" => Ok(Self::None),
+            "lz4" => Ok(Self::Lz4),
+            "snappy" => Ok(Self::Snappy),
+            "zstd" => Ok(Self::Zstd),
+            _ => Err(format!("Unknown compression type: {s}")),
+        }
+    }
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, ValueEnum)]
+#[non_exhaustive]
+pub enum ProfileType {
+    Light,
+
+    Mainnet,
+
+    Testnet,
+}
+
+impl std::fmt::Display for ProfileType {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Self::Light => write!(f, "light"),
+            Self::Mainnet => write!(f, "mainnet"),
+            Self::Testnet => write!(f, "testnet"),
+        }
+    }
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
+#[non_exhaustive]
+pub enum WorkloadType {
+    BlockValidation,
+    DaSampling,
+    DaCommitments,
+    IbdServing,
+    BlockStorage,
+    DaStorage,
+}
+
+impl std::fmt::Display for WorkloadType {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Self::BlockValidation => write!(f, "block_validation"),
+            Self::DaSampling => write!(f, "da_sampling"),
+            Self::DaCommitments => write!(f, "da_commitments"),
+            Self::IbdServing => write!(f, "ibd_serving"),
+            Self::BlockStorage => write!(f, "block_storage"),
+            Self::DaStorage => write!(f, "da_storage"),
+        }
+    }
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
+#[non_exhaustive]
+pub enum NetworkSize {
+    Small,
+    Medium,
+    Large,
+    Peak,
+}
+
+impl NetworkSize {
+    #[must_use]
+    pub const fn validator_count(self) -> usize {
+        match self {
+            Self::Small => 100,
+            Self::Medium => 1000,
+            Self::Large => 2000,
+            Self::Peak => 5000,
+        }
+    }
+
+    #[must_use]
+    pub const fn concurrent_services(self) -> usize {
+        match self {
+            Self::Small => 6,
+            Self::Medium => 8,
+            Self::Large => 10,
+            Self::Peak => 15,
+        }
+    }
+}
--- a/storage-benchmarks/src/config/validator_profiles.rs
+++ b/storage-benchmarks/src/config/validator_profiles.rs
@ -0,0 +1,115 @@
+use serde::{Deserialize, Serialize};
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ValidatorProfile {
+    pub name: String,
+    pub description: String,
+
+    pub block_read_rate_hz: f64,
+    pub da_share_read_rate_hz: f64,
+    pub range_scan_rate_hz: f64,
+
+    pub block_write_rate_hz: f64,
+    pub da_share_write_rate_hz: f64,
+    pub commitment_write_rate_hz: f64,
+
+    pub recent_access_ratio: f64,
+    pub historical_access_ratio: f64,
+
+    #[serde(default = "default_total_validators")]
+    pub total_validators: usize,
+    #[serde(default = "default_assigned_subnets")]
+    pub assigned_subnets: usize,
+}
+
+impl ValidatorProfile {
+    #[must_use]
+    pub fn ibd_concurrent_streams(&self) -> usize {
+        let base_streams = 1;
+        let network_factor = (self.total_validators as f64 / 500.0).max(1.0);
+        let total_streams = (f64::from(base_streams) * network_factor).round() as usize;
+
+        std::cmp::min(total_streams, 8)
+    }
+
+    #[must_use]
+    pub fn da_concurrent_streams(&self) -> usize {
+        let subnet_factor = (self.assigned_subnets as f64 / 5.0).max(1.0);
+        let total_streams = subnet_factor.round() as usize;
+
+        std::cmp::min(total_streams, 5)
+    }
+
+    #[must_use]
+    pub fn total_concurrent_services(&self) -> usize {
+        let base_services = 3;
+        let ibd_services = self.ibd_concurrent_streams();
+        let da_services = self.da_concurrent_streams();
+
+        base_services + ibd_services + da_services
+    }
+}
+
+const fn default_total_validators() -> usize {
+    1000
+}
+const fn default_assigned_subnets() -> usize {
+    10
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct NetworkScalingConfig {
+    pub total_validators: usize,
+
+    pub total_subnets: usize,
+
+    pub assigned_subnets: usize,
+
+    pub activity_multiplier: f64,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ConcurrencyConfig {
+    pub base_concurrent_services: usize,
+
+    pub services_per_1k_validators: f64,
+
+    pub max_concurrent_services: usize,
+
+    pub ibd_concurrency_factor: f64,
+
+    pub da_concurrency_factor: f64,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ValidatorProfiles {
+    pub light: ValidatorProfile,
+    pub mainnet: ValidatorProfile,
+    pub testnet: ValidatorProfile,
+}
+
+impl ValidatorProfiles {
+    pub fn from_file<P: AsRef<std::path::Path>>(
+        path: P,
+    ) -> Result<Self, Box<dyn std::error::Error>> {
+        let content = std::fs::read_to_string(path)?;
+        let profiles: Self = toml::from_str(&content)
+            .map_err(|e| format!("Failed to parse validator profiles TOML: {e}"))?;
+        Ok(profiles)
+    }
+
+    #[must_use]
+    pub fn get_profile(&self, name: &str) -> Option<&ValidatorProfile> {
+        match name {
+            "light" => Some(&self.light),
+            "mainnet" => Some(&self.mainnet),
+            "testnet" => Some(&self.testnet),
+            _ => None,
+        }
+    }
+
+    #[must_use]
+    pub fn available_profiles(&self) -> Vec<&str> {
+        vec!["light", "mainnet", "testnet"]
+    }
+}
--- a/storage-benchmarks/src/data/deterministic.rs
+++ b/storage-benchmarks/src/data/deterministic.rs
@ -0,0 +1,152 @@
+use rand::{Rng as _, SeedableRng as _};
+use rand_chacha::ChaCha20Rng;
+use serde::{Deserialize, Serialize};
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct BenchmarkSeed {
+    pub master_seed: u64,
+    pub dataset_generation_seed: u64,
+    pub access_pattern_seed: u64,
+    pub latency_measurement_seed: u64,
+}
+
+impl BenchmarkSeed {
+    #[must_use]
+    pub fn from_master(master_seed: u64) -> Self {
+        let mut rng = ChaCha20Rng::seed_from_u64(master_seed);
+
+        Self {
+            master_seed,
+            dataset_generation_seed: rng.gen(),
+            access_pattern_seed: rng.gen(),
+            latency_measurement_seed: rng.gen(),
+        }
+    }
+
+    #[must_use]
+    pub fn default_fixed() -> Self {
+        Self::from_master(12345)
+    }
+
+    #[must_use]
+    pub fn from_args_or_env(args: &[String]) -> Self {
+        for (i, arg) in args.iter().enumerate() {
+            if arg == "--seed" && i + 1 < args.len() {
+                if let Ok(seed) = args[i + 1].parse::<u64>() {
+                    return Self::from_master(seed);
+                }
+            }
+        }
+
+        if let Ok(seed_str) = std::env::var("BENCHMARK_SEED") {
+            if let Ok(seed) = seed_str.parse::<u64>() {
+                return Self::from_master(seed);
+            }
+        }
+
+        Self::default_fixed()
+    }
+
+    #[must_use]
+    pub fn dataset_rng(&self) -> ChaCha20Rng {
+        ChaCha20Rng::seed_from_u64(self.dataset_generation_seed)
+    }
+
+    #[must_use]
+    pub fn access_pattern_rng(&self, operation_id: u64) -> ChaCha20Rng {
+        ChaCha20Rng::seed_from_u64(self.access_pattern_seed.wrapping_add(operation_id))
+    }
+
+    #[must_use]
+    pub fn latency_measurement_rng(&self) -> ChaCha20Rng {
+        ChaCha20Rng::seed_from_u64(self.latency_measurement_seed)
+    }
+
+    pub fn log_configuration(&self) {
+        log::info!("Benchmark seeds (for reproducibility):");
+        log::info!("   Master seed: {}", self.master_seed);
+        log::info!("   Dataset generation: {}", self.dataset_generation_seed);
+        log::info!("   Access patterns: {}", self.access_pattern_seed);
+        log::info!("   Latency measurement: {}", self.latency_measurement_seed);
+        log::info!(
+            "   Reproduce with: --seed {} or BENCHMARK_SEED={}",
+            self.master_seed,
+            self.master_seed
+        );
+    }
+}
+
+static GLOBAL_BENCHMARK_SEED: std::sync::OnceLock<BenchmarkSeed> = std::sync::OnceLock::new();
+
+pub fn initialize_benchmark_seed(args: &[String]) -> &'static BenchmarkSeed {
+    GLOBAL_BENCHMARK_SEED.get_or_init(|| {
+        let seed = BenchmarkSeed::from_args_or_env(args);
+        seed.log_configuration();
+        seed
+    })
+}
+
+pub fn get_benchmark_seed() -> &'static BenchmarkSeed {
+    GLOBAL_BENCHMARK_SEED.get().unwrap_or_else(|| {
+        GLOBAL_BENCHMARK_SEED.get_or_init(|| {
+            let seed = BenchmarkSeed::default_fixed();
+            log::warn!("Using default seed (benchmark_seed not initialized)");
+            seed.log_configuration();
+            seed
+        })
+    })
+}
+
+#[must_use]
+pub fn create_deterministic_rng(purpose: RngPurpose, id: u64) -> ChaCha20Rng {
+    let seed = get_benchmark_seed();
+
+    match purpose {
+        RngPurpose::DatasetGeneration => {
+            ChaCha20Rng::seed_from_u64(seed.dataset_generation_seed.wrapping_add(id))
+        }
+        RngPurpose::AccessPattern => seed.access_pattern_rng(id),
+        RngPurpose::LatencyMeasurement => {
+            ChaCha20Rng::seed_from_u64(seed.latency_measurement_seed.wrapping_add(id))
+        }
+    }
+}
+
+#[derive(Debug, Clone, Copy)]
+pub enum RngPurpose {
+    DatasetGeneration,
+    AccessPattern,
+    LatencyMeasurement,
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_deterministic_seed_derivation() {
+        let seed1 = BenchmarkSeed::from_master(12345);
+        let seed2 = BenchmarkSeed::from_master(12345);
+
+        assert_eq!(seed1.dataset_generation_seed, seed2.dataset_generation_seed);
+        assert_eq!(seed1.access_pattern_seed, seed2.access_pattern_seed);
+    }
+
+    #[test]
+    fn test_different_master_seeds() {
+        let seed1 = BenchmarkSeed::from_master(12345);
+        let seed2 = BenchmarkSeed::from_master(54321);
+
+        assert_ne!(seed1.dataset_generation_seed, seed2.dataset_generation_seed);
+    }
+
+    #[test]
+    fn test_deterministic_rng_creation() {
+        let seed = BenchmarkSeed::from_master(12345);
+
+        let rng1 = seed.access_pattern_rng(100);
+        let rng2 = seed.access_pattern_rng(100);
+
+        assert_eq!(rng1.get_seed(), rng2.get_seed());
+    }
+}
--- a/storage-benchmarks/src/data/generator.rs
+++ b/storage-benchmarks/src/data/generator.rs
@ -0,0 +1,200 @@
+use bytes::Bytes;
+use nomos_core::{da::BlobId, header::HeaderId};
+use rand::Rng as _;
+use rand_chacha::ChaCha20Rng;
+use serde::{Deserialize, Serialize};
+
+use crate::{
+    benchmark::utilities::{create_blob_id, create_header_id},
+    deterministic::BenchmarkSeed,
+};
+
+pub struct RealisticDataGenerator {
+    seed_config: BenchmarkSeed,
+    dataset_rng: ChaCha20Rng,
+    block_sequence: u64,
+    da_sequence: u64,
+    generation_stats: DataGenerationStats,
+}
+
+#[derive(Debug, Clone, Default, Serialize, Deserialize)]
+pub struct DataGenerationStats {
+    pub blocks_created: u64,
+    pub da_shares_created: u64,
+    pub commitments_created: u64,
+    pub total_bytes_generated: u64,
+    pub generation_start: Option<chrono::DateTime<chrono::Utc>>,
+}
+
+impl RealisticDataGenerator {
+    #[must_use]
+    pub fn new(master_seed: u64) -> Self {
+        let seed_config = BenchmarkSeed::from_master(master_seed);
+        let dataset_rng = seed_config.dataset_rng();
+
+        Self {
+            seed_config,
+            dataset_rng,
+            block_sequence: 0,
+            da_sequence: 0,
+            generation_stats: DataGenerationStats {
+                generation_start: Some(chrono::Utc::now()),
+                ..Default::default()
+            },
+        }
+    }
+
+    #[must_use]
+    pub fn with_default_seed() -> Self {
+        Self::new(12345)
+    }
+
+    pub fn generate_block(&mut self, target_size: usize) -> Bytes {
+        let block_data = self.create_realistic_block_data(self.block_sequence, target_size);
+
+        self.block_sequence += 1;
+        self.generation_stats.blocks_created += 1;
+        self.generation_stats.total_bytes_generated += target_size as u64;
+
+        block_data
+    }
+
+    pub fn generate_da_share(&mut self, size: usize) -> Bytes {
+        let share_data = self.create_deterministic_da_share(self.da_sequence, size);
+
+        self.da_sequence += 1;
+        self.generation_stats.da_shares_created += 1;
+        self.generation_stats.total_bytes_generated += size as u64;
+
+        share_data
+    }
+
+    pub fn generate_commitment(&mut self, size: usize) -> Bytes {
+        let commitment_data = self.create_deterministic_commitment(self.da_sequence, size);
+
+        self.generation_stats.commitments_created += 1;
+        self.generation_stats.total_bytes_generated += size as u64;
+
+        commitment_data
+    }
+
+    pub fn generate_block_batch(&mut self, count: usize, block_size: usize) -> Vec<Bytes> {
+        std::iter::repeat_with(|| self.generate_block(block_size))
+            .take(count)
+            .collect()
+    }
+
+    pub fn generate_da_batch(
+        &mut self,
+        count: usize,
+        share_size: usize,
+        commitment_size: usize,
+    ) -> Vec<(Bytes, Bytes)> {
+        std::iter::repeat_with(|| {
+            let share = self.generate_da_share(share_size);
+            let commitment = self.generate_commitment(commitment_size);
+            (share, commitment)
+        })
+        .take(count)
+        .collect()
+    }
+
+    #[must_use]
+    pub const fn stats(&self) -> &DataGenerationStats {
+        &self.generation_stats
+    }
+
+    #[must_use]
+    pub const fn sequence_state(&self) -> (u64, u64) {
+        (self.block_sequence, self.da_sequence)
+    }
+
+    pub const fn set_sequence_state(&mut self, block_sequence: u64, da_sequence: u64) {
+        self.block_sequence = block_sequence;
+        self.da_sequence = da_sequence;
+    }
+
+    pub fn reset(&mut self) {
+        self.block_sequence = 0;
+        self.da_sequence = 0;
+        self.generation_stats = DataGenerationStats {
+            generation_start: Some(chrono::Utc::now()),
+            ..Default::default()
+        };
+        self.dataset_rng = self.seed_config.dataset_rng();
+    }
+
+    fn create_realistic_block_data(&mut self, block_index: u64, target_size: usize) -> Bytes {
+        let mut block_data = Vec::with_capacity(target_size);
+
+        block_data.extend_from_slice(&block_index.to_be_bytes());
+
+        let parent_hash: [u8; 32] = self.dataset_rng.gen();
+        block_data.extend_from_slice(&parent_hash);
+
+        let merkle_root: [u8; 32] = self.dataset_rng.gen();
+        block_data.extend_from_slice(&merkle_root);
+
+        let timestamp = chrono::Utc::now().timestamp() as u64 + block_index * 30;
+        block_data.extend_from_slice(&timestamp.to_be_bytes());
+
+        while block_data.len() < target_size {
+            block_data.push(self.dataset_rng.gen());
+        }
+
+        block_data.resize(target_size, 0);
+        Bytes::from(block_data)
+    }
+
+    fn create_deterministic_da_share(&mut self, _sequence: u64, size: usize) -> Bytes {
+        let data: Vec<u8> = std::iter::repeat_with(|| self.dataset_rng.gen())
+            .take(size)
+            .collect();
+        Bytes::from(data)
+    }
+
+    fn create_deterministic_commitment(&mut self, _sequence: u64, size: usize) -> Bytes {
+        let data: Vec<u8> = std::iter::repeat_with(|| self.dataset_rng.gen())
+            .take(size)
+            .collect();
+        Bytes::from(data)
+    }
+}
+
+pub struct IdGenerator {
+    block_counter: usize,
+    blob_counter: usize,
+}
+
+impl IdGenerator {
+    #[must_use]
+    pub const fn new() -> Self {
+        Self {
+            block_counter: 0,
+            blob_counter: 0,
+        }
+    }
+
+    pub fn next_header_id(&mut self) -> HeaderId {
+        let id = create_header_id(self.block_counter);
+        self.block_counter += 1;
+        id
+    }
+
+    pub fn next_blob_id(&mut self) -> BlobId {
+        let id = create_blob_id(self.blob_counter, 0);
+        self.blob_counter += 1;
+        id
+    }
+
+    #[must_use]
+    pub const fn counters(&self) -> (usize, usize) {
+        (self.block_counter, self.blob_counter)
+    }
+}
+
+impl Default for IdGenerator {
+    fn default() -> Self {
+        Self::new()
+    }
+}
--- a/storage-benchmarks/src/data/hot_path_types.rs
+++ b/storage-benchmarks/src/data/hot_path_types.rs
@ -0,0 +1,172 @@
+use smallvec::SmallVec;
+
+#[derive(Debug, Clone)]
+pub struct OperationBuffer {
+    pub read_buffer: Box<[u8]>,
+    pub write_buffer: Box<[u8]>,
+    pub key_buffer: SmallVec<[u8; 64]>,
+}
+
+impl OperationBuffer {
+    #[must_use]
+    pub fn new(read_size: usize, write_size: usize) -> Self {
+        Self {
+            read_buffer: vec![0u8; read_size].into_boxed_slice(),
+            write_buffer: vec![0u8; write_size].into_boxed_slice(),
+            key_buffer: SmallVec::new(),
+        }
+    }
+
+    #[must_use]
+    pub fn read_slice(&self) -> &[u8] {
+        &self.read_buffer
+    }
+
+    pub fn write_slice_mut(&mut self) -> &mut [u8] {
+        &mut self.write_buffer
+    }
+
+    pub fn prepare_key<T: AsRef<[u8]>>(&mut self, key_data: T) -> &[u8] {
+        let key_bytes = key_data.as_ref();
+
+        self.key_buffer.clear();
+        self.key_buffer.extend_from_slice(key_bytes);
+
+        &self.key_buffer
+    }
+}
+
+#[derive(Debug, Clone, Copy)]
+pub struct TimingMeasurement {
+    pub start_time: std::time::Instant,
+}
+
+impl TimingMeasurement {
+    #[inline]
+    #[must_use]
+    pub fn start() -> Self {
+        Self {
+            start_time: std::time::Instant::now(),
+        }
+    }
+
+    #[inline]
+    #[must_use]
+    pub fn end(self) -> std::time::Duration {
+        self.start_time.elapsed()
+    }
+}
+
+#[derive(Debug, Clone, Default)]
+pub struct ThreadLocalMetrics {
+    pub operations_count: u64,
+    pub success_count: u64,
+    pub error_count: u64,
+    pub bytes_processed: u64,
+    pub latency_sum_micros: u64,
+    pub latency_count: u64,
+}
+
+impl ThreadLocalMetrics {
+    #[inline]
+    pub const fn record_operation(
+        &mut self,
+        success: bool,
+        bytes: u64,
+        latency: std::time::Duration,
+    ) {
+        self.operations_count += 1;
+
+        if success {
+            self.success_count += 1;
+        } else {
+            self.error_count += 1;
+        }
+
+        self.bytes_processed += bytes;
+        self.latency_sum_micros += latency.as_micros() as u64;
+        self.latency_count += 1;
+    }
+
+    #[must_use]
+    pub fn average_latency_micros(&self) -> f64 {
+        if self.latency_count > 0 {
+            self.latency_sum_micros as f64 / self.latency_count as f64
+        } else {
+            0.0
+        }
+    }
+
+    pub const fn fold_into(self, global: &mut Self) {
+        global.operations_count += self.operations_count;
+        global.success_count += self.success_count;
+        global.error_count += self.error_count;
+        global.bytes_processed += self.bytes_processed;
+        global.latency_sum_micros += self.latency_sum_micros;
+        global.latency_count += self.latency_count;
+    }
+}
+
+pub trait EfficientIteratorExt: Iterator {
+    fn collect_presized(self, size_hint: usize) -> Vec<Self::Item>
+    where
+        Self: Sized,
+    {
+        let mut vec = Vec::with_capacity(size_hint);
+        vec.extend(self);
+        vec
+    }
+
+    fn collect_small_8(self) -> SmallVec<[Self::Item; 8]>
+    where
+        Self: Sized,
+    {
+        let mut vec: SmallVec<[Self::Item; 8]> = SmallVec::new();
+        vec.extend(self);
+        vec
+    }
+}
+
+impl<I: Iterator> EfficientIteratorExt for I {}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::benchmark::utilities::create_blob_id;
+
+    #[test]
+    fn test_operation_buffer_efficiency() {
+        let mut buffer = OperationBuffer::new(1024, 2048);
+
+        let key1 = buffer.prepare_key(b"test_key_1");
+        assert_eq!(key1, b"test_key_1");
+
+        let key2 = buffer.prepare_key(b"different_key");
+        assert_eq!(key2, b"different_key");
+
+        assert!(buffer.key_buffer.capacity() >= 12);
+    }
+
+    #[test]
+    fn test_efficient_id_creation() {
+        let header_id = create_header_id_efficient(12345);
+        let blob_id = create_blob_id(100, 5);
+
+        assert_ne!(header_id.as_ref(), &[0u8; 32]);
+        assert_ne!(blob_id.as_ref(), &[0u8; 32]);
+    }
+
+    #[test]
+    fn test_thread_local_metrics() {
+        let mut metrics = ThreadLocalMetrics::default();
+
+        metrics.record_operation(true, 1024, std::time::Duration::from_micros(500));
+        metrics.record_operation(false, 0, std::time::Duration::from_micros(1000));
+
+        assert_eq!(metrics.operations_count, 2);
+        assert_eq!(metrics.success_count, 1);
+        assert_eq!(metrics.error_count, 1);
+        assert_eq!(metrics.bytes_processed, 1024);
+        assert_eq!(metrics.average_latency_micros(), 750.0);
+    }
+}
--- a/storage-benchmarks/src/data/mod.rs
+++ b/storage-benchmarks/src/data/mod.rs
@ -0,0 +1,11 @@
+pub mod deterministic;
+pub mod generator;
+pub mod hot_path_types;
+pub mod realistic_generation;
+pub mod spec_access_patterns;
+pub mod streaming_writer;
+
+pub use deterministic::*;
+pub use generator::*;
+pub use realistic_generation::*;
+pub use spec_access_patterns::*;
--- a/storage-benchmarks/src/data/realistic_generation.rs
+++ b/storage-benchmarks/src/data/realistic_generation.rs
@ -0,0 +1,211 @@
+use bytes::Bytes;
+use cryptarchia_engine::Slot;
+use groth16::Fr;
+use nomos_core::{
+    block::Block,
+    crypto::{Digest as _, Hasher},
+    header::{ContentId, Header, HeaderId},
+    mantle::{
+        ledger::Tx as LedgerTx, ops::leader_claim::VoucherCm, MantleTx, Note, SignedMantleTx,
+        Transaction as _, Utxo,
+    },
+    proofs::{
+        leader_proof::{Groth16LeaderProof, LeaderPrivate, LeaderPublic},
+        zksig::{DummyZkSignature, ZkSignaturePublic},
+    },
+};
+use rand::{Rng as _, SeedableRng as _};
+use rand_chacha::ChaCha20Rng;
+
+use crate::deterministic::{create_deterministic_rng, RngPurpose};
+
+pub fn create_block(
+    block_index: usize,
+    parent_id: HeaderId,
+) -> Result<(HeaderId, Bytes), Box<dyn std::error::Error>> {
+    let transactions = create_signed_mantle_txs(block_index);
+
+    let slot = Slot::from(block_index as u64);
+    let block_root = ContentId::from(calculate_block_root(&transactions));
+    let proof = make_test_proof(block_index);
+
+    let header = Header::new(parent_id, block_root, slot, proof);
+    let header_id = header.id();
+
+    let block: Block<SignedMantleTx> = Block::new(header, transactions);
+    let block_bytes = bincode::serialize(&block)?;
+
+    Ok((header_id, Bytes::from(block_bytes)))
+}
+
+#[must_use]
+pub fn create_block_data(block_index: usize, target_size: usize) -> Bytes {
+    create_simplified_realistic_block_data(block_index, target_size)
+}
+
+fn make_test_proof(block_index: usize) -> Groth16LeaderProof {
+    let public_inputs = LeaderPublic::new(
+        Fr::from(block_index as u64),
+        Fr::from(block_index as u64 + 1),
+        Fr::from(12345u64),
+        block_index as u64,
+        1_000_000,
+    );
+
+    let note = Note::new(1000, Fr::from(block_index as u64).into());
+    let utxo = Utxo {
+        tx_hash: Fr::from(block_index as u64).into(),
+        output_index: 0,
+        note,
+    };
+
+    let leader_key_bytes = [block_index as u8; 32];
+    let leader_key = ed25519_dalek::VerifyingKey::from_bytes(&leader_key_bytes)
+        .unwrap_or_else(|_| ed25519_dalek::VerifyingKey::from_bytes(&[1u8; 32]).unwrap());
+
+    let aged_path = vec![];
+    let latest_path = vec![];
+
+    let private = LeaderPrivate::new(
+        public_inputs,
+        utxo,
+        &aged_path,
+        &latest_path,
+        Fr::from(999u64),
+        0,
+        &leader_key,
+    );
+
+    let voucher_cm = VoucherCm::default();
+
+    Groth16LeaderProof::prove(private, voucher_cm).unwrap_or_else(|_| {
+        panic!("Proof generation failed - ensure POL_PROOF_DEV_MODE=true is set");
+    })
+}
+
+#[must_use]
+pub fn create_da_share(block: usize, blob: usize, size: usize) -> Bytes {
+    let data_id = (block as u64 * 1000) + blob as u64;
+    let mut rng = create_deterministic_rng(RngPurpose::DatasetGeneration, data_id);
+
+    let data: Vec<u8> = std::iter::repeat_with(|| rng.gen()).take(size).collect();
+
+    Bytes::from(data)
+}
+
+pub async fn create_commitment(
+    block: usize,
+    blob: usize,
+    size: usize,
+) -> Result<Bytes, Box<dyn std::error::Error>> {
+    let commitment_id = (block as u64 * 1000) + blob as u64;
+    let mut rng =
+        create_deterministic_rng(RngPurpose::DatasetGeneration, commitment_id + 1_000_000);
+    let commitment_data: Vec<u8> = std::iter::repeat_with(|| rng.gen()).take(size).collect();
+
+    Ok(Bytes::from(commitment_data))
+}
+
+fn create_simplified_realistic_block_data(block_index: usize, target_size: usize) -> Bytes {
+    let mut rng = create_deterministic_rng(RngPurpose::DatasetGeneration, block_index as u64);
+
+    let mut block_data = Vec::with_capacity(target_size);
+
+    block_data.extend_from_slice(&(block_index as u64).to_be_bytes());
+
+    let parent_hash: [u8; 32] = rng.gen();
+    block_data.extend_from_slice(&parent_hash);
+
+    let merkle_root: [u8; 32] = rng.gen();
+    block_data.extend_from_slice(&merkle_root);
+
+    let timestamp = chrono::Utc::now().timestamp() as u64 + block_index as u64 * 30;
+    block_data.extend_from_slice(&timestamp.to_be_bytes());
+
+    while block_data.len() < target_size {
+        block_data.push(rng.gen());
+    }
+
+    block_data.resize(target_size, 0);
+    Bytes::from(block_data)
+}
+
+fn create_signed_mantle_txs(block_index: usize) -> Vec<SignedMantleTx> {
+    let mut rng = ChaCha20Rng::seed_from_u64(block_index as u64 * 12345);
+
+    let tx_count = std::cmp::min(5 + (block_index % 100), 1024);
+
+    let mut transactions = Vec::with_capacity(tx_count);
+
+    for tx_idx in 0..tx_count {
+        let input_utxos = create_input_utxos(&mut rng, tx_idx);
+        let input_ids: Vec<_> = input_utxos.iter().map(Utxo::id).collect();
+
+        let output_notes = create_output_notes(&mut rng, tx_idx);
+
+        let ledger_tx = LedgerTx::new(input_ids, output_notes);
+
+        let mantle_tx = MantleTx {
+            ops: vec![],
+            ledger_tx,
+            execution_gas_price: rng.gen::<u64>() % 1_000_000,
+            storage_gas_price: rng.gen::<u64>() % 100_000,
+        };
+
+        let pks: Vec<Fr> = input_utxos.iter().map(|utxo| utxo.note.pk.into()).collect();
+        let msg_hash = mantle_tx.hash().into();
+        let ledger_tx_proof = DummyZkSignature::prove(ZkSignaturePublic { pks, msg_hash });
+
+        let ops_proofs = vec![];
+
+        let signed_tx = SignedMantleTx {
+            ops_proofs,
+            ledger_tx_proof,
+            mantle_tx,
+        };
+
+        transactions.push(signed_tx);
+    }
+
+    transactions
+}
+
+fn create_input_utxos(rng: &mut ChaCha20Rng, tx_idx: usize) -> Vec<Utxo> {
+    let input_count = 1 + (tx_idx % 3);
+
+    (0..input_count)
+        .map(|input_idx| Utxo {
+            tx_hash: Fr::from(rng.gen::<u64>()).into(),
+            output_index: input_idx,
+            note: Note::new(
+                rng.gen::<u64>() % 1_000_000,
+                Fr::from(rng.gen::<u64>()).into(),
+            ),
+        })
+        .collect()
+}
+
+fn create_output_notes(rng: &mut ChaCha20Rng, tx_idx: usize) -> Vec<Note> {
+    let output_count = 1 + (tx_idx % 4);
+
+    std::iter::repeat_with(|| {
+        Note::new(
+            rng.gen::<u64>() % 1_000_000,
+            Fr::from(rng.gen::<u64>()).into(),
+        )
+    })
+    .take(output_count)
+    .collect()
+}
+
+fn calculate_block_root(transactions: &[SignedMantleTx]) -> [u8; 32] {
+    let mut hasher = Hasher::new();
+    hasher.update(b"BLOCK_ROOT_V1");
+
+    for tx in transactions {
+        let tx_hash = tx.mantle_tx.hash();
+        hasher.update(tx_hash.as_signing_bytes());
+    }
+
+    hasher.finalize().into()
+}
--- a/storage-benchmarks/src/data/spec_access_patterns.rs
+++ b/storage-benchmarks/src/data/spec_access_patterns.rs
@ -0,0 +1,69 @@
+use rand_distr::{Distribution as _, Zipf};
+
+use crate::{
+    config::ValidatorProfile,
+    deterministic::{create_deterministic_rng, RngPurpose},
+};
+
+#[must_use]
+pub fn select_block_spec_accurate(
+    operation_count: u64,
+    max_blocks: usize,
+    profile: &ValidatorProfile,
+) -> usize {
+    if max_blocks == 0 {
+        return 0;
+    }
+
+    let access_selector = (operation_count * 31) % 100;
+
+    if access_selector < (profile.recent_access_ratio * 100.0) as u64 {
+        select_recent_block_zipfian(operation_count, max_blocks)
+    } else {
+        select_historical_block_uniform(operation_count, max_blocks)
+    }
+}
+
+fn select_recent_block_zipfian(operation_count: u64, max_blocks: usize) -> usize {
+    let recent_window_size = std::cmp::max(max_blocks / 5, 1000);
+
+    let zipf_dist = Zipf::new(recent_window_size as u64, 1.0).unwrap();
+
+    let mut rng = create_deterministic_rng(RngPurpose::AccessPattern, operation_count);
+    let zipf_sample = zipf_dist.sample(&mut rng) as usize;
+
+    let recent_start = max_blocks.saturating_sub(recent_window_size);
+    let tip_offset = zipf_sample.saturating_sub(1);
+
+    recent_start + (recent_window_size - 1 - tip_offset)
+}
+
+const fn select_historical_block_uniform(operation_count: u64, max_blocks: usize) -> usize {
+    (operation_count as usize * 23) % max_blocks
+}
+
+#[must_use]
+pub fn select_da_spec_accurate(
+    operation_count: u64,
+    max_blobs: usize,
+    profile: &ValidatorProfile,
+) -> usize {
+    if max_blobs == 0 {
+        return 0;
+    }
+
+    let recent_threshold = (profile.recent_access_ratio * 100.0) as u64;
+    let access_selector = (operation_count * 41) % 100;
+
+    if access_selector < recent_threshold {
+        let recent_blobs = std::cmp::min(100, max_blobs);
+        let zipf_dist = Zipf::new(recent_blobs as u64, 1.2).unwrap();
+        let mut rng = create_deterministic_rng(RngPurpose::AccessPattern, operation_count);
+        let sample = zipf_dist.sample(&mut rng) as usize;
+
+        let recent_start = max_blobs.saturating_sub(recent_blobs);
+        recent_start + (recent_blobs - sample.min(recent_blobs))
+    } else {
+        (operation_count as usize * 29) % max_blobs
+    }
+}
--- a/storage-benchmarks/src/data/streaming_writer.rs
+++ b/storage-benchmarks/src/data/streaming_writer.rs
@ -0,0 +1,49 @@
+use std::{
+    fs::File,
+    io::{BufWriter, Write as _},
+    path::Path,
+};
+
+const CHUNK_SIZE: usize = 64 * 1024;
+const BUFFER_SIZE: usize = 1024 * 1024;
+
+pub struct StreamingDatasetWriter {
+    writer: BufWriter<File>,
+    chunk_buffer: Box<[u8; CHUNK_SIZE]>,
+    bytes_written: u64,
+}
+
+impl StreamingDatasetWriter {
+    pub fn new<P: AsRef<Path>>(path: P) -> Result<Self, std::io::Error> {
+        let file = File::create(path)?;
+        let writer = BufWriter::with_capacity(BUFFER_SIZE, file);
+
+        Ok(Self {
+            writer,
+            chunk_buffer: vec![0u8; CHUNK_SIZE]
+                .into_boxed_slice()
+                .try_into()
+                .expect("CHUNK_SIZE is const"),
+            bytes_written: 0,
+        })
+    }
+
+    pub fn write_chunk(&mut self, data: &[u8]) -> Result<(), std::io::Error> {
+        let mut remaining = data;
+
+        while !remaining.is_empty() {
+            let write_size = std::cmp::min(remaining.len(), CHUNK_SIZE);
+            self.chunk_buffer[..write_size].copy_from_slice(&remaining[..write_size]);
+            self.writer.write_all(&self.chunk_buffer[..write_size])?;
+            remaining = &remaining[write_size..];
+            self.bytes_written += write_size as u64;
+        }
+
+        Ok(())
+    }
+
+    pub fn finalize(mut self) -> Result<u64, std::io::Error> {
+        self.writer.flush()?;
+        Ok(self.bytes_written)
+    }
+}
--- a/storage-benchmarks/src/lib.rs
+++ b/storage-benchmarks/src/lib.rs
@ -0,0 +1,58 @@
+use std::path::PathBuf;
+
+use nomos_storage::backends::rocksdb::RocksBackendSettings;
+
+pub mod benchmark;
+pub mod config;
+pub mod data;
+pub mod metrics;
+pub mod storage;
+
+pub use benchmark::*;
+pub use config::{
+    CompressionType, DatasetGenConfig, NetworkSize, ProductionBenchConfig, ProfileType,
+    ValidatorProfile, ValidatorProfiles, WorkloadType,
+};
+pub use data::*;
+pub use metrics::*;
+pub use storage::*;
+
+#[derive(Debug, Clone)]
+pub struct BenchStorageConfig {
+    pub name: String,
+    pub settings: RocksBackendSettings,
+}
+
+impl BenchStorageConfig {
+    #[must_use]
+    pub fn production() -> Self {
+        Self {
+            name: "production".to_string(),
+            settings: RocksBackendSettings {
+                db_path: Self::data_path(),
+                read_only: false,
+                column_family: Some("blocks".to_string()),
+            },
+        }
+    }
+
+    #[must_use]
+    pub fn data_path() -> PathBuf {
+        let home_dir = std::env::var("HOME").unwrap_or_else(|_| "/tmp".to_string());
+        let data_dir = PathBuf::from(home_dir).join(".nomos_storage_benchmarks");
+        let _ = std::fs::create_dir_all(&data_dir);
+        data_dir.join("rocksdb_data")
+    }
+
+    #[must_use]
+    pub fn results_path() -> PathBuf {
+        let home_dir = std::env::var("HOME").unwrap_or_else(|_| "/tmp".to_string());
+        let results_dir = PathBuf::from(home_dir)
+            .join(".nomos_storage_benchmarks")
+            .join("results");
+        let _ = std::fs::create_dir_all(&results_dir);
+        results_dir
+    }
+}
+
+pub type BenchConfig = BenchStorageConfig;
--- a/storage-benchmarks/src/metrics/latency_stats.rs
+++ b/storage-benchmarks/src/metrics/latency_stats.rs
@ -0,0 +1,87 @@
+use std::time::Instant;
+
+use hdrhistogram::Histogram;
+use serde::{Deserialize, Serialize};
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct LatencyPercentiles {
+    pub p50_ms: f64,
+
+    pub p90_ms: f64,
+
+    pub p95_ms: f64,
+
+    pub p99_ms: f64,
+
+    pub max_ms: f64,
+
+    pub mean_ms: f64,
+
+    pub sample_count: u64,
+}
+
+pub struct LatencyTracker {
+    histogram: Histogram<u64>,
+    operation_count: u64,
+}
+
+impl Default for LatencyTracker {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl LatencyTracker {
+    #[must_use]
+    pub fn new() -> Self {
+        Self {
+            histogram: Histogram::new_with_bounds(1, 3_600_000_000, 3)
+                .expect("Valid histogram bounds"),
+            operation_count: 0,
+        }
+    }
+
+    pub async fn record_async_operation<F, Fut, R>(&mut self, operation: F) -> R
+    where
+        F: FnOnce() -> Fut,
+        Fut: std::future::Future<Output = R>,
+    {
+        let start = Instant::now();
+        let result = operation().await;
+        let latency = start.elapsed();
+
+        let latency_micros = latency.as_micros() as u64;
+        if self.histogram.record(latency_micros).is_ok() {
+            self.operation_count += 1;
+        }
+
+        result
+    }
+
+    #[must_use]
+    pub fn get_percentiles(&self) -> LatencyPercentiles {
+        if self.operation_count == 0 {
+            return LatencyPercentiles {
+                p50_ms: 0.0,
+                p90_ms: 0.0,
+                p95_ms: 0.0,
+                p99_ms: 0.0,
+                max_ms: 0.0,
+                mean_ms: 0.0,
+                sample_count: 0,
+            };
+        }
+
+        let to_ms = |v: u64| v as f64 / 1000.0;
+
+        LatencyPercentiles {
+            p50_ms: to_ms(self.histogram.value_at_quantile(0.50)),
+            p90_ms: to_ms(self.histogram.value_at_quantile(0.90)),
+            p95_ms: to_ms(self.histogram.value_at_quantile(0.95)),
+            p99_ms: to_ms(self.histogram.value_at_quantile(0.99)),
+            max_ms: to_ms(self.histogram.max()),
+            mean_ms: self.histogram.mean() / 1000.0,
+            sample_count: self.operation_count,
+        }
+    }
+}
--- a/storage-benchmarks/src/metrics/mod.rs
+++ b/storage-benchmarks/src/metrics/mod.rs
@ -0,0 +1,9 @@
+pub mod latency_stats;
+pub mod rocksdb_collector;
+pub mod rocksdb_stats;
+pub mod runtime_memory_allocator;
+
+pub use latency_stats::*;
+pub use rocksdb_collector::*;
+pub use rocksdb_stats::*;
+pub use runtime_memory_allocator::*;
--- a/storage-benchmarks/src/metrics/rocksdb_collector.rs
+++ b/storage-benchmarks/src/metrics/rocksdb_collector.rs
@ -0,0 +1,365 @@
+use std::collections::HashMap;
+
+use nomos_storage::backends::rocksdb::RocksBackend;
+use serde::{Deserialize, Serialize};
+
+pub struct RocksDbStatsCollector {
+    storage_ref: Option<*const RocksBackend>,
+    property_cache: HashMap<String, Option<u64>>,
+    stats_cache: Option<String>,
+    cache_valid: bool,
+    collection_count: u64,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct RocksDbStatsSnapshot {
+    pub stats: super::RocksDbStats,
+    pub collection_timestamp: chrono::DateTime<chrono::Utc>,
+    pub collection_id: u64,
+    pub cache_hits: u64,
+    pub cache_misses: u64,
+}
+
+impl RocksDbStatsCollector {
+    #[must_use]
+    pub fn new() -> Self {
+        Self {
+            storage_ref: None,
+            property_cache: HashMap::new(),
+            stats_cache: None,
+            cache_valid: false,
+            collection_count: 0,
+        }
+    }
+
+    pub fn attach(&mut self, storage: &RocksBackend) {
+        self.storage_ref = Some(std::ptr::from_ref::<RocksBackend>(storage));
+        self.invalidate_cache();
+    }
+
+    pub fn collect_stats(&mut self) -> Result<RocksDbStatsSnapshot, Box<dyn std::error::Error>> {
+        // SAFETY: storage_ref is set in attach() and guaranteed to be valid for the
+        // lifetime of this collector
+        let storage = unsafe {
+            self.storage_ref
+                .ok_or("No storage attached")?
+                .as_ref()
+                .ok_or("Invalid storage ref")?
+        };
+
+        self.collection_count += 1;
+
+        let stats = self.collect_with_caching(storage)?;
+
+        Ok(RocksDbStatsSnapshot {
+            stats,
+            collection_timestamp: chrono::Utc::now(),
+            collection_id: self.collection_count,
+            cache_hits: self.count_cache_hits(),
+            cache_misses: self.count_cache_misses(),
+        })
+    }
+
+    pub fn collect_before_after<F>(
+        &mut self,
+        operation: F,
+    ) -> Result<(RocksDbStatsSnapshot, RocksDbStatsSnapshot), Box<dyn std::error::Error>>
+    where
+        F: FnOnce() -> Result<(), Box<dyn std::error::Error>>,
+    {
+        let before = self.collect_stats()?;
+        self.invalidate_cache();
+
+        operation()?;
+
+        let after = self.collect_stats()?;
+
+        Ok((before, after))
+    }
+
+    pub fn invalidate_cache(&mut self) {
+        self.property_cache.clear();
+        self.stats_cache = None;
+        self.cache_valid = false;
+    }
+
+    #[must_use]
+    pub fn collection_stats(&self) -> (u64, u64, u64) {
+        (
+            self.collection_count,
+            self.count_cache_hits(),
+            self.count_cache_misses(),
+        )
+    }
+
+    fn collect_with_caching(
+        &mut self,
+        storage: &RocksBackend,
+    ) -> Result<super::RocksDbStats, Box<dyn std::error::Error>> {
+        let stats_string = if let Some(ref cached) = self.stats_cache {
+            cached.clone()
+        } else {
+            let stats = self.get_stats_string(storage)?;
+            self.stats_cache = Some(stats.clone());
+            stats
+        };
+
+        let (cache_hit_count, cache_miss_count) = self.parse_cache_hit_miss(&stats_string);
+        let cache_hit_rate = if cache_hit_count + cache_miss_count > 0 {
+            cache_hit_count as f64 / (cache_hit_count + cache_miss_count) as f64
+        } else {
+            0.0
+        };
+
+        let level_files: Vec<u64> = (0..7)
+            .map(|level| {
+                self.get_cached_property_u64(
+                    storage,
+                    rocksdb::properties::num_files_at_level(level).as_ref(),
+                )
+            })
+            .collect();
+
+        Ok(super::RocksDbStats {
+            cache_hit_rate,
+            cache_hit_count,
+            cache_miss_count,
+            block_cache_usage_bytes: self
+                .get_cached_property_u64(storage, rocksdb::properties::BLOCK_CACHE_USAGE.as_ref()),
+            block_cache_capacity_bytes: self.get_cached_property_u64(
+                storage,
+                rocksdb::properties::BLOCK_CACHE_CAPACITY.as_ref(),
+            ),
+            index_cache_usage_bytes: self.get_cached_property_u64(
+                storage,
+                rocksdb::properties::ESTIMATE_TABLE_READERS_MEM.as_ref(),
+            ),
+
+            compaction_pending_bytes: self.get_cached_property_u64(
+                storage,
+                rocksdb::properties::ESTIMATE_PENDING_COMPACTION_BYTES.as_ref(),
+            ),
+            compaction_running_count: self.get_cached_property_u64(
+                storage,
+                rocksdb::properties::NUM_RUNNING_COMPACTIONS.as_ref(),
+            ),
+
+            l0_file_count: level_files[0],
+            l1_file_count: level_files[1],
+            l2_file_count: level_files[2],
+            l3_file_count: level_files[3],
+            l4_file_count: level_files[4],
+            l5_file_count: level_files[5],
+            l6_file_count: level_files[6],
+            total_sst_files: level_files.iter().sum(),
+            total_sst_size_bytes: self.get_cached_property_u64(
+                storage,
+                rocksdb::properties::TOTAL_SST_FILES_SIZE.as_ref(),
+            ),
+
+            memtable_count: self.parse_memtable_count(&stats_string),
+            num_immutable_memtables: self.parse_immutable_memtables(&stats_string),
+            memtable_flush_pending: self.get_cached_property_u64(
+                storage,
+                rocksdb::properties::NUM_RUNNING_FLUSHES.as_ref(),
+            ),
+            approximate_memory_usage_bytes: self.get_cached_property_u64(
+                storage,
+                rocksdb::properties::CUR_SIZE_ALL_MEM_TABLES.as_ref(),
+            ),
+
+            read_amplification: self.parse_read_amplification(&stats_string),
+            write_amplification: self.parse_write_amplification(&stats_string),
+            total_read_bytes: self.parse_total_read_bytes(&stats_string),
+            total_write_bytes: self.parse_total_write_bytes(&stats_string),
+            write_stall_time_ms: self.parse_write_stall_time(&stats_string),
+
+            live_sst_files_size_bytes: self.get_cached_property_u64(
+                storage,
+                rocksdb::properties::LIVE_SST_FILES_SIZE.as_ref(),
+            ),
+            num_entries: self
+                .get_cached_property_u64(storage, rocksdb::properties::ESTIMATE_NUM_KEYS.as_ref()),
+        })
+    }
+
+    fn get_cached_property_u64(&mut self, storage: &RocksBackend, property: &str) -> u64 {
+        if let Some(cached_value) = self.property_cache.get(property) {
+            return cached_value.unwrap_or(0);
+        }
+
+        let value = self.query_property_u64(storage, property);
+        self.property_cache.insert(property.to_owned(), value);
+        value.unwrap_or(0)
+    }
+
+    fn query_property_u64(&self, storage: &RocksBackend, property: &str) -> Option<u64> {
+        let property_owned = property.to_owned();
+        let transaction = storage.txn(move |db| match db.property_value(&property_owned) {
+            Ok(Some(value_string)) => Ok(Some(value_string.into_bytes().into())),
+            Ok(None) => Ok(None),
+            Err(e) => Err(e),
+        });
+
+        match transaction.execute() {
+            Ok(Some(result_bytes)) => {
+                let value_str = String::from_utf8_lossy(&result_bytes);
+                value_str.trim().parse().ok()
+            }
+            _ => None,
+        }
+    }
+
+    fn get_stats_string(
+        &self,
+        storage: &RocksBackend,
+    ) -> Result<String, Box<dyn std::error::Error>> {
+        let transaction = storage.txn(|db| match db.property_value(rocksdb::properties::STATS) {
+            Ok(Some(stats_string)) => Ok(Some(stats_string.into_bytes().into())),
+            _ => Ok(Some(b"".to_vec().into())),
+        });
+
+        match transaction.execute() {
+            Ok(Some(stats_bytes)) => Ok(String::from_utf8_lossy(&stats_bytes).to_string()),
+            _ => Ok(String::new()),
+        }
+    }
+
+    fn count_cache_hits(&self) -> u64 {
+        self.property_cache.values().filter(|v| v.is_some()).count() as u64
+    }
+
+    fn count_cache_misses(&self) -> u64 {
+        self.property_cache.values().filter(|v| v.is_none()).count() as u64
+    }
+
+    fn parse_cache_hit_miss(&self, stats: &str) -> (u64, u64) {
+        let mut hits = 0u64;
+        let mut misses = 0u64;
+
+        for line in stats.lines() {
+            if line.contains("Block cache hit count:") || line.contains("block.cache.hit") {
+                if let Some(value) = self.extract_number_from_line(line) {
+                    hits = value;
+                }
+            } else if line.contains("Block cache miss count:") || line.contains("block.cache.miss")
+            {
+                if let Some(value) = self.extract_number_from_line(line) {
+                    misses = value;
+                }
+            }
+        }
+
+        (hits, misses)
+    }
+
+    fn parse_memtable_count(&self, stats: &str) -> u64 {
+        for line in stats.lines() {
+            if line.contains("Number of memtables") || line.contains("num-live-memtables") {
+                if let Some(value) = self.extract_number_from_line(line) {
+                    return value;
+                }
+            }
+        }
+        0
+    }
+
+    fn parse_immutable_memtables(&self, stats: &str) -> u64 {
+        for line in stats.lines() {
+            if line.contains("immutable memtables") || line.contains("num-immutable-mem-table") {
+                if let Some(value) = self.extract_number_from_line(line) {
+                    return value;
+                }
+            }
+        }
+        0
+    }
+
+    fn parse_read_amplification(&self, stats: &str) -> f64 {
+        for line in stats.lines() {
+            if line.contains("read amplification") || line.contains("Read(GB)") {
+                if let Some(value) = self.extract_float_from_line(line) {
+                    return value;
+                }
+            }
+        }
+        0.0
+    }
+
+    fn parse_write_amplification(&self, stats: &str) -> f64 {
+        for line in stats.lines() {
+            if line.contains("write amplification") || line.contains("Write(GB)") {
+                if let Some(value) = self.extract_float_from_line(line) {
+                    return value;
+                }
+            }
+        }
+        0.0
+    }
+
+    fn parse_total_read_bytes(&self, stats: &str) -> u64 {
+        for line in stats.lines() {
+            if line.contains("total bytes read") || line.contains("Read(GB)") {
+                if let Some(value) = self.extract_number_from_line(line) {
+                    return value;
+                }
+            }
+        }
+        0
+    }
+
+    fn parse_total_write_bytes(&self, stats: &str) -> u64 {
+        for line in stats.lines() {
+            if line.contains("total bytes written") || line.contains("Write(GB)") {
+                if let Some(value) = self.extract_number_from_line(line) {
+                    return value;
+                }
+            }
+        }
+        0
+    }
+
+    fn parse_write_stall_time(&self, stats: &str) -> u64 {
+        for line in stats.lines() {
+            if line.contains("Cumulative stall:") && line.contains("H:M:S") {
+                if let Some(percent_pos) = line.find("percent") {
+                    let before_percent = &line[..percent_pos];
+                    if let Some(comma_pos) = before_percent.rfind(',') {
+                        let percent_str = before_percent[comma_pos + 1..].trim();
+                        if let Ok(percent) = percent_str.parse::<f64>() {
+                            return (percent * 10.0) as u64;
+                        }
+                    }
+                }
+            }
+        }
+        0
+    }
+
+    fn extract_number_from_line(&self, line: &str) -> Option<u64> {
+        if let Some(colon_pos) = line.find(':') {
+            let value_part = line[colon_pos + 1..].trim();
+            if let Some(number_str) = value_part.split_whitespace().next() {
+                let clean_number = number_str.replace(',', "");
+                return clean_number.parse().ok();
+            }
+        }
+        None
+    }
+
+    fn extract_float_from_line(&self, line: &str) -> Option<f64> {
+        if let Some(colon_pos) = line.find(':') {
+            let value_part = line[colon_pos + 1..].trim();
+            if let Some(number_str) = value_part.split_whitespace().next() {
+                return number_str.parse().ok();
+            }
+        }
+        None
+    }
+}
+
+impl Default for RocksDbStatsCollector {
+    fn default() -> Self {
+        Self::new()
+    }
+}
--- a/storage-benchmarks/src/metrics/rocksdb_stats.rs
+++ b/storage-benchmarks/src/metrics/rocksdb_stats.rs
@ -0,0 +1,386 @@
+use nomos_storage::backends::rocksdb::RocksBackend;
+use serde::{Deserialize, Serialize};
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct RocksDbStats {
+    pub cache_hit_rate: f64,
+    pub cache_hit_count: u64,
+    pub cache_miss_count: u64,
+    pub block_cache_usage_bytes: u64,
+    pub block_cache_capacity_bytes: u64,
+    pub index_cache_usage_bytes: u64,
+
+    pub compaction_pending_bytes: u64,
+    pub compaction_running_count: u64,
+
+    pub l0_file_count: u64,
+    pub l1_file_count: u64,
+    pub l2_file_count: u64,
+    pub l3_file_count: u64,
+    pub l4_file_count: u64,
+    pub l5_file_count: u64,
+    pub l6_file_count: u64,
+    pub total_sst_files: u64,
+    pub total_sst_size_bytes: u64,
+
+    pub memtable_count: u64,
+    pub num_immutable_memtables: u64,
+    pub memtable_flush_pending: u64,
+    pub approximate_memory_usage_bytes: u64,
+
+    pub read_amplification: f64,
+    pub write_amplification: f64,
+    pub total_read_bytes: u64,
+    pub total_write_bytes: u64,
+    pub write_stall_time_ms: u64,
+
+    pub live_sst_files_size_bytes: u64,
+    pub num_entries: u64,
+}
+
+impl Default for RocksDbStats {
+    fn default() -> Self {
+        Self {
+            cache_hit_rate: 0.0,
+            cache_hit_count: 0,
+            cache_miss_count: 0,
+            block_cache_usage_bytes: 0,
+            block_cache_capacity_bytes: 0,
+            index_cache_usage_bytes: 0,
+
+            compaction_pending_bytes: 0,
+            compaction_running_count: 0,
+
+            l0_file_count: 0,
+            l1_file_count: 0,
+            l2_file_count: 0,
+            l3_file_count: 0,
+            l4_file_count: 0,
+            l5_file_count: 0,
+            l6_file_count: 0,
+            total_sst_files: 0,
+            total_sst_size_bytes: 0,
+
+            memtable_count: 0,
+            num_immutable_memtables: 0,
+            memtable_flush_pending: 0,
+            approximate_memory_usage_bytes: 0,
+
+            read_amplification: 0.0,
+            write_amplification: 0.0,
+            total_read_bytes: 0,
+            total_write_bytes: 0,
+            write_stall_time_ms: 0,
+
+            live_sst_files_size_bytes: 0,
+            num_entries: 0,
+        }
+    }
+}
+
+#[must_use]
+pub fn collect_rocksdb_stats(storage: &RocksBackend) -> RocksDbStats {
+    let (cache_hit_count, cache_miss_count) =
+        parse_cache_hit_miss_counts(&get_stats_string(storage));
+    let cache_hit_rate = if cache_hit_count + cache_miss_count > 0 {
+        cache_hit_count as f64 / (cache_hit_count + cache_miss_count) as f64
+    } else {
+        0.0
+    };
+
+    let l0_files = get_level_file_count(storage, 0);
+    let l1_files = get_level_file_count(storage, 1);
+    let l2_files = get_level_file_count(storage, 2);
+    let l3_files = get_level_file_count(storage, 3);
+    let l4_files = get_level_file_count(storage, 4);
+    let l5_files = get_level_file_count(storage, 5);
+    let l6_files = get_level_file_count(storage, 6);
+
+    RocksDbStats {
+        cache_hit_rate,
+        cache_hit_count,
+        cache_miss_count,
+        block_cache_usage_bytes: get_property_u64(
+            storage,
+            &rocksdb::properties::BLOCK_CACHE_USAGE.as_ref(),
+        ),
+        block_cache_capacity_bytes: get_property_u64(
+            storage,
+            &rocksdb::properties::BLOCK_CACHE_CAPACITY.as_ref(),
+        ),
+        index_cache_usage_bytes: get_property_u64(
+            storage,
+            &rocksdb::properties::ESTIMATE_TABLE_READERS_MEM.as_ref(),
+        ),
+
+        compaction_pending_bytes: get_property_u64(
+            storage,
+            &rocksdb::properties::ESTIMATE_PENDING_COMPACTION_BYTES.as_ref(),
+        ),
+        compaction_running_count: get_property_u64(
+            storage,
+            &rocksdb::properties::NUM_RUNNING_COMPACTIONS.as_ref(),
+        ),
+
+        l0_file_count: l0_files,
+        l1_file_count: l1_files,
+        l2_file_count: l2_files,
+        l3_file_count: l3_files,
+        l4_file_count: l4_files,
+        l5_file_count: l5_files,
+        l6_file_count: l6_files,
+        total_sst_files: l0_files + l1_files + l2_files + l3_files + l4_files + l5_files + l6_files,
+        total_sst_size_bytes: get_property_u64(
+            storage,
+            &rocksdb::properties::TOTAL_SST_FILES_SIZE.as_ref(),
+        ),
+
+        memtable_count: parse_memtable_count(&get_stats_string(storage)),
+        num_immutable_memtables: parse_immutable_memtables(&get_stats_string(storage)),
+        memtable_flush_pending: get_property_u64(
+            storage,
+            &rocksdb::properties::NUM_RUNNING_FLUSHES.as_ref(),
+        ),
+        approximate_memory_usage_bytes: get_property_u64(
+            storage,
+            &rocksdb::properties::CUR_SIZE_ALL_MEM_TABLES.as_ref(),
+        ),
+
+        read_amplification: parse_read_amplification(&get_stats_string(storage)),
+        write_amplification: parse_write_amplification(&get_stats_string(storage)),
+        total_read_bytes: parse_total_read_bytes(&get_stats_string(storage)),
+        total_write_bytes: parse_total_write_bytes(&get_stats_string(storage)),
+        write_stall_time_ms: parse_write_stall_time(&get_stats_string(storage)),
+
+        live_sst_files_size_bytes: get_property_u64(
+            storage,
+            &rocksdb::properties::LIVE_SST_FILES_SIZE.as_ref(),
+        ),
+        num_entries: get_property_u64(storage, &rocksdb::properties::ESTIMATE_NUM_KEYS.as_ref()),
+    }
+}
+
+fn get_stats_string(storage: &RocksBackend) -> String {
+    let transaction = storage.txn(|db| match db.property_value(rocksdb::properties::STATS) {
+        Ok(Some(stats_string)) => Ok(Some(stats_string.into_bytes().into())),
+        _ => Ok(Some(b"".to_vec().into())),
+    });
+
+    match transaction.execute() {
+        Ok(Some(stats_bytes)) => String::from_utf8_lossy(&stats_bytes).to_string(),
+        _ => String::new(),
+    }
+}
+
+fn get_level_file_count(storage: &RocksBackend, level: i32) -> u64 {
+    get_property_u64(
+        storage,
+        &rocksdb::properties::num_files_at_level(level as usize).as_ref(),
+    )
+}
+
+fn get_property_u64(storage: &RocksBackend, property: &str) -> u64 {
+    match get_property_value(storage, property) {
+        Some(value) => {
+            log::debug!("Property '{}': {}", property, value);
+            value
+        }
+        None => {
+            log::debug!("Property '{}': unavailable", property);
+            0
+        }
+    }
+}
+
+fn get_property_value(storage: &RocksBackend, property: &str) -> Option<u64> {
+    let property_owned = property.to_owned();
+    let property_for_log = property.to_owned();
+    let transaction = storage.txn(move |db| match db.property_value(&property_owned) {
+        Ok(Some(value_string)) => Ok(Some(value_string.into_bytes().into())),
+        Ok(None) => Ok(None),
+        Err(e) => Err(e),
+    });
+
+    match transaction.execute() {
+        Ok(Some(result_bytes)) => {
+            let value_str = String::from_utf8_lossy(&result_bytes);
+            match value_str.trim().parse::<u64>() {
+                Ok(parsed) => {
+                    log::trace!("Property '{}' available: {}", property_for_log, parsed);
+                    Some(parsed)
+                }
+                Err(_) => {
+                    log::trace!(
+                        "Property '{}' parse error from: '{}'",
+                        property_for_log,
+                        value_str
+                    );
+                    None
+                }
+            }
+        }
+        Ok(None) => {
+            log::trace!("Property '{}' unavailable", property_for_log);
+            None
+        }
+        Err(e) => {
+            log::trace!("Property '{}' failed: {}", property_for_log, e);
+            None
+        }
+    }
+}
+
+fn parse_cache_hit_miss_counts(stats: &str) -> (u64, u64) {
+    let mut hits = 0u64;
+    let mut misses = 0u64;
+
+    for line in stats.lines() {
+        if line.contains("Block cache hit count:") || line.contains("block.cache.hit") {
+            if let Some(value) = extract_number_from_line(line) {
+                hits = value;
+            }
+        } else if line.contains("Block cache miss count:") || line.contains("block.cache.miss") {
+            if let Some(value) = extract_number_from_line(line) {
+                misses = value;
+            }
+        }
+    }
+
+    (hits, misses)
+}
+
+fn parse_write_stall_time(stats: &str) -> u64 {
+    for line in stats.lines() {
+        if line.contains("Cumulative stall:") && line.contains("H:M:S") {
+            if let Some(percent_pos) = line.find("percent") {
+                let before_percent = &line[..percent_pos];
+                if let Some(comma_pos) = before_percent.rfind(',') {
+                    let percent_str = before_percent[comma_pos + 1..].trim();
+                    if let Ok(percent) = percent_str.parse::<f64>() {
+                        return (percent * 10.0) as u64;
+                    }
+                }
+            }
+        }
+    }
+    0
+}
+
+fn extract_number_from_line(line: &str) -> Option<u64> {
+    if let Some(colon_pos) = line.find(':') {
+        let value_part = line[colon_pos + 1..].trim();
+        if let Some(number_str) = value_part.split_whitespace().next() {
+            let clean_number = number_str.replace(',', "");
+            return clean_number.parse().ok();
+        }
+    }
+    None
+}
+
+fn parse_read_amplification(stats: &str) -> f64 {
+    for line in stats.lines() {
+        if line.contains("read amplification") || line.contains("Read(GB)") {
+            if let Some(value) = extract_float_from_line(line) {
+                return value;
+            }
+        }
+    }
+    0.0
+}
+
+fn parse_write_amplification(stats: &str) -> f64 {
+    for line in stats.lines() {
+        if line.contains("write amplification") || line.contains("Write(GB)") {
+            if let Some(value) = extract_float_from_line(line) {
+                return value;
+            }
+        }
+    }
+    0.0
+}
+
+fn parse_total_read_bytes(stats: &str) -> u64 {
+    for line in stats.lines() {
+        if line.contains("total bytes read") || line.contains("Read(GB)") {
+            if let Some(value) = extract_number_from_line(line) {
+                return value;
+            }
+        }
+    }
+    0
+}
+
+fn parse_total_write_bytes(stats: &str) -> u64 {
+    for line in stats.lines() {
+        if line.contains("total bytes written") || line.contains("Write(GB)") {
+            if let Some(value) = extract_number_from_line(line) {
+                return value;
+            }
+        }
+    }
+    0
+}
+
+fn parse_memtable_count(stats: &str) -> u64 {
+    for line in stats.lines() {
+        if line.contains("Number of memtables") || line.contains("num-live-memtables") {
+            if let Some(value) = extract_number_from_line(line) {
+                return value;
+            }
+        }
+    }
+    0
+}
+
+fn parse_immutable_memtables(stats: &str) -> u64 {
+    for line in stats.lines() {
+        if line.contains("immutable memtables") || line.contains("num-immutable-mem-table") {
+            if let Some(value) = extract_number_from_line(line) {
+                return value;
+            }
+        }
+    }
+    0
+}
+
+fn extract_float_from_line(line: &str) -> Option<f64> {
+    if let Some(colon_pos) = line.find(':') {
+        let value_part = line[colon_pos + 1..].trim();
+        if let Some(number_str) = value_part.split_whitespace().next() {
+            return number_str.parse().ok();
+        }
+    }
+    None
+}
+
+pub struct StatsCollector {
+    pub before: RocksDbStats,
+    pub after: RocksDbStats,
+}
+
+impl StatsCollector {
+    pub fn new() -> Self {
+        Self {
+            before: RocksDbStats::default(),
+            after: RocksDbStats::default(),
+        }
+    }
+
+    pub fn collect_before(&mut self, storage: &RocksBackend) {
+        self.before = collect_rocksdb_stats(storage);
+        log::debug!(
+            "Before: cache {:.1}%, L0 files {}",
+            self.before.cache_hit_rate * 100.0,
+            self.before.l0_file_count
+        );
+    }
+
+    pub fn collect_after(&mut self, storage: &RocksBackend) {
+        self.after = collect_rocksdb_stats(storage);
+        log::debug!(
+            "After: cache {:.1}%, L0 files {}",
+            self.after.cache_hit_rate * 100.0,
+            self.after.l0_file_count
+        );
+    }
+}
--- a/storage-benchmarks/src/metrics/runtime_memory_allocator.rs
+++ b/storage-benchmarks/src/metrics/runtime_memory_allocator.rs
@ -0,0 +1,114 @@
+use std::{
+    alloc::{GlobalAlloc, Layout, System},
+    sync::atomic::{AtomicUsize, Ordering},
+};
+
+pub struct RuntimeValidatorAllocator {
+    inner: System,
+    allocated: AtomicUsize,
+    limit: AtomicUsize,
+}
+
+impl RuntimeValidatorAllocator {
+    pub const fn new() -> Self {
+        Self {
+            inner: System,
+            allocated: AtomicUsize::new(0),
+            limit: AtomicUsize::new(16 * 1024 * 1024 * 1024),
+        }
+    }
+
+    pub fn set_limit_gb(&self, limit_gb: usize) {
+        let limit_bytes = limit_gb * 1024 * 1024 * 1024;
+        self.limit.store(limit_bytes, Ordering::SeqCst);
+        log::info!(
+            "Memory limit updated to {}GB ({} bytes)",
+            limit_gb,
+            limit_bytes
+        );
+    }
+
+    pub fn usage_mb(&self) -> f64 {
+        self.allocated.load(Ordering::Relaxed) as f64 / 1024.0 / 1024.0
+    }
+
+    pub fn usage_percent(&self) -> f64 {
+        let current = self.allocated.load(Ordering::Relaxed);
+        let limit = self.limit.load(Ordering::Relaxed);
+        if limit > 0 {
+            current as f64 / limit as f64 * 100.0
+        } else {
+            0.0
+        }
+    }
+
+    pub fn limit_gb(&self) -> usize {
+        self.limit.load(Ordering::Relaxed) / 1024 / 1024 / 1024
+    }
+
+    pub fn actual_limit_gb(&self) -> usize {
+        self.limit_gb()
+    }
+
+    pub fn would_exceed_limit(&self, size: usize) -> bool {
+        let current = self.allocated.load(Ordering::Relaxed);
+        let limit = self.limit.load(Ordering::Relaxed);
+        current + size > limit
+    }
+
+    pub fn allocation_failures(&self) -> u64 {
+        0
+    }
+}
+
+unsafe impl GlobalAlloc for RuntimeValidatorAllocator {
+    unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
+        let size = layout.size();
+        let current = self.allocated.fetch_add(size, Ordering::SeqCst);
+        let limit = self.limit.load(Ordering::Relaxed);
+
+        if current + size > limit {
+            self.allocated.fetch_sub(size, Ordering::SeqCst);
+
+            if size >= 1024 * 1024 {
+                log::warn!(
+                    "Memory limit exceeded: {}MB allocation blocked ({}GB limit, {:.1}% used)",
+                    size / 1024 / 1024,
+                    limit / 1024 / 1024 / 1024,
+                    current as f64 / limit as f64 * 100.0
+                );
+            }
+
+            return std::ptr::null_mut();
+        }
+
+        unsafe { self.inner.alloc(layout) }
+    }
+
+    unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) {
+        self.allocated.fetch_sub(layout.size(), Ordering::SeqCst);
+
+        unsafe {
+            self.inner.dealloc(ptr, layout);
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_runtime_limit_setting() {
+        let allocator = RuntimeValidatorAllocator::new();
+
+        assert_eq!(allocator.limit_gb(), 16);
+
+        allocator.set_limit_gb(8);
+        assert_eq!(allocator.limit_gb(), 8);
+        assert_eq!(allocator.actual_limit_gb(), 8);
+
+        allocator.set_limit_gb(32);
+        assert_eq!(allocator.limit_gb(), 32);
+    }
+}
--- a/storage-benchmarks/src/storage/mod.rs
+++ b/storage-benchmarks/src/storage/mod.rs
@ -0,0 +1,3 @@
+pub mod rocksdb_options_tuning;
+
+pub use rocksdb_options_tuning::*;
--- a/storage-benchmarks/src/storage/rocksdb_options_tuning.rs
+++ b/storage-benchmarks/src/storage/rocksdb_options_tuning.rs
@ -0,0 +1,180 @@
+use serde::{Deserialize, Serialize};
+
+use crate::CompressionType;
+
+#[derive(Debug, Clone, Serialize, Deserialize, Default)]
+pub struct RocksDbTuningOptions {
+    pub cache_size_percent: Option<u32>,
+
+    pub write_buffer_mb: Option<u32>,
+
+    pub compaction_jobs: Option<u32>,
+
+    pub block_size_kb: Option<u32>,
+
+    pub compression: Option<CompressionType>,
+
+    pub bloom_filter_bits: Option<u32>,
+}
+
+impl RocksDbTuningOptions {
+    pub fn apply_to_options(
+        &self,
+        opts: &mut rocksdb::Options,
+    ) -> Result<(), Box<dyn std::error::Error>> {
+        if let Some(cache_percent) = self.cache_size_percent {
+            let system_memory_gb = get_system_memory_gb();
+            let cache_size_bytes = ((system_memory_gb as f64 * (f64::from(cache_percent) / 100.0))
+                * 1024.0
+                * 1024.0
+                * 1024.0) as usize;
+
+            let cache = rocksdb::Cache::new_lru_cache(cache_size_bytes);
+            let mut block_opts = rocksdb::BlockBasedOptions::default();
+            block_opts.set_block_cache(&cache);
+            opts.set_block_based_table_factory(&block_opts);
+
+            log::info!(
+                "Applied block cache: {}% of RAM = {}MB",
+                cache_percent,
+                cache_size_bytes / 1024 / 1024
+            );
+        }
+
+        if let Some(buffer_mb) = self.write_buffer_mb {
+            let buffer_bytes = (buffer_mb as usize) * 1024 * 1024;
+            opts.set_write_buffer_size(buffer_bytes);
+            log::info!("Applied write buffer: {}MB", buffer_mb);
+        }
+
+        if let Some(jobs) = self.compaction_jobs {
+            opts.set_max_background_jobs(jobs as i32);
+            log::info!("Applied compaction jobs: {}", jobs);
+        }
+
+        if let Some(block_size_kb) = self.block_size_kb {
+            let block_size_bytes = (block_size_kb as usize) * 1024;
+            let mut block_opts = rocksdb::BlockBasedOptions::default();
+            block_opts.set_block_size(block_size_bytes);
+            opts.set_block_based_table_factory(&block_opts);
+            log::info!("Applied block size: {}KB", block_size_kb);
+        }
+
+        if let Some(compression) = self.compression {
+            match compression {
+                CompressionType::None => {
+                    opts.set_compression_type(rocksdb::DBCompressionType::None);
+                    log::info!("Applied compression: None");
+                }
+                CompressionType::Lz4 => {
+                    opts.set_compression_type(rocksdb::DBCompressionType::Lz4);
+                    log::info!("Applied compression: LZ4");
+                }
+                CompressionType::Snappy => {
+                    opts.set_compression_type(rocksdb::DBCompressionType::Snappy);
+                    log::info!("Applied compression: Snappy");
+                }
+                CompressionType::Zstd => {
+                    opts.set_compression_type(rocksdb::DBCompressionType::Zstd);
+                    log::info!("Applied compression: Zstd");
+                }
+            }
+        }
+
+        Ok(())
+    }
+
+    pub fn from_args(args: &[String]) -> (Self, bool) {
+        let mut config = Self::default();
+        let mut read_only = false;
+
+        let mut i = 0;
+        while i < args.len() {
+            match args[i].as_str() {
+                "--cache-size" if i + 1 < args.len() => {
+                    config.cache_size_percent = args[i + 1].parse().ok();
+                    i += 2;
+                }
+                "--write-buffer" if i + 1 < args.len() => {
+                    config.write_buffer_mb = args[i + 1].parse().ok();
+                    i += 2;
+                }
+                "--compaction-jobs" if i + 1 < args.len() => {
+                    config.compaction_jobs = args[i + 1].parse().ok();
+                    i += 2;
+                }
+                "--block-size" if i + 1 < args.len() => {
+                    config.block_size_kb = args[i + 1].parse().ok();
+                    i += 2;
+                }
+                "--read-only" => {
+                    read_only = true;
+                    i += 1;
+                }
+                "--compression" if i + 1 < args.len() => {
+                    match args[i + 1].parse::<CompressionType>() {
+                        Ok(compression_type) => config.compression = Some(compression_type),
+                        Err(e) => log::warn!("Invalid compression type: {}", e),
+                    }
+                    i += 2;
+                }
+                _ => {
+                    i += 1;
+                }
+            }
+        }
+
+        (config, read_only)
+    }
+
+    pub fn description(&self) -> String {
+        let mut parts = Vec::new();
+
+        if let Some(cache) = self.cache_size_percent {
+            parts.push(format!("cache:{}%", cache));
+        }
+        if let Some(buffer) = self.write_buffer_mb {
+            parts.push(format!("buffer:{}MB", buffer));
+        }
+        if let Some(jobs) = self.compaction_jobs {
+            parts.push(format!("jobs:{}", jobs));
+        }
+        if let Some(block_size) = self.block_size_kb {
+            parts.push(format!("block:{}KB", block_size));
+        }
+
+        if parts.is_empty() {
+            "defaults".to_string()
+        } else {
+            parts.join(",")
+        }
+    }
+}
+
+fn get_system_memory_gb() -> usize {
+    if let Ok(meminfo) = std::fs::read_to_string("/proc/meminfo") {
+        for line in meminfo.lines() {
+            if line.starts_with("MemTotal:") {
+                if let Some(kb_str) = line.split_whitespace().nth(1) {
+                    if let Ok(kb) = kb_str.parse::<usize>() {
+                        return kb / 1024 / 1024;
+                    }
+                }
+            }
+        }
+    }
+
+    16
+}
+
+pub fn create_tuned_rocksdb_options(tuning_config: &RocksDbTuningOptions) -> rocksdb::Options {
+    let mut opts = rocksdb::Options::default();
+    opts.create_if_missing(true);
+    opts.create_missing_column_families(true);
+
+    if let Err(e) = tuning_config.apply_to_options(&mut opts) {
+        log::error!("Failed to apply RocksDB tuning: {}", e);
+    }
+
+    opts
+}