From 9136e5f3c05f712d0af7714110dad30b6c222dac Mon Sep 17 00:00:00 2001 From: Maxime Van Hees Date: Thu, 30 Oct 2025 11:17:26 +0100 Subject: [PATCH] benchmarking --- Cargo.lock | 121 +++++++++ Cargo.toml | 23 ++ benches/README.md | 172 ++++++++++++ benches/bulk_ops.rs | 336 +++++++++++++++++++++++ benches/common/backends.rs | 197 ++++++++++++++ benches/common/data_generator.rs | 131 +++++++++ benches/common/metrics.rs | 289 ++++++++++++++++++++ benches/common/mod.rs | 8 + benches/concurrent_ops.rs | 317 ++++++++++++++++++++++ benches/memory_profile.rs | 337 +++++++++++++++++++++++ benches/scan_ops.rs | 339 +++++++++++++++++++++++ benches/single_ops.rs | 444 +++++++++++++++++++++++++++++++ docs/benchmarking.md | 409 ++++++++++++++++++++++++++++ scripts/compare_backends.py | 258 ++++++++++++++++++ scripts/parse_results.py | 222 ++++++++++++++++ src/error.rs | 8 + 16 files changed, 3611 insertions(+) create mode 100644 benches/README.md create mode 100644 benches/bulk_ops.rs create mode 100644 benches/common/backends.rs create mode 100644 benches/common/data_generator.rs create mode 100644 benches/common/metrics.rs create mode 100644 benches/common/mod.rs create mode 100644 benches/concurrent_ops.rs create mode 100644 benches/memory_profile.rs create mode 100644 benches/scan_ops.rs create mode 100644 benches/single_ops.rs create mode 100644 docs/benchmarking.md create mode 100755 scripts/compare_backends.py create mode 100755 scripts/parse_results.py diff --git a/Cargo.lock b/Cargo.lock index 1856901..667fb49 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -140,6 +140,12 @@ dependencies = [ "libc", ] +[[package]] +name = "anes" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" + [[package]] name = "anstream" version = "0.6.20" @@ -1169,6 +1175,12 @@ dependencies = [ "either", ] +[[package]] +name = "cast" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" + [[package]] name = "cbc" version = "0.1.2" @@ -1272,6 +1284,33 @@ dependencies = [ "phf", ] +[[package]] +name = "ciborium" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" +dependencies = [ + "ciborium-io", + "ciborium-ll", + "serde", +] + +[[package]] +name = "ciborium-io" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" + +[[package]] +name = "ciborium-ll" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" +dependencies = [ + "ciborium-io", + "half", +] + [[package]] name = "cipher" version = "0.4.4" @@ -1476,6 +1515,41 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "criterion" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1c047a62b0cc3e145fa84415a3191f628e980b194c2755aa12300a4e6cbd928" +dependencies = [ + "anes", + "cast", + "ciborium", + "clap", + "criterion-plot", + "csv", + "itertools 0.13.0", + "num-traits", + "oorandom", + "plotters", + "rayon", + "regex", + "serde", + "serde_json", + "tinytemplate", + "tokio", + "walkdir", +] + +[[package]] +name = "criterion-plot" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b1bcc0dc7dfae599d84ad0b1a55f80cde8af3725da8313b528da95ef783e338" +dependencies = [ + "cast", + "itertools 0.13.0", +] + [[package]] name = "crossbeam-channel" version = "0.5.15" @@ -2959,6 +3033,8 @@ dependencies = [ "bytes", "chacha20poly1305", "clap", + "criterion", + "csv", "ed25519-dalek", "futures", "jsonrpsee", @@ -2975,6 +3051,7 @@ dependencies = [ "sha2", "sled", "tantivy 0.25.0", + "tempfile", "thiserror 1.0.69", "tokio", "ureq", @@ -4852,6 +4929,12 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b4ce411919553d3f9fa53a0880544cda985a112117a0444d5ff1e870a893d6ea" +[[package]] +name = "oorandom" +version = "11.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" + [[package]] name = "opaque-debug" version = "0.3.1" @@ -5162,6 +5245,34 @@ version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" +[[package]] +name = "plotters" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747" +dependencies = [ + "num-traits", + "plotters-backend", + "plotters-svg", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "plotters-backend" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a" + +[[package]] +name = "plotters-svg" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670" +dependencies = [ + "plotters-backend", +] + [[package]] name = "poly1305" version = "0.8.0" @@ -7022,6 +7133,16 @@ dependencies = [ "zerovec", ] +[[package]] +name = "tinytemplate" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" +dependencies = [ + "serde", + "serde_json", +] + [[package]] name = "tinyvec" version = "1.10.0" diff --git a/Cargo.toml b/Cargo.toml index 0d441ea..82bd87b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -36,6 +36,29 @@ lancedb = "0.22.1" uuid = "1.18.1" ureq = { version = "2.10.0", features = ["json", "tls"] } reth-ipc = { git = "https://github.com/paradigmxyz/reth", package = "reth-ipc", rev = "d8451e54e7267f9f1634118d6d279b2216f7e2bb" } +criterion = { version = "0.7.0", features = ["async", "async_tokio", "csv_output"] } [dev-dependencies] redis = { version = "0.24", features = ["aio", "tokio-comp"] } +tempfile = "3.8" +csv = "1.3" + +[[bench]] +name = "single_ops" +harness = false + +[[bench]] +name = "bulk_ops" +harness = false + +[[bench]] +name = "scan_ops" +harness = false + +[[bench]] +name = "concurrent_ops" +harness = false + +[[bench]] +name = "memory_profile" +harness = false diff --git a/benches/README.md b/benches/README.md new file mode 100644 index 0000000..842de6d --- /dev/null +++ b/benches/README.md @@ -0,0 +1,172 @@ +# HeroDB Benchmarks + +This directory contains comprehensive performance benchmarks for HeroDB's storage backends (redb and sled). + +## Quick Start + +```bash +# Run all benchmarks +cargo bench + +# Run specific suite +cargo bench --bench single_ops + +# Quick run (fewer samples) +cargo bench -- --quick +``` + +## Benchmark Suites + +### 1. Single Operations (`single_ops.rs`) +Measures individual operation latency: +- **String operations**: SET, GET, DEL, EXISTS +- **Hash operations**: HSET, HGET, HGETALL, HDEL, HEXISTS +- **List operations**: LPUSH, RPUSH, LPOP, RPOP, LRANGE + +### 2. Bulk Operations (`bulk_ops.rs`) +Tests throughput with varying batch sizes: +- Bulk insert (100, 1K, 10K records) +- Bulk read (sequential and random) +- Bulk update and delete +- Mixed workload (70% reads, 30% writes) + +### 3. Scan Operations (`scan_ops.rs`) +Evaluates iteration and filtering: +- SCAN with pattern matching +- HSCAN for hash fields +- KEYS operation +- DBSIZE, HKEYS, HVALS + +### 4. Concurrent Operations (`concurrent_ops.rs`) +Simulates multi-client scenarios: +- Concurrent writes (10, 50 clients) +- Concurrent reads (10, 50 clients) +- Mixed concurrent workload +- Concurrent hash and list operations + +### 5. Memory Profiling (`memory_profile.rs`) +Tracks memory usage patterns: +- Per-operation memory allocation +- Peak memory usage +- Memory efficiency (bytes per record) +- Allocation count tracking + +## Common Infrastructure + +The `common/` directory provides shared utilities: + +- **`data_generator.rs`**: Deterministic test data generation +- **`backends.rs`**: Backend setup and management +- **`metrics.rs`**: Custom metrics collection and export + +## Results Analysis + +### Parse Results + +```bash +python3 scripts/parse_results.py target/criterion --csv results.csv --json results.json +``` + +### Compare Backends + +```bash +python3 scripts/compare_backends.py results.csv --export comparison.csv +``` + +### View HTML Reports + +Open `target/criterion/report/index.html` in a browser for interactive charts. + +## Documentation + +- **[Running Benchmarks](../docs/running_benchmarks.md)** - Quick start guide +- **[Benchmarking Guide](../docs/benchmarking.md)** - Complete user guide +- **[Architecture](../docs/benchmark_architecture.md)** - System design +- **[Implementation Plan](../docs/benchmark_implementation_plan.md)** - Development details +- **[Sample Results](../docs/benchmark_results_sample.md)** - Example analysis + +## Key Features + +✅ **Statistical Rigor**: Uses Criterion for statistically sound measurements +✅ **Fair Comparison**: Identical test datasets across all backends +✅ **Reproducibility**: Fixed random seeds for deterministic results +✅ **Comprehensive Coverage**: Single ops, bulk ops, scans, concurrency +✅ **Memory Profiling**: Custom allocator tracking +✅ **Multiple Formats**: Terminal, CSV, JSON, HTML outputs + +## Performance Tips + +### For Accurate Results + +1. **System Preparation** + - Close unnecessary applications + - Disable CPU frequency scaling + - Ensure stable power supply + +2. **Benchmark Configuration** + - Use sufficient sample size (100+) + - Allow proper warm-up time + - Run multiple iterations + +3. **Environment Isolation** + - Use temporary directories + - Clean state between benchmarks + - Avoid shared resources + +### For Faster Iteration + +```bash +# Quick mode (fewer samples) +cargo bench -- --quick + +# Specific operation only +cargo bench -- single_ops/strings/set + +# Specific backend only +cargo bench -- redb +``` + +## Troubleshooting + +### High Variance +- Close background applications +- Disable CPU frequency scaling +- Increase sample size + +### Out of Memory +- Run suites separately +- Reduce dataset sizes +- Increase system swap + +### Slow Benchmarks +- Use `--quick` flag +- Run specific benchmarks +- Reduce measurement time + +See [Running Benchmarks](../docs/running_benchmarks.md) for detailed troubleshooting. + +## Contributing + +When adding new benchmarks: + +1. Follow existing patterns in benchmark files +2. Use common infrastructure (data_generator, backends) +3. Ensure fair comparison between backends +4. Add documentation for new metrics +5. Test with both `--quick` and full runs + +## Example Output + +``` +single_ops/strings/set/redb/100bytes + time: [1.234 µs 1.245 µs 1.256 µs] + thrpt: [802.5K ops/s 810.2K ops/s 818.1K ops/s] + +single_ops/strings/set/sled/100bytes + time: [1.567 µs 1.578 µs 1.589 µs] + thrpt: [629.5K ops/s 633.7K ops/s 638.1K ops/s] +``` + +## License + +Same as HeroDB project. \ No newline at end of file diff --git a/benches/bulk_ops.rs b/benches/bulk_ops.rs new file mode 100644 index 0000000..e049eaf --- /dev/null +++ b/benches/bulk_ops.rs @@ -0,0 +1,336 @@ +// benches/bulk_ops.rs +use criterion::{criterion_group, criterion_main, Criterion, BenchmarkId, BatchSize}; + +mod common; +use common::*; + +/// Benchmark bulk insert operations with varying batch sizes +fn bench_bulk_insert(c: &mut Criterion) { + let mut group = c.benchmark_group("bulk_ops/insert"); + + for size in [100, 1_000, 10_000] { + for backend_type in BackendType::all() { + group.bench_with_input( + BenchmarkId::new(format!("{}/size", backend_type.name()), size), + &(backend_type, size), + |b, &(backend_type, size)| { + b.iter_batched( + || { + let backend = BenchmarkBackend::new(backend_type).unwrap(); + let mut generator = DataGenerator::new(42); + let data = generator.generate_string_pairs(size, 100); + (backend, data) + }, + |(backend, data)| { + for (key, value) in data { + backend.storage.set(key, value).unwrap(); + } + }, + BatchSize::SmallInput + ); + } + ); + } + } + + group.finish(); +} + +/// Benchmark bulk sequential read operations +fn bench_bulk_read_sequential(c: &mut Criterion) { + let mut group = c.benchmark_group("bulk_ops/read_sequential"); + + for size in [1_000, 10_000] { + for backend_type in BackendType::all() { + let backend = setup_populated_backend(backend_type, size, 100) + .expect("Failed to setup backend"); + let generator = DataGenerator::new(42); + + group.bench_with_input( + BenchmarkId::new(format!("{}/size", backend.name()), size), + &(backend, size), + |b, (backend, size)| { + b.iter(|| { + for i in 0..*size { + let key = generator.generate_key("bench:key", i); + backend.storage.get(&key).unwrap(); + } + }); + } + ); + } + } + + group.finish(); +} + +/// Benchmark bulk random read operations +fn bench_bulk_read_random(c: &mut Criterion) { + let mut group = c.benchmark_group("bulk_ops/read_random"); + + for size in [1_000, 10_000] { + for backend_type in BackendType::all() { + let backend = setup_populated_backend(backend_type, size, 100) + .expect("Failed to setup backend"); + let generator = DataGenerator::new(42); + + // Pre-generate random indices for fair comparison + let indices: Vec = (0..size) + .map(|_| rand::random::() % size) + .collect(); + + group.bench_with_input( + BenchmarkId::new(format!("{}/size", backend.name()), size), + &(backend, indices), + |b, (backend, indices)| { + b.iter(|| { + for &idx in indices { + let key = generator.generate_key("bench:key", idx); + backend.storage.get(&key).unwrap(); + } + }); + } + ); + } + } + + group.finish(); +} + +/// Benchmark bulk update operations +fn bench_bulk_update(c: &mut Criterion) { + let mut group = c.benchmark_group("bulk_ops/update"); + + for size in [100, 1_000, 10_000] { + for backend_type in BackendType::all() { + group.bench_with_input( + BenchmarkId::new(format!("{}/size", backend_type.name()), size), + &(backend_type, size), + |b, &(backend_type, size)| { + b.iter_batched( + || { + let backend = setup_populated_backend(backend_type, size, 100).unwrap(); + let mut generator = DataGenerator::new(43); // Different seed for updates + let updates = generator.generate_string_pairs(size, 100); + (backend, updates) + }, + |(backend, updates)| { + for (key, value) in updates { + backend.storage.set(key, value).unwrap(); + } + }, + BatchSize::SmallInput + ); + } + ); + } + } + + group.finish(); +} + +/// Benchmark bulk delete operations +fn bench_bulk_delete(c: &mut Criterion) { + let mut group = c.benchmark_group("bulk_ops/delete"); + + for size in [100, 1_000, 10_000] { + for backend_type in BackendType::all() { + group.bench_with_input( + BenchmarkId::new(format!("{}/size", backend_type.name()), size), + &(backend_type, size), + |b, &(backend_type, size)| { + b.iter_batched( + || { + let backend = setup_populated_backend(backend_type, size, 100).unwrap(); + let generator = DataGenerator::new(42); + let keys: Vec = (0..size) + .map(|i| generator.generate_key("bench:key", i)) + .collect(); + (backend, keys) + }, + |(backend, keys)| { + for key in keys { + backend.storage.del(key).unwrap(); + } + }, + BatchSize::SmallInput + ); + } + ); + } + } + + group.finish(); +} + +/// Benchmark bulk hash insert operations +fn bench_bulk_hash_insert(c: &mut Criterion) { + let mut group = c.benchmark_group("bulk_ops/hash_insert"); + + for size in [100, 1_000] { + for backend_type in BackendType::all() { + group.bench_with_input( + BenchmarkId::new(format!("{}/size", backend_type.name()), size), + &(backend_type, size), + |b, &(backend_type, size)| { + b.iter_batched( + || { + let backend = BenchmarkBackend::new(backend_type).unwrap(); + let mut generator = DataGenerator::new(42); + let data = generator.generate_hash_data(size, 10, 100); + (backend, data) + }, + |(backend, data)| { + for (key, fields) in data { + backend.storage.hset(&key, fields).unwrap(); + } + }, + BatchSize::SmallInput + ); + } + ); + } + } + + group.finish(); +} + +/// Benchmark bulk hash read operations (HGETALL) +fn bench_bulk_hash_read(c: &mut Criterion) { + let mut group = c.benchmark_group("bulk_ops/hash_read"); + + for size in [100, 1_000] { + for backend_type in BackendType::all() { + let backend = setup_populated_backend_hashes(backend_type, size, 10, 100) + .expect("Failed to setup backend"); + let generator = DataGenerator::new(42); + + group.bench_with_input( + BenchmarkId::new(format!("{}/size", backend.name()), size), + &(backend, size), + |b, (backend, size)| { + b.iter(|| { + for i in 0..*size { + let key = generator.generate_key("bench:hash", i); + backend.storage.hgetall(&key).unwrap(); + } + }); + } + ); + } + } + + group.finish(); +} + +/// Benchmark bulk list insert operations +fn bench_bulk_list_insert(c: &mut Criterion) { + let mut group = c.benchmark_group("bulk_ops/list_insert"); + + for size in [100, 1_000] { + for backend_type in BackendType::all() { + group.bench_with_input( + BenchmarkId::new(format!("{}/size", backend_type.name()), size), + &(backend_type, size), + |b, &(backend_type, size)| { + b.iter_batched( + || { + let backend = BenchmarkBackend::new(backend_type).unwrap(); + let mut generator = DataGenerator::new(42); + let data = generator.generate_list_data(size, 10, 100); + (backend, data) + }, + |(backend, data)| { + for (key, elements) in data { + backend.storage.rpush(&key, elements).unwrap(); + } + }, + BatchSize::SmallInput + ); + } + ); + } + } + + group.finish(); +} + +/// Benchmark bulk list read operations (LRANGE) +fn bench_bulk_list_read(c: &mut Criterion) { + let mut group = c.benchmark_group("bulk_ops/list_read"); + + for size in [100, 1_000] { + for backend_type in BackendType::all() { + let backend = setup_populated_backend_lists(backend_type, size, 10, 100) + .expect("Failed to setup backend"); + let generator = DataGenerator::new(42); + + group.bench_with_input( + BenchmarkId::new(format!("{}/size", backend.name()), size), + &(backend, size), + |b, (backend, size)| { + b.iter(|| { + for i in 0..*size { + let key = generator.generate_key("bench:list", i); + backend.storage.lrange(&key, 0, -1).unwrap(); + } + }); + } + ); + } + } + + group.finish(); +} + +/// Benchmark mixed workload (70% reads, 30% writes) +fn bench_mixed_workload(c: &mut Criterion) { + let mut group = c.benchmark_group("bulk_ops/mixed_workload"); + + for size in [1_000, 10_000] { + for backend_type in BackendType::all() { + let backend = setup_populated_backend(backend_type, size, 100) + .expect("Failed to setup backend"); + let mut generator = DataGenerator::new(42); + + group.bench_with_input( + BenchmarkId::new(format!("{}/size", backend.name()), size), + &(backend, size), + |b, (backend, size)| { + b.iter(|| { + for i in 0..*size { + if i % 10 < 7 { + // 70% reads + let key = generator.generate_key("bench:key", i % size); + backend.storage.get(&key).unwrap(); + } else { + // 30% writes + let key = generator.generate_key("bench:key", i); + let value = generator.generate_value(100); + backend.storage.set(key, value).unwrap(); + } + } + }); + } + ); + } + } + + group.finish(); +} + +criterion_group!( + benches, + bench_bulk_insert, + bench_bulk_read_sequential, + bench_bulk_read_random, + bench_bulk_update, + bench_bulk_delete, + bench_bulk_hash_insert, + bench_bulk_hash_read, + bench_bulk_list_insert, + bench_bulk_list_read, + bench_mixed_workload, +); + +criterion_main!(benches); \ No newline at end of file diff --git a/benches/common/backends.rs b/benches/common/backends.rs new file mode 100644 index 0000000..a43722b --- /dev/null +++ b/benches/common/backends.rs @@ -0,0 +1,197 @@ +// benches/common/backends.rs +use herodb::storage::Storage; +use herodb::storage_sled::SledStorage; +use herodb::storage_trait::StorageBackend; +use std::sync::Arc; +use tempfile::TempDir; + +/// Backend type identifier +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum BackendType { + Redb, + Sled, +} + +impl BackendType { + pub fn name(&self) -> &'static str { + match self { + BackendType::Redb => "redb", + BackendType::Sled => "sled", + } + } + + pub fn all() -> Vec { + vec![BackendType::Redb, BackendType::Sled] + } +} + +/// Wrapper for benchmark backends with automatic cleanup +pub struct BenchmarkBackend { + pub storage: Arc, + pub backend_type: BackendType, + _temp_dir: TempDir, // Kept for automatic cleanup +} + +impl BenchmarkBackend { + /// Create a new redb backend for benchmarking + pub fn new_redb() -> Result> { + let temp_dir = TempDir::new()?; + let db_path = temp_dir.path().join("bench.db"); + let storage = Storage::new(db_path, false, None)?; + + Ok(Self { + storage: Arc::new(storage), + backend_type: BackendType::Redb, + _temp_dir: temp_dir, + }) + } + + /// Create a new sled backend for benchmarking + pub fn new_sled() -> Result> { + let temp_dir = TempDir::new()?; + let db_path = temp_dir.path().join("bench.sled"); + let storage = SledStorage::new(db_path, false, None)?; + + Ok(Self { + storage: Arc::new(storage), + backend_type: BackendType::Sled, + _temp_dir: temp_dir, + }) + } + + /// Create a backend of the specified type + pub fn new(backend_type: BackendType) -> Result> { + match backend_type { + BackendType::Redb => Self::new_redb(), + BackendType::Sled => Self::new_sled(), + } + } + + /// Get the backend name for display + pub fn name(&self) -> &'static str { + self.backend_type.name() + } + + /// Pre-populate the backend with test data + pub fn populate_strings(&self, data: &[(String, String)]) -> Result<(), Box> { + for (key, value) in data { + self.storage.set(key.clone(), value.clone())?; + } + Ok(()) + } + + /// Pre-populate with hash data + pub fn populate_hashes(&self, data: &[(String, Vec<(String, String)>)]) -> Result<(), Box> { + for (key, fields) in data { + self.storage.hset(key, fields.clone())?; + } + Ok(()) + } + + /// Pre-populate with list data + pub fn populate_lists(&self, data: &[(String, Vec)]) -> Result<(), Box> { + for (key, elements) in data { + self.storage.rpush(key, elements.clone())?; + } + Ok(()) + } + + /// Clear all data from the backend + pub fn clear(&self) -> Result<(), Box> { + self.storage.flushdb()?; + Ok(()) + } + + /// Get the number of keys in the database + pub fn dbsize(&self) -> Result> { + Ok(self.storage.dbsize()?) + } +} + +/// Helper function to create and populate a backend for read benchmarks +pub fn setup_populated_backend( + backend_type: BackendType, + num_keys: usize, + value_size: usize, +) -> Result> { + use super::DataGenerator; + + let backend = BenchmarkBackend::new(backend_type)?; + let mut generator = DataGenerator::new(42); + let data = generator.generate_string_pairs(num_keys, value_size); + backend.populate_strings(&data)?; + + Ok(backend) +} + +/// Helper function to create and populate a backend with hash data +pub fn setup_populated_backend_hashes( + backend_type: BackendType, + num_hashes: usize, + fields_per_hash: usize, + value_size: usize, +) -> Result> { + use super::DataGenerator; + + let backend = BenchmarkBackend::new(backend_type)?; + let mut generator = DataGenerator::new(42); + let data = generator.generate_hash_data(num_hashes, fields_per_hash, value_size); + backend.populate_hashes(&data)?; + + Ok(backend) +} + +/// Helper function to create and populate a backend with list data +pub fn setup_populated_backend_lists( + backend_type: BackendType, + num_lists: usize, + elements_per_list: usize, + element_size: usize, +) -> Result> { + use super::DataGenerator; + + let backend = BenchmarkBackend::new(backend_type)?; + let mut generator = DataGenerator::new(42); + let data = generator.generate_list_data(num_lists, elements_per_list, element_size); + backend.populate_lists(&data)?; + + Ok(backend) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_backend_creation() { + let redb = BenchmarkBackend::new_redb(); + assert!(redb.is_ok()); + + let sled = BenchmarkBackend::new_sled(); + assert!(sled.is_ok()); + } + + #[test] + fn test_backend_populate() { + let backend = BenchmarkBackend::new_redb().unwrap(); + let data = vec![ + ("key1".to_string(), "value1".to_string()), + ("key2".to_string(), "value2".to_string()), + ]; + + backend.populate_strings(&data).unwrap(); + assert_eq!(backend.dbsize().unwrap(), 2); + } + + #[test] + fn test_backend_clear() { + let backend = BenchmarkBackend::new_redb().unwrap(); + let data = vec![("key1".to_string(), "value1".to_string())]; + + backend.populate_strings(&data).unwrap(); + assert_eq!(backend.dbsize().unwrap(), 1); + + backend.clear().unwrap(); + assert_eq!(backend.dbsize().unwrap(), 0); + } +} \ No newline at end of file diff --git a/benches/common/data_generator.rs b/benches/common/data_generator.rs new file mode 100644 index 0000000..042869d --- /dev/null +++ b/benches/common/data_generator.rs @@ -0,0 +1,131 @@ +// benches/common/data_generator.rs +use rand::{Rng, SeedableRng}; +use rand::rngs::StdRng; + +/// Deterministic data generator for benchmarks +pub struct DataGenerator { + rng: StdRng, +} + +impl DataGenerator { + /// Create a new data generator with a fixed seed for reproducibility + pub fn new(seed: u64) -> Self { + Self { + rng: StdRng::seed_from_u64(seed), + } + } + + /// Generate a single key with the given prefix and ID + pub fn generate_key(&self, prefix: &str, id: usize) -> String { + format!("{}:{:08}", prefix, id) + } + + /// Generate a random string value of the specified size + pub fn generate_value(&mut self, size: usize) -> String { + const CHARSET: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"; + (0..size) + .map(|_| { + let idx = self.rng.gen_range(0..CHARSET.len()); + CHARSET[idx] as char + }) + .collect() + } + + /// Generate a batch of key-value pairs + pub fn generate_string_pairs(&mut self, count: usize, value_size: usize) -> Vec<(String, String)> { + (0..count) + .map(|i| { + let key = self.generate_key("bench:key", i); + let value = self.generate_value(value_size); + (key, value) + }) + .collect() + } + + /// Generate hash data (key -> field-value pairs) + pub fn generate_hash_data(&mut self, num_hashes: usize, fields_per_hash: usize, value_size: usize) + -> Vec<(String, Vec<(String, String)>)> { + (0..num_hashes) + .map(|i| { + let hash_key = self.generate_key("bench:hash", i); + let fields: Vec<(String, String)> = (0..fields_per_hash) + .map(|j| { + let field = format!("field{}", j); + let value = self.generate_value(value_size); + (field, value) + }) + .collect(); + (hash_key, fields) + }) + .collect() + } + + /// Generate list data (key -> list of elements) + pub fn generate_list_data(&mut self, num_lists: usize, elements_per_list: usize, element_size: usize) + -> Vec<(String, Vec)> { + (0..num_lists) + .map(|i| { + let list_key = self.generate_key("bench:list", i); + let elements: Vec = (0..elements_per_list) + .map(|_| self.generate_value(element_size)) + .collect(); + (list_key, elements) + }) + .collect() + } + + /// Generate keys for pattern matching tests + pub fn generate_pattern_keys(&mut self, count: usize) -> Vec { + let mut keys = Vec::new(); + + // Generate keys with different patterns + for i in 0..count / 3 { + keys.push(format!("user:{}:profile", i)); + } + for i in 0..count / 3 { + keys.push(format!("session:{}:data", i)); + } + for i in 0..count / 3 { + keys.push(format!("cache:{}:value", i)); + } + + keys + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_deterministic_generation() { + let mut generator1 = DataGenerator::new(42); + let mut generator2 = DataGenerator::new(42); + + let pairs1 = generator1.generate_string_pairs(10, 50); + let pairs2 = generator2.generate_string_pairs(10, 50); + + assert_eq!(pairs1, pairs2, "Same seed should produce same data"); + } + + #[test] + fn test_value_size() { + let mut generator = DataGenerator::new(42); + let value = generator.generate_value(100); + assert_eq!(value.len(), 100); + } + + #[test] + fn test_hash_generation() { + let mut generator = DataGenerator::new(42); + let hashes = generator.generate_hash_data(5, 10, 50); + + assert_eq!(hashes.len(), 5); + for (_, fields) in hashes { + assert_eq!(fields.len(), 10); + for (_, value) in fields { + assert_eq!(value.len(), 50); + } + } + } +} \ No newline at end of file diff --git a/benches/common/metrics.rs b/benches/common/metrics.rs new file mode 100644 index 0000000..53d3b6b --- /dev/null +++ b/benches/common/metrics.rs @@ -0,0 +1,289 @@ +// benches/common/metrics.rs +use serde::{Deserialize, Serialize}; +use std::time::Duration; + +/// Custom metrics for benchmark results +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct BenchmarkMetrics { + pub operation: String, + pub backend: String, + pub dataset_size: usize, + pub mean_ns: u64, + pub median_ns: u64, + pub p95_ns: u64, + pub p99_ns: u64, + pub std_dev_ns: u64, + pub throughput_ops_sec: f64, +} + +impl BenchmarkMetrics { + pub fn new( + operation: String, + backend: String, + dataset_size: usize, + ) -> Self { + Self { + operation, + backend, + dataset_size, + mean_ns: 0, + median_ns: 0, + p95_ns: 0, + p99_ns: 0, + std_dev_ns: 0, + throughput_ops_sec: 0.0, + } + } + + /// Convert to CSV row format + pub fn to_csv_row(&self) -> String { + format!( + "{},{},{},{},{},{},{},{},{:.2}", + self.backend, + self.operation, + self.dataset_size, + self.mean_ns, + self.median_ns, + self.p95_ns, + self.p99_ns, + self.std_dev_ns, + self.throughput_ops_sec + ) + } + + /// Get CSV header + pub fn csv_header() -> String { + "backend,operation,dataset_size,mean_ns,median_ns,p95_ns,p99_ns,std_dev_ns,throughput_ops_sec".to_string() + } + + /// Convert to JSON + pub fn to_json(&self) -> serde_json::Value { + serde_json::json!({ + "backend": self.backend, + "operation": self.operation, + "dataset_size": self.dataset_size, + "metrics": { + "mean_ns": self.mean_ns, + "median_ns": self.median_ns, + "p95_ns": self.p95_ns, + "p99_ns": self.p99_ns, + "std_dev_ns": self.std_dev_ns, + "throughput_ops_sec": self.throughput_ops_sec + } + }) + } + + /// Calculate throughput from mean latency + pub fn calculate_throughput(&mut self) { + if self.mean_ns > 0 { + self.throughput_ops_sec = 1_000_000_000.0 / self.mean_ns as f64; + } + } + + /// Format duration for display + pub fn format_duration(nanos: u64) -> String { + if nanos < 1_000 { + format!("{} ns", nanos) + } else if nanos < 1_000_000 { + format!("{:.2} µs", nanos as f64 / 1_000.0) + } else if nanos < 1_000_000_000 { + format!("{:.2} ms", nanos as f64 / 1_000_000.0) + } else { + format!("{:.2} s", nanos as f64 / 1_000_000_000.0) + } + } + + /// Pretty print the metrics + pub fn display(&self) -> String { + format!( + "{}/{} (n={}): mean={}, median={}, p95={}, p99={}, throughput={:.0} ops/sec", + self.backend, + self.operation, + self.dataset_size, + Self::format_duration(self.mean_ns), + Self::format_duration(self.median_ns), + Self::format_duration(self.p95_ns), + Self::format_duration(self.p99_ns), + self.throughput_ops_sec + ) + } +} + +/// Memory metrics for profiling +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct MemoryMetrics { + pub operation: String, + pub backend: String, + pub allocations: usize, + pub peak_bytes: usize, + pub avg_bytes_per_op: f64, +} + +impl MemoryMetrics { + pub fn new(operation: String, backend: String) -> Self { + Self { + operation, + backend, + allocations: 0, + peak_bytes: 0, + avg_bytes_per_op: 0.0, + } + } + + /// Convert to CSV row format + pub fn to_csv_row(&self) -> String { + format!( + "{},{},{},{},{:.2}", + self.backend, + self.operation, + self.allocations, + self.peak_bytes, + self.avg_bytes_per_op + ) + } + + /// Get CSV header + pub fn csv_header() -> String { + "backend,operation,allocations,peak_bytes,avg_bytes_per_op".to_string() + } + + /// Format bytes for display + pub fn format_bytes(bytes: usize) -> String { + if bytes < 1024 { + format!("{} B", bytes) + } else if bytes < 1024 * 1024 { + format!("{:.2} KB", bytes as f64 / 1024.0) + } else if bytes < 1024 * 1024 * 1024 { + format!("{:.2} MB", bytes as f64 / (1024.0 * 1024.0)) + } else { + format!("{:.2} GB", bytes as f64 / (1024.0 * 1024.0 * 1024.0)) + } + } + + /// Pretty print the metrics + pub fn display(&self) -> String { + format!( + "{}/{}: {} allocations, peak={}, avg={}", + self.backend, + self.operation, + self.allocations, + Self::format_bytes(self.peak_bytes), + Self::format_bytes(self.avg_bytes_per_op as usize) + ) + } +} + +/// Collection of benchmark results for comparison +#[derive(Debug, Default)] +pub struct BenchmarkResults { + pub metrics: Vec, + pub memory_metrics: Vec, +} + +impl BenchmarkResults { + pub fn new() -> Self { + Self::default() + } + + pub fn add_metric(&mut self, metric: BenchmarkMetrics) { + self.metrics.push(metric); + } + + pub fn add_memory_metric(&mut self, metric: MemoryMetrics) { + self.memory_metrics.push(metric); + } + + /// Export all metrics to CSV format + pub fn to_csv(&self) -> String { + let mut output = String::new(); + + if !self.metrics.is_empty() { + output.push_str(&BenchmarkMetrics::csv_header()); + output.push('\n'); + for metric in &self.metrics { + output.push_str(&metric.to_csv_row()); + output.push('\n'); + } + } + + if !self.memory_metrics.is_empty() { + output.push('\n'); + output.push_str(&MemoryMetrics::csv_header()); + output.push('\n'); + for metric in &self.memory_metrics { + output.push_str(&metric.to_csv_row()); + output.push('\n'); + } + } + + output + } + + /// Export all metrics to JSON format + pub fn to_json(&self) -> serde_json::Value { + serde_json::json!({ + "benchmarks": self.metrics.iter().map(|m| m.to_json()).collect::>(), + "memory": self.memory_metrics + }) + } + + /// Save results to a file + pub fn save_csv(&self, path: &str) -> std::io::Result<()> { + std::fs::write(path, self.to_csv()) + } + + pub fn save_json(&self, path: &str) -> std::io::Result<()> { + let json = serde_json::to_string_pretty(&self.to_json())?; + std::fs::write(path, json) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_metrics_creation() { + let mut metric = BenchmarkMetrics::new( + "set".to_string(), + "redb".to_string(), + 1000, + ); + metric.mean_ns = 1_245; + metric.calculate_throughput(); + + assert!(metric.throughput_ops_sec > 0.0); + } + + #[test] + fn test_csv_export() { + let mut results = BenchmarkResults::new(); + let mut metric = BenchmarkMetrics::new( + "set".to_string(), + "redb".to_string(), + 1000, + ); + metric.mean_ns = 1_245; + metric.calculate_throughput(); + + results.add_metric(metric); + let csv = results.to_csv(); + + assert!(csv.contains("backend,operation")); + assert!(csv.contains("redb,set")); + } + + #[test] + fn test_duration_formatting() { + assert_eq!(BenchmarkMetrics::format_duration(500), "500 ns"); + assert_eq!(BenchmarkMetrics::format_duration(1_500), "1.50 µs"); + assert_eq!(BenchmarkMetrics::format_duration(1_500_000), "1.50 ms"); + } + + #[test] + fn test_bytes_formatting() { + assert_eq!(MemoryMetrics::format_bytes(512), "512 B"); + assert_eq!(MemoryMetrics::format_bytes(2048), "2.00 KB"); + assert_eq!(MemoryMetrics::format_bytes(2_097_152), "2.00 MB"); + } +} \ No newline at end of file diff --git a/benches/common/mod.rs b/benches/common/mod.rs new file mode 100644 index 0000000..5535fa4 --- /dev/null +++ b/benches/common/mod.rs @@ -0,0 +1,8 @@ +// benches/common/mod.rs +pub mod data_generator; +pub mod backends; +pub mod metrics; + +pub use data_generator::*; +pub use backends::*; +pub use metrics::*; \ No newline at end of file diff --git a/benches/concurrent_ops.rs b/benches/concurrent_ops.rs new file mode 100644 index 0000000..602feda --- /dev/null +++ b/benches/concurrent_ops.rs @@ -0,0 +1,317 @@ +// benches/concurrent_ops.rs +use criterion::{criterion_group, criterion_main, Criterion, BenchmarkId}; +use tokio::runtime::Runtime; +use std::sync::Arc; + +mod common; +use common::*; + +/// Benchmark concurrent write operations +fn bench_concurrent_writes(c: &mut Criterion) { + let mut group = c.benchmark_group("concurrent_ops/writes"); + + for num_clients in [10, 50] { + for backend_type in BackendType::all() { + let backend = BenchmarkBackend::new(backend_type).expect("Failed to create backend"); + let storage = backend.storage.clone(); + + group.bench_with_input( + BenchmarkId::new(format!("{}/clients", backend.name()), num_clients), + &(storage, num_clients), + |b, (storage, num_clients)| { + let rt = Runtime::new().unwrap(); + b.to_async(&rt).iter(|| { + let storage = storage.clone(); + let num_clients = *num_clients; + async move { + let mut tasks = Vec::new(); + + for client_id in 0..num_clients { + let storage = storage.clone(); + let task = tokio::spawn(async move { + let mut generator = DataGenerator::new(42 + client_id as u64); + for i in 0..100 { + let key = format!("client:{}:key:{}", client_id, i); + let value = generator.generate_value(100); + storage.set(key, value).unwrap(); + } + }); + tasks.push(task); + } + + for task in tasks { + task.await.unwrap(); + } + } + }); + } + ); + } + } + + group.finish(); +} + +/// Benchmark concurrent read operations +fn bench_concurrent_reads(c: &mut Criterion) { + let mut group = c.benchmark_group("concurrent_ops/reads"); + + for num_clients in [10, 50] { + for backend_type in BackendType::all() { + // Pre-populate with data + let backend = setup_populated_backend(backend_type, 10_000, 100) + .expect("Failed to setup backend"); + let storage = backend.storage.clone(); + + group.bench_with_input( + BenchmarkId::new(format!("{}/clients", backend.name()), num_clients), + &(storage, num_clients), + |b, (storage, num_clients)| { + let rt = Runtime::new().unwrap(); + b.to_async(&rt).iter(|| { + let storage = storage.clone(); + let num_clients = *num_clients; + async move { + let mut tasks = Vec::new(); + + for client_id in 0..num_clients { + let storage = storage.clone(); + let task = tokio::spawn(async move { + let generator = DataGenerator::new(42); + for i in 0..100 { + let key_id = (client_id * 100 + i) % 10_000; + let key = generator.generate_key("bench:key", key_id); + storage.get(&key).unwrap(); + } + }); + tasks.push(task); + } + + for task in tasks { + task.await.unwrap(); + } + } + }); + } + ); + } + } + + group.finish(); +} + +/// Benchmark mixed concurrent workload (70% reads, 30% writes) +fn bench_concurrent_mixed(c: &mut Criterion) { + let mut group = c.benchmark_group("concurrent_ops/mixed"); + + for num_clients in [10, 50] { + for backend_type in BackendType::all() { + // Pre-populate with data + let backend = setup_populated_backend(backend_type, 10_000, 100) + .expect("Failed to setup backend"); + let storage = backend.storage.clone(); + + group.bench_with_input( + BenchmarkId::new(format!("{}/clients", backend.name()), num_clients), + &(storage, num_clients), + |b, (storage, num_clients)| { + let rt = Runtime::new().unwrap(); + b.to_async(&rt).iter(|| { + let storage = storage.clone(); + let num_clients = *num_clients; + async move { + let mut tasks = Vec::new(); + + for client_id in 0..num_clients { + let storage = storage.clone(); + let task = tokio::spawn(async move { + let mut generator = DataGenerator::new(42 + client_id as u64); + for i in 0..100 { + if i % 10 < 7 { + // 70% reads + let key_id = (client_id * 100 + i) % 10_000; + let key = generator.generate_key("bench:key", key_id); + storage.get(&key).unwrap(); + } else { + // 30% writes + let key = format!("client:{}:key:{}", client_id, i); + let value = generator.generate_value(100); + storage.set(key, value).unwrap(); + } + } + }); + tasks.push(task); + } + + for task in tasks { + task.await.unwrap(); + } + } + }); + } + ); + } + } + + group.finish(); +} + +/// Benchmark concurrent hash operations +fn bench_concurrent_hash_ops(c: &mut Criterion) { + let mut group = c.benchmark_group("concurrent_ops/hash_ops"); + + for num_clients in [10, 50] { + for backend_type in BackendType::all() { + let backend = BenchmarkBackend::new(backend_type).expect("Failed to create backend"); + let storage = backend.storage.clone(); + + group.bench_with_input( + BenchmarkId::new(format!("{}/clients", backend.name()), num_clients), + &(storage, num_clients), + |b, (storage, num_clients)| { + let rt = Runtime::new().unwrap(); + b.to_async(&rt).iter(|| { + let storage = storage.clone(); + let num_clients = *num_clients; + async move { + let mut tasks = Vec::new(); + + for client_id in 0..num_clients { + let storage = storage.clone(); + let task = tokio::spawn(async move { + let mut generator = DataGenerator::new(42 + client_id as u64); + for i in 0..50 { + let key = format!("client:{}:hash:{}", client_id, i); + let field = format!("field{}", i % 10); + let value = generator.generate_value(100); + storage.hset(&key, vec![(field, value)]).unwrap(); + } + }); + tasks.push(task); + } + + for task in tasks { + task.await.unwrap(); + } + } + }); + } + ); + } + } + + group.finish(); +} + +/// Benchmark concurrent list operations +fn bench_concurrent_list_ops(c: &mut Criterion) { + let mut group = c.benchmark_group("concurrent_ops/list_ops"); + + for num_clients in [10, 50] { + for backend_type in BackendType::all() { + let backend = BenchmarkBackend::new(backend_type).expect("Failed to create backend"); + let storage = backend.storage.clone(); + + group.bench_with_input( + BenchmarkId::new(format!("{}/clients", backend.name()), num_clients), + &(storage, num_clients), + |b, (storage, num_clients)| { + let rt = Runtime::new().unwrap(); + b.to_async(&rt).iter(|| { + let storage = storage.clone(); + let num_clients = *num_clients; + async move { + let mut tasks = Vec::new(); + + for client_id in 0..num_clients { + let storage = storage.clone(); + let task = tokio::spawn(async move { + let mut generator = DataGenerator::new(42 + client_id as u64); + for i in 0..50 { + let key = format!("client:{}:list:{}", client_id, i); + let element = generator.generate_value(100); + storage.rpush(&key, vec![element]).unwrap(); + } + }); + tasks.push(task); + } + + for task in tasks { + task.await.unwrap(); + } + } + }); + } + ); + } + } + + group.finish(); +} + +/// Benchmark concurrent scan operations +fn bench_concurrent_scans(c: &mut Criterion) { + let mut group = c.benchmark_group("concurrent_ops/scans"); + + for num_clients in [10, 50] { + for backend_type in BackendType::all() { + // Pre-populate with data + let backend = setup_populated_backend(backend_type, 10_000, 100) + .expect("Failed to setup backend"); + let storage = backend.storage.clone(); + + group.bench_with_input( + BenchmarkId::new(format!("{}/clients", backend.name()), num_clients), + &(storage, num_clients), + |b, (storage, num_clients)| { + let rt = Runtime::new().unwrap(); + b.to_async(&rt).iter(|| { + let storage = storage.clone(); + let num_clients = *num_clients; + async move { + let mut tasks = Vec::new(); + + for _client_id in 0..num_clients { + let storage = storage.clone(); + let task = tokio::spawn(async move { + let mut cursor = 0u64; + let mut total = 0; + loop { + let (next_cursor, items) = storage + .scan(cursor, None, Some(100)) + .unwrap(); + total += items.len(); + if next_cursor == 0 { + break; + } + cursor = next_cursor; + } + total + }); + tasks.push(task); + } + + for task in tasks { + task.await.unwrap(); + } + } + }); + } + ); + } + } + + group.finish(); +} + +criterion_group!( + benches, + bench_concurrent_writes, + bench_concurrent_reads, + bench_concurrent_mixed, + bench_concurrent_hash_ops, + bench_concurrent_list_ops, + bench_concurrent_scans, +); + +criterion_main!(benches); \ No newline at end of file diff --git a/benches/memory_profile.rs b/benches/memory_profile.rs new file mode 100644 index 0000000..9ea4546 --- /dev/null +++ b/benches/memory_profile.rs @@ -0,0 +1,337 @@ +// benches/memory_profile.rs +use criterion::{criterion_group, criterion_main, Criterion, BenchmarkId, BatchSize}; +use std::alloc::{GlobalAlloc, Layout, System}; +use std::sync::atomic::{AtomicUsize, Ordering}; + +mod common; +use common::*; + +// Simple memory tracking allocator +struct TrackingAllocator; + +static ALLOCATED: AtomicUsize = AtomicUsize::new(0); +static DEALLOCATED: AtomicUsize = AtomicUsize::new(0); +static PEAK: AtomicUsize = AtomicUsize::new(0); +static ALLOC_COUNT: AtomicUsize = AtomicUsize::new(0); + +unsafe impl GlobalAlloc for TrackingAllocator { + unsafe fn alloc(&self, layout: Layout) -> *mut u8 { + let ret = System.alloc(layout); + if !ret.is_null() { + let size = layout.size(); + ALLOCATED.fetch_add(size, Ordering::SeqCst); + ALLOC_COUNT.fetch_add(1, Ordering::SeqCst); + + // Update peak if necessary + let current = ALLOCATED.load(Ordering::SeqCst) - DEALLOCATED.load(Ordering::SeqCst); + let mut peak = PEAK.load(Ordering::SeqCst); + while current > peak { + match PEAK.compare_exchange_weak(peak, current, Ordering::SeqCst, Ordering::SeqCst) { + Ok(_) => break, + Err(x) => peak = x, + } + } + } + ret + } + + unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) { + System.dealloc(ptr, layout); + DEALLOCATED.fetch_add(layout.size(), Ordering::SeqCst); + } +} + +#[global_allocator] +static GLOBAL: TrackingAllocator = TrackingAllocator; + +/// Reset memory tracking counters +fn reset_memory_tracking() { + ALLOCATED.store(0, Ordering::SeqCst); + DEALLOCATED.store(0, Ordering::SeqCst); + PEAK.store(0, Ordering::SeqCst); + ALLOC_COUNT.store(0, Ordering::SeqCst); +} + +/// Get current memory stats +fn get_memory_stats() -> (usize, usize, usize) { + let allocated = ALLOCATED.load(Ordering::SeqCst); + let deallocated = DEALLOCATED.load(Ordering::SeqCst); + let peak = PEAK.load(Ordering::SeqCst); + let alloc_count = ALLOC_COUNT.load(Ordering::SeqCst); + + let current = allocated.saturating_sub(deallocated); + (current, peak, alloc_count) +} + +/// Profile memory usage for single SET operations +fn profile_memory_set(c: &mut Criterion) { + let mut group = c.benchmark_group("memory_profile/set"); + + for backend_type in BackendType::all() { + group.bench_with_input( + BenchmarkId::new(backend_type.name(), "100bytes"), + &backend_type, + |b, &backend_type| { + b.iter_batched( + || { + reset_memory_tracking(); + let backend = BenchmarkBackend::new(backend_type).unwrap(); + let mut generator = DataGenerator::new(42); + let key = generator.generate_key("bench:key", 0); + let value = generator.generate_value(100); + (backend, key, value) + }, + |(backend, key, value)| { + backend.storage.set(key, value).unwrap(); + let (current, peak, allocs) = get_memory_stats(); + println!("{}: current={}, peak={}, allocs={}", + backend.name(), current, peak, allocs); + }, + BatchSize::SmallInput + ); + } + ); + } + + group.finish(); +} + +/// Profile memory usage for single GET operations +fn profile_memory_get(c: &mut Criterion) { + let mut group = c.benchmark_group("memory_profile/get"); + + for backend_type in BackendType::all() { + let backend = setup_populated_backend(backend_type, 1_000, 100) + .expect("Failed to setup backend"); + let generator = DataGenerator::new(42); + + group.bench_with_input( + BenchmarkId::new(backend.name(), "100bytes"), + &backend, + |b, backend| { + b.iter_batched( + || { + reset_memory_tracking(); + generator.generate_key("bench:key", 0) + }, + |key| { + backend.storage.get(&key).unwrap(); + let (current, peak, allocs) = get_memory_stats(); + println!("{}: current={}, peak={}, allocs={}", + backend.name(), current, peak, allocs); + }, + BatchSize::SmallInput + ); + } + ); + } + + group.finish(); +} + +/// Profile memory usage for bulk insert operations +fn profile_memory_bulk_insert(c: &mut Criterion) { + let mut group = c.benchmark_group("memory_profile/bulk_insert"); + + for size in [100, 1_000] { + for backend_type in BackendType::all() { + group.bench_with_input( + BenchmarkId::new(format!("{}/size", backend_type.name()), size), + &(backend_type, size), + |b, &(backend_type, size)| { + b.iter_batched( + || { + reset_memory_tracking(); + let backend = BenchmarkBackend::new(backend_type).unwrap(); + let mut generator = DataGenerator::new(42); + let data = generator.generate_string_pairs(size, 100); + (backend, data) + }, + |(backend, data)| { + for (key, value) in data { + backend.storage.set(key, value).unwrap(); + } + let (current, peak, allocs) = get_memory_stats(); + println!("{} (n={}): current={}, peak={}, allocs={}, bytes_per_record={}", + backend.name(), size, current, peak, allocs, peak / size); + }, + BatchSize::SmallInput + ); + } + ); + } + } + + group.finish(); +} + +/// Profile memory usage for hash operations +fn profile_memory_hash_ops(c: &mut Criterion) { + let mut group = c.benchmark_group("memory_profile/hash_ops"); + + for backend_type in BackendType::all() { + group.bench_with_input( + BenchmarkId::new(backend_type.name(), "hset"), + &backend_type, + |b, &backend_type| { + b.iter_batched( + || { + reset_memory_tracking(); + let backend = BenchmarkBackend::new(backend_type).unwrap(); + let mut generator = DataGenerator::new(42); + let key = generator.generate_key("bench:hash", 0); + let fields = vec![ + ("field1".to_string(), generator.generate_value(100)), + ("field2".to_string(), generator.generate_value(100)), + ("field3".to_string(), generator.generate_value(100)), + ]; + (backend, key, fields) + }, + |(backend, key, fields)| { + backend.storage.hset(&key, fields).unwrap(); + let (current, peak, allocs) = get_memory_stats(); + println!("{}: current={}, peak={}, allocs={}", + backend.name(), current, peak, allocs); + }, + BatchSize::SmallInput + ); + } + ); + } + + group.finish(); +} + +/// Profile memory usage for list operations +fn profile_memory_list_ops(c: &mut Criterion) { + let mut group = c.benchmark_group("memory_profile/list_ops"); + + for backend_type in BackendType::all() { + group.bench_with_input( + BenchmarkId::new(backend_type.name(), "rpush"), + &backend_type, + |b, &backend_type| { + b.iter_batched( + || { + reset_memory_tracking(); + let backend = BenchmarkBackend::new(backend_type).unwrap(); + let mut generator = DataGenerator::new(42); + let key = generator.generate_key("bench:list", 0); + let elements = vec![ + generator.generate_value(100), + generator.generate_value(100), + generator.generate_value(100), + ]; + (backend, key, elements) + }, + |(backend, key, elements)| { + backend.storage.rpush(&key, elements).unwrap(); + let (current, peak, allocs) = get_memory_stats(); + println!("{}: current={}, peak={}, allocs={}", + backend.name(), current, peak, allocs); + }, + BatchSize::SmallInput + ); + } + ); + } + + group.finish(); +} + +/// Profile memory usage for scan operations +fn profile_memory_scan(c: &mut Criterion) { + let mut group = c.benchmark_group("memory_profile/scan"); + + for size in [1_000, 10_000] { + for backend_type in BackendType::all() { + let backend = setup_populated_backend(backend_type, size, 100) + .expect("Failed to setup backend"); + + group.bench_with_input( + BenchmarkId::new(format!("{}/size", backend.name()), size), + &backend, + |b, backend| { + b.iter(|| { + reset_memory_tracking(); + let mut cursor = 0u64; + let mut total = 0; + loop { + let (next_cursor, items) = backend.storage + .scan(cursor, None, Some(100)) + .unwrap(); + total += items.len(); + if next_cursor == 0 { + break; + } + cursor = next_cursor; + } + let (current, peak, allocs) = get_memory_stats(); + println!("{} (n={}): scanned={}, current={}, peak={}, allocs={}", + backend.name(), size, total, current, peak, allocs); + total + }); + } + ); + } + } + + group.finish(); +} + +/// Profile memory efficiency (bytes per record stored) +fn profile_memory_efficiency(c: &mut Criterion) { + let mut group = c.benchmark_group("memory_profile/efficiency"); + + for size in [1_000, 10_000] { + for backend_type in BackendType::all() { + group.bench_with_input( + BenchmarkId::new(format!("{}/size", backend_type.name()), size), + &(backend_type, size), + |b, &(backend_type, size)| { + b.iter_batched( + || { + reset_memory_tracking(); + let backend = BenchmarkBackend::new(backend_type).unwrap(); + let mut generator = DataGenerator::new(42); + let data = generator.generate_string_pairs(size, 100); + (backend, data) + }, + |(backend, data)| { + let data_size: usize = data.iter() + .map(|(k, v)| k.len() + v.len()) + .sum(); + + for (key, value) in data { + backend.storage.set(key, value).unwrap(); + } + + let (current, peak, allocs) = get_memory_stats(); + let overhead_pct = ((peak as f64 - data_size as f64) / data_size as f64) * 100.0; + + println!("{} (n={}): data_size={}, peak={}, overhead={:.1}%, bytes_per_record={}, allocs={}", + backend.name(), size, data_size, peak, overhead_pct, + peak / size, allocs); + }, + BatchSize::SmallInput + ); + } + ); + } + } + + group.finish(); +} + +criterion_group!( + benches, + profile_memory_set, + profile_memory_get, + profile_memory_bulk_insert, + profile_memory_hash_ops, + profile_memory_list_ops, + profile_memory_scan, + profile_memory_efficiency, +); + +criterion_main!(benches); \ No newline at end of file diff --git a/benches/scan_ops.rs b/benches/scan_ops.rs new file mode 100644 index 0000000..3b12bf8 --- /dev/null +++ b/benches/scan_ops.rs @@ -0,0 +1,339 @@ +// benches/scan_ops.rs +use criterion::{criterion_group, criterion_main, Criterion, BenchmarkId}; + +mod common; +use common::*; + +/// Benchmark SCAN operation - full database scan +fn bench_scan_full(c: &mut Criterion) { + let mut group = c.benchmark_group("scan_ops/scan_full"); + + for size in [1_000, 10_000] { + for backend_type in BackendType::all() { + let backend = setup_populated_backend(backend_type, size, 100) + .expect("Failed to setup backend"); + + group.bench_with_input( + BenchmarkId::new(format!("{}/size", backend.name()), size), + &backend, + |b, backend| { + b.iter(|| { + let mut cursor = 0u64; + let mut total = 0; + loop { + let (next_cursor, items) = backend.storage + .scan(cursor, None, Some(100)) + .unwrap(); + total += items.len(); + if next_cursor == 0 { + break; + } + cursor = next_cursor; + } + total + }); + } + ); + } + } + + group.finish(); +} + +/// Benchmark SCAN operation with pattern matching +fn bench_scan_pattern(c: &mut Criterion) { + let mut group = c.benchmark_group("scan_ops/scan_pattern"); + + for backend_type in BackendType::all() { + // Create backend with mixed key patterns + let backend = BenchmarkBackend::new(backend_type).expect("Failed to create backend"); + let mut generator = DataGenerator::new(42); + + // Insert keys with different patterns + for i in 0..3_000 { + let key = if i < 1_000 { + format!("user:{}:profile", i) + } else if i < 2_000 { + format!("session:{}:data", i - 1_000) + } else { + format!("cache:{}:value", i - 2_000) + }; + let value = generator.generate_value(100); + backend.storage.set(key, value).unwrap(); + } + + // Benchmark pattern matching + for pattern in ["user:*", "session:*", "cache:*"] { + group.bench_with_input( + BenchmarkId::new(format!("{}/pattern", backend.name()), pattern), + &(backend.storage.clone(), pattern), + |b, (storage, pattern)| { + b.iter(|| { + let mut cursor = 0u64; + let mut total = 0; + loop { + let (next_cursor, items) = storage + .scan(cursor, Some(pattern), Some(100)) + .unwrap(); + total += items.len(); + if next_cursor == 0 { + break; + } + cursor = next_cursor; + } + total + }); + } + ); + } + } + + group.finish(); +} + +/// Benchmark HSCAN operation - scan hash fields +fn bench_hscan(c: &mut Criterion) { + let mut group = c.benchmark_group("scan_ops/hscan"); + + for fields_count in [10, 100] { + for backend_type in BackendType::all() { + let backend = setup_populated_backend_hashes(backend_type, 100, fields_count, 100) + .expect("Failed to setup backend"); + let generator = DataGenerator::new(42); + let key = generator.generate_key("bench:hash", 0); + + group.bench_with_input( + BenchmarkId::new(format!("{}/fields", backend.name()), fields_count), + &(backend, key), + |b, (backend, key)| { + b.iter(|| { + let mut cursor = 0u64; + let mut total = 0; + loop { + let (next_cursor, items) = backend.storage + .hscan(key, cursor, None, Some(10)) + .unwrap(); + total += items.len(); + if next_cursor == 0 { + break; + } + cursor = next_cursor; + } + total + }); + } + ); + } + } + + group.finish(); +} + +/// Benchmark HSCAN with pattern matching +fn bench_hscan_pattern(c: &mut Criterion) { + let mut group = c.benchmark_group("scan_ops/hscan_pattern"); + + for backend_type in BackendType::all() { + let backend = BenchmarkBackend::new(backend_type).expect("Failed to create backend"); + let mut generator = DataGenerator::new(42); + + // Create a hash with mixed field patterns + let key = "bench:hash:0".to_string(); + let mut fields = Vec::new(); + for i in 0..100 { + let field = if i < 33 { + format!("user_{}", i) + } else if i < 66 { + format!("session_{}", i - 33) + } else { + format!("cache_{}", i - 66) + }; + let value = generator.generate_value(100); + fields.push((field, value)); + } + backend.storage.hset(&key, fields).unwrap(); + + // Benchmark pattern matching + for pattern in ["user_*", "session_*", "cache_*"] { + group.bench_with_input( + BenchmarkId::new(format!("{}/pattern", backend.name()), pattern), + &(backend.storage.clone(), key.clone(), pattern), + |b, (storage, key, pattern)| { + b.iter(|| { + let mut cursor = 0u64; + let mut total = 0; + loop { + let (next_cursor, items) = storage + .hscan(key, cursor, Some(pattern), Some(10)) + .unwrap(); + total += items.len(); + if next_cursor == 0 { + break; + } + cursor = next_cursor; + } + total + }); + } + ); + } + } + + group.finish(); +} + +/// Benchmark KEYS operation with various patterns +fn bench_keys_operation(c: &mut Criterion) { + let mut group = c.benchmark_group("scan_ops/keys"); + + for backend_type in BackendType::all() { + // Create backend with mixed key patterns + let backend = BenchmarkBackend::new(backend_type).expect("Failed to create backend"); + let mut generator = DataGenerator::new(42); + + // Insert keys with different patterns + for i in 0..3_000 { + let key = if i < 1_000 { + format!("user:{}:profile", i) + } else if i < 2_000 { + format!("session:{}:data", i - 1_000) + } else { + format!("cache:{}:value", i - 2_000) + }; + let value = generator.generate_value(100); + backend.storage.set(key, value).unwrap(); + } + + // Benchmark different patterns + for pattern in ["*", "user:*", "session:*", "*:profile", "user:*:profile"] { + group.bench_with_input( + BenchmarkId::new(format!("{}/pattern", backend.name()), pattern), + &(backend.storage.clone(), pattern), + |b, (storage, pattern)| { + b.iter(|| { + storage.keys(pattern).unwrap() + }); + } + ); + } + } + + group.finish(); +} + +/// Benchmark DBSIZE operation +fn bench_dbsize(c: &mut Criterion) { + let mut group = c.benchmark_group("scan_ops/dbsize"); + + for size in [1_000, 10_000] { + for backend_type in BackendType::all() { + let backend = setup_populated_backend(backend_type, size, 100) + .expect("Failed to setup backend"); + + group.bench_with_input( + BenchmarkId::new(format!("{}/size", backend.name()), size), + &backend, + |b, backend| { + b.iter(|| { + backend.storage.dbsize().unwrap() + }); + } + ); + } + } + + group.finish(); +} + +/// Benchmark LRANGE with different range sizes +fn bench_lrange_sizes(c: &mut Criterion) { + let mut group = c.benchmark_group("scan_ops/lrange"); + + for range_size in [10, 50, 100] { + for backend_type in BackendType::all() { + let backend = setup_populated_backend_lists(backend_type, 100, 100, 100) + .expect("Failed to setup backend"); + let generator = DataGenerator::new(42); + let key = generator.generate_key("bench:list", 0); + + group.bench_with_input( + BenchmarkId::new(format!("{}/range", backend.name()), range_size), + &(backend, key, range_size), + |b, (backend, key, range_size)| { + b.iter(|| { + backend.storage.lrange(key, 0, (*range_size - 1) as i64).unwrap() + }); + } + ); + } + } + + group.finish(); +} + +/// Benchmark HKEYS operation +fn bench_hkeys(c: &mut Criterion) { + let mut group = c.benchmark_group("scan_ops/hkeys"); + + for fields_count in [10, 50, 100] { + for backend_type in BackendType::all() { + let backend = setup_populated_backend_hashes(backend_type, 100, fields_count, 100) + .expect("Failed to setup backend"); + let generator = DataGenerator::new(42); + let key = generator.generate_key("bench:hash", 0); + + group.bench_with_input( + BenchmarkId::new(format!("{}/fields", backend.name()), fields_count), + &(backend, key), + |b, (backend, key)| { + b.iter(|| { + backend.storage.hkeys(key).unwrap() + }); + } + ); + } + } + + group.finish(); +} + +/// Benchmark HVALS operation +fn bench_hvals(c: &mut Criterion) { + let mut group = c.benchmark_group("scan_ops/hvals"); + + for fields_count in [10, 50, 100] { + for backend_type in BackendType::all() { + let backend = setup_populated_backend_hashes(backend_type, 100, fields_count, 100) + .expect("Failed to setup backend"); + let generator = DataGenerator::new(42); + let key = generator.generate_key("bench:hash", 0); + + group.bench_with_input( + BenchmarkId::new(format!("{}/fields", backend.name()), fields_count), + &(backend, key), + |b, (backend, key)| { + b.iter(|| { + backend.storage.hvals(key).unwrap() + }); + } + ); + } + } + + group.finish(); +} + +criterion_group!( + benches, + bench_scan_full, + bench_scan_pattern, + bench_hscan, + bench_hscan_pattern, + bench_keys_operation, + bench_dbsize, + bench_lrange_sizes, + bench_hkeys, + bench_hvals, +); + +criterion_main!(benches); \ No newline at end of file diff --git a/benches/single_ops.rs b/benches/single_ops.rs new file mode 100644 index 0000000..789ef35 --- /dev/null +++ b/benches/single_ops.rs @@ -0,0 +1,444 @@ +// benches/single_ops.rs +use criterion::{criterion_group, criterion_main, Criterion, BenchmarkId, BatchSize}; + +mod common; +use common::*; + +/// Benchmark string SET operations +fn bench_string_set(c: &mut Criterion) { + let mut group = c.benchmark_group("single_ops/strings/set"); + + for backend_type in BackendType::all() { + let backend = BenchmarkBackend::new(backend_type).expect("Failed to create backend"); + let mut generator = DataGenerator::new(42); + + group.bench_with_input( + BenchmarkId::new(backend.name(), "100bytes"), + &backend, + |b, backend| { + b.iter_batched( + || { + let key = generator.generate_key("bench:key", rand::random::() % 100000); + let value = generator.generate_value(100); + (key, value) + }, + |(key, value)| { + backend.storage.set(key, value).unwrap(); + }, + BatchSize::SmallInput + ); + } + ); + } + + group.finish(); +} + +/// Benchmark string GET operations +fn bench_string_get(c: &mut Criterion) { + let mut group = c.benchmark_group("single_ops/strings/get"); + + for backend_type in BackendType::all() { + // Pre-populate with 10K keys + let backend = setup_populated_backend(backend_type, 10_000, 100) + .expect("Failed to setup backend"); + let generator = DataGenerator::new(42); + + group.bench_with_input( + BenchmarkId::new(backend.name(), "100bytes"), + &backend, + |b, backend| { + b.iter_batched( + || { + let key_id = rand::random::() % 10_000; + generator.generate_key("bench:key", key_id) + }, + |key| { + backend.storage.get(&key).unwrap(); + }, + BatchSize::SmallInput + ); + } + ); + } + + group.finish(); +} + +/// Benchmark string DEL operations +fn bench_string_del(c: &mut Criterion) { + let mut group = c.benchmark_group("single_ops/strings/del"); + + for backend_type in BackendType::all() { + group.bench_with_input( + BenchmarkId::new(backend_type.name(), "100bytes"), + &backend_type, + |b, &backend_type| { + b.iter_batched( + || { + // Create fresh backend with one key for each iteration + let backend = BenchmarkBackend::new(backend_type).unwrap(); + let mut generator = DataGenerator::new(42); + let key = generator.generate_key("bench:key", 0); + let value = generator.generate_value(100); + backend.storage.set(key.clone(), value).unwrap(); + (backend, key) + }, + |(backend, key)| { + backend.storage.del(key).unwrap(); + }, + BatchSize::SmallInput + ); + } + ); + } + + group.finish(); +} + +/// Benchmark string EXISTS operations +fn bench_string_exists(c: &mut Criterion) { + let mut group = c.benchmark_group("single_ops/strings/exists"); + + for backend_type in BackendType::all() { + let backend = setup_populated_backend(backend_type, 10_000, 100) + .expect("Failed to setup backend"); + let generator = DataGenerator::new(42); + + group.bench_with_input( + BenchmarkId::new(backend.name(), "100bytes"), + &backend, + |b, backend| { + b.iter_batched( + || { + let key_id = rand::random::() % 10_000; + generator.generate_key("bench:key", key_id) + }, + |key| { + backend.storage.exists(&key).unwrap(); + }, + BatchSize::SmallInput + ); + } + ); + } + + group.finish(); +} + +/// Benchmark hash HSET operations +fn bench_hash_hset(c: &mut Criterion) { + let mut group = c.benchmark_group("single_ops/hashes/hset"); + + for backend_type in BackendType::all() { + let backend = BenchmarkBackend::new(backend_type).expect("Failed to create backend"); + let mut generator = DataGenerator::new(42); + + group.bench_with_input( + BenchmarkId::new(backend.name(), "single_field"), + &backend, + |b, backend| { + b.iter_batched( + || { + let key = generator.generate_key("bench:hash", rand::random::() % 1000); + let field = format!("field{}", rand::random::() % 100); + let value = generator.generate_value(100); + (key, field, value) + }, + |(key, field, value)| { + backend.storage.hset(&key, vec![(field, value)]).unwrap(); + }, + BatchSize::SmallInput + ); + } + ); + } + + group.finish(); +} + +/// Benchmark hash HGET operations +fn bench_hash_hget(c: &mut Criterion) { + let mut group = c.benchmark_group("single_ops/hashes/hget"); + + for backend_type in BackendType::all() { + // Pre-populate with hashes + let backend = setup_populated_backend_hashes(backend_type, 1_000, 10, 100) + .expect("Failed to setup backend"); + let generator = DataGenerator::new(42); + + group.bench_with_input( + BenchmarkId::new(backend.name(), "single_field"), + &backend, + |b, backend| { + b.iter_batched( + || { + let key = generator.generate_key("bench:hash", rand::random::() % 1_000); + let field = format!("field{}", rand::random::() % 10); + (key, field) + }, + |(key, field)| { + backend.storage.hget(&key, &field).unwrap(); + }, + BatchSize::SmallInput + ); + } + ); + } + + group.finish(); +} + +/// Benchmark hash HGETALL operations +fn bench_hash_hgetall(c: &mut Criterion) { + let mut group = c.benchmark_group("single_ops/hashes/hgetall"); + + for backend_type in BackendType::all() { + let backend = setup_populated_backend_hashes(backend_type, 1_000, 10, 100) + .expect("Failed to setup backend"); + let generator = DataGenerator::new(42); + + group.bench_with_input( + BenchmarkId::new(backend.name(), "10_fields"), + &backend, + |b, backend| { + b.iter_batched( + || { + generator.generate_key("bench:hash", rand::random::() % 1_000) + }, + |key| { + backend.storage.hgetall(&key).unwrap(); + }, + BatchSize::SmallInput + ); + } + ); + } + + group.finish(); +} + +/// Benchmark hash HDEL operations +fn bench_hash_hdel(c: &mut Criterion) { + let mut group = c.benchmark_group("single_ops/hashes/hdel"); + + for backend_type in BackendType::all() { + group.bench_with_input( + BenchmarkId::new(backend_type.name(), "single_field"), + &backend_type, + |b, &backend_type| { + b.iter_batched( + || { + let backend = setup_populated_backend_hashes(backend_type, 1, 10, 100).unwrap(); + let generator = DataGenerator::new(42); + let key = generator.generate_key("bench:hash", 0); + let field = format!("field{}", rand::random::() % 10); + (backend, key, field) + }, + |(backend, key, field)| { + backend.storage.hdel(&key, vec![field]).unwrap(); + }, + BatchSize::SmallInput + ); + } + ); + } + + group.finish(); +} + +/// Benchmark hash HEXISTS operations +fn bench_hash_hexists(c: &mut Criterion) { + let mut group = c.benchmark_group("single_ops/hashes/hexists"); + + for backend_type in BackendType::all() { + let backend = setup_populated_backend_hashes(backend_type, 1_000, 10, 100) + .expect("Failed to setup backend"); + let generator = DataGenerator::new(42); + + group.bench_with_input( + BenchmarkId::new(backend.name(), "single_field"), + &backend, + |b, backend| { + b.iter_batched( + || { + let key = generator.generate_key("bench:hash", rand::random::() % 1_000); + let field = format!("field{}", rand::random::() % 10); + (key, field) + }, + |(key, field)| { + backend.storage.hexists(&key, &field).unwrap(); + }, + BatchSize::SmallInput + ); + } + ); + } + + group.finish(); +} + +/// Benchmark list LPUSH operations +fn bench_list_lpush(c: &mut Criterion) { + let mut group = c.benchmark_group("single_ops/lists/lpush"); + + for backend_type in BackendType::all() { + let backend = BenchmarkBackend::new(backend_type).expect("Failed to create backend"); + let mut generator = DataGenerator::new(42); + + group.bench_with_input( + BenchmarkId::new(backend.name(), "single_element"), + &backend, + |b, backend| { + b.iter_batched( + || { + let key = generator.generate_key("bench:list", rand::random::() % 1000); + let element = generator.generate_value(100); + (key, element) + }, + |(key, element)| { + backend.storage.lpush(&key, vec![element]).unwrap(); + }, + BatchSize::SmallInput + ); + } + ); + } + + group.finish(); +} + +/// Benchmark list RPUSH operations +fn bench_list_rpush(c: &mut Criterion) { + let mut group = c.benchmark_group("single_ops/lists/rpush"); + + for backend_type in BackendType::all() { + let backend = BenchmarkBackend::new(backend_type).expect("Failed to create backend"); + let mut generator = DataGenerator::new(42); + + group.bench_with_input( + BenchmarkId::new(backend.name(), "single_element"), + &backend, + |b, backend| { + b.iter_batched( + || { + let key = generator.generate_key("bench:list", rand::random::() % 1000); + let element = generator.generate_value(100); + (key, element) + }, + |(key, element)| { + backend.storage.rpush(&key, vec![element]).unwrap(); + }, + BatchSize::SmallInput + ); + } + ); + } + + group.finish(); +} + +/// Benchmark list LPOP operations +fn bench_list_lpop(c: &mut Criterion) { + let mut group = c.benchmark_group("single_ops/lists/lpop"); + + for backend_type in BackendType::all() { + group.bench_with_input( + BenchmarkId::new(backend_type.name(), "single_element"), + &backend_type, + |b, &backend_type| { + b.iter_batched( + || { + let backend = setup_populated_backend_lists(backend_type, 1, 100, 100).unwrap(); + let generator = DataGenerator::new(42); + let key = generator.generate_key("bench:list", 0); + (backend, key) + }, + |(backend, key)| { + backend.storage.lpop(&key, 1).unwrap(); + }, + BatchSize::SmallInput + ); + } + ); + } + + group.finish(); +} + +/// Benchmark list RPOP operations +fn bench_list_rpop(c: &mut Criterion) { + let mut group = c.benchmark_group("single_ops/lists/rpop"); + + for backend_type in BackendType::all() { + group.bench_with_input( + BenchmarkId::new(backend_type.name(), "single_element"), + &backend_type, + |b, &backend_type| { + b.iter_batched( + || { + let backend = setup_populated_backend_lists(backend_type, 1, 100, 100).unwrap(); + let generator = DataGenerator::new(42); + let key = generator.generate_key("bench:list", 0); + (backend, key) + }, + |(backend, key)| { + backend.storage.rpop(&key, 1).unwrap(); + }, + BatchSize::SmallInput + ); + } + ); + } + + group.finish(); +} + +/// Benchmark list LRANGE operations +fn bench_list_lrange(c: &mut Criterion) { + let mut group = c.benchmark_group("single_ops/lists/lrange"); + + for backend_type in BackendType::all() { + let backend = setup_populated_backend_lists(backend_type, 1_000, 100, 100) + .expect("Failed to setup backend"); + let generator = DataGenerator::new(42); + + group.bench_with_input( + BenchmarkId::new(backend.name(), "10_elements"), + &backend, + |b, backend| { + b.iter_batched( + || { + generator.generate_key("bench:list", rand::random::() % 1_000) + }, + |key| { + backend.storage.lrange(&key, 0, 9).unwrap(); + }, + BatchSize::SmallInput + ); + } + ); + } + + group.finish(); +} + +criterion_group!( + benches, + bench_string_set, + bench_string_get, + bench_string_del, + bench_string_exists, + bench_hash_hset, + bench_hash_hget, + bench_hash_hgetall, + bench_hash_hdel, + bench_hash_hexists, + bench_list_lpush, + bench_list_rpush, + bench_list_lpop, + bench_list_rpop, + bench_list_lrange, +); + +criterion_main!(benches); \ No newline at end of file diff --git a/docs/benchmarking.md b/docs/benchmarking.md new file mode 100644 index 0000000..6d914ad --- /dev/null +++ b/docs/benchmarking.md @@ -0,0 +1,409 @@ +# HeroDB Performance Benchmarking Guide + +## Overview + +This document describes the comprehensive benchmarking suite for HeroDB, designed to measure and compare the performance characteristics of the two storage backends: **redb** (default) and **sled**. + +## Benchmark Architecture + +### Design Principles + +1. **Fair Comparison**: Identical test datasets and operations across all backends +2. **Statistical Rigor**: Using Criterion for statistically sound measurements +3. **Real-World Scenarios**: Mix of synthetic and realistic workload patterns +4. **Reproducibility**: Deterministic test data generation with fixed seeds +5. **Isolation**: Each benchmark runs in a clean environment + +### Benchmark Categories + +#### 1. Single-Operation CRUD Benchmarks +Measures the performance of individual database operations: + +- **String Operations** + - `SET` - Write a single key-value pair + - `GET` - Read a single key-value pair + - `DEL` - Delete a single key + - `EXISTS` - Check key existence + +- **Hash Operations** + - `HSET` - Set single field in hash + - `HGET` - Get single field from hash + - `HGETALL` - Get all fields from hash + - `HDEL` - Delete field from hash + - `HEXISTS` - Check field existence + +- **List Operations** + - `LPUSH` - Push to list head + - `RPUSH` - Push to list tail + - `LPOP` - Pop from list head + - `RPOP` - Pop from list tail + - `LRANGE` - Get range of elements + +#### 2. Bulk Operation Benchmarks +Tests throughput with varying batch sizes: + +- **Bulk Insert**: 100, 1,000, 10,000 records +- **Bulk Read**: Sequential and random access patterns +- **Bulk Update**: Modify existing records +- **Bulk Delete**: Remove multiple records + +#### 3. Query and Scan Benchmarks +Evaluates iteration and filtering performance: + +- **SCAN**: Cursor-based key iteration +- **HSCAN**: Hash field iteration +- **KEYS**: Pattern matching (with various patterns) +- **Range Queries**: List range operations + +#### 4. Concurrent Operation Benchmarks +Simulates multi-client scenarios: + +- **10 Concurrent Clients**: Light load +- **50 Concurrent Clients**: Medium load +- **Mixed Workload**: 70% reads, 30% writes + +#### 5. Memory Profiling +Tracks memory usage patterns: + +- **Allocation Tracking**: Total allocations per operation +- **Peak Memory**: Maximum memory usage +- **Memory Efficiency**: Bytes per record stored + +### Test Data Specifications + +#### Dataset Sizes +- **Small**: 1,000 - 10,000 records +- **Medium**: 10,000 records (primary focus) + +#### Data Characteristics +- **Key Format**: `bench:key:{id}` (predictable, sortable) +- **Value Sizes**: + - Small: 50-100 bytes + - Medium: 500-1000 bytes + - Large: 5000-10000 bytes +- **Hash Fields**: 5-20 fields per hash +- **List Elements**: 10-100 elements per list + +### Metrics Collected + +For each benchmark, we collect: + +1. **Latency Metrics** + - Mean execution time + - Median (p50) + - 95th percentile (p95) + - 99th percentile (p99) + - Standard deviation + +2. **Throughput Metrics** + - Operations per second + - Records per second (for bulk operations) + +3. **Memory Metrics** + - Total allocations + - Peak memory usage + - Average bytes per operation + +4. **Initialization Overhead** + - Database startup time + - First operation latency (cold cache) + +## Benchmark Structure + +### Directory Layout + +``` +benches/ +├── common/ +│ ├── mod.rs # Shared utilities +│ ├── data_generator.rs # Test data generation +│ ├── metrics.rs # Custom metrics collection +│ └── backends.rs # Backend setup helpers +├── single_ops.rs # Single-operation benchmarks +├── bulk_ops.rs # Bulk operation benchmarks +├── scan_ops.rs # Scan and query benchmarks +├── concurrent_ops.rs # Concurrent operation benchmarks +└── memory_profile.rs # Memory profiling benchmarks +``` + +### Running Benchmarks + +#### Run All Benchmarks +```bash +cargo bench +``` + +#### Run Specific Benchmark Suite +```bash +cargo bench --bench single_ops +cargo bench --bench bulk_ops +cargo bench --bench concurrent_ops +``` + +#### Run Specific Backend +```bash +cargo bench -- redb +cargo bench -- sled +``` + +#### Generate Reports +```bash +# Run benchmarks and save results +cargo bench -- --save-baseline main + +# Compare against baseline +cargo bench -- --baseline main + +# Export to CSV +cargo bench -- --output-format csv > results.csv +``` + +### Output Formats + +#### 1. Terminal Output (Default) +Real-time progress with statistical summaries: +``` +single_ops/redb/set/small + time: [1.234 µs 1.245 µs 1.256 µs] + thrpt: [802.5K ops/s 810.2K ops/s 818.1K ops/s] +``` + +#### 2. CSV Export +Structured data for analysis: +```csv +backend,operation,dataset_size,mean_ns,median_ns,p95_ns,p99_ns,throughput_ops_sec +redb,set,small,1245,1240,1890,2100,810200 +sled,set,small,1567,1550,2340,2890,638000 +``` + +#### 3. JSON Export +Detailed metrics for programmatic processing: +```json +{ + "benchmark": "single_ops/redb/set/small", + "metrics": { + "mean": 1245, + "median": 1240, + "p95": 1890, + "p99": 2100, + "std_dev": 145, + "throughput": 810200 + }, + "memory": { + "allocations": 3, + "peak_bytes": 4096 + } +} +``` + +## Benchmark Implementation Details + +### Backend Setup + +Each benchmark creates isolated database instances: + +```rust +// Redb backend +let temp_dir = TempDir::new()?; +let db_path = temp_dir.path().join("bench.db"); +let storage = Storage::new(db_path, false, None)?; + +// Sled backend +let temp_dir = TempDir::new()?; +let db_path = temp_dir.path().join("bench.sled"); +let storage = SledStorage::new(db_path, false, None)?; +``` + +### Data Generation + +Deterministic data generation ensures reproducibility: + +```rust +use rand::{SeedableRng, Rng}; +use rand::rngs::StdRng; + +fn generate_test_data(count: usize, seed: u64) -> Vec<(String, String)> { + let mut rng = StdRng::seed_from_u64(seed); + (0..count) + .map(|i| { + let key = format!("bench:key:{:08}", i); + let value = generate_value(&mut rng, 100); + (key, value) + }) + .collect() +} +``` + +### Concurrent Testing + +Using Tokio for async concurrent operations: + +```rust +async fn concurrent_benchmark( + storage: Arc, + num_clients: usize, + operations: usize +) { + let tasks: Vec<_> = (0..num_clients) + .map(|client_id| { + let storage = storage.clone(); + tokio::spawn(async move { + for i in 0..operations { + let key = format!("client:{}:key:{}", client_id, i); + storage.set(key, "value".to_string()).unwrap(); + } + }) + }) + .collect(); + + futures::future::join_all(tasks).await; +} +``` + +## Interpreting Results + +### Performance Comparison + +When comparing backends, consider: + +1. **Latency vs Throughput Trade-offs** + - Lower latency = better for interactive workloads + - Higher throughput = better for batch processing + +2. **Consistency** + - Lower standard deviation = more predictable performance + - Check p95/p99 for tail latency + +3. **Scalability** + - How performance changes with dataset size + - Concurrent operation efficiency + +### Backend Selection Guidelines + +Based on benchmark results, choose: + +**redb** when: +- Need predictable latency +- Working with structured data (separate tables) +- Require high concurrent read performance +- Memory efficiency is important + +**sled** when: +- Need high write throughput +- Working with uniform data types +- Require lock-free operations +- Crash recovery is critical + +## Memory Profiling + +### Using DHAT + +For detailed memory profiling: + +```bash +# Install valgrind and dhat +sudo apt-get install valgrind + +# Run with DHAT +cargo bench --bench memory_profile -- --profile-time=10 +``` + +### Custom Allocation Tracking + +The benchmarks include custom allocation tracking: + +```rust +#[global_allocator] +static ALLOC: dhat::Alloc = dhat::Alloc; + +fn track_allocations(f: F) -> AllocationStats +where + F: FnOnce(), +{ + let _profiler = dhat::Profiler::new_heap(); + f(); + // Extract stats from profiler +} +``` + +## Continuous Benchmarking + +### Regression Detection + +Compare against baseline to detect performance regressions: + +```bash +# Save current performance as baseline +cargo bench -- --save-baseline v0.1.0 + +# After changes, compare +cargo bench -- --baseline v0.1.0 + +# Criterion will highlight significant changes +``` + +### CI Integration + +Add to CI pipeline: + +```yaml +- name: Run Benchmarks + run: | + cargo bench --no-fail-fast -- --output-format json > bench-results.json + +- name: Compare Results + run: | + python scripts/compare_benchmarks.py \ + --baseline baseline.json \ + --current bench-results.json \ + --threshold 10 # Fail if >10% regression +``` + +## Troubleshooting + +### Common Issues + +1. **Inconsistent Results** + - Ensure system is idle during benchmarks + - Disable CPU frequency scaling + - Run multiple iterations + +2. **Out of Memory** + - Reduce dataset sizes + - Run benchmarks sequentially + - Increase system swap space + +3. **Slow Benchmarks** + - Reduce sample size in Criterion config + - Use `--quick` flag for faster runs + - Focus on specific benchmarks + +### Performance Tips + +```bash +# Quick benchmark run (fewer samples) +cargo bench -- --quick + +# Verbose output for debugging +cargo bench -- --verbose + +# Profile specific operation +cargo bench -- single_ops/redb/set +``` + +## Future Enhancements + +Potential additions to the benchmark suite: + +1. **Transaction Performance**: Measure MULTI/EXEC overhead +2. **Encryption Overhead**: Compare encrypted vs non-encrypted +3. **Persistence Testing**: Measure flush/sync performance +4. **Recovery Time**: Database restart and recovery speed +5. **Network Overhead**: Redis protocol parsing impact +6. **Long-Running Stability**: Performance over extended periods + +## References + +- [Criterion.rs Documentation](https://bheisler.github.io/criterion.rs/book/) +- [DHAT Memory Profiler](https://valgrind.org/docs/manual/dh-manual.html) +- [Rust Performance Book](https://nnethercote.github.io/perf-book/) \ No newline at end of file diff --git a/scripts/compare_backends.py b/scripts/compare_backends.py new file mode 100755 index 0000000..351ef3e --- /dev/null +++ b/scripts/compare_backends.py @@ -0,0 +1,258 @@ +#!/usr/bin/env python3 +""" +Compare performance between redb and sled backends. +""" + +import json +import csv +import sys +from typing import Dict, List, Any +from pathlib import Path + +def load_results(input_file: str) -> List[Dict[str, Any]]: + """Load benchmark results from CSV or JSON file.""" + path = Path(input_file) + + if not path.exists(): + print(f"Error: File not found: {input_file}", file=sys.stderr) + return [] + + if path.suffix == '.json': + with open(input_file, 'r') as f: + data = json.load(f) + return data.get('benchmarks', []) + elif path.suffix == '.csv': + results = [] + with open(input_file, 'r') as f: + reader = csv.DictReader(f) + for row in reader: + # Convert numeric fields + row['mean_ns'] = float(row.get('mean_ns', 0)) + row['median_ns'] = float(row.get('median_ns', 0)) + row['throughput_ops_sec'] = float(row.get('throughput_ops_sec', 0)) + results.append(row) + return results + else: + print(f"Error: Unsupported file format: {path.suffix}", file=sys.stderr) + return [] + +def group_by_operation(results: List[Dict[str, Any]]) -> Dict[str, Dict[str, Dict]]: + """Group results by operation and backend.""" + grouped = {} + + for result in results: + operation = result.get('operation', result.get('name', '')) + backend = result.get('backend', '') + + if not operation or not backend: + continue + + if operation not in grouped: + grouped[operation] = {} + + grouped[operation][backend] = result + + return grouped + +def calculate_speedup(redb_value: float, sled_value: float) -> float: + """Calculate speedup factor (positive means redb is faster).""" + if sled_value == 0: + return 0 + return sled_value / redb_value + +def format_duration(nanos: float) -> str: + """Format duration in human-readable format.""" + if nanos < 1_000: + return f"{nanos:.0f} ns" + elif nanos < 1_000_000: + return f"{nanos / 1_000:.2f} µs" + elif nanos < 1_000_000_000: + return f"{nanos / 1_000_000:.2f} ms" + else: + return f"{nanos / 1_000_000_000:.2f} s" + +def print_comparison_table(grouped: Dict[str, Dict[str, Dict]]): + """Print a comparison table of backends.""" + print("\n" + "=" * 100) + print("BACKEND PERFORMANCE COMPARISON") + print("=" * 100) + print() + + # Header + print(f"{'Operation':<30} {'redb (mean)':<15} {'sled (mean)':<15} {'Speedup':<12} {'Winner':<10}") + print("-" * 100) + + redb_wins = 0 + sled_wins = 0 + total_comparisons = 0 + + for operation in sorted(grouped.keys()): + backends = grouped[operation] + + if 'redb' in backends and 'sled' in backends: + redb_mean = backends['redb'].get('mean_ns', 0) + sled_mean = backends['sled'].get('mean_ns', 0) + + speedup = calculate_speedup(redb_mean, sled_mean) + + if speedup > 1.0: + winner = "redb" + redb_wins += 1 + elif speedup < 1.0: + winner = "sled" + sled_wins += 1 + else: + winner = "tie" + + total_comparisons += 1 + + speedup_str = f"{speedup:.2f}x" if speedup != 0 else "N/A" + + print(f"{operation:<30} {format_duration(redb_mean):<15} {format_duration(sled_mean):<15} " + f"{speedup_str:<12} {winner:<10}") + + print("-" * 100) + print(f"\nSummary: redb wins: {redb_wins}, sled wins: {sled_wins}, total: {total_comparisons}") + + if total_comparisons > 0: + redb_pct = (redb_wins / total_comparisons) * 100 + sled_pct = (sled_wins / total_comparisons) * 100 + print(f"Win rate: redb {redb_pct:.1f}%, sled {sled_pct:.1f}%") + +def print_throughput_comparison(grouped: Dict[str, Dict[str, Dict]]): + """Print throughput comparison.""" + print("\n" + "=" * 100) + print("THROUGHPUT COMPARISON (ops/sec)") + print("=" * 100) + print() + + print(f"{'Operation':<30} {'redb':<20} {'sled':<20} {'Difference':<15}") + print("-" * 100) + + for operation in sorted(grouped.keys()): + backends = grouped[operation] + + if 'redb' in backends and 'sled' in backends: + redb_throughput = backends['redb'].get('throughput_ops_sec', 0) + sled_throughput = backends['sled'].get('throughput_ops_sec', 0) + + diff_pct = ((redb_throughput - sled_throughput) / sled_throughput * 100) if sled_throughput > 0 else 0 + diff_str = f"{diff_pct:+.1f}%" + + print(f"{operation:<30} {redb_throughput:>18,.0f} {sled_throughput:>18,.0f} {diff_str:>13}") + +def generate_recommendations(grouped: Dict[str, Dict[str, Dict]]): + """Generate recommendations based on benchmark results.""" + print("\n" + "=" * 100) + print("RECOMMENDATIONS") + print("=" * 100) + print() + + redb_strengths = [] + sled_strengths = [] + + for operation, backends in grouped.items(): + if 'redb' in backends and 'sled' in backends: + redb_mean = backends['redb'].get('mean_ns', 0) + sled_mean = backends['sled'].get('mean_ns', 0) + + speedup = calculate_speedup(redb_mean, sled_mean) + + if speedup > 1.2: # redb is >20% faster + redb_strengths.append((operation, speedup)) + elif speedup < 0.8: # sled is >20% faster + sled_strengths.append((operation, 1/speedup)) + + print("Use redb when:") + if redb_strengths: + for op, speedup in sorted(redb_strengths, key=lambda x: x[1], reverse=True)[:5]: + print(f" • {op}: {speedup:.2f}x faster than sled") + else: + print(" • No significant advantages found") + + print("\nUse sled when:") + if sled_strengths: + for op, speedup in sorted(sled_strengths, key=lambda x: x[1], reverse=True)[:5]: + print(f" • {op}: {speedup:.2f}x faster than redb") + else: + print(" • No significant advantages found") + + print("\nGeneral guidelines:") + print(" • redb: Better for read-heavy workloads, predictable latency") + print(" • sled: Better for write-heavy workloads, memory efficiency") + +def export_comparison(grouped: Dict[str, Dict[str, Dict]], output_file: str): + """Export comparison to CSV.""" + with open(output_file, 'w', newline='') as f: + fieldnames = ['operation', 'redb_mean_ns', 'sled_mean_ns', 'speedup', + 'redb_throughput', 'sled_throughput', 'winner'] + writer = csv.DictWriter(f, fieldnames=fieldnames) + writer.writeheader() + + for operation, backends in sorted(grouped.items()): + if 'redb' in backends and 'sled' in backends: + redb_mean = backends['redb'].get('mean_ns', 0) + sled_mean = backends['sled'].get('mean_ns', 0) + redb_throughput = backends['redb'].get('throughput_ops_sec', 0) + sled_throughput = backends['sled'].get('throughput_ops_sec', 0) + + speedup = calculate_speedup(redb_mean, sled_mean) + winner = "redb" if speedup > 1.0 else "sled" if speedup < 1.0 else "tie" + + writer.writerow({ + 'operation': operation, + 'redb_mean_ns': int(redb_mean), + 'sled_mean_ns': int(sled_mean), + 'speedup': f"{speedup:.2f}", + 'redb_throughput': f"{redb_throughput:.0f}", + 'sled_throughput': f"{sled_throughput:.0f}", + 'winner': winner + }) + + print(f"\nComparison exported to {output_file}") + +def main(): + if len(sys.argv) < 2: + print("Usage: python compare_backends.py [--export comparison.csv]") + print("\nExample:") + print(" python compare_backends.py results.csv") + print(" python compare_backends.py results.json --export comparison.csv") + sys.exit(1) + + input_file = sys.argv[1] + export_file = None + + # Parse command line arguments + if '--export' in sys.argv: + idx = sys.argv.index('--export') + if idx + 1 < len(sys.argv): + export_file = sys.argv[idx + 1] + + # Load results + print(f"Loading results from {input_file}...") + results = load_results(input_file) + + if not results: + print("No results found!") + sys.exit(1) + + print(f"Loaded {len(results)} benchmark results") + + # Group by operation + grouped = group_by_operation(results) + + if not grouped: + print("No comparable results found!") + sys.exit(1) + + # Print comparisons + print_comparison_table(grouped) + print_throughput_comparison(grouped) + generate_recommendations(grouped) + + # Export if requested + if export_file: + export_comparison(grouped, export_file) + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/scripts/parse_results.py b/scripts/parse_results.py new file mode 100755 index 0000000..3b00558 --- /dev/null +++ b/scripts/parse_results.py @@ -0,0 +1,222 @@ +#!/usr/bin/env python3 +""" +Parse Criterion benchmark results and export to CSV/JSON formats. +""" + +import json +import csv +import sys +import os +from pathlib import Path +from typing import Dict, List, Any + +def parse_criterion_json(criterion_dir: str) -> List[Dict[str, Any]]: + """Parse Criterion benchmark results from the target directory.""" + results = [] + criterion_path = Path(criterion_dir) + + if not criterion_path.exists(): + print(f"Error: Criterion directory not found: {criterion_dir}", file=sys.stderr) + return results + + # Find all benchmark.json files + for benchmark_file in criterion_path.rglob("new/benchmark.json"): + try: + with open(benchmark_file, 'r') as f: + data = json.load(f) + + # Extract benchmark name from path + bench_name = str(benchmark_file.parent.parent.name) + + # Extract metrics + result = { + 'name': bench_name, + 'mean_ns': data.get('mean', {}).get('point_estimate', 0), + 'median_ns': data.get('median', {}).get('point_estimate', 0), + 'std_dev_ns': data.get('std_dev', {}).get('point_estimate', 0), + } + + # Calculate throughput + if result['mean_ns'] > 0: + result['throughput_ops_sec'] = 1_000_000_000 / result['mean_ns'] + else: + result['throughput_ops_sec'] = 0 + + results.append(result) + except Exception as e: + print(f"Warning: Failed to parse {benchmark_file}: {e}", file=sys.stderr) + + return results + +def parse_benchmark_name(name: str) -> Dict[str, str]: + """Parse benchmark name into components.""" + parts = name.split('/') + + result = { + 'suite': parts[0] if len(parts) > 0 else '', + 'category': parts[1] if len(parts) > 1 else '', + 'operation': parts[2] if len(parts) > 2 else '', + 'backend': '', + 'parameter': '' + } + + # Try to extract backend name + for part in parts: + if 'redb' in part.lower(): + result['backend'] = 'redb' + break + elif 'sled' in part.lower(): + result['backend'] = 'sled' + break + + # Extract parameter (size, clients, etc.) + if len(parts) > 3: + result['parameter'] = parts[3] + + return result + +def export_to_csv(results: List[Dict[str, Any]], output_file: str): + """Export results to CSV format.""" + if not results: + print("No results to export", file=sys.stderr) + return + + fieldnames = ['name', 'backend', 'operation', 'mean_ns', 'median_ns', + 'std_dev_ns', 'throughput_ops_sec'] + + with open(output_file, 'w', newline='') as f: + writer = csv.DictWriter(f, fieldnames=fieldnames) + writer.writeheader() + + for result in results: + parsed = parse_benchmark_name(result['name']) + row = { + 'name': result['name'], + 'backend': parsed['backend'], + 'operation': parsed['operation'], + 'mean_ns': int(result['mean_ns']), + 'median_ns': int(result['median_ns']), + 'std_dev_ns': int(result['std_dev_ns']), + 'throughput_ops_sec': f"{result['throughput_ops_sec']:.2f}" + } + writer.writerow(row) + + print(f"Exported {len(results)} results to {output_file}") + +def export_to_json(results: List[Dict[str, Any]], output_file: str): + """Export results to JSON format.""" + if not results: + print("No results to export", file=sys.stderr) + return + + # Enhance results with parsed information + enhanced_results = [] + for result in results: + parsed = parse_benchmark_name(result['name']) + enhanced = {**result, **parsed} + enhanced_results.append(enhanced) + + output = { + 'benchmarks': enhanced_results, + 'summary': { + 'total_benchmarks': len(results), + 'backends': list(set(r.get('backend', '') for r in enhanced_results if r.get('backend'))) + } + } + + with open(output_file, 'w') as f: + json.dump(output, f, indent=2) + + print(f"Exported {len(results)} results to {output_file}") + +def print_summary(results: List[Dict[str, Any]]): + """Print a summary of benchmark results.""" + if not results: + print("No results to summarize") + return + + print("\n=== Benchmark Summary ===\n") + print(f"Total benchmarks: {len(results)}") + + # Group by backend + backends = {} + for result in results: + parsed = parse_benchmark_name(result['name']) + backend = parsed['backend'] + if backend: + if backend not in backends: + backends[backend] = [] + backends[backend].append(result) + + for backend, bench_results in backends.items(): + print(f"\n{backend.upper()}:") + print(f" Benchmarks: {len(bench_results)}") + + if bench_results: + mean_throughput = sum(r['throughput_ops_sec'] for r in bench_results) / len(bench_results) + print(f" Avg throughput: {mean_throughput:.2f} ops/sec") + + fastest = max(bench_results, key=lambda x: x['throughput_ops_sec']) + print(f" Fastest: {fastest['name']} ({fastest['throughput_ops_sec']:.2f} ops/sec)") + +def main(): + if len(sys.argv) < 2: + print("Usage: python parse_results.py [--csv output.csv] [--json output.json]") + print("\nExample:") + print(" python parse_results.py target/criterion --csv results.csv --json results.json") + sys.exit(1) + + criterion_dir = sys.argv[1] + + # Parse command line arguments + csv_output = None + json_output = None + + i = 2 + while i < len(sys.argv): + if sys.argv[i] == '--csv' and i + 1 < len(sys.argv): + csv_output = sys.argv[i + 1] + i += 2 + elif sys.argv[i] == '--json' and i + 1 < len(sys.argv): + json_output = sys.argv[i + 1] + i += 2 + else: + i += 1 + + # Parse results + print(f"Parsing benchmark results from {criterion_dir}...") + results = parse_criterion_json(criterion_dir) + + if not results: + print("No benchmark results found!") + sys.exit(1) + + # Export results + if csv_output: + export_to_csv(results, csv_output) + + if json_output: + export_to_json(results, json_output) + + # Print summary + print_summary(results) + + # If no output specified, print to stdout + if not csv_output and not json_output: + print("\n=== CSV Output ===\n") + import io + output = io.StringIO() + fieldnames = ['name', 'mean_ns', 'median_ns', 'throughput_ops_sec'] + writer = csv.DictWriter(output, fieldnames=fieldnames) + writer.writeheader() + for result in results: + writer.writerow({ + 'name': result['name'], + 'mean_ns': int(result['mean_ns']), + 'median_ns': int(result['median_ns']), + 'throughput_ops_sec': f"{result['throughput_ops_sec']:.2f}" + }) + print(output.getvalue()) + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/src/error.rs b/src/error.rs index 3037c70..1120134 100644 --- a/src/error.rs +++ b/src/error.rs @@ -9,6 +9,14 @@ use bincode; #[derive(Debug)] pub struct DBError(pub String); +impl std::fmt::Display for DBError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.0) + } +} + +impl std::error::Error for DBError {} + impl From for DBError { fn from(item: std::io::Error) -> Self { DBError(item.to_string().clone())