benchmarking

2025-10-30 11:17:26 +01:00
parent 592b6c1ea9
commit 9136e5f3c0
16 changed files with 3611 additions and 0 deletions
--- a/benches/common/backends.rs
+++ b/benches/common/backends.rs
@@ -0,0 +1,197 @@
+// benches/common/backends.rs
+use herodb::storage::Storage;
+use herodb::storage_sled::SledStorage;
+use herodb::storage_trait::StorageBackend;
+use std::sync::Arc;
+use tempfile::TempDir;
+
+/// Backend type identifier
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum BackendType {
+    Redb,
+    Sled,
+}
+
+impl BackendType {
+    pub fn name(&self) -> &'static str {
+        match self {
+            BackendType::Redb => "redb",
+            BackendType::Sled => "sled",
+        }
+    }
+
+    pub fn all() -> Vec<BackendType> {
+        vec![BackendType::Redb, BackendType::Sled]
+    }
+}
+
+/// Wrapper for benchmark backends with automatic cleanup
+pub struct BenchmarkBackend {
+    pub storage: Arc<dyn StorageBackend>,
+    pub backend_type: BackendType,
+    _temp_dir: TempDir, // Kept for automatic cleanup
+}
+
+impl BenchmarkBackend {
+    /// Create a new redb backend for benchmarking
+    pub fn new_redb() -> Result<Self, Box<dyn std::error::Error>> {
+        let temp_dir = TempDir::new()?;
+        let db_path = temp_dir.path().join("bench.db");
+        let storage = Storage::new(db_path, false, None)?;
+        
+        Ok(Self {
+            storage: Arc::new(storage),
+            backend_type: BackendType::Redb,
+            _temp_dir: temp_dir,
+        })
+    }
+
+    /// Create a new sled backend for benchmarking
+    pub fn new_sled() -> Result<Self, Box<dyn std::error::Error>> {
+        let temp_dir = TempDir::new()?;
+        let db_path = temp_dir.path().join("bench.sled");
+        let storage = SledStorage::new(db_path, false, None)?;
+        
+        Ok(Self {
+            storage: Arc::new(storage),
+            backend_type: BackendType::Sled,
+            _temp_dir: temp_dir,
+        })
+    }
+
+    /// Create a backend of the specified type
+    pub fn new(backend_type: BackendType) -> Result<Self, Box<dyn std::error::Error>> {
+        match backend_type {
+            BackendType::Redb => Self::new_redb(),
+            BackendType::Sled => Self::new_sled(),
+        }
+    }
+
+    /// Get the backend name for display
+    pub fn name(&self) -> &'static str {
+        self.backend_type.name()
+    }
+
+    /// Pre-populate the backend with test data
+    pub fn populate_strings(&self, data: &[(String, String)]) -> Result<(), Box<dyn std::error::Error>> {
+        for (key, value) in data {
+            self.storage.set(key.clone(), value.clone())?;
+        }
+        Ok(())
+    }
+
+    /// Pre-populate with hash data
+    pub fn populate_hashes(&self, data: &[(String, Vec<(String, String)>)]) -> Result<(), Box<dyn std::error::Error>> {
+        for (key, fields) in data {
+            self.storage.hset(key, fields.clone())?;
+        }
+        Ok(())
+    }
+
+    /// Pre-populate with list data
+    pub fn populate_lists(&self, data: &[(String, Vec<String>)]) -> Result<(), Box<dyn std::error::Error>> {
+        for (key, elements) in data {
+            self.storage.rpush(key, elements.clone())?;
+        }
+        Ok(())
+    }
+
+    /// Clear all data from the backend
+    pub fn clear(&self) -> Result<(), Box<dyn std::error::Error>> {
+        self.storage.flushdb()?;
+        Ok(())
+    }
+
+    /// Get the number of keys in the database
+    pub fn dbsize(&self) -> Result<i64, Box<dyn std::error::Error>> {
+        Ok(self.storage.dbsize()?)
+    }
+}
+
+/// Helper function to create and populate a backend for read benchmarks
+pub fn setup_populated_backend(
+    backend_type: BackendType,
+    num_keys: usize,
+    value_size: usize,
+) -> Result<BenchmarkBackend, Box<dyn std::error::Error>> {
+    use super::DataGenerator;
+    
+    let backend = BenchmarkBackend::new(backend_type)?;
+    let mut generator = DataGenerator::new(42);
+    let data = generator.generate_string_pairs(num_keys, value_size);
+    backend.populate_strings(&data)?;
+    
+    Ok(backend)
+}
+
+/// Helper function to create and populate a backend with hash data
+pub fn setup_populated_backend_hashes(
+    backend_type: BackendType,
+    num_hashes: usize,
+    fields_per_hash: usize,
+    value_size: usize,
+) -> Result<BenchmarkBackend, Box<dyn std::error::Error>> {
+    use super::DataGenerator;
+    
+    let backend = BenchmarkBackend::new(backend_type)?;
+    let mut generator = DataGenerator::new(42);
+    let data = generator.generate_hash_data(num_hashes, fields_per_hash, value_size);
+    backend.populate_hashes(&data)?;
+    
+    Ok(backend)
+}
+
+/// Helper function to create and populate a backend with list data
+pub fn setup_populated_backend_lists(
+    backend_type: BackendType,
+    num_lists: usize,
+    elements_per_list: usize,
+    element_size: usize,
+) -> Result<BenchmarkBackend, Box<dyn std::error::Error>> {
+    use super::DataGenerator;
+    
+    let backend = BenchmarkBackend::new(backend_type)?;
+    let mut generator = DataGenerator::new(42);
+    let data = generator.generate_list_data(num_lists, elements_per_list, element_size);
+    backend.populate_lists(&data)?;
+    
+    Ok(backend)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_backend_creation() {
+        let redb = BenchmarkBackend::new_redb();
+        assert!(redb.is_ok());
+
+        let sled = BenchmarkBackend::new_sled();
+        assert!(sled.is_ok());
+    }
+
+    #[test]
+    fn test_backend_populate() {
+        let backend = BenchmarkBackend::new_redb().unwrap();
+        let data = vec![
+            ("key1".to_string(), "value1".to_string()),
+            ("key2".to_string(), "value2".to_string()),
+        ];
+        
+        backend.populate_strings(&data).unwrap();
+        assert_eq!(backend.dbsize().unwrap(), 2);
+    }
+
+    #[test]
+    fn test_backend_clear() {
+        let backend = BenchmarkBackend::new_redb().unwrap();
+        let data = vec![("key1".to_string(), "value1".to_string())];
+        
+        backend.populate_strings(&data).unwrap();
+        assert_eq!(backend.dbsize().unwrap(), 1);
+        
+        backend.clear().unwrap();
+        assert_eq!(backend.dbsize().unwrap(), 0);
+    }
+}
--- a/benches/common/data_generator.rs
+++ b/benches/common/data_generator.rs
@@ -0,0 +1,131 @@
+// benches/common/data_generator.rs
+use rand::{Rng, SeedableRng};
+use rand::rngs::StdRng;
+
+/// Deterministic data generator for benchmarks
+pub struct DataGenerator {
+    rng: StdRng,
+}
+
+impl DataGenerator {
+    /// Create a new data generator with a fixed seed for reproducibility
+    pub fn new(seed: u64) -> Self {
+        Self {
+            rng: StdRng::seed_from_u64(seed),
+        }
+    }
+
+    /// Generate a single key with the given prefix and ID
+    pub fn generate_key(&self, prefix: &str, id: usize) -> String {
+        format!("{}:{:08}", prefix, id)
+    }
+
+    /// Generate a random string value of the specified size
+    pub fn generate_value(&mut self, size: usize) -> String {
+        const CHARSET: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
+        (0..size)
+            .map(|_| {
+                let idx = self.rng.gen_range(0..CHARSET.len());
+                CHARSET[idx] as char
+            })
+            .collect()
+    }
+
+    /// Generate a batch of key-value pairs
+    pub fn generate_string_pairs(&mut self, count: usize, value_size: usize) -> Vec<(String, String)> {
+        (0..count)
+            .map(|i| {
+                let key = self.generate_key("bench:key", i);
+                let value = self.generate_value(value_size);
+                (key, value)
+            })
+            .collect()
+    }
+
+    /// Generate hash data (key -> field-value pairs)
+    pub fn generate_hash_data(&mut self, num_hashes: usize, fields_per_hash: usize, value_size: usize) 
+        -> Vec<(String, Vec<(String, String)>)> {
+        (0..num_hashes)
+            .map(|i| {
+                let hash_key = self.generate_key("bench:hash", i);
+                let fields: Vec<(String, String)> = (0..fields_per_hash)
+                    .map(|j| {
+                        let field = format!("field{}", j);
+                        let value = self.generate_value(value_size);
+                        (field, value)
+                    })
+                    .collect();
+                (hash_key, fields)
+            })
+            .collect()
+    }
+
+    /// Generate list data (key -> list of elements)
+    pub fn generate_list_data(&mut self, num_lists: usize, elements_per_list: usize, element_size: usize) 
+        -> Vec<(String, Vec<String>)> {
+        (0..num_lists)
+            .map(|i| {
+                let list_key = self.generate_key("bench:list", i);
+                let elements: Vec<String> = (0..elements_per_list)
+                    .map(|_| self.generate_value(element_size))
+                    .collect();
+                (list_key, elements)
+            })
+            .collect()
+    }
+
+    /// Generate keys for pattern matching tests
+    pub fn generate_pattern_keys(&mut self, count: usize) -> Vec<String> {
+        let mut keys = Vec::new();
+        
+        // Generate keys with different patterns
+        for i in 0..count / 3 {
+            keys.push(format!("user:{}:profile", i));
+        }
+        for i in 0..count / 3 {
+            keys.push(format!("session:{}:data", i));
+        }
+        for i in 0..count / 3 {
+            keys.push(format!("cache:{}:value", i));
+        }
+        
+        keys
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_deterministic_generation() {
+        let mut generator1 = DataGenerator::new(42);
+        let mut generator2 = DataGenerator::new(42);
+
+        let pairs1 = generator1.generate_string_pairs(10, 50);
+        let pairs2 = generator2.generate_string_pairs(10, 50);
+
+        assert_eq!(pairs1, pairs2, "Same seed should produce same data");
+    }
+
+    #[test]
+    fn test_value_size() {
+        let mut generator = DataGenerator::new(42);
+        let value = generator.generate_value(100);
+        assert_eq!(value.len(), 100);
+    }
+
+    #[test]
+    fn test_hash_generation() {
+        let mut generator = DataGenerator::new(42);
+        let hashes = generator.generate_hash_data(5, 10, 50);
+        
+        assert_eq!(hashes.len(), 5);
+        for (_, fields) in hashes {
+            assert_eq!(fields.len(), 10);
+            for (_, value) in fields {
+                assert_eq!(value.len(), 50);
+            }
+        }
+    }
+}
--- a/benches/common/metrics.rs
+++ b/benches/common/metrics.rs
@@ -0,0 +1,289 @@
+// benches/common/metrics.rs
+use serde::{Deserialize, Serialize};
+use std::time::Duration;
+
+/// Custom metrics for benchmark results
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct BenchmarkMetrics {
+    pub operation: String,
+    pub backend: String,
+    pub dataset_size: usize,
+    pub mean_ns: u64,
+    pub median_ns: u64,
+    pub p95_ns: u64,
+    pub p99_ns: u64,
+    pub std_dev_ns: u64,
+    pub throughput_ops_sec: f64,
+}
+
+impl BenchmarkMetrics {
+    pub fn new(
+        operation: String,
+        backend: String,
+        dataset_size: usize,
+    ) -> Self {
+        Self {
+            operation,
+            backend,
+            dataset_size,
+            mean_ns: 0,
+            median_ns: 0,
+            p95_ns: 0,
+            p99_ns: 0,
+            std_dev_ns: 0,
+            throughput_ops_sec: 0.0,
+        }
+    }
+
+    /// Convert to CSV row format
+    pub fn to_csv_row(&self) -> String {
+        format!(
+            "{},{},{},{},{},{},{},{},{:.2}",
+            self.backend,
+            self.operation,
+            self.dataset_size,
+            self.mean_ns,
+            self.median_ns,
+            self.p95_ns,
+            self.p99_ns,
+            self.std_dev_ns,
+            self.throughput_ops_sec
+        )
+    }
+
+    /// Get CSV header
+    pub fn csv_header() -> String {
+        "backend,operation,dataset_size,mean_ns,median_ns,p95_ns,p99_ns,std_dev_ns,throughput_ops_sec".to_string()
+    }
+
+    /// Convert to JSON
+    pub fn to_json(&self) -> serde_json::Value {
+        serde_json::json!({
+            "backend": self.backend,
+            "operation": self.operation,
+            "dataset_size": self.dataset_size,
+            "metrics": {
+                "mean_ns": self.mean_ns,
+                "median_ns": self.median_ns,
+                "p95_ns": self.p95_ns,
+                "p99_ns": self.p99_ns,
+                "std_dev_ns": self.std_dev_ns,
+                "throughput_ops_sec": self.throughput_ops_sec
+            }
+        })
+    }
+
+    /// Calculate throughput from mean latency
+    pub fn calculate_throughput(&mut self) {
+        if self.mean_ns > 0 {
+            self.throughput_ops_sec = 1_000_000_000.0 / self.mean_ns as f64;
+        }
+    }
+
+    /// Format duration for display
+    pub fn format_duration(nanos: u64) -> String {
+        if nanos < 1_000 {
+            format!("{} ns", nanos)
+        } else if nanos < 1_000_000 {
+            format!("{:.2} µs", nanos as f64 / 1_000.0)
+        } else if nanos < 1_000_000_000 {
+            format!("{:.2} ms", nanos as f64 / 1_000_000.0)
+        } else {
+            format!("{:.2} s", nanos as f64 / 1_000_000_000.0)
+        }
+    }
+
+    /// Pretty print the metrics
+    pub fn display(&self) -> String {
+        format!(
+            "{}/{} (n={}): mean={}, median={}, p95={}, p99={}, throughput={:.0} ops/sec",
+            self.backend,
+            self.operation,
+            self.dataset_size,
+            Self::format_duration(self.mean_ns),
+            Self::format_duration(self.median_ns),
+            Self::format_duration(self.p95_ns),
+            Self::format_duration(self.p99_ns),
+            self.throughput_ops_sec
+        )
+    }
+}
+
+/// Memory metrics for profiling
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct MemoryMetrics {
+    pub operation: String,
+    pub backend: String,
+    pub allocations: usize,
+    pub peak_bytes: usize,
+    pub avg_bytes_per_op: f64,
+}
+
+impl MemoryMetrics {
+    pub fn new(operation: String, backend: String) -> Self {
+        Self {
+            operation,
+            backend,
+            allocations: 0,
+            peak_bytes: 0,
+            avg_bytes_per_op: 0.0,
+        }
+    }
+
+    /// Convert to CSV row format
+    pub fn to_csv_row(&self) -> String {
+        format!(
+            "{},{},{},{},{:.2}",
+            self.backend,
+            self.operation,
+            self.allocations,
+            self.peak_bytes,
+            self.avg_bytes_per_op
+        )
+    }
+
+    /// Get CSV header
+    pub fn csv_header() -> String {
+        "backend,operation,allocations,peak_bytes,avg_bytes_per_op".to_string()
+    }
+
+    /// Format bytes for display
+    pub fn format_bytes(bytes: usize) -> String {
+        if bytes < 1024 {
+            format!("{} B", bytes)
+        } else if bytes < 1024 * 1024 {
+            format!("{:.2} KB", bytes as f64 / 1024.0)
+        } else if bytes < 1024 * 1024 * 1024 {
+            format!("{:.2} MB", bytes as f64 / (1024.0 * 1024.0))
+        } else {
+            format!("{:.2} GB", bytes as f64 / (1024.0 * 1024.0 * 1024.0))
+        }
+    }
+
+    /// Pretty print the metrics
+    pub fn display(&self) -> String {
+        format!(
+            "{}/{}: {} allocations, peak={}, avg={}",
+            self.backend,
+            self.operation,
+            self.allocations,
+            Self::format_bytes(self.peak_bytes),
+            Self::format_bytes(self.avg_bytes_per_op as usize)
+        )
+    }
+}
+
+/// Collection of benchmark results for comparison
+#[derive(Debug, Default)]
+pub struct BenchmarkResults {
+    pub metrics: Vec<BenchmarkMetrics>,
+    pub memory_metrics: Vec<MemoryMetrics>,
+}
+
+impl BenchmarkResults {
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    pub fn add_metric(&mut self, metric: BenchmarkMetrics) {
+        self.metrics.push(metric);
+    }
+
+    pub fn add_memory_metric(&mut self, metric: MemoryMetrics) {
+        self.memory_metrics.push(metric);
+    }
+
+    /// Export all metrics to CSV format
+    pub fn to_csv(&self) -> String {
+        let mut output = String::new();
+        
+        if !self.metrics.is_empty() {
+            output.push_str(&BenchmarkMetrics::csv_header());
+            output.push('\n');
+            for metric in &self.metrics {
+                output.push_str(&metric.to_csv_row());
+                output.push('\n');
+            }
+        }
+        
+        if !self.memory_metrics.is_empty() {
+            output.push('\n');
+            output.push_str(&MemoryMetrics::csv_header());
+            output.push('\n');
+            for metric in &self.memory_metrics {
+                output.push_str(&metric.to_csv_row());
+                output.push('\n');
+            }
+        }
+        
+        output
+    }
+
+    /// Export all metrics to JSON format
+    pub fn to_json(&self) -> serde_json::Value {
+        serde_json::json!({
+            "benchmarks": self.metrics.iter().map(|m| m.to_json()).collect::<Vec<_>>(),
+            "memory": self.memory_metrics
+        })
+    }
+
+    /// Save results to a file
+    pub fn save_csv(&self, path: &str) -> std::io::Result<()> {
+        std::fs::write(path, self.to_csv())
+    }
+
+    pub fn save_json(&self, path: &str) -> std::io::Result<()> {
+        let json = serde_json::to_string_pretty(&self.to_json())?;
+        std::fs::write(path, json)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_metrics_creation() {
+        let mut metric = BenchmarkMetrics::new(
+            "set".to_string(),
+            "redb".to_string(),
+            1000,
+        );
+        metric.mean_ns = 1_245;
+        metric.calculate_throughput();
+        
+        assert!(metric.throughput_ops_sec > 0.0);
+    }
+
+    #[test]
+    fn test_csv_export() {
+        let mut results = BenchmarkResults::new();
+        let mut metric = BenchmarkMetrics::new(
+            "set".to_string(),
+            "redb".to_string(),
+            1000,
+        );
+        metric.mean_ns = 1_245;
+        metric.calculate_throughput();
+        
+        results.add_metric(metric);
+        let csv = results.to_csv();
+        
+        assert!(csv.contains("backend,operation"));
+        assert!(csv.contains("redb,set"));
+    }
+
+    #[test]
+    fn test_duration_formatting() {
+        assert_eq!(BenchmarkMetrics::format_duration(500), "500 ns");
+        assert_eq!(BenchmarkMetrics::format_duration(1_500), "1.50 µs");
+        assert_eq!(BenchmarkMetrics::format_duration(1_500_000), "1.50 ms");
+    }
+
+    #[test]
+    fn test_bytes_formatting() {
+        assert_eq!(MemoryMetrics::format_bytes(512), "512 B");
+        assert_eq!(MemoryMetrics::format_bytes(2048), "2.00 KB");
+        assert_eq!(MemoryMetrics::format_bytes(2_097_152), "2.00 MB");
+    }
+}
--- a/benches/common/mod.rs
+++ b/benches/common/mod.rs
@@ -0,0 +1,8 @@
+// benches/common/mod.rs
+pub mod data_generator;
+pub mod backends;
+pub mod metrics;
+
+pub use data_generator::*;
+pub use backends::*;
+pub use metrics::*;