benchmarking

This commit is contained in:
Maxime Van Hees
2025-10-30 11:17:26 +01:00
parent 592b6c1ea9
commit 9136e5f3c0
16 changed files with 3611 additions and 0 deletions

197
benches/common/backends.rs Normal file
View File

@@ -0,0 +1,197 @@
// benches/common/backends.rs
use herodb::storage::Storage;
use herodb::storage_sled::SledStorage;
use herodb::storage_trait::StorageBackend;
use std::sync::Arc;
use tempfile::TempDir;
/// Backend type identifier
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum BackendType {
Redb,
Sled,
}
impl BackendType {
pub fn name(&self) -> &'static str {
match self {
BackendType::Redb => "redb",
BackendType::Sled => "sled",
}
}
pub fn all() -> Vec<BackendType> {
vec![BackendType::Redb, BackendType::Sled]
}
}
/// Wrapper for benchmark backends with automatic cleanup
pub struct BenchmarkBackend {
pub storage: Arc<dyn StorageBackend>,
pub backend_type: BackendType,
_temp_dir: TempDir, // Kept for automatic cleanup
}
impl BenchmarkBackend {
/// Create a new redb backend for benchmarking
pub fn new_redb() -> Result<Self, Box<dyn std::error::Error>> {
let temp_dir = TempDir::new()?;
let db_path = temp_dir.path().join("bench.db");
let storage = Storage::new(db_path, false, None)?;
Ok(Self {
storage: Arc::new(storage),
backend_type: BackendType::Redb,
_temp_dir: temp_dir,
})
}
/// Create a new sled backend for benchmarking
pub fn new_sled() -> Result<Self, Box<dyn std::error::Error>> {
let temp_dir = TempDir::new()?;
let db_path = temp_dir.path().join("bench.sled");
let storage = SledStorage::new(db_path, false, None)?;
Ok(Self {
storage: Arc::new(storage),
backend_type: BackendType::Sled,
_temp_dir: temp_dir,
})
}
/// Create a backend of the specified type
pub fn new(backend_type: BackendType) -> Result<Self, Box<dyn std::error::Error>> {
match backend_type {
BackendType::Redb => Self::new_redb(),
BackendType::Sled => Self::new_sled(),
}
}
/// Get the backend name for display
pub fn name(&self) -> &'static str {
self.backend_type.name()
}
/// Pre-populate the backend with test data
pub fn populate_strings(&self, data: &[(String, String)]) -> Result<(), Box<dyn std::error::Error>> {
for (key, value) in data {
self.storage.set(key.clone(), value.clone())?;
}
Ok(())
}
/// Pre-populate with hash data
pub fn populate_hashes(&self, data: &[(String, Vec<(String, String)>)]) -> Result<(), Box<dyn std::error::Error>> {
for (key, fields) in data {
self.storage.hset(key, fields.clone())?;
}
Ok(())
}
/// Pre-populate with list data
pub fn populate_lists(&self, data: &[(String, Vec<String>)]) -> Result<(), Box<dyn std::error::Error>> {
for (key, elements) in data {
self.storage.rpush(key, elements.clone())?;
}
Ok(())
}
/// Clear all data from the backend
pub fn clear(&self) -> Result<(), Box<dyn std::error::Error>> {
self.storage.flushdb()?;
Ok(())
}
/// Get the number of keys in the database
pub fn dbsize(&self) -> Result<i64, Box<dyn std::error::Error>> {
Ok(self.storage.dbsize()?)
}
}
/// Helper function to create and populate a backend for read benchmarks
pub fn setup_populated_backend(
backend_type: BackendType,
num_keys: usize,
value_size: usize,
) -> Result<BenchmarkBackend, Box<dyn std::error::Error>> {
use super::DataGenerator;
let backend = BenchmarkBackend::new(backend_type)?;
let mut generator = DataGenerator::new(42);
let data = generator.generate_string_pairs(num_keys, value_size);
backend.populate_strings(&data)?;
Ok(backend)
}
/// Helper function to create and populate a backend with hash data
pub fn setup_populated_backend_hashes(
backend_type: BackendType,
num_hashes: usize,
fields_per_hash: usize,
value_size: usize,
) -> Result<BenchmarkBackend, Box<dyn std::error::Error>> {
use super::DataGenerator;
let backend = BenchmarkBackend::new(backend_type)?;
let mut generator = DataGenerator::new(42);
let data = generator.generate_hash_data(num_hashes, fields_per_hash, value_size);
backend.populate_hashes(&data)?;
Ok(backend)
}
/// Helper function to create and populate a backend with list data
pub fn setup_populated_backend_lists(
backend_type: BackendType,
num_lists: usize,
elements_per_list: usize,
element_size: usize,
) -> Result<BenchmarkBackend, Box<dyn std::error::Error>> {
use super::DataGenerator;
let backend = BenchmarkBackend::new(backend_type)?;
let mut generator = DataGenerator::new(42);
let data = generator.generate_list_data(num_lists, elements_per_list, element_size);
backend.populate_lists(&data)?;
Ok(backend)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_backend_creation() {
let redb = BenchmarkBackend::new_redb();
assert!(redb.is_ok());
let sled = BenchmarkBackend::new_sled();
assert!(sled.is_ok());
}
#[test]
fn test_backend_populate() {
let backend = BenchmarkBackend::new_redb().unwrap();
let data = vec![
("key1".to_string(), "value1".to_string()),
("key2".to_string(), "value2".to_string()),
];
backend.populate_strings(&data).unwrap();
assert_eq!(backend.dbsize().unwrap(), 2);
}
#[test]
fn test_backend_clear() {
let backend = BenchmarkBackend::new_redb().unwrap();
let data = vec![("key1".to_string(), "value1".to_string())];
backend.populate_strings(&data).unwrap();
assert_eq!(backend.dbsize().unwrap(), 1);
backend.clear().unwrap();
assert_eq!(backend.dbsize().unwrap(), 0);
}
}

View File

@@ -0,0 +1,131 @@
// benches/common/data_generator.rs
use rand::{Rng, SeedableRng};
use rand::rngs::StdRng;
/// Deterministic data generator for benchmarks
pub struct DataGenerator {
rng: StdRng,
}
impl DataGenerator {
/// Create a new data generator with a fixed seed for reproducibility
pub fn new(seed: u64) -> Self {
Self {
rng: StdRng::seed_from_u64(seed),
}
}
/// Generate a single key with the given prefix and ID
pub fn generate_key(&self, prefix: &str, id: usize) -> String {
format!("{}:{:08}", prefix, id)
}
/// Generate a random string value of the specified size
pub fn generate_value(&mut self, size: usize) -> String {
const CHARSET: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
(0..size)
.map(|_| {
let idx = self.rng.gen_range(0..CHARSET.len());
CHARSET[idx] as char
})
.collect()
}
/// Generate a batch of key-value pairs
pub fn generate_string_pairs(&mut self, count: usize, value_size: usize) -> Vec<(String, String)> {
(0..count)
.map(|i| {
let key = self.generate_key("bench:key", i);
let value = self.generate_value(value_size);
(key, value)
})
.collect()
}
/// Generate hash data (key -> field-value pairs)
pub fn generate_hash_data(&mut self, num_hashes: usize, fields_per_hash: usize, value_size: usize)
-> Vec<(String, Vec<(String, String)>)> {
(0..num_hashes)
.map(|i| {
let hash_key = self.generate_key("bench:hash", i);
let fields: Vec<(String, String)> = (0..fields_per_hash)
.map(|j| {
let field = format!("field{}", j);
let value = self.generate_value(value_size);
(field, value)
})
.collect();
(hash_key, fields)
})
.collect()
}
/// Generate list data (key -> list of elements)
pub fn generate_list_data(&mut self, num_lists: usize, elements_per_list: usize, element_size: usize)
-> Vec<(String, Vec<String>)> {
(0..num_lists)
.map(|i| {
let list_key = self.generate_key("bench:list", i);
let elements: Vec<String> = (0..elements_per_list)
.map(|_| self.generate_value(element_size))
.collect();
(list_key, elements)
})
.collect()
}
/// Generate keys for pattern matching tests
pub fn generate_pattern_keys(&mut self, count: usize) -> Vec<String> {
let mut keys = Vec::new();
// Generate keys with different patterns
for i in 0..count / 3 {
keys.push(format!("user:{}:profile", i));
}
for i in 0..count / 3 {
keys.push(format!("session:{}:data", i));
}
for i in 0..count / 3 {
keys.push(format!("cache:{}:value", i));
}
keys
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_deterministic_generation() {
let mut generator1 = DataGenerator::new(42);
let mut generator2 = DataGenerator::new(42);
let pairs1 = generator1.generate_string_pairs(10, 50);
let pairs2 = generator2.generate_string_pairs(10, 50);
assert_eq!(pairs1, pairs2, "Same seed should produce same data");
}
#[test]
fn test_value_size() {
let mut generator = DataGenerator::new(42);
let value = generator.generate_value(100);
assert_eq!(value.len(), 100);
}
#[test]
fn test_hash_generation() {
let mut generator = DataGenerator::new(42);
let hashes = generator.generate_hash_data(5, 10, 50);
assert_eq!(hashes.len(), 5);
for (_, fields) in hashes {
assert_eq!(fields.len(), 10);
for (_, value) in fields {
assert_eq!(value.len(), 50);
}
}
}
}

289
benches/common/metrics.rs Normal file
View File

@@ -0,0 +1,289 @@
// benches/common/metrics.rs
use serde::{Deserialize, Serialize};
use std::time::Duration;
/// Custom metrics for benchmark results
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct BenchmarkMetrics {
pub operation: String,
pub backend: String,
pub dataset_size: usize,
pub mean_ns: u64,
pub median_ns: u64,
pub p95_ns: u64,
pub p99_ns: u64,
pub std_dev_ns: u64,
pub throughput_ops_sec: f64,
}
impl BenchmarkMetrics {
pub fn new(
operation: String,
backend: String,
dataset_size: usize,
) -> Self {
Self {
operation,
backend,
dataset_size,
mean_ns: 0,
median_ns: 0,
p95_ns: 0,
p99_ns: 0,
std_dev_ns: 0,
throughput_ops_sec: 0.0,
}
}
/// Convert to CSV row format
pub fn to_csv_row(&self) -> String {
format!(
"{},{},{},{},{},{},{},{},{:.2}",
self.backend,
self.operation,
self.dataset_size,
self.mean_ns,
self.median_ns,
self.p95_ns,
self.p99_ns,
self.std_dev_ns,
self.throughput_ops_sec
)
}
/// Get CSV header
pub fn csv_header() -> String {
"backend,operation,dataset_size,mean_ns,median_ns,p95_ns,p99_ns,std_dev_ns,throughput_ops_sec".to_string()
}
/// Convert to JSON
pub fn to_json(&self) -> serde_json::Value {
serde_json::json!({
"backend": self.backend,
"operation": self.operation,
"dataset_size": self.dataset_size,
"metrics": {
"mean_ns": self.mean_ns,
"median_ns": self.median_ns,
"p95_ns": self.p95_ns,
"p99_ns": self.p99_ns,
"std_dev_ns": self.std_dev_ns,
"throughput_ops_sec": self.throughput_ops_sec
}
})
}
/// Calculate throughput from mean latency
pub fn calculate_throughput(&mut self) {
if self.mean_ns > 0 {
self.throughput_ops_sec = 1_000_000_000.0 / self.mean_ns as f64;
}
}
/// Format duration for display
pub fn format_duration(nanos: u64) -> String {
if nanos < 1_000 {
format!("{} ns", nanos)
} else if nanos < 1_000_000 {
format!("{:.2} µs", nanos as f64 / 1_000.0)
} else if nanos < 1_000_000_000 {
format!("{:.2} ms", nanos as f64 / 1_000_000.0)
} else {
format!("{:.2} s", nanos as f64 / 1_000_000_000.0)
}
}
/// Pretty print the metrics
pub fn display(&self) -> String {
format!(
"{}/{} (n={}): mean={}, median={}, p95={}, p99={}, throughput={:.0} ops/sec",
self.backend,
self.operation,
self.dataset_size,
Self::format_duration(self.mean_ns),
Self::format_duration(self.median_ns),
Self::format_duration(self.p95_ns),
Self::format_duration(self.p99_ns),
self.throughput_ops_sec
)
}
}
/// Memory metrics for profiling
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MemoryMetrics {
pub operation: String,
pub backend: String,
pub allocations: usize,
pub peak_bytes: usize,
pub avg_bytes_per_op: f64,
}
impl MemoryMetrics {
pub fn new(operation: String, backend: String) -> Self {
Self {
operation,
backend,
allocations: 0,
peak_bytes: 0,
avg_bytes_per_op: 0.0,
}
}
/// Convert to CSV row format
pub fn to_csv_row(&self) -> String {
format!(
"{},{},{},{},{:.2}",
self.backend,
self.operation,
self.allocations,
self.peak_bytes,
self.avg_bytes_per_op
)
}
/// Get CSV header
pub fn csv_header() -> String {
"backend,operation,allocations,peak_bytes,avg_bytes_per_op".to_string()
}
/// Format bytes for display
pub fn format_bytes(bytes: usize) -> String {
if bytes < 1024 {
format!("{} B", bytes)
} else if bytes < 1024 * 1024 {
format!("{:.2} KB", bytes as f64 / 1024.0)
} else if bytes < 1024 * 1024 * 1024 {
format!("{:.2} MB", bytes as f64 / (1024.0 * 1024.0))
} else {
format!("{:.2} GB", bytes as f64 / (1024.0 * 1024.0 * 1024.0))
}
}
/// Pretty print the metrics
pub fn display(&self) -> String {
format!(
"{}/{}: {} allocations, peak={}, avg={}",
self.backend,
self.operation,
self.allocations,
Self::format_bytes(self.peak_bytes),
Self::format_bytes(self.avg_bytes_per_op as usize)
)
}
}
/// Collection of benchmark results for comparison
#[derive(Debug, Default)]
pub struct BenchmarkResults {
pub metrics: Vec<BenchmarkMetrics>,
pub memory_metrics: Vec<MemoryMetrics>,
}
impl BenchmarkResults {
pub fn new() -> Self {
Self::default()
}
pub fn add_metric(&mut self, metric: BenchmarkMetrics) {
self.metrics.push(metric);
}
pub fn add_memory_metric(&mut self, metric: MemoryMetrics) {
self.memory_metrics.push(metric);
}
/// Export all metrics to CSV format
pub fn to_csv(&self) -> String {
let mut output = String::new();
if !self.metrics.is_empty() {
output.push_str(&BenchmarkMetrics::csv_header());
output.push('\n');
for metric in &self.metrics {
output.push_str(&metric.to_csv_row());
output.push('\n');
}
}
if !self.memory_metrics.is_empty() {
output.push('\n');
output.push_str(&MemoryMetrics::csv_header());
output.push('\n');
for metric in &self.memory_metrics {
output.push_str(&metric.to_csv_row());
output.push('\n');
}
}
output
}
/// Export all metrics to JSON format
pub fn to_json(&self) -> serde_json::Value {
serde_json::json!({
"benchmarks": self.metrics.iter().map(|m| m.to_json()).collect::<Vec<_>>(),
"memory": self.memory_metrics
})
}
/// Save results to a file
pub fn save_csv(&self, path: &str) -> std::io::Result<()> {
std::fs::write(path, self.to_csv())
}
pub fn save_json(&self, path: &str) -> std::io::Result<()> {
let json = serde_json::to_string_pretty(&self.to_json())?;
std::fs::write(path, json)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_metrics_creation() {
let mut metric = BenchmarkMetrics::new(
"set".to_string(),
"redb".to_string(),
1000,
);
metric.mean_ns = 1_245;
metric.calculate_throughput();
assert!(metric.throughput_ops_sec > 0.0);
}
#[test]
fn test_csv_export() {
let mut results = BenchmarkResults::new();
let mut metric = BenchmarkMetrics::new(
"set".to_string(),
"redb".to_string(),
1000,
);
metric.mean_ns = 1_245;
metric.calculate_throughput();
results.add_metric(metric);
let csv = results.to_csv();
assert!(csv.contains("backend,operation"));
assert!(csv.contains("redb,set"));
}
#[test]
fn test_duration_formatting() {
assert_eq!(BenchmarkMetrics::format_duration(500), "500 ns");
assert_eq!(BenchmarkMetrics::format_duration(1_500), "1.50 µs");
assert_eq!(BenchmarkMetrics::format_duration(1_500_000), "1.50 ms");
}
#[test]
fn test_bytes_formatting() {
assert_eq!(MemoryMetrics::format_bytes(512), "512 B");
assert_eq!(MemoryMetrics::format_bytes(2048), "2.00 KB");
assert_eq!(MemoryMetrics::format_bytes(2_097_152), "2.00 MB");
}
}

8
benches/common/mod.rs Normal file
View File

@@ -0,0 +1,8 @@
// benches/common/mod.rs
pub mod data_generator;
pub mod backends;
pub mod metrics;
pub use data_generator::*;
pub use backends::*;
pub use metrics::*;