Files
herodb/scripts/compare_backends.py
Maxime Van Hees 9136e5f3c0 benchmarking
2025-10-30 11:17:26 +01:00

258 lines
9.0 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Compare performance between redb and sled backends.
"""
import json
import csv
import sys
from typing import Dict, List, Any
from pathlib import Path
def load_results(input_file: str) -> List[Dict[str, Any]]:
"""Load benchmark results from CSV or JSON file."""
path = Path(input_file)
if not path.exists():
print(f"Error: File not found: {input_file}", file=sys.stderr)
return []
if path.suffix == '.json':
with open(input_file, 'r') as f:
data = json.load(f)
return data.get('benchmarks', [])
elif path.suffix == '.csv':
results = []
with open(input_file, 'r') as f:
reader = csv.DictReader(f)
for row in reader:
# Convert numeric fields
row['mean_ns'] = float(row.get('mean_ns', 0))
row['median_ns'] = float(row.get('median_ns', 0))
row['throughput_ops_sec'] = float(row.get('throughput_ops_sec', 0))
results.append(row)
return results
else:
print(f"Error: Unsupported file format: {path.suffix}", file=sys.stderr)
return []
def group_by_operation(results: List[Dict[str, Any]]) -> Dict[str, Dict[str, Dict]]:
"""Group results by operation and backend."""
grouped = {}
for result in results:
operation = result.get('operation', result.get('name', ''))
backend = result.get('backend', '')
if not operation or not backend:
continue
if operation not in grouped:
grouped[operation] = {}
grouped[operation][backend] = result
return grouped
def calculate_speedup(redb_value: float, sled_value: float) -> float:
"""Calculate speedup factor (positive means redb is faster)."""
if sled_value == 0:
return 0
return sled_value / redb_value
def format_duration(nanos: float) -> str:
"""Format duration in human-readable format."""
if nanos < 1_000:
return f"{nanos:.0f} ns"
elif nanos < 1_000_000:
return f"{nanos / 1_000:.2f} µs"
elif nanos < 1_000_000_000:
return f"{nanos / 1_000_000:.2f} ms"
else:
return f"{nanos / 1_000_000_000:.2f} s"
def print_comparison_table(grouped: Dict[str, Dict[str, Dict]]):
"""Print a comparison table of backends."""
print("\n" + "=" * 100)
print("BACKEND PERFORMANCE COMPARISON")
print("=" * 100)
print()
# Header
print(f"{'Operation':<30} {'redb (mean)':<15} {'sled (mean)':<15} {'Speedup':<12} {'Winner':<10}")
print("-" * 100)
redb_wins = 0
sled_wins = 0
total_comparisons = 0
for operation in sorted(grouped.keys()):
backends = grouped[operation]
if 'redb' in backends and 'sled' in backends:
redb_mean = backends['redb'].get('mean_ns', 0)
sled_mean = backends['sled'].get('mean_ns', 0)
speedup = calculate_speedup(redb_mean, sled_mean)
if speedup > 1.0:
winner = "redb"
redb_wins += 1
elif speedup < 1.0:
winner = "sled"
sled_wins += 1
else:
winner = "tie"
total_comparisons += 1
speedup_str = f"{speedup:.2f}x" if speedup != 0 else "N/A"
print(f"{operation:<30} {format_duration(redb_mean):<15} {format_duration(sled_mean):<15} "
f"{speedup_str:<12} {winner:<10}")
print("-" * 100)
print(f"\nSummary: redb wins: {redb_wins}, sled wins: {sled_wins}, total: {total_comparisons}")
if total_comparisons > 0:
redb_pct = (redb_wins / total_comparisons) * 100
sled_pct = (sled_wins / total_comparisons) * 100
print(f"Win rate: redb {redb_pct:.1f}%, sled {sled_pct:.1f}%")
def print_throughput_comparison(grouped: Dict[str, Dict[str, Dict]]):
"""Print throughput comparison."""
print("\n" + "=" * 100)
print("THROUGHPUT COMPARISON (ops/sec)")
print("=" * 100)
print()
print(f"{'Operation':<30} {'redb':<20} {'sled':<20} {'Difference':<15}")
print("-" * 100)
for operation in sorted(grouped.keys()):
backends = grouped[operation]
if 'redb' in backends and 'sled' in backends:
redb_throughput = backends['redb'].get('throughput_ops_sec', 0)
sled_throughput = backends['sled'].get('throughput_ops_sec', 0)
diff_pct = ((redb_throughput - sled_throughput) / sled_throughput * 100) if sled_throughput > 0 else 0
diff_str = f"{diff_pct:+.1f}%"
print(f"{operation:<30} {redb_throughput:>18,.0f} {sled_throughput:>18,.0f} {diff_str:>13}")
def generate_recommendations(grouped: Dict[str, Dict[str, Dict]]):
"""Generate recommendations based on benchmark results."""
print("\n" + "=" * 100)
print("RECOMMENDATIONS")
print("=" * 100)
print()
redb_strengths = []
sled_strengths = []
for operation, backends in grouped.items():
if 'redb' in backends and 'sled' in backends:
redb_mean = backends['redb'].get('mean_ns', 0)
sled_mean = backends['sled'].get('mean_ns', 0)
speedup = calculate_speedup(redb_mean, sled_mean)
if speedup > 1.2: # redb is >20% faster
redb_strengths.append((operation, speedup))
elif speedup < 0.8: # sled is >20% faster
sled_strengths.append((operation, 1/speedup))
print("Use redb when:")
if redb_strengths:
for op, speedup in sorted(redb_strengths, key=lambda x: x[1], reverse=True)[:5]:
print(f"{op}: {speedup:.2f}x faster than sled")
else:
print(" • No significant advantages found")
print("\nUse sled when:")
if sled_strengths:
for op, speedup in sorted(sled_strengths, key=lambda x: x[1], reverse=True)[:5]:
print(f"{op}: {speedup:.2f}x faster than redb")
else:
print(" • No significant advantages found")
print("\nGeneral guidelines:")
print(" • redb: Better for read-heavy workloads, predictable latency")
print(" • sled: Better for write-heavy workloads, memory efficiency")
def export_comparison(grouped: Dict[str, Dict[str, Dict]], output_file: str):
"""Export comparison to CSV."""
with open(output_file, 'w', newline='') as f:
fieldnames = ['operation', 'redb_mean_ns', 'sled_mean_ns', 'speedup',
'redb_throughput', 'sled_throughput', 'winner']
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
for operation, backends in sorted(grouped.items()):
if 'redb' in backends and 'sled' in backends:
redb_mean = backends['redb'].get('mean_ns', 0)
sled_mean = backends['sled'].get('mean_ns', 0)
redb_throughput = backends['redb'].get('throughput_ops_sec', 0)
sled_throughput = backends['sled'].get('throughput_ops_sec', 0)
speedup = calculate_speedup(redb_mean, sled_mean)
winner = "redb" if speedup > 1.0 else "sled" if speedup < 1.0 else "tie"
writer.writerow({
'operation': operation,
'redb_mean_ns': int(redb_mean),
'sled_mean_ns': int(sled_mean),
'speedup': f"{speedup:.2f}",
'redb_throughput': f"{redb_throughput:.0f}",
'sled_throughput': f"{sled_throughput:.0f}",
'winner': winner
})
print(f"\nComparison exported to {output_file}")
def main():
if len(sys.argv) < 2:
print("Usage: python compare_backends.py <results_file> [--export comparison.csv]")
print("\nExample:")
print(" python compare_backends.py results.csv")
print(" python compare_backends.py results.json --export comparison.csv")
sys.exit(1)
input_file = sys.argv[1]
export_file = None
# Parse command line arguments
if '--export' in sys.argv:
idx = sys.argv.index('--export')
if idx + 1 < len(sys.argv):
export_file = sys.argv[idx + 1]
# Load results
print(f"Loading results from {input_file}...")
results = load_results(input_file)
if not results:
print("No results found!")
sys.exit(1)
print(f"Loaded {len(results)} benchmark results")
# Group by operation
grouped = group_by_operation(results)
if not grouped:
print("No comparable results found!")
sys.exit(1)
# Print comparisons
print_comparison_table(grouped)
print_throughput_comparison(grouped)
generate_recommendations(grouped)
# Export if requested
if export_file:
export_comparison(grouped, export_file)
if __name__ == '__main__':
main()