benchmarking

2025-10-30 11:17:26 +01:00
parent 592b6c1ea9
commit 9136e5f3c0
16 changed files with 3611 additions and 0 deletions
--- a/scripts/parse_results.py
+++ b/scripts/parse_results.py
@@ -0,0 +1,222 @@
+#!/usr/bin/env python3
+"""
+Parse Criterion benchmark results and export to CSV/JSON formats.
+"""
+
+import json
+import csv
+import sys
+import os
+from pathlib import Path
+from typing import Dict, List, Any
+
+def parse_criterion_json(criterion_dir: str) -> List[Dict[str, Any]]:
+    """Parse Criterion benchmark results from the target directory."""
+    results = []
+    criterion_path = Path(criterion_dir)
+    
+    if not criterion_path.exists():
+        print(f"Error: Criterion directory not found: {criterion_dir}", file=sys.stderr)
+        return results
+    
+    # Find all benchmark.json files
+    for benchmark_file in criterion_path.rglob("new/benchmark.json"):
+        try:
+            with open(benchmark_file, 'r') as f:
+                data = json.load(f)
+            
+            # Extract benchmark name from path
+            bench_name = str(benchmark_file.parent.parent.name)
+            
+            # Extract metrics
+            result = {
+                'name': bench_name,
+                'mean_ns': data.get('mean', {}).get('point_estimate', 0),
+                'median_ns': data.get('median', {}).get('point_estimate', 0),
+                'std_dev_ns': data.get('std_dev', {}).get('point_estimate', 0),
+            }
+            
+            # Calculate throughput
+            if result['mean_ns'] > 0:
+                result['throughput_ops_sec'] = 1_000_000_000 / result['mean_ns']
+            else:
+                result['throughput_ops_sec'] = 0
+            
+            results.append(result)
+        except Exception as e:
+            print(f"Warning: Failed to parse {benchmark_file}: {e}", file=sys.stderr)
+    
+    return results
+
+def parse_benchmark_name(name: str) -> Dict[str, str]:
+    """Parse benchmark name into components."""
+    parts = name.split('/')
+    
+    result = {
+        'suite': parts[0] if len(parts) > 0 else '',
+        'category': parts[1] if len(parts) > 1 else '',
+        'operation': parts[2] if len(parts) > 2 else '',
+        'backend': '',
+        'parameter': ''
+    }
+    
+    # Try to extract backend name
+    for part in parts:
+        if 'redb' in part.lower():
+            result['backend'] = 'redb'
+            break
+        elif 'sled' in part.lower():
+            result['backend'] = 'sled'
+            break
+    
+    # Extract parameter (size, clients, etc.)
+    if len(parts) > 3:
+        result['parameter'] = parts[3]
+    
+    return result
+
+def export_to_csv(results: List[Dict[str, Any]], output_file: str):
+    """Export results to CSV format."""
+    if not results:
+        print("No results to export", file=sys.stderr)
+        return
+    
+    fieldnames = ['name', 'backend', 'operation', 'mean_ns', 'median_ns', 
+                  'std_dev_ns', 'throughput_ops_sec']
+    
+    with open(output_file, 'w', newline='') as f:
+        writer = csv.DictWriter(f, fieldnames=fieldnames)
+        writer.writeheader()
+        
+        for result in results:
+            parsed = parse_benchmark_name(result['name'])
+            row = {
+                'name': result['name'],
+                'backend': parsed['backend'],
+                'operation': parsed['operation'],
+                'mean_ns': int(result['mean_ns']),
+                'median_ns': int(result['median_ns']),
+                'std_dev_ns': int(result['std_dev_ns']),
+                'throughput_ops_sec': f"{result['throughput_ops_sec']:.2f}"
+            }
+            writer.writerow(row)
+    
+    print(f"Exported {len(results)} results to {output_file}")
+
+def export_to_json(results: List[Dict[str, Any]], output_file: str):
+    """Export results to JSON format."""
+    if not results:
+        print("No results to export", file=sys.stderr)
+        return
+    
+    # Enhance results with parsed information
+    enhanced_results = []
+    for result in results:
+        parsed = parse_benchmark_name(result['name'])
+        enhanced = {**result, **parsed}
+        enhanced_results.append(enhanced)
+    
+    output = {
+        'benchmarks': enhanced_results,
+        'summary': {
+            'total_benchmarks': len(results),
+            'backends': list(set(r.get('backend', '') for r in enhanced_results if r.get('backend')))
+        }
+    }
+    
+    with open(output_file, 'w') as f:
+        json.dump(output, f, indent=2)
+    
+    print(f"Exported {len(results)} results to {output_file}")
+
+def print_summary(results: List[Dict[str, Any]]):
+    """Print a summary of benchmark results."""
+    if not results:
+        print("No results to summarize")
+        return
+    
+    print("\n=== Benchmark Summary ===\n")
+    print(f"Total benchmarks: {len(results)}")
+    
+    # Group by backend
+    backends = {}
+    for result in results:
+        parsed = parse_benchmark_name(result['name'])
+        backend = parsed['backend']
+        if backend:
+            if backend not in backends:
+                backends[backend] = []
+            backends[backend].append(result)
+    
+    for backend, bench_results in backends.items():
+        print(f"\n{backend.upper()}:")
+        print(f"  Benchmarks: {len(bench_results)}")
+        
+        if bench_results:
+            mean_throughput = sum(r['throughput_ops_sec'] for r in bench_results) / len(bench_results)
+            print(f"  Avg throughput: {mean_throughput:.2f} ops/sec")
+            
+            fastest = max(bench_results, key=lambda x: x['throughput_ops_sec'])
+            print(f"  Fastest: {fastest['name']} ({fastest['throughput_ops_sec']:.2f} ops/sec)")
+
+def main():
+    if len(sys.argv) < 2:
+        print("Usage: python parse_results.py <criterion_dir> [--csv output.csv] [--json output.json]")
+        print("\nExample:")
+        print("  python parse_results.py target/criterion --csv results.csv --json results.json")
+        sys.exit(1)
+    
+    criterion_dir = sys.argv[1]
+    
+    # Parse command line arguments
+    csv_output = None
+    json_output = None
+    
+    i = 2
+    while i < len(sys.argv):
+        if sys.argv[i] == '--csv' and i + 1 < len(sys.argv):
+            csv_output = sys.argv[i + 1]
+            i += 2
+        elif sys.argv[i] == '--json' and i + 1 < len(sys.argv):
+            json_output = sys.argv[i + 1]
+            i += 2
+        else:
+            i += 1
+    
+    # Parse results
+    print(f"Parsing benchmark results from {criterion_dir}...")
+    results = parse_criterion_json(criterion_dir)
+    
+    if not results:
+        print("No benchmark results found!")
+        sys.exit(1)
+    
+    # Export results
+    if csv_output:
+        export_to_csv(results, csv_output)
+    
+    if json_output:
+        export_to_json(results, json_output)
+    
+    # Print summary
+    print_summary(results)
+    
+    # If no output specified, print to stdout
+    if not csv_output and not json_output:
+        print("\n=== CSV Output ===\n")
+        import io
+        output = io.StringIO()
+        fieldnames = ['name', 'mean_ns', 'median_ns', 'throughput_ops_sec']
+        writer = csv.DictWriter(output, fieldnames=fieldnames)
+        writer.writeheader()
+        for result in results:
+            writer.writerow({
+                'name': result['name'],
+                'mean_ns': int(result['mean_ns']),
+                'median_ns': int(result['median_ns']),
+                'throughput_ops_sec': f"{result['throughput_ops_sec']:.2f}"
+            })
+        print(output.getvalue())
+
+if __name__ == '__main__':
+    main()