Compare commits
	
		
			49 Commits
		
	
	
		
			dd90a49615
			...
			perf_mem_t
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | 9136e5f3c0 | ||
| 592b6c1ea9 | |||
|  | 6ae5d6f4f9 | ||
|  | 219e612eca | ||
| c4ae52b6ff | |||
|  | 45d8e306fb | ||
|  | 483ccb2ba8 | ||
|  | df780e20a2 | ||
|  | a8720c06db | ||
|  | 2139deb85d | ||
|  | 7d07b57d32 | ||
|  | 4aa49e0d5c | ||
|  | 644946f1ca | ||
|  | cf66f4c304 | ||
|  | 6a4e2819bf | ||
|  | 77a53bae86 | ||
| 7f689ae29b | |||
|  | 7f92001b89 | ||
|  | e7248b84e8 | ||
|  | 22ac4c9ed6 | ||
|  | c470772a13 | ||
|  | bd34fd092a | ||
|  | 8e044a64b7 | ||
|  | 87177f4a07 | ||
|  | 151a6ffbfa | ||
|  | 8ab841f68c | ||
|  | 8808c0e9d9 | ||
|  | c6b277cc9c | ||
| 8331ed032b | |||
|  | b8ca73397d | ||
|  | 1b15806a85 | ||
|  | da325a9659 | ||
|  | bdf363016a | ||
|  | 8798bc202e | ||
|  | 9fa9832605 | ||
|  | 4bb24b38dd | ||
|  | f3da14b957 | ||
|  | 5ea34b4445 | ||
|  | d9a3b711d1 | ||
|  | d931770e90 | ||
|  | a87ec4dbb5 | ||
| a1127b72da | |||
| 3850df89be | |||
| 45195d403e | |||
| f17b441ca1 | |||
| ff4ea1d844 | |||
| c9e1dcdb6c | |||
| 56699b9abb | |||
|  | 58cb1e8d5e | 
							
								
								
									
										1
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							| @@ -3,6 +3,7 @@ | ||||
| debug/ | ||||
| target/ | ||||
| .vscode/ | ||||
| test_images/ | ||||
|  | ||||
| # These are backup files generated by rustfmt | ||||
| **/*.rs.bk | ||||
|   | ||||
							
								
								
									
										6286
									
								
								Cargo.lock
									
									
									
										generated
									
									
									
								
							
							
						
						
									
										6286
									
								
								Cargo.lock
									
									
									
										generated
									
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										71
									
								
								Cargo.toml
									
									
									
									
									
								
							
							
						
						
									
										71
									
								
								Cargo.toml
									
									
									
									
									
								
							| @@ -1,9 +1,64 @@ | ||||
| [workspace] | ||||
| members = ["herodb"] | ||||
| resolver = "2" | ||||
| [package] | ||||
| name = "herodb" | ||||
| version = "0.0.1" | ||||
| authors = ["ThreeFold Tech NV"] | ||||
| edition = "2024" | ||||
|  | ||||
| # You can define shared profiles for all workspace members here | ||||
| [profile.release] | ||||
| lto = true | ||||
| codegen-units = 1 | ||||
| strip = true | ||||
| [dependencies] | ||||
| anyhow = "1.0.59" | ||||
| bytes = "1.3.0" | ||||
| thiserror = "1.0.32" | ||||
| tokio = { version = "1.23.0", features = ["full"] } | ||||
| clap = { version = "4.5.20", features = ["derive"] } | ||||
| byteorder = "1.4.3" | ||||
| futures = "0.3" | ||||
| sled = "0.34" | ||||
| redb = "2.1.3" | ||||
| serde = { version = "1.0", features = ["derive"] } | ||||
| serde_json = "1.0" | ||||
| bincode = "1.3" | ||||
| chacha20poly1305 = "0.10.1" | ||||
| rand = "0.8" | ||||
| sha2 = "0.10" | ||||
| age = "0.10" | ||||
| secrecy = "0.8" | ||||
| ed25519-dalek = "2" | ||||
| x25519-dalek = "2" | ||||
| base64 = "0.22" | ||||
| jsonrpsee = { version = "0.25.1", features = ["http-client", "ws-client", "server", "macros"] } | ||||
| tantivy = "0.25.0" | ||||
| arrow-schema = "55.2.0" | ||||
| arrow-array = "55.2.0" | ||||
| lance = "0.37.0" | ||||
| lance-index = "0.37.0" | ||||
| arrow = "55.2.0" | ||||
| lancedb = "0.22.1" | ||||
| uuid = "1.18.1" | ||||
| ureq = { version = "2.10.0", features = ["json", "tls"] } | ||||
| reth-ipc = { git = "https://github.com/paradigmxyz/reth", package = "reth-ipc", rev = "d8451e54e7267f9f1634118d6d279b2216f7e2bb" } | ||||
| criterion = { version = "0.7.0", features = ["async", "async_tokio", "csv_output"] } | ||||
|  | ||||
| [dev-dependencies] | ||||
| redis = { version = "0.24", features = ["aio", "tokio-comp"] } | ||||
| tempfile = "3.8" | ||||
| csv = "1.3" | ||||
|  | ||||
| [[bench]] | ||||
| name = "single_ops" | ||||
| harness = false | ||||
|  | ||||
| [[bench]] | ||||
| name = "bulk_ops" | ||||
| harness = false | ||||
|  | ||||
| [[bench]] | ||||
| name = "scan_ops" | ||||
| harness = false | ||||
|  | ||||
| [[bench]] | ||||
| name = "concurrent_ops" | ||||
| harness = false | ||||
|  | ||||
| [[bench]] | ||||
| name = "memory_profile" | ||||
| harness = false | ||||
|   | ||||
							
								
								
									
										113
									
								
								README.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										113
									
								
								README.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,113 @@ | ||||
| # HeroDB | ||||
|  | ||||
| HeroDB is a Redis-compatible database built with Rust, offering a flexible and secure storage solution. It supports two primary storage backends: `redb` (default) and `sled`, both with full encryption capabilities. HeroDB aims to provide a robust and performant key-value store with advanced features like data-at-rest encryption, hash operations, list operations, and cursor-based scanning. | ||||
|  | ||||
| ## Purpose | ||||
|  | ||||
| The main purpose of HeroDB is to offer a lightweight, embeddable, and Redis-compatible database that prioritizes data security through transparent encryption. It's designed for applications that require fast, reliable data storage with the option for strong cryptographic protection, without the overhead of a full-fledged Redis server. | ||||
|  | ||||
| ## Features | ||||
|  | ||||
| - **Redis Compatibility**: Supports a subset of Redis commands over RESP (Redis Serialization Protocol) via TCP. | ||||
| - **Dual Backend Support**: | ||||
|     - `redb` (default): Optimized for concurrent access and high-throughput scenarios. | ||||
|     - `sled`: A lock-free, log-structured database, excellent for specific workloads. | ||||
| - **Data-at-Rest Encryption**: Transparent encryption for both backends using the `age` encryption library. | ||||
| - **Key-Value Operations**: Full support for basic string, hash, and list operations. | ||||
| - **Expiration**: Time-to-live (TTL) functionality for keys. | ||||
| - **Scanning**: Cursor-based iteration for keys and hash fields (`SCAN`, `HSCAN`). | ||||
| - **AGE Cryptography Commands**: HeroDB-specific extensions for cryptographic operations. | ||||
| - **Symmetric Encryption**: Stateless symmetric encryption using XChaCha20-Poly1305. | ||||
| - **Admin Database 0**: Centralized control for database management, access control, and per-database encryption. | ||||
|  | ||||
| ## Quick Start | ||||
|  | ||||
| ### Building HeroDB | ||||
|  | ||||
| To build HeroDB, navigate to the project root and run: | ||||
|  | ||||
| ```bash | ||||
| cargo build --release | ||||
| ``` | ||||
|  | ||||
| ### Running HeroDB | ||||
|  | ||||
| Launch HeroDB with the required `--admin-secret` flag, which encrypts the admin database (DB 0) and authorizes admin access. Optional flags include `--dir` for the database directory, `--port` for the TCP port (default 6379), `--sled` for the sled backend, `--enable-rpc` to start the HTTP JSON-RPC server on a TCP port, `--enable-rpc-ipc` to start JSON-RPC over a Unix Domain Socket (non-HTTP), and `--rpc-ipc-path <path>` to specify the socket path (default: `/tmp/herodb.ipc`). | ||||
|  | ||||
| Example: | ||||
| ```bash | ||||
| ./target/release/herodb --dir /tmp/herodb --admin-secret myadminsecret --port 6379 --enable-rpc | ||||
| ``` | ||||
|  | ||||
| To enable JSON-RPC over a Unix Domain Socket at `/tmp/herodb.sock`: | ||||
| ```bash | ||||
| ./target/release/herodb --dir /tmp/herodb --admin-secret myadminsecret --enable-rpc-ipc --rpc-ipc-path /tmp/herodb.sock | ||||
| ``` | ||||
|  | ||||
| Test the IPC endpoint interactively with socat: | ||||
| ```bash | ||||
| sudo socat -d -d -t 5 - UNIX-CONNECT:/tmp/herodb.sock | ||||
| ``` | ||||
| Then paste a framed JSON-RPC request (Content-Length header, blank line, then JSON body). Example: | ||||
| ``` | ||||
| Content-Length: 73 | ||||
|  | ||||
| {"jsonrpc":"2.0","method":"hero_listDatabases","params":[],"id":3} | ||||
| ``` | ||||
|  | ||||
| For a one-liner that auto-computes Content-Length and pretty-prints the JSON response, see docs/rpc_examples.md. | ||||
|  | ||||
| For detailed launch options, see [Basics](docs/basics.md). | ||||
|  | ||||
| ## Usage with Redis Clients | ||||
|  | ||||
| HeroDB can be interacted with using any standard Redis client, such as `redis-cli`, `redis-py` (Python), or `ioredis` (Node.js). | ||||
|  | ||||
| ### Example with `redis-cli` | ||||
|  | ||||
| Connections start with no database selected. You must SELECT a database first. | ||||
|  | ||||
| - To work in the admin database (DB 0), authenticate with the admin secret: | ||||
| ```bash | ||||
| redis-cli -p 6379 SELECT 0 KEY myadminsecret | ||||
| redis-cli -p 6379 SET mykey "Hello from HeroDB!" | ||||
| redis-cli -p 6379 GET mykey | ||||
| # → "Hello from HeroDB!" | ||||
| ``` | ||||
|  | ||||
| - To use a user database, first create one via the JSON-RPC API (see docs/rpc_examples.md), then select it: | ||||
| ```bash | ||||
| # Suppose RPC created database id 1 | ||||
| redis-cli -p 6379 SELECT 1 | ||||
| redis-cli -p 6379 HSET user:1 name "Alice" age "30" | ||||
| redis-cli -p 6379 HGET user:1 name | ||||
| # → "Alice" | ||||
| redis-cli -p 6379 SCAN 0 MATCH user:* COUNT 10 | ||||
| ``` | ||||
|  | ||||
| ## Cryptography | ||||
|  | ||||
| HeroDB supports asymmetric encryption/signatures via AGE commands (X25519 for encryption, Ed25519 for signatures) in stateless or key-managed modes, and symmetric encryption via SYM commands. Keys are persisted in the admin database (DB 0) for managed modes. | ||||
|  | ||||
| For details, see [AGE Cryptography](docs/age.md) and [Basics](docs/basics.md). | ||||
|  | ||||
| ## Database Management | ||||
|  | ||||
| Databases are managed via JSON-RPC API, with metadata stored in the encrypted admin database (DB 0). Databases are public by default upon creation; use RPC to set them private, requiring access keys for SELECT operations (read or readwrite based on permissions). This includes per-database encryption keys, access control, and lifecycle management. | ||||
|  | ||||
| For examples, see [JSON-RPC Examples](docs/rpc_examples.md) and [Admin DB 0 Model](docs/admin.md). | ||||
|  | ||||
| ## Documentation | ||||
|  | ||||
| For more detailed information on commands, features, and advanced usage, please refer to the documentation: | ||||
|  | ||||
| - [Basics](docs/basics.md) - Launch options, symmetric encryption, and basic usage | ||||
| - [Supported Commands](docs/cmds.md) - Complete Redis command reference and backend comparison | ||||
| - [AGE Cryptography](docs/age.md) - Asymmetric encryption and digital signatures | ||||
| - [Admin DB 0 Model](docs/admin.md) - Database management, access control, and per-database encryption | ||||
| - [JSON-RPC Examples](docs/rpc_examples.md) - Management API examples | ||||
| - [Full-Text Search](docs/search.md) - Tantivy-powered search capabilities | ||||
| - [Tantivy Backend](docs/tantivy.md) - Tantivy as a dedicated database backend | ||||
| - [Lance Vector Store](docs/lance.md) - Vector embeddings and semantic search | ||||
| - [Lance Text and Images Example](docs/lancedb_text_and_images_example.md) - End-to-end vector search examples | ||||
| - [Local Embedder Tutorial](docs/local_embedder_full_example.md) - Complete embedding models tutorial | ||||
							
								
								
									
										172
									
								
								benches/README.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										172
									
								
								benches/README.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,172 @@ | ||||
| # HeroDB Benchmarks | ||||
|  | ||||
| This directory contains comprehensive performance benchmarks for HeroDB's storage backends (redb and sled). | ||||
|  | ||||
| ## Quick Start | ||||
|  | ||||
| ```bash | ||||
| # Run all benchmarks | ||||
| cargo bench | ||||
|  | ||||
| # Run specific suite | ||||
| cargo bench --bench single_ops | ||||
|  | ||||
| # Quick run (fewer samples) | ||||
| cargo bench -- --quick | ||||
| ``` | ||||
|  | ||||
| ## Benchmark Suites | ||||
|  | ||||
| ### 1. Single Operations (`single_ops.rs`) | ||||
| Measures individual operation latency: | ||||
| - **String operations**: SET, GET, DEL, EXISTS | ||||
| - **Hash operations**: HSET, HGET, HGETALL, HDEL, HEXISTS | ||||
| - **List operations**: LPUSH, RPUSH, LPOP, RPOP, LRANGE | ||||
|  | ||||
| ### 2. Bulk Operations (`bulk_ops.rs`) | ||||
| Tests throughput with varying batch sizes: | ||||
| - Bulk insert (100, 1K, 10K records) | ||||
| - Bulk read (sequential and random) | ||||
| - Bulk update and delete | ||||
| - Mixed workload (70% reads, 30% writes) | ||||
|  | ||||
| ### 3. Scan Operations (`scan_ops.rs`) | ||||
| Evaluates iteration and filtering: | ||||
| - SCAN with pattern matching | ||||
| - HSCAN for hash fields | ||||
| - KEYS operation | ||||
| - DBSIZE, HKEYS, HVALS | ||||
|  | ||||
| ### 4. Concurrent Operations (`concurrent_ops.rs`) | ||||
| Simulates multi-client scenarios: | ||||
| - Concurrent writes (10, 50 clients) | ||||
| - Concurrent reads (10, 50 clients) | ||||
| - Mixed concurrent workload | ||||
| - Concurrent hash and list operations | ||||
|  | ||||
| ### 5. Memory Profiling (`memory_profile.rs`) | ||||
| Tracks memory usage patterns: | ||||
| - Per-operation memory allocation | ||||
| - Peak memory usage | ||||
| - Memory efficiency (bytes per record) | ||||
| - Allocation count tracking | ||||
|  | ||||
| ## Common Infrastructure | ||||
|  | ||||
| The `common/` directory provides shared utilities: | ||||
|  | ||||
| - **`data_generator.rs`**: Deterministic test data generation | ||||
| - **`backends.rs`**: Backend setup and management | ||||
| - **`metrics.rs`**: Custom metrics collection and export | ||||
|  | ||||
| ## Results Analysis | ||||
|  | ||||
| ### Parse Results | ||||
|  | ||||
| ```bash | ||||
| python3 scripts/parse_results.py target/criterion --csv results.csv --json results.json | ||||
| ``` | ||||
|  | ||||
| ### Compare Backends | ||||
|  | ||||
| ```bash | ||||
| python3 scripts/compare_backends.py results.csv --export comparison.csv | ||||
| ``` | ||||
|  | ||||
| ### View HTML Reports | ||||
|  | ||||
| Open `target/criterion/report/index.html` in a browser for interactive charts. | ||||
|  | ||||
| ## Documentation | ||||
|  | ||||
| - **[Running Benchmarks](../docs/running_benchmarks.md)** - Quick start guide | ||||
| - **[Benchmarking Guide](../docs/benchmarking.md)** - Complete user guide | ||||
| - **[Architecture](../docs/benchmark_architecture.md)** - System design | ||||
| - **[Implementation Plan](../docs/benchmark_implementation_plan.md)** - Development details | ||||
| - **[Sample Results](../docs/benchmark_results_sample.md)** - Example analysis | ||||
|  | ||||
| ## Key Features | ||||
|  | ||||
| ✅ **Statistical Rigor**: Uses Criterion for statistically sound measurements | ||||
| ✅ **Fair Comparison**: Identical test datasets across all backends | ||||
| ✅ **Reproducibility**: Fixed random seeds for deterministic results | ||||
| ✅ **Comprehensive Coverage**: Single ops, bulk ops, scans, concurrency | ||||
| ✅ **Memory Profiling**: Custom allocator tracking | ||||
| ✅ **Multiple Formats**: Terminal, CSV, JSON, HTML outputs | ||||
|  | ||||
| ## Performance Tips | ||||
|  | ||||
| ### For Accurate Results | ||||
|  | ||||
| 1. **System Preparation** | ||||
|    - Close unnecessary applications | ||||
|    - Disable CPU frequency scaling | ||||
|    - Ensure stable power supply | ||||
|  | ||||
| 2. **Benchmark Configuration** | ||||
|    - Use sufficient sample size (100+) | ||||
|    - Allow proper warm-up time | ||||
|    - Run multiple iterations | ||||
|  | ||||
| 3. **Environment Isolation** | ||||
|    - Use temporary directories | ||||
|    - Clean state between benchmarks | ||||
|    - Avoid shared resources | ||||
|  | ||||
| ### For Faster Iteration | ||||
|  | ||||
| ```bash | ||||
| # Quick mode (fewer samples) | ||||
| cargo bench -- --quick | ||||
|  | ||||
| # Specific operation only | ||||
| cargo bench -- single_ops/strings/set | ||||
|  | ||||
| # Specific backend only | ||||
| cargo bench -- redb | ||||
| ``` | ||||
|  | ||||
| ## Troubleshooting | ||||
|  | ||||
| ### High Variance | ||||
| - Close background applications | ||||
| - Disable CPU frequency scaling | ||||
| - Increase sample size | ||||
|  | ||||
| ### Out of Memory | ||||
| - Run suites separately | ||||
| - Reduce dataset sizes | ||||
| - Increase system swap | ||||
|  | ||||
| ### Slow Benchmarks | ||||
| - Use `--quick` flag | ||||
| - Run specific benchmarks | ||||
| - Reduce measurement time | ||||
|  | ||||
| See [Running Benchmarks](../docs/running_benchmarks.md) for detailed troubleshooting. | ||||
|  | ||||
| ## Contributing | ||||
|  | ||||
| When adding new benchmarks: | ||||
|  | ||||
| 1. Follow existing patterns in benchmark files | ||||
| 2. Use common infrastructure (data_generator, backends) | ||||
| 3. Ensure fair comparison between backends | ||||
| 4. Add documentation for new metrics | ||||
| 5. Test with both `--quick` and full runs | ||||
|  | ||||
| ## Example Output | ||||
|  | ||||
| ``` | ||||
| single_ops/strings/set/redb/100bytes | ||||
|                         time:   [1.234 µs 1.245 µs 1.256 µs] | ||||
|                         thrpt:  [802.5K ops/s 810.2K ops/s 818.1K ops/s] | ||||
|  | ||||
| single_ops/strings/set/sled/100bytes | ||||
|                         time:   [1.567 µs 1.578 µs 1.589 µs] | ||||
|                         thrpt:  [629.5K ops/s 633.7K ops/s 638.1K ops/s] | ||||
| ``` | ||||
|  | ||||
| ## License | ||||
|  | ||||
| Same as HeroDB project. | ||||
							
								
								
									
										336
									
								
								benches/bulk_ops.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										336
									
								
								benches/bulk_ops.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,336 @@ | ||||
| // benches/bulk_ops.rs | ||||
| use criterion::{criterion_group, criterion_main, Criterion, BenchmarkId, BatchSize}; | ||||
|  | ||||
| mod common; | ||||
| use common::*; | ||||
|  | ||||
| /// Benchmark bulk insert operations with varying batch sizes | ||||
| fn bench_bulk_insert(c: &mut Criterion) { | ||||
|     let mut group = c.benchmark_group("bulk_ops/insert"); | ||||
|      | ||||
|     for size in [100, 1_000, 10_000] { | ||||
|         for backend_type in BackendType::all() { | ||||
|             group.bench_with_input( | ||||
|                 BenchmarkId::new(format!("{}/size", backend_type.name()), size), | ||||
|                 &(backend_type, size), | ||||
|                 |b, &(backend_type, size)| { | ||||
|                     b.iter_batched( | ||||
|                         || { | ||||
|                             let backend = BenchmarkBackend::new(backend_type).unwrap(); | ||||
|                             let mut generator = DataGenerator::new(42); | ||||
|                             let data = generator.generate_string_pairs(size, 100); | ||||
|                             (backend, data) | ||||
|                         }, | ||||
|                         |(backend, data)| { | ||||
|                             for (key, value) in data { | ||||
|                                 backend.storage.set(key, value).unwrap(); | ||||
|                             } | ||||
|                         }, | ||||
|                         BatchSize::SmallInput | ||||
|                     ); | ||||
|                 } | ||||
|             ); | ||||
|         } | ||||
|     } | ||||
|      | ||||
|     group.finish(); | ||||
| } | ||||
|  | ||||
| /// Benchmark bulk sequential read operations | ||||
| fn bench_bulk_read_sequential(c: &mut Criterion) { | ||||
|     let mut group = c.benchmark_group("bulk_ops/read_sequential"); | ||||
|      | ||||
|     for size in [1_000, 10_000] { | ||||
|         for backend_type in BackendType::all() { | ||||
|             let backend = setup_populated_backend(backend_type, size, 100) | ||||
|                 .expect("Failed to setup backend"); | ||||
|             let generator = DataGenerator::new(42); | ||||
|              | ||||
|             group.bench_with_input( | ||||
|                 BenchmarkId::new(format!("{}/size", backend.name()), size), | ||||
|                 &(backend, size), | ||||
|                 |b, (backend, size)| { | ||||
|                     b.iter(|| { | ||||
|                         for i in 0..*size { | ||||
|                             let key = generator.generate_key("bench:key", i); | ||||
|                             backend.storage.get(&key).unwrap(); | ||||
|                         } | ||||
|                     }); | ||||
|                 } | ||||
|             ); | ||||
|         } | ||||
|     } | ||||
|      | ||||
|     group.finish(); | ||||
| } | ||||
|  | ||||
| /// Benchmark bulk random read operations | ||||
| fn bench_bulk_read_random(c: &mut Criterion) { | ||||
|     let mut group = c.benchmark_group("bulk_ops/read_random"); | ||||
|      | ||||
|     for size in [1_000, 10_000] { | ||||
|         for backend_type in BackendType::all() { | ||||
|             let backend = setup_populated_backend(backend_type, size, 100) | ||||
|                 .expect("Failed to setup backend"); | ||||
|             let generator = DataGenerator::new(42); | ||||
|              | ||||
|             // Pre-generate random indices for fair comparison | ||||
|             let indices: Vec<usize> = (0..size) | ||||
|                 .map(|_| rand::random::<usize>() % size) | ||||
|                 .collect(); | ||||
|              | ||||
|             group.bench_with_input( | ||||
|                 BenchmarkId::new(format!("{}/size", backend.name()), size), | ||||
|                 &(backend, indices), | ||||
|                 |b, (backend, indices)| { | ||||
|                     b.iter(|| { | ||||
|                         for &idx in indices { | ||||
|                             let key = generator.generate_key("bench:key", idx); | ||||
|                             backend.storage.get(&key).unwrap(); | ||||
|                         } | ||||
|                     }); | ||||
|                 } | ||||
|             ); | ||||
|         } | ||||
|     } | ||||
|      | ||||
|     group.finish(); | ||||
| } | ||||
|  | ||||
| /// Benchmark bulk update operations | ||||
| fn bench_bulk_update(c: &mut Criterion) { | ||||
|     let mut group = c.benchmark_group("bulk_ops/update"); | ||||
|      | ||||
|     for size in [100, 1_000, 10_000] { | ||||
|         for backend_type in BackendType::all() { | ||||
|             group.bench_with_input( | ||||
|                 BenchmarkId::new(format!("{}/size", backend_type.name()), size), | ||||
|                 &(backend_type, size), | ||||
|                 |b, &(backend_type, size)| { | ||||
|                     b.iter_batched( | ||||
|                         || { | ||||
|                             let backend = setup_populated_backend(backend_type, size, 100).unwrap(); | ||||
|                             let mut generator = DataGenerator::new(43); // Different seed for updates | ||||
|                             let updates = generator.generate_string_pairs(size, 100); | ||||
|                             (backend, updates) | ||||
|                         }, | ||||
|                         |(backend, updates)| { | ||||
|                             for (key, value) in updates { | ||||
|                                 backend.storage.set(key, value).unwrap(); | ||||
|                             } | ||||
|                         }, | ||||
|                         BatchSize::SmallInput | ||||
|                     ); | ||||
|                 } | ||||
|             ); | ||||
|         } | ||||
|     } | ||||
|      | ||||
|     group.finish(); | ||||
| } | ||||
|  | ||||
| /// Benchmark bulk delete operations | ||||
| fn bench_bulk_delete(c: &mut Criterion) { | ||||
|     let mut group = c.benchmark_group("bulk_ops/delete"); | ||||
|      | ||||
|     for size in [100, 1_000, 10_000] { | ||||
|         for backend_type in BackendType::all() { | ||||
|             group.bench_with_input( | ||||
|                 BenchmarkId::new(format!("{}/size", backend_type.name()), size), | ||||
|                 &(backend_type, size), | ||||
|                 |b, &(backend_type, size)| { | ||||
|                     b.iter_batched( | ||||
|                         || { | ||||
|                             let backend = setup_populated_backend(backend_type, size, 100).unwrap(); | ||||
|                             let generator = DataGenerator::new(42); | ||||
|                             let keys: Vec<String> = (0..size) | ||||
|                                 .map(|i| generator.generate_key("bench:key", i)) | ||||
|                                 .collect(); | ||||
|                             (backend, keys) | ||||
|                         }, | ||||
|                         |(backend, keys)| { | ||||
|                             for key in keys { | ||||
|                                 backend.storage.del(key).unwrap(); | ||||
|                             } | ||||
|                         }, | ||||
|                         BatchSize::SmallInput | ||||
|                     ); | ||||
|                 } | ||||
|             ); | ||||
|         } | ||||
|     } | ||||
|      | ||||
|     group.finish(); | ||||
| } | ||||
|  | ||||
| /// Benchmark bulk hash insert operations | ||||
| fn bench_bulk_hash_insert(c: &mut Criterion) { | ||||
|     let mut group = c.benchmark_group("bulk_ops/hash_insert"); | ||||
|      | ||||
|     for size in [100, 1_000] { | ||||
|         for backend_type in BackendType::all() { | ||||
|             group.bench_with_input( | ||||
|                 BenchmarkId::new(format!("{}/size", backend_type.name()), size), | ||||
|                 &(backend_type, size), | ||||
|                 |b, &(backend_type, size)| { | ||||
|                     b.iter_batched( | ||||
|                         || { | ||||
|                             let backend = BenchmarkBackend::new(backend_type).unwrap(); | ||||
|                             let mut generator = DataGenerator::new(42); | ||||
|                             let data = generator.generate_hash_data(size, 10, 100); | ||||
|                             (backend, data) | ||||
|                         }, | ||||
|                         |(backend, data)| { | ||||
|                             for (key, fields) in data { | ||||
|                                 backend.storage.hset(&key, fields).unwrap(); | ||||
|                             } | ||||
|                         }, | ||||
|                         BatchSize::SmallInput | ||||
|                     ); | ||||
|                 } | ||||
|             ); | ||||
|         } | ||||
|     } | ||||
|      | ||||
|     group.finish(); | ||||
| } | ||||
|  | ||||
| /// Benchmark bulk hash read operations (HGETALL) | ||||
| fn bench_bulk_hash_read(c: &mut Criterion) { | ||||
|     let mut group = c.benchmark_group("bulk_ops/hash_read"); | ||||
|      | ||||
|     for size in [100, 1_000] { | ||||
|         for backend_type in BackendType::all() { | ||||
|             let backend = setup_populated_backend_hashes(backend_type, size, 10, 100) | ||||
|                 .expect("Failed to setup backend"); | ||||
|             let generator = DataGenerator::new(42); | ||||
|              | ||||
|             group.bench_with_input( | ||||
|                 BenchmarkId::new(format!("{}/size", backend.name()), size), | ||||
|                 &(backend, size), | ||||
|                 |b, (backend, size)| { | ||||
|                     b.iter(|| { | ||||
|                         for i in 0..*size { | ||||
|                             let key = generator.generate_key("bench:hash", i); | ||||
|                             backend.storage.hgetall(&key).unwrap(); | ||||
|                         } | ||||
|                     }); | ||||
|                 } | ||||
|             ); | ||||
|         } | ||||
|     } | ||||
|      | ||||
|     group.finish(); | ||||
| } | ||||
|  | ||||
| /// Benchmark bulk list insert operations | ||||
| fn bench_bulk_list_insert(c: &mut Criterion) { | ||||
|     let mut group = c.benchmark_group("bulk_ops/list_insert"); | ||||
|      | ||||
|     for size in [100, 1_000] { | ||||
|         for backend_type in BackendType::all() { | ||||
|             group.bench_with_input( | ||||
|                 BenchmarkId::new(format!("{}/size", backend_type.name()), size), | ||||
|                 &(backend_type, size), | ||||
|                 |b, &(backend_type, size)| { | ||||
|                     b.iter_batched( | ||||
|                         || { | ||||
|                             let backend = BenchmarkBackend::new(backend_type).unwrap(); | ||||
|                             let mut generator = DataGenerator::new(42); | ||||
|                             let data = generator.generate_list_data(size, 10, 100); | ||||
|                             (backend, data) | ||||
|                         }, | ||||
|                         |(backend, data)| { | ||||
|                             for (key, elements) in data { | ||||
|                                 backend.storage.rpush(&key, elements).unwrap(); | ||||
|                             } | ||||
|                         }, | ||||
|                         BatchSize::SmallInput | ||||
|                     ); | ||||
|                 } | ||||
|             ); | ||||
|         } | ||||
|     } | ||||
|      | ||||
|     group.finish(); | ||||
| } | ||||
|  | ||||
| /// Benchmark bulk list read operations (LRANGE) | ||||
| fn bench_bulk_list_read(c: &mut Criterion) { | ||||
|     let mut group = c.benchmark_group("bulk_ops/list_read"); | ||||
|      | ||||
|     for size in [100, 1_000] { | ||||
|         for backend_type in BackendType::all() { | ||||
|             let backend = setup_populated_backend_lists(backend_type, size, 10, 100) | ||||
|                 .expect("Failed to setup backend"); | ||||
|             let generator = DataGenerator::new(42); | ||||
|              | ||||
|             group.bench_with_input( | ||||
|                 BenchmarkId::new(format!("{}/size", backend.name()), size), | ||||
|                 &(backend, size), | ||||
|                 |b, (backend, size)| { | ||||
|                     b.iter(|| { | ||||
|                         for i in 0..*size { | ||||
|                             let key = generator.generate_key("bench:list", i); | ||||
|                             backend.storage.lrange(&key, 0, -1).unwrap(); | ||||
|                         } | ||||
|                     }); | ||||
|                 } | ||||
|             ); | ||||
|         } | ||||
|     } | ||||
|      | ||||
|     group.finish(); | ||||
| } | ||||
|  | ||||
| /// Benchmark mixed workload (70% reads, 30% writes) | ||||
| fn bench_mixed_workload(c: &mut Criterion) { | ||||
|     let mut group = c.benchmark_group("bulk_ops/mixed_workload"); | ||||
|      | ||||
|     for size in [1_000, 10_000] { | ||||
|         for backend_type in BackendType::all() { | ||||
|             let backend = setup_populated_backend(backend_type, size, 100) | ||||
|                 .expect("Failed to setup backend"); | ||||
|             let mut generator = DataGenerator::new(42); | ||||
|              | ||||
|             group.bench_with_input( | ||||
|                 BenchmarkId::new(format!("{}/size", backend.name()), size), | ||||
|                 &(backend, size), | ||||
|                 |b, (backend, size)| { | ||||
|                     b.iter(|| { | ||||
|                         for i in 0..*size { | ||||
|                             if i % 10 < 7 { | ||||
|                                 // 70% reads | ||||
|                                 let key = generator.generate_key("bench:key", i % size); | ||||
|                                 backend.storage.get(&key).unwrap(); | ||||
|                             } else { | ||||
|                                 // 30% writes | ||||
|                                 let key = generator.generate_key("bench:key", i); | ||||
|                                 let value = generator.generate_value(100); | ||||
|                                 backend.storage.set(key, value).unwrap(); | ||||
|                             } | ||||
|                         } | ||||
|                     }); | ||||
|                 } | ||||
|             ); | ||||
|         } | ||||
|     } | ||||
|      | ||||
|     group.finish(); | ||||
| } | ||||
|  | ||||
| criterion_group!( | ||||
|     benches, | ||||
|     bench_bulk_insert, | ||||
|     bench_bulk_read_sequential, | ||||
|     bench_bulk_read_random, | ||||
|     bench_bulk_update, | ||||
|     bench_bulk_delete, | ||||
|     bench_bulk_hash_insert, | ||||
|     bench_bulk_hash_read, | ||||
|     bench_bulk_list_insert, | ||||
|     bench_bulk_list_read, | ||||
|     bench_mixed_workload, | ||||
| ); | ||||
|  | ||||
| criterion_main!(benches); | ||||
							
								
								
									
										197
									
								
								benches/common/backends.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										197
									
								
								benches/common/backends.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,197 @@ | ||||
| // benches/common/backends.rs | ||||
| use herodb::storage::Storage; | ||||
| use herodb::storage_sled::SledStorage; | ||||
| use herodb::storage_trait::StorageBackend; | ||||
| use std::sync::Arc; | ||||
| use tempfile::TempDir; | ||||
|  | ||||
| /// Backend type identifier | ||||
| #[derive(Debug, Clone, Copy, PartialEq, Eq)] | ||||
| pub enum BackendType { | ||||
|     Redb, | ||||
|     Sled, | ||||
| } | ||||
|  | ||||
| impl BackendType { | ||||
|     pub fn name(&self) -> &'static str { | ||||
|         match self { | ||||
|             BackendType::Redb => "redb", | ||||
|             BackendType::Sled => "sled", | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub fn all() -> Vec<BackendType> { | ||||
|         vec![BackendType::Redb, BackendType::Sled] | ||||
|     } | ||||
| } | ||||
|  | ||||
| /// Wrapper for benchmark backends with automatic cleanup | ||||
| pub struct BenchmarkBackend { | ||||
|     pub storage: Arc<dyn StorageBackend>, | ||||
|     pub backend_type: BackendType, | ||||
|     _temp_dir: TempDir, // Kept for automatic cleanup | ||||
| } | ||||
|  | ||||
| impl BenchmarkBackend { | ||||
|     /// Create a new redb backend for benchmarking | ||||
|     pub fn new_redb() -> Result<Self, Box<dyn std::error::Error>> { | ||||
|         let temp_dir = TempDir::new()?; | ||||
|         let db_path = temp_dir.path().join("bench.db"); | ||||
|         let storage = Storage::new(db_path, false, None)?; | ||||
|          | ||||
|         Ok(Self { | ||||
|             storage: Arc::new(storage), | ||||
|             backend_type: BackendType::Redb, | ||||
|             _temp_dir: temp_dir, | ||||
|         }) | ||||
|     } | ||||
|  | ||||
|     /// Create a new sled backend for benchmarking | ||||
|     pub fn new_sled() -> Result<Self, Box<dyn std::error::Error>> { | ||||
|         let temp_dir = TempDir::new()?; | ||||
|         let db_path = temp_dir.path().join("bench.sled"); | ||||
|         let storage = SledStorage::new(db_path, false, None)?; | ||||
|          | ||||
|         Ok(Self { | ||||
|             storage: Arc::new(storage), | ||||
|             backend_type: BackendType::Sled, | ||||
|             _temp_dir: temp_dir, | ||||
|         }) | ||||
|     } | ||||
|  | ||||
|     /// Create a backend of the specified type | ||||
|     pub fn new(backend_type: BackendType) -> Result<Self, Box<dyn std::error::Error>> { | ||||
|         match backend_type { | ||||
|             BackendType::Redb => Self::new_redb(), | ||||
|             BackendType::Sled => Self::new_sled(), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     /// Get the backend name for display | ||||
|     pub fn name(&self) -> &'static str { | ||||
|         self.backend_type.name() | ||||
|     } | ||||
|  | ||||
|     /// Pre-populate the backend with test data | ||||
|     pub fn populate_strings(&self, data: &[(String, String)]) -> Result<(), Box<dyn std::error::Error>> { | ||||
|         for (key, value) in data { | ||||
|             self.storage.set(key.clone(), value.clone())?; | ||||
|         } | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     /// Pre-populate with hash data | ||||
|     pub fn populate_hashes(&self, data: &[(String, Vec<(String, String)>)]) -> Result<(), Box<dyn std::error::Error>> { | ||||
|         for (key, fields) in data { | ||||
|             self.storage.hset(key, fields.clone())?; | ||||
|         } | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     /// Pre-populate with list data | ||||
|     pub fn populate_lists(&self, data: &[(String, Vec<String>)]) -> Result<(), Box<dyn std::error::Error>> { | ||||
|         for (key, elements) in data { | ||||
|             self.storage.rpush(key, elements.clone())?; | ||||
|         } | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     /// Clear all data from the backend | ||||
|     pub fn clear(&self) -> Result<(), Box<dyn std::error::Error>> { | ||||
|         self.storage.flushdb()?; | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     /// Get the number of keys in the database | ||||
|     pub fn dbsize(&self) -> Result<i64, Box<dyn std::error::Error>> { | ||||
|         Ok(self.storage.dbsize()?) | ||||
|     } | ||||
| } | ||||
|  | ||||
| /// Helper function to create and populate a backend for read benchmarks | ||||
| pub fn setup_populated_backend( | ||||
|     backend_type: BackendType, | ||||
|     num_keys: usize, | ||||
|     value_size: usize, | ||||
| ) -> Result<BenchmarkBackend, Box<dyn std::error::Error>> { | ||||
|     use super::DataGenerator; | ||||
|      | ||||
|     let backend = BenchmarkBackend::new(backend_type)?; | ||||
|     let mut generator = DataGenerator::new(42); | ||||
|     let data = generator.generate_string_pairs(num_keys, value_size); | ||||
|     backend.populate_strings(&data)?; | ||||
|      | ||||
|     Ok(backend) | ||||
| } | ||||
|  | ||||
| /// Helper function to create and populate a backend with hash data | ||||
| pub fn setup_populated_backend_hashes( | ||||
|     backend_type: BackendType, | ||||
|     num_hashes: usize, | ||||
|     fields_per_hash: usize, | ||||
|     value_size: usize, | ||||
| ) -> Result<BenchmarkBackend, Box<dyn std::error::Error>> { | ||||
|     use super::DataGenerator; | ||||
|      | ||||
|     let backend = BenchmarkBackend::new(backend_type)?; | ||||
|     let mut generator = DataGenerator::new(42); | ||||
|     let data = generator.generate_hash_data(num_hashes, fields_per_hash, value_size); | ||||
|     backend.populate_hashes(&data)?; | ||||
|      | ||||
|     Ok(backend) | ||||
| } | ||||
|  | ||||
| /// Helper function to create and populate a backend with list data | ||||
| pub fn setup_populated_backend_lists( | ||||
|     backend_type: BackendType, | ||||
|     num_lists: usize, | ||||
|     elements_per_list: usize, | ||||
|     element_size: usize, | ||||
| ) -> Result<BenchmarkBackend, Box<dyn std::error::Error>> { | ||||
|     use super::DataGenerator; | ||||
|      | ||||
|     let backend = BenchmarkBackend::new(backend_type)?; | ||||
|     let mut generator = DataGenerator::new(42); | ||||
|     let data = generator.generate_list_data(num_lists, elements_per_list, element_size); | ||||
|     backend.populate_lists(&data)?; | ||||
|      | ||||
|     Ok(backend) | ||||
| } | ||||
|  | ||||
| #[cfg(test)] | ||||
| mod tests { | ||||
|     use super::*; | ||||
|  | ||||
|     #[test] | ||||
|     fn test_backend_creation() { | ||||
|         let redb = BenchmarkBackend::new_redb(); | ||||
|         assert!(redb.is_ok()); | ||||
|  | ||||
|         let sled = BenchmarkBackend::new_sled(); | ||||
|         assert!(sled.is_ok()); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn test_backend_populate() { | ||||
|         let backend = BenchmarkBackend::new_redb().unwrap(); | ||||
|         let data = vec![ | ||||
|             ("key1".to_string(), "value1".to_string()), | ||||
|             ("key2".to_string(), "value2".to_string()), | ||||
|         ]; | ||||
|          | ||||
|         backend.populate_strings(&data).unwrap(); | ||||
|         assert_eq!(backend.dbsize().unwrap(), 2); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn test_backend_clear() { | ||||
|         let backend = BenchmarkBackend::new_redb().unwrap(); | ||||
|         let data = vec![("key1".to_string(), "value1".to_string())]; | ||||
|          | ||||
|         backend.populate_strings(&data).unwrap(); | ||||
|         assert_eq!(backend.dbsize().unwrap(), 1); | ||||
|          | ||||
|         backend.clear().unwrap(); | ||||
|         assert_eq!(backend.dbsize().unwrap(), 0); | ||||
|     } | ||||
| } | ||||
							
								
								
									
										131
									
								
								benches/common/data_generator.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										131
									
								
								benches/common/data_generator.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,131 @@ | ||||
| // benches/common/data_generator.rs | ||||
| use rand::{Rng, SeedableRng}; | ||||
| use rand::rngs::StdRng; | ||||
|  | ||||
| /// Deterministic data generator for benchmarks | ||||
| pub struct DataGenerator { | ||||
|     rng: StdRng, | ||||
| } | ||||
|  | ||||
| impl DataGenerator { | ||||
|     /// Create a new data generator with a fixed seed for reproducibility | ||||
|     pub fn new(seed: u64) -> Self { | ||||
|         Self { | ||||
|             rng: StdRng::seed_from_u64(seed), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     /// Generate a single key with the given prefix and ID | ||||
|     pub fn generate_key(&self, prefix: &str, id: usize) -> String { | ||||
|         format!("{}:{:08}", prefix, id) | ||||
|     } | ||||
|  | ||||
|     /// Generate a random string value of the specified size | ||||
|     pub fn generate_value(&mut self, size: usize) -> String { | ||||
|         const CHARSET: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"; | ||||
|         (0..size) | ||||
|             .map(|_| { | ||||
|                 let idx = self.rng.gen_range(0..CHARSET.len()); | ||||
|                 CHARSET[idx] as char | ||||
|             }) | ||||
|             .collect() | ||||
|     } | ||||
|  | ||||
|     /// Generate a batch of key-value pairs | ||||
|     pub fn generate_string_pairs(&mut self, count: usize, value_size: usize) -> Vec<(String, String)> { | ||||
|         (0..count) | ||||
|             .map(|i| { | ||||
|                 let key = self.generate_key("bench:key", i); | ||||
|                 let value = self.generate_value(value_size); | ||||
|                 (key, value) | ||||
|             }) | ||||
|             .collect() | ||||
|     } | ||||
|  | ||||
|     /// Generate hash data (key -> field-value pairs) | ||||
|     pub fn generate_hash_data(&mut self, num_hashes: usize, fields_per_hash: usize, value_size: usize)  | ||||
|         -> Vec<(String, Vec<(String, String)>)> { | ||||
|         (0..num_hashes) | ||||
|             .map(|i| { | ||||
|                 let hash_key = self.generate_key("bench:hash", i); | ||||
|                 let fields: Vec<(String, String)> = (0..fields_per_hash) | ||||
|                     .map(|j| { | ||||
|                         let field = format!("field{}", j); | ||||
|                         let value = self.generate_value(value_size); | ||||
|                         (field, value) | ||||
|                     }) | ||||
|                     .collect(); | ||||
|                 (hash_key, fields) | ||||
|             }) | ||||
|             .collect() | ||||
|     } | ||||
|  | ||||
|     /// Generate list data (key -> list of elements) | ||||
|     pub fn generate_list_data(&mut self, num_lists: usize, elements_per_list: usize, element_size: usize)  | ||||
|         -> Vec<(String, Vec<String>)> { | ||||
|         (0..num_lists) | ||||
|             .map(|i| { | ||||
|                 let list_key = self.generate_key("bench:list", i); | ||||
|                 let elements: Vec<String> = (0..elements_per_list) | ||||
|                     .map(|_| self.generate_value(element_size)) | ||||
|                     .collect(); | ||||
|                 (list_key, elements) | ||||
|             }) | ||||
|             .collect() | ||||
|     } | ||||
|  | ||||
|     /// Generate keys for pattern matching tests | ||||
|     pub fn generate_pattern_keys(&mut self, count: usize) -> Vec<String> { | ||||
|         let mut keys = Vec::new(); | ||||
|          | ||||
|         // Generate keys with different patterns | ||||
|         for i in 0..count / 3 { | ||||
|             keys.push(format!("user:{}:profile", i)); | ||||
|         } | ||||
|         for i in 0..count / 3 { | ||||
|             keys.push(format!("session:{}:data", i)); | ||||
|         } | ||||
|         for i in 0..count / 3 { | ||||
|             keys.push(format!("cache:{}:value", i)); | ||||
|         } | ||||
|          | ||||
|         keys | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[cfg(test)] | ||||
| mod tests { | ||||
|     use super::*; | ||||
|  | ||||
|     #[test] | ||||
|     fn test_deterministic_generation() { | ||||
|         let mut generator1 = DataGenerator::new(42); | ||||
|         let mut generator2 = DataGenerator::new(42); | ||||
|  | ||||
|         let pairs1 = generator1.generate_string_pairs(10, 50); | ||||
|         let pairs2 = generator2.generate_string_pairs(10, 50); | ||||
|  | ||||
|         assert_eq!(pairs1, pairs2, "Same seed should produce same data"); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn test_value_size() { | ||||
|         let mut generator = DataGenerator::new(42); | ||||
|         let value = generator.generate_value(100); | ||||
|         assert_eq!(value.len(), 100); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn test_hash_generation() { | ||||
|         let mut generator = DataGenerator::new(42); | ||||
|         let hashes = generator.generate_hash_data(5, 10, 50); | ||||
|          | ||||
|         assert_eq!(hashes.len(), 5); | ||||
|         for (_, fields) in hashes { | ||||
|             assert_eq!(fields.len(), 10); | ||||
|             for (_, value) in fields { | ||||
|                 assert_eq!(value.len(), 50); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| } | ||||
							
								
								
									
										289
									
								
								benches/common/metrics.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										289
									
								
								benches/common/metrics.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,289 @@ | ||||
| // benches/common/metrics.rs | ||||
| use serde::{Deserialize, Serialize}; | ||||
| use std::time::Duration; | ||||
|  | ||||
| /// Custom metrics for benchmark results | ||||
| #[derive(Debug, Clone, Serialize, Deserialize)] | ||||
| pub struct BenchmarkMetrics { | ||||
|     pub operation: String, | ||||
|     pub backend: String, | ||||
|     pub dataset_size: usize, | ||||
|     pub mean_ns: u64, | ||||
|     pub median_ns: u64, | ||||
|     pub p95_ns: u64, | ||||
|     pub p99_ns: u64, | ||||
|     pub std_dev_ns: u64, | ||||
|     pub throughput_ops_sec: f64, | ||||
| } | ||||
|  | ||||
| impl BenchmarkMetrics { | ||||
|     pub fn new( | ||||
|         operation: String, | ||||
|         backend: String, | ||||
|         dataset_size: usize, | ||||
|     ) -> Self { | ||||
|         Self { | ||||
|             operation, | ||||
|             backend, | ||||
|             dataset_size, | ||||
|             mean_ns: 0, | ||||
|             median_ns: 0, | ||||
|             p95_ns: 0, | ||||
|             p99_ns: 0, | ||||
|             std_dev_ns: 0, | ||||
|             throughput_ops_sec: 0.0, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     /// Convert to CSV row format | ||||
|     pub fn to_csv_row(&self) -> String { | ||||
|         format!( | ||||
|             "{},{},{},{},{},{},{},{},{:.2}", | ||||
|             self.backend, | ||||
|             self.operation, | ||||
|             self.dataset_size, | ||||
|             self.mean_ns, | ||||
|             self.median_ns, | ||||
|             self.p95_ns, | ||||
|             self.p99_ns, | ||||
|             self.std_dev_ns, | ||||
|             self.throughput_ops_sec | ||||
|         ) | ||||
|     } | ||||
|  | ||||
|     /// Get CSV header | ||||
|     pub fn csv_header() -> String { | ||||
|         "backend,operation,dataset_size,mean_ns,median_ns,p95_ns,p99_ns,std_dev_ns,throughput_ops_sec".to_string() | ||||
|     } | ||||
|  | ||||
|     /// Convert to JSON | ||||
|     pub fn to_json(&self) -> serde_json::Value { | ||||
|         serde_json::json!({ | ||||
|             "backend": self.backend, | ||||
|             "operation": self.operation, | ||||
|             "dataset_size": self.dataset_size, | ||||
|             "metrics": { | ||||
|                 "mean_ns": self.mean_ns, | ||||
|                 "median_ns": self.median_ns, | ||||
|                 "p95_ns": self.p95_ns, | ||||
|                 "p99_ns": self.p99_ns, | ||||
|                 "std_dev_ns": self.std_dev_ns, | ||||
|                 "throughput_ops_sec": self.throughput_ops_sec | ||||
|             } | ||||
|         }) | ||||
|     } | ||||
|  | ||||
|     /// Calculate throughput from mean latency | ||||
|     pub fn calculate_throughput(&mut self) { | ||||
|         if self.mean_ns > 0 { | ||||
|             self.throughput_ops_sec = 1_000_000_000.0 / self.mean_ns as f64; | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     /// Format duration for display | ||||
|     pub fn format_duration(nanos: u64) -> String { | ||||
|         if nanos < 1_000 { | ||||
|             format!("{} ns", nanos) | ||||
|         } else if nanos < 1_000_000 { | ||||
|             format!("{:.2} µs", nanos as f64 / 1_000.0) | ||||
|         } else if nanos < 1_000_000_000 { | ||||
|             format!("{:.2} ms", nanos as f64 / 1_000_000.0) | ||||
|         } else { | ||||
|             format!("{:.2} s", nanos as f64 / 1_000_000_000.0) | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     /// Pretty print the metrics | ||||
|     pub fn display(&self) -> String { | ||||
|         format!( | ||||
|             "{}/{} (n={}): mean={}, median={}, p95={}, p99={}, throughput={:.0} ops/sec", | ||||
|             self.backend, | ||||
|             self.operation, | ||||
|             self.dataset_size, | ||||
|             Self::format_duration(self.mean_ns), | ||||
|             Self::format_duration(self.median_ns), | ||||
|             Self::format_duration(self.p95_ns), | ||||
|             Self::format_duration(self.p99_ns), | ||||
|             self.throughput_ops_sec | ||||
|         ) | ||||
|     } | ||||
| } | ||||
|  | ||||
| /// Memory metrics for profiling | ||||
| #[derive(Debug, Clone, Serialize, Deserialize)] | ||||
| pub struct MemoryMetrics { | ||||
|     pub operation: String, | ||||
|     pub backend: String, | ||||
|     pub allocations: usize, | ||||
|     pub peak_bytes: usize, | ||||
|     pub avg_bytes_per_op: f64, | ||||
| } | ||||
|  | ||||
| impl MemoryMetrics { | ||||
|     pub fn new(operation: String, backend: String) -> Self { | ||||
|         Self { | ||||
|             operation, | ||||
|             backend, | ||||
|             allocations: 0, | ||||
|             peak_bytes: 0, | ||||
|             avg_bytes_per_op: 0.0, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     /// Convert to CSV row format | ||||
|     pub fn to_csv_row(&self) -> String { | ||||
|         format!( | ||||
|             "{},{},{},{},{:.2}", | ||||
|             self.backend, | ||||
|             self.operation, | ||||
|             self.allocations, | ||||
|             self.peak_bytes, | ||||
|             self.avg_bytes_per_op | ||||
|         ) | ||||
|     } | ||||
|  | ||||
|     /// Get CSV header | ||||
|     pub fn csv_header() -> String { | ||||
|         "backend,operation,allocations,peak_bytes,avg_bytes_per_op".to_string() | ||||
|     } | ||||
|  | ||||
|     /// Format bytes for display | ||||
|     pub fn format_bytes(bytes: usize) -> String { | ||||
|         if bytes < 1024 { | ||||
|             format!("{} B", bytes) | ||||
|         } else if bytes < 1024 * 1024 { | ||||
|             format!("{:.2} KB", bytes as f64 / 1024.0) | ||||
|         } else if bytes < 1024 * 1024 * 1024 { | ||||
|             format!("{:.2} MB", bytes as f64 / (1024.0 * 1024.0)) | ||||
|         } else { | ||||
|             format!("{:.2} GB", bytes as f64 / (1024.0 * 1024.0 * 1024.0)) | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     /// Pretty print the metrics | ||||
|     pub fn display(&self) -> String { | ||||
|         format!( | ||||
|             "{}/{}: {} allocations, peak={}, avg={}", | ||||
|             self.backend, | ||||
|             self.operation, | ||||
|             self.allocations, | ||||
|             Self::format_bytes(self.peak_bytes), | ||||
|             Self::format_bytes(self.avg_bytes_per_op as usize) | ||||
|         ) | ||||
|     } | ||||
| } | ||||
|  | ||||
| /// Collection of benchmark results for comparison | ||||
| #[derive(Debug, Default)] | ||||
| pub struct BenchmarkResults { | ||||
|     pub metrics: Vec<BenchmarkMetrics>, | ||||
|     pub memory_metrics: Vec<MemoryMetrics>, | ||||
| } | ||||
|  | ||||
| impl BenchmarkResults { | ||||
|     pub fn new() -> Self { | ||||
|         Self::default() | ||||
|     } | ||||
|  | ||||
|     pub fn add_metric(&mut self, metric: BenchmarkMetrics) { | ||||
|         self.metrics.push(metric); | ||||
|     } | ||||
|  | ||||
|     pub fn add_memory_metric(&mut self, metric: MemoryMetrics) { | ||||
|         self.memory_metrics.push(metric); | ||||
|     } | ||||
|  | ||||
|     /// Export all metrics to CSV format | ||||
|     pub fn to_csv(&self) -> String { | ||||
|         let mut output = String::new(); | ||||
|          | ||||
|         if !self.metrics.is_empty() { | ||||
|             output.push_str(&BenchmarkMetrics::csv_header()); | ||||
|             output.push('\n'); | ||||
|             for metric in &self.metrics { | ||||
|                 output.push_str(&metric.to_csv_row()); | ||||
|                 output.push('\n'); | ||||
|             } | ||||
|         } | ||||
|          | ||||
|         if !self.memory_metrics.is_empty() { | ||||
|             output.push('\n'); | ||||
|             output.push_str(&MemoryMetrics::csv_header()); | ||||
|             output.push('\n'); | ||||
|             for metric in &self.memory_metrics { | ||||
|                 output.push_str(&metric.to_csv_row()); | ||||
|                 output.push('\n'); | ||||
|             } | ||||
|         } | ||||
|          | ||||
|         output | ||||
|     } | ||||
|  | ||||
|     /// Export all metrics to JSON format | ||||
|     pub fn to_json(&self) -> serde_json::Value { | ||||
|         serde_json::json!({ | ||||
|             "benchmarks": self.metrics.iter().map(|m| m.to_json()).collect::<Vec<_>>(), | ||||
|             "memory": self.memory_metrics | ||||
|         }) | ||||
|     } | ||||
|  | ||||
|     /// Save results to a file | ||||
|     pub fn save_csv(&self, path: &str) -> std::io::Result<()> { | ||||
|         std::fs::write(path, self.to_csv()) | ||||
|     } | ||||
|  | ||||
|     pub fn save_json(&self, path: &str) -> std::io::Result<()> { | ||||
|         let json = serde_json::to_string_pretty(&self.to_json())?; | ||||
|         std::fs::write(path, json) | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[cfg(test)] | ||||
| mod tests { | ||||
|     use super::*; | ||||
|  | ||||
|     #[test] | ||||
|     fn test_metrics_creation() { | ||||
|         let mut metric = BenchmarkMetrics::new( | ||||
|             "set".to_string(), | ||||
|             "redb".to_string(), | ||||
|             1000, | ||||
|         ); | ||||
|         metric.mean_ns = 1_245; | ||||
|         metric.calculate_throughput(); | ||||
|          | ||||
|         assert!(metric.throughput_ops_sec > 0.0); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn test_csv_export() { | ||||
|         let mut results = BenchmarkResults::new(); | ||||
|         let mut metric = BenchmarkMetrics::new( | ||||
|             "set".to_string(), | ||||
|             "redb".to_string(), | ||||
|             1000, | ||||
|         ); | ||||
|         metric.mean_ns = 1_245; | ||||
|         metric.calculate_throughput(); | ||||
|          | ||||
|         results.add_metric(metric); | ||||
|         let csv = results.to_csv(); | ||||
|          | ||||
|         assert!(csv.contains("backend,operation")); | ||||
|         assert!(csv.contains("redb,set")); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn test_duration_formatting() { | ||||
|         assert_eq!(BenchmarkMetrics::format_duration(500), "500 ns"); | ||||
|         assert_eq!(BenchmarkMetrics::format_duration(1_500), "1.50 µs"); | ||||
|         assert_eq!(BenchmarkMetrics::format_duration(1_500_000), "1.50 ms"); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn test_bytes_formatting() { | ||||
|         assert_eq!(MemoryMetrics::format_bytes(512), "512 B"); | ||||
|         assert_eq!(MemoryMetrics::format_bytes(2048), "2.00 KB"); | ||||
|         assert_eq!(MemoryMetrics::format_bytes(2_097_152), "2.00 MB"); | ||||
|     } | ||||
| } | ||||
							
								
								
									
										8
									
								
								benches/common/mod.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										8
									
								
								benches/common/mod.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,8 @@ | ||||
| // benches/common/mod.rs | ||||
| pub mod data_generator; | ||||
| pub mod backends; | ||||
| pub mod metrics; | ||||
|  | ||||
| pub use data_generator::*; | ||||
| pub use backends::*; | ||||
| pub use metrics::*; | ||||
							
								
								
									
										317
									
								
								benches/concurrent_ops.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										317
									
								
								benches/concurrent_ops.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,317 @@ | ||||
| // benches/concurrent_ops.rs | ||||
| use criterion::{criterion_group, criterion_main, Criterion, BenchmarkId}; | ||||
| use tokio::runtime::Runtime; | ||||
| use std::sync::Arc; | ||||
|  | ||||
| mod common; | ||||
| use common::*; | ||||
|  | ||||
| /// Benchmark concurrent write operations | ||||
| fn bench_concurrent_writes(c: &mut Criterion) { | ||||
|     let mut group = c.benchmark_group("concurrent_ops/writes"); | ||||
|      | ||||
|     for num_clients in [10, 50] { | ||||
|         for backend_type in BackendType::all() { | ||||
|             let backend = BenchmarkBackend::new(backend_type).expect("Failed to create backend"); | ||||
|             let storage = backend.storage.clone(); | ||||
|              | ||||
|             group.bench_with_input( | ||||
|                 BenchmarkId::new(format!("{}/clients", backend.name()), num_clients), | ||||
|                 &(storage, num_clients), | ||||
|                 |b, (storage, num_clients)| { | ||||
|                     let rt = Runtime::new().unwrap(); | ||||
|                     b.to_async(&rt).iter(|| { | ||||
|                         let storage = storage.clone(); | ||||
|                         let num_clients = *num_clients; | ||||
|                         async move { | ||||
|                             let mut tasks = Vec::new(); | ||||
|                              | ||||
|                             for client_id in 0..num_clients { | ||||
|                                 let storage = storage.clone(); | ||||
|                                 let task = tokio::spawn(async move { | ||||
|                                     let mut generator = DataGenerator::new(42 + client_id as u64); | ||||
|                                     for i in 0..100 { | ||||
|                                         let key = format!("client:{}:key:{}", client_id, i); | ||||
|                                         let value = generator.generate_value(100); | ||||
|                                         storage.set(key, value).unwrap(); | ||||
|                                     } | ||||
|                                 }); | ||||
|                                 tasks.push(task); | ||||
|                             } | ||||
|                              | ||||
|                             for task in tasks { | ||||
|                                 task.await.unwrap(); | ||||
|                             } | ||||
|                         } | ||||
|                     }); | ||||
|                 } | ||||
|             ); | ||||
|         } | ||||
|     } | ||||
|      | ||||
|     group.finish(); | ||||
| } | ||||
|  | ||||
| /// Benchmark concurrent read operations | ||||
| fn bench_concurrent_reads(c: &mut Criterion) { | ||||
|     let mut group = c.benchmark_group("concurrent_ops/reads"); | ||||
|      | ||||
|     for num_clients in [10, 50] { | ||||
|         for backend_type in BackendType::all() { | ||||
|             // Pre-populate with data | ||||
|             let backend = setup_populated_backend(backend_type, 10_000, 100) | ||||
|                 .expect("Failed to setup backend"); | ||||
|             let storage = backend.storage.clone(); | ||||
|              | ||||
|             group.bench_with_input( | ||||
|                 BenchmarkId::new(format!("{}/clients", backend.name()), num_clients), | ||||
|                 &(storage, num_clients), | ||||
|                 |b, (storage, num_clients)| { | ||||
|                     let rt = Runtime::new().unwrap(); | ||||
|                     b.to_async(&rt).iter(|| { | ||||
|                         let storage = storage.clone(); | ||||
|                         let num_clients = *num_clients; | ||||
|                         async move { | ||||
|                             let mut tasks = Vec::new(); | ||||
|                              | ||||
|                             for client_id in 0..num_clients { | ||||
|                                 let storage = storage.clone(); | ||||
|                                 let task = tokio::spawn(async move { | ||||
|                                     let generator = DataGenerator::new(42); | ||||
|                                     for i in 0..100 { | ||||
|                                         let key_id = (client_id * 100 + i) % 10_000; | ||||
|                                         let key = generator.generate_key("bench:key", key_id); | ||||
|                                         storage.get(&key).unwrap(); | ||||
|                                     } | ||||
|                                 }); | ||||
|                                 tasks.push(task); | ||||
|                             } | ||||
|                              | ||||
|                             for task in tasks { | ||||
|                                 task.await.unwrap(); | ||||
|                             } | ||||
|                         } | ||||
|                     }); | ||||
|                 } | ||||
|             ); | ||||
|         } | ||||
|     } | ||||
|      | ||||
|     group.finish(); | ||||
| } | ||||
|  | ||||
| /// Benchmark mixed concurrent workload (70% reads, 30% writes) | ||||
| fn bench_concurrent_mixed(c: &mut Criterion) { | ||||
|     let mut group = c.benchmark_group("concurrent_ops/mixed"); | ||||
|      | ||||
|     for num_clients in [10, 50] { | ||||
|         for backend_type in BackendType::all() { | ||||
|             // Pre-populate with data | ||||
|             let backend = setup_populated_backend(backend_type, 10_000, 100) | ||||
|                 .expect("Failed to setup backend"); | ||||
|             let storage = backend.storage.clone(); | ||||
|              | ||||
|             group.bench_with_input( | ||||
|                 BenchmarkId::new(format!("{}/clients", backend.name()), num_clients), | ||||
|                 &(storage, num_clients), | ||||
|                 |b, (storage, num_clients)| { | ||||
|                     let rt = Runtime::new().unwrap(); | ||||
|                     b.to_async(&rt).iter(|| { | ||||
|                         let storage = storage.clone(); | ||||
|                         let num_clients = *num_clients; | ||||
|                         async move { | ||||
|                             let mut tasks = Vec::new(); | ||||
|                              | ||||
|                             for client_id in 0..num_clients { | ||||
|                                 let storage = storage.clone(); | ||||
|                                 let task = tokio::spawn(async move { | ||||
|                                     let mut generator = DataGenerator::new(42 + client_id as u64); | ||||
|                                     for i in 0..100 { | ||||
|                                         if i % 10 < 7 { | ||||
|                                             // 70% reads | ||||
|                                             let key_id = (client_id * 100 + i) % 10_000; | ||||
|                                             let key = generator.generate_key("bench:key", key_id); | ||||
|                                             storage.get(&key).unwrap(); | ||||
|                                         } else { | ||||
|                                             // 30% writes | ||||
|                                             let key = format!("client:{}:key:{}", client_id, i); | ||||
|                                             let value = generator.generate_value(100); | ||||
|                                             storage.set(key, value).unwrap(); | ||||
|                                         } | ||||
|                                     } | ||||
|                                 }); | ||||
|                                 tasks.push(task); | ||||
|                             } | ||||
|                              | ||||
|                             for task in tasks { | ||||
|                                 task.await.unwrap(); | ||||
|                             } | ||||
|                         } | ||||
|                     }); | ||||
|                 } | ||||
|             ); | ||||
|         } | ||||
|     } | ||||
|      | ||||
|     group.finish(); | ||||
| } | ||||
|  | ||||
| /// Benchmark concurrent hash operations | ||||
| fn bench_concurrent_hash_ops(c: &mut Criterion) { | ||||
|     let mut group = c.benchmark_group("concurrent_ops/hash_ops"); | ||||
|      | ||||
|     for num_clients in [10, 50] { | ||||
|         for backend_type in BackendType::all() { | ||||
|             let backend = BenchmarkBackend::new(backend_type).expect("Failed to create backend"); | ||||
|             let storage = backend.storage.clone(); | ||||
|              | ||||
|             group.bench_with_input( | ||||
|                 BenchmarkId::new(format!("{}/clients", backend.name()), num_clients), | ||||
|                 &(storage, num_clients), | ||||
|                 |b, (storage, num_clients)| { | ||||
|                     let rt = Runtime::new().unwrap(); | ||||
|                     b.to_async(&rt).iter(|| { | ||||
|                         let storage = storage.clone(); | ||||
|                         let num_clients = *num_clients; | ||||
|                         async move { | ||||
|                             let mut tasks = Vec::new(); | ||||
|                              | ||||
|                             for client_id in 0..num_clients { | ||||
|                                 let storage = storage.clone(); | ||||
|                                 let task = tokio::spawn(async move { | ||||
|                                     let mut generator = DataGenerator::new(42 + client_id as u64); | ||||
|                                     for i in 0..50 { | ||||
|                                         let key = format!("client:{}:hash:{}", client_id, i); | ||||
|                                         let field = format!("field{}", i % 10); | ||||
|                                         let value = generator.generate_value(100); | ||||
|                                         storage.hset(&key, vec![(field, value)]).unwrap(); | ||||
|                                     } | ||||
|                                 }); | ||||
|                                 tasks.push(task); | ||||
|                             } | ||||
|                              | ||||
|                             for task in tasks { | ||||
|                                 task.await.unwrap(); | ||||
|                             } | ||||
|                         } | ||||
|                     }); | ||||
|                 } | ||||
|             ); | ||||
|         } | ||||
|     } | ||||
|      | ||||
|     group.finish(); | ||||
| } | ||||
|  | ||||
| /// Benchmark concurrent list operations | ||||
| fn bench_concurrent_list_ops(c: &mut Criterion) { | ||||
|     let mut group = c.benchmark_group("concurrent_ops/list_ops"); | ||||
|      | ||||
|     for num_clients in [10, 50] { | ||||
|         for backend_type in BackendType::all() { | ||||
|             let backend = BenchmarkBackend::new(backend_type).expect("Failed to create backend"); | ||||
|             let storage = backend.storage.clone(); | ||||
|              | ||||
|             group.bench_with_input( | ||||
|                 BenchmarkId::new(format!("{}/clients", backend.name()), num_clients), | ||||
|                 &(storage, num_clients), | ||||
|                 |b, (storage, num_clients)| { | ||||
|                     let rt = Runtime::new().unwrap(); | ||||
|                     b.to_async(&rt).iter(|| { | ||||
|                         let storage = storage.clone(); | ||||
|                         let num_clients = *num_clients; | ||||
|                         async move { | ||||
|                             let mut tasks = Vec::new(); | ||||
|                              | ||||
|                             for client_id in 0..num_clients { | ||||
|                                 let storage = storage.clone(); | ||||
|                                 let task = tokio::spawn(async move { | ||||
|                                     let mut generator = DataGenerator::new(42 + client_id as u64); | ||||
|                                     for i in 0..50 { | ||||
|                                         let key = format!("client:{}:list:{}", client_id, i); | ||||
|                                         let element = generator.generate_value(100); | ||||
|                                         storage.rpush(&key, vec![element]).unwrap(); | ||||
|                                     } | ||||
|                                 }); | ||||
|                                 tasks.push(task); | ||||
|                             } | ||||
|                              | ||||
|                             for task in tasks { | ||||
|                                 task.await.unwrap(); | ||||
|                             } | ||||
|                         } | ||||
|                     }); | ||||
|                 } | ||||
|             ); | ||||
|         } | ||||
|     } | ||||
|      | ||||
|     group.finish(); | ||||
| } | ||||
|  | ||||
| /// Benchmark concurrent scan operations | ||||
| fn bench_concurrent_scans(c: &mut Criterion) { | ||||
|     let mut group = c.benchmark_group("concurrent_ops/scans"); | ||||
|      | ||||
|     for num_clients in [10, 50] { | ||||
|         for backend_type in BackendType::all() { | ||||
|             // Pre-populate with data | ||||
|             let backend = setup_populated_backend(backend_type, 10_000, 100) | ||||
|                 .expect("Failed to setup backend"); | ||||
|             let storage = backend.storage.clone(); | ||||
|              | ||||
|             group.bench_with_input( | ||||
|                 BenchmarkId::new(format!("{}/clients", backend.name()), num_clients), | ||||
|                 &(storage, num_clients), | ||||
|                 |b, (storage, num_clients)| { | ||||
|                     let rt = Runtime::new().unwrap(); | ||||
|                     b.to_async(&rt).iter(|| { | ||||
|                         let storage = storage.clone(); | ||||
|                         let num_clients = *num_clients; | ||||
|                         async move { | ||||
|                             let mut tasks = Vec::new(); | ||||
|                              | ||||
|                             for _client_id in 0..num_clients { | ||||
|                                 let storage = storage.clone(); | ||||
|                                 let task = tokio::spawn(async move { | ||||
|                                     let mut cursor = 0u64; | ||||
|                                     let mut total = 0; | ||||
|                                     loop { | ||||
|                                         let (next_cursor, items) = storage | ||||
|                                             .scan(cursor, None, Some(100)) | ||||
|                                             .unwrap(); | ||||
|                                         total += items.len(); | ||||
|                                         if next_cursor == 0 { | ||||
|                                             break; | ||||
|                                         } | ||||
|                                         cursor = next_cursor; | ||||
|                                     } | ||||
|                                     total | ||||
|                                 }); | ||||
|                                 tasks.push(task); | ||||
|                             } | ||||
|                              | ||||
|                             for task in tasks { | ||||
|                                 task.await.unwrap(); | ||||
|                             } | ||||
|                         } | ||||
|                     }); | ||||
|                 } | ||||
|             ); | ||||
|         } | ||||
|     } | ||||
|      | ||||
|     group.finish(); | ||||
| } | ||||
|  | ||||
| criterion_group!( | ||||
|     benches, | ||||
|     bench_concurrent_writes, | ||||
|     bench_concurrent_reads, | ||||
|     bench_concurrent_mixed, | ||||
|     bench_concurrent_hash_ops, | ||||
|     bench_concurrent_list_ops, | ||||
|     bench_concurrent_scans, | ||||
| ); | ||||
|  | ||||
| criterion_main!(benches); | ||||
							
								
								
									
										337
									
								
								benches/memory_profile.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										337
									
								
								benches/memory_profile.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,337 @@ | ||||
| // benches/memory_profile.rs | ||||
| use criterion::{criterion_group, criterion_main, Criterion, BenchmarkId, BatchSize}; | ||||
| use std::alloc::{GlobalAlloc, Layout, System}; | ||||
| use std::sync::atomic::{AtomicUsize, Ordering}; | ||||
|  | ||||
| mod common; | ||||
| use common::*; | ||||
|  | ||||
| // Simple memory tracking allocator | ||||
| struct TrackingAllocator; | ||||
|  | ||||
| static ALLOCATED: AtomicUsize = AtomicUsize::new(0); | ||||
| static DEALLOCATED: AtomicUsize = AtomicUsize::new(0); | ||||
| static PEAK: AtomicUsize = AtomicUsize::new(0); | ||||
| static ALLOC_COUNT: AtomicUsize = AtomicUsize::new(0); | ||||
|  | ||||
| unsafe impl GlobalAlloc for TrackingAllocator { | ||||
|     unsafe fn alloc(&self, layout: Layout) -> *mut u8 { | ||||
|         let ret = System.alloc(layout); | ||||
|         if !ret.is_null() { | ||||
|             let size = layout.size(); | ||||
|             ALLOCATED.fetch_add(size, Ordering::SeqCst); | ||||
|             ALLOC_COUNT.fetch_add(1, Ordering::SeqCst); | ||||
|              | ||||
|             // Update peak if necessary | ||||
|             let current = ALLOCATED.load(Ordering::SeqCst) - DEALLOCATED.load(Ordering::SeqCst); | ||||
|             let mut peak = PEAK.load(Ordering::SeqCst); | ||||
|             while current > peak { | ||||
|                 match PEAK.compare_exchange_weak(peak, current, Ordering::SeqCst, Ordering::SeqCst) { | ||||
|                     Ok(_) => break, | ||||
|                     Err(x) => peak = x, | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|         ret | ||||
|     } | ||||
|  | ||||
|     unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) { | ||||
|         System.dealloc(ptr, layout); | ||||
|         DEALLOCATED.fetch_add(layout.size(), Ordering::SeqCst); | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[global_allocator] | ||||
| static GLOBAL: TrackingAllocator = TrackingAllocator; | ||||
|  | ||||
| /// Reset memory tracking counters | ||||
| fn reset_memory_tracking() { | ||||
|     ALLOCATED.store(0, Ordering::SeqCst); | ||||
|     DEALLOCATED.store(0, Ordering::SeqCst); | ||||
|     PEAK.store(0, Ordering::SeqCst); | ||||
|     ALLOC_COUNT.store(0, Ordering::SeqCst); | ||||
| } | ||||
|  | ||||
| /// Get current memory stats | ||||
| fn get_memory_stats() -> (usize, usize, usize) { | ||||
|     let allocated = ALLOCATED.load(Ordering::SeqCst); | ||||
|     let deallocated = DEALLOCATED.load(Ordering::SeqCst); | ||||
|     let peak = PEAK.load(Ordering::SeqCst); | ||||
|     let alloc_count = ALLOC_COUNT.load(Ordering::SeqCst); | ||||
|      | ||||
|     let current = allocated.saturating_sub(deallocated); | ||||
|     (current, peak, alloc_count) | ||||
| } | ||||
|  | ||||
| /// Profile memory usage for single SET operations | ||||
| fn profile_memory_set(c: &mut Criterion) { | ||||
|     let mut group = c.benchmark_group("memory_profile/set"); | ||||
|      | ||||
|     for backend_type in BackendType::all() { | ||||
|         group.bench_with_input( | ||||
|             BenchmarkId::new(backend_type.name(), "100bytes"), | ||||
|             &backend_type, | ||||
|             |b, &backend_type| { | ||||
|                 b.iter_batched( | ||||
|                     || { | ||||
|                         reset_memory_tracking(); | ||||
|                         let backend = BenchmarkBackend::new(backend_type).unwrap(); | ||||
|                         let mut generator = DataGenerator::new(42); | ||||
|                         let key = generator.generate_key("bench:key", 0); | ||||
|                         let value = generator.generate_value(100); | ||||
|                         (backend, key, value) | ||||
|                     }, | ||||
|                     |(backend, key, value)| { | ||||
|                         backend.storage.set(key, value).unwrap(); | ||||
|                         let (current, peak, allocs) = get_memory_stats(); | ||||
|                         println!("{}: current={}, peak={}, allocs={}",  | ||||
|                                  backend.name(), current, peak, allocs); | ||||
|                     }, | ||||
|                     BatchSize::SmallInput | ||||
|                 ); | ||||
|             } | ||||
|         ); | ||||
|     } | ||||
|      | ||||
|     group.finish(); | ||||
| } | ||||
|  | ||||
| /// Profile memory usage for single GET operations | ||||
| fn profile_memory_get(c: &mut Criterion) { | ||||
|     let mut group = c.benchmark_group("memory_profile/get"); | ||||
|      | ||||
|     for backend_type in BackendType::all() { | ||||
|         let backend = setup_populated_backend(backend_type, 1_000, 100) | ||||
|             .expect("Failed to setup backend"); | ||||
|         let generator = DataGenerator::new(42); | ||||
|          | ||||
|         group.bench_with_input( | ||||
|             BenchmarkId::new(backend.name(), "100bytes"), | ||||
|             &backend, | ||||
|             |b, backend| { | ||||
|                 b.iter_batched( | ||||
|                     || { | ||||
|                         reset_memory_tracking(); | ||||
|                         generator.generate_key("bench:key", 0) | ||||
|                     }, | ||||
|                     |key| { | ||||
|                         backend.storage.get(&key).unwrap(); | ||||
|                         let (current, peak, allocs) = get_memory_stats(); | ||||
|                         println!("{}: current={}, peak={}, allocs={}",  | ||||
|                                  backend.name(), current, peak, allocs); | ||||
|                     }, | ||||
|                     BatchSize::SmallInput | ||||
|                 ); | ||||
|             } | ||||
|         ); | ||||
|     } | ||||
|      | ||||
|     group.finish(); | ||||
| } | ||||
|  | ||||
| /// Profile memory usage for bulk insert operations | ||||
| fn profile_memory_bulk_insert(c: &mut Criterion) { | ||||
|     let mut group = c.benchmark_group("memory_profile/bulk_insert"); | ||||
|      | ||||
|     for size in [100, 1_000] { | ||||
|         for backend_type in BackendType::all() { | ||||
|             group.bench_with_input( | ||||
|                 BenchmarkId::new(format!("{}/size", backend_type.name()), size), | ||||
|                 &(backend_type, size), | ||||
|                 |b, &(backend_type, size)| { | ||||
|                     b.iter_batched( | ||||
|                         || { | ||||
|                             reset_memory_tracking(); | ||||
|                             let backend = BenchmarkBackend::new(backend_type).unwrap(); | ||||
|                             let mut generator = DataGenerator::new(42); | ||||
|                             let data = generator.generate_string_pairs(size, 100); | ||||
|                             (backend, data) | ||||
|                         }, | ||||
|                         |(backend, data)| { | ||||
|                             for (key, value) in data { | ||||
|                                 backend.storage.set(key, value).unwrap(); | ||||
|                             } | ||||
|                             let (current, peak, allocs) = get_memory_stats(); | ||||
|                             println!("{} (n={}): current={}, peak={}, allocs={}, bytes_per_record={}",  | ||||
|                                      backend.name(), size, current, peak, allocs, peak / size); | ||||
|                         }, | ||||
|                         BatchSize::SmallInput | ||||
|                     ); | ||||
|                 } | ||||
|             ); | ||||
|         } | ||||
|     } | ||||
|      | ||||
|     group.finish(); | ||||
| } | ||||
|  | ||||
| /// Profile memory usage for hash operations | ||||
| fn profile_memory_hash_ops(c: &mut Criterion) { | ||||
|     let mut group = c.benchmark_group("memory_profile/hash_ops"); | ||||
|      | ||||
|     for backend_type in BackendType::all() { | ||||
|         group.bench_with_input( | ||||
|             BenchmarkId::new(backend_type.name(), "hset"), | ||||
|             &backend_type, | ||||
|             |b, &backend_type| { | ||||
|                 b.iter_batched( | ||||
|                     || { | ||||
|                         reset_memory_tracking(); | ||||
|                         let backend = BenchmarkBackend::new(backend_type).unwrap(); | ||||
|                         let mut generator = DataGenerator::new(42); | ||||
|                         let key = generator.generate_key("bench:hash", 0); | ||||
|                         let fields = vec![ | ||||
|                             ("field1".to_string(), generator.generate_value(100)), | ||||
|                             ("field2".to_string(), generator.generate_value(100)), | ||||
|                             ("field3".to_string(), generator.generate_value(100)), | ||||
|                         ]; | ||||
|                         (backend, key, fields) | ||||
|                     }, | ||||
|                     |(backend, key, fields)| { | ||||
|                         backend.storage.hset(&key, fields).unwrap(); | ||||
|                         let (current, peak, allocs) = get_memory_stats(); | ||||
|                         println!("{}: current={}, peak={}, allocs={}",  | ||||
|                                  backend.name(), current, peak, allocs); | ||||
|                     }, | ||||
|                     BatchSize::SmallInput | ||||
|                 ); | ||||
|             } | ||||
|         ); | ||||
|     } | ||||
|      | ||||
|     group.finish(); | ||||
| } | ||||
|  | ||||
| /// Profile memory usage for list operations | ||||
| fn profile_memory_list_ops(c: &mut Criterion) { | ||||
|     let mut group = c.benchmark_group("memory_profile/list_ops"); | ||||
|      | ||||
|     for backend_type in BackendType::all() { | ||||
|         group.bench_with_input( | ||||
|             BenchmarkId::new(backend_type.name(), "rpush"), | ||||
|             &backend_type, | ||||
|             |b, &backend_type| { | ||||
|                 b.iter_batched( | ||||
|                     || { | ||||
|                         reset_memory_tracking(); | ||||
|                         let backend = BenchmarkBackend::new(backend_type).unwrap(); | ||||
|                         let mut generator = DataGenerator::new(42); | ||||
|                         let key = generator.generate_key("bench:list", 0); | ||||
|                         let elements = vec![ | ||||
|                             generator.generate_value(100), | ||||
|                             generator.generate_value(100), | ||||
|                             generator.generate_value(100), | ||||
|                         ]; | ||||
|                         (backend, key, elements) | ||||
|                     }, | ||||
|                     |(backend, key, elements)| { | ||||
|                         backend.storage.rpush(&key, elements).unwrap(); | ||||
|                         let (current, peak, allocs) = get_memory_stats(); | ||||
|                         println!("{}: current={}, peak={}, allocs={}",  | ||||
|                                  backend.name(), current, peak, allocs); | ||||
|                     }, | ||||
|                     BatchSize::SmallInput | ||||
|                 ); | ||||
|             } | ||||
|         ); | ||||
|     } | ||||
|      | ||||
|     group.finish(); | ||||
| } | ||||
|  | ||||
| /// Profile memory usage for scan operations | ||||
| fn profile_memory_scan(c: &mut Criterion) { | ||||
|     let mut group = c.benchmark_group("memory_profile/scan"); | ||||
|      | ||||
|     for size in [1_000, 10_000] { | ||||
|         for backend_type in BackendType::all() { | ||||
|             let backend = setup_populated_backend(backend_type, size, 100) | ||||
|                 .expect("Failed to setup backend"); | ||||
|              | ||||
|             group.bench_with_input( | ||||
|                 BenchmarkId::new(format!("{}/size", backend.name()), size), | ||||
|                 &backend, | ||||
|                 |b, backend| { | ||||
|                     b.iter(|| { | ||||
|                         reset_memory_tracking(); | ||||
|                         let mut cursor = 0u64; | ||||
|                         let mut total = 0; | ||||
|                         loop { | ||||
|                             let (next_cursor, items) = backend.storage | ||||
|                                 .scan(cursor, None, Some(100)) | ||||
|                                 .unwrap(); | ||||
|                             total += items.len(); | ||||
|                             if next_cursor == 0 { | ||||
|                                 break; | ||||
|                             } | ||||
|                             cursor = next_cursor; | ||||
|                         } | ||||
|                         let (current, peak, allocs) = get_memory_stats(); | ||||
|                         println!("{} (n={}): scanned={}, current={}, peak={}, allocs={}",  | ||||
|                                  backend.name(), size, total, current, peak, allocs); | ||||
|                         total | ||||
|                     }); | ||||
|                 } | ||||
|             ); | ||||
|         } | ||||
|     } | ||||
|      | ||||
|     group.finish(); | ||||
| } | ||||
|  | ||||
| /// Profile memory efficiency (bytes per record stored) | ||||
| fn profile_memory_efficiency(c: &mut Criterion) { | ||||
|     let mut group = c.benchmark_group("memory_profile/efficiency"); | ||||
|      | ||||
|     for size in [1_000, 10_000] { | ||||
|         for backend_type in BackendType::all() { | ||||
|             group.bench_with_input( | ||||
|                 BenchmarkId::new(format!("{}/size", backend_type.name()), size), | ||||
|                 &(backend_type, size), | ||||
|                 |b, &(backend_type, size)| { | ||||
|                     b.iter_batched( | ||||
|                         || { | ||||
|                             reset_memory_tracking(); | ||||
|                             let backend = BenchmarkBackend::new(backend_type).unwrap(); | ||||
|                             let mut generator = DataGenerator::new(42); | ||||
|                             let data = generator.generate_string_pairs(size, 100); | ||||
|                             (backend, data) | ||||
|                         }, | ||||
|                         |(backend, data)| { | ||||
|                             let data_size: usize = data.iter() | ||||
|                                 .map(|(k, v)| k.len() + v.len()) | ||||
|                                 .sum(); | ||||
|                              | ||||
|                             for (key, value) in data { | ||||
|                                 backend.storage.set(key, value).unwrap(); | ||||
|                             } | ||||
|                              | ||||
|                             let (current, peak, allocs) = get_memory_stats(); | ||||
|                             let overhead_pct = ((peak as f64 - data_size as f64) / data_size as f64) * 100.0; | ||||
|                              | ||||
|                             println!("{} (n={}): data_size={}, peak={}, overhead={:.1}%, bytes_per_record={}, allocs={}",  | ||||
|                                      backend.name(), size, data_size, peak, overhead_pct,  | ||||
|                                      peak / size, allocs); | ||||
|                         }, | ||||
|                         BatchSize::SmallInput | ||||
|                     ); | ||||
|                 } | ||||
|             ); | ||||
|         } | ||||
|     } | ||||
|      | ||||
|     group.finish(); | ||||
| } | ||||
|  | ||||
| criterion_group!( | ||||
|     benches, | ||||
|     profile_memory_set, | ||||
|     profile_memory_get, | ||||
|     profile_memory_bulk_insert, | ||||
|     profile_memory_hash_ops, | ||||
|     profile_memory_list_ops, | ||||
|     profile_memory_scan, | ||||
|     profile_memory_efficiency, | ||||
| ); | ||||
|  | ||||
| criterion_main!(benches); | ||||
							
								
								
									
										339
									
								
								benches/scan_ops.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										339
									
								
								benches/scan_ops.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,339 @@ | ||||
| // benches/scan_ops.rs | ||||
| use criterion::{criterion_group, criterion_main, Criterion, BenchmarkId}; | ||||
|  | ||||
| mod common; | ||||
| use common::*; | ||||
|  | ||||
| /// Benchmark SCAN operation - full database scan | ||||
| fn bench_scan_full(c: &mut Criterion) { | ||||
|     let mut group = c.benchmark_group("scan_ops/scan_full"); | ||||
|      | ||||
|     for size in [1_000, 10_000] { | ||||
|         for backend_type in BackendType::all() { | ||||
|             let backend = setup_populated_backend(backend_type, size, 100) | ||||
|                 .expect("Failed to setup backend"); | ||||
|              | ||||
|             group.bench_with_input( | ||||
|                 BenchmarkId::new(format!("{}/size", backend.name()), size), | ||||
|                 &backend, | ||||
|                 |b, backend| { | ||||
|                     b.iter(|| { | ||||
|                         let mut cursor = 0u64; | ||||
|                         let mut total = 0; | ||||
|                         loop { | ||||
|                             let (next_cursor, items) = backend.storage | ||||
|                                 .scan(cursor, None, Some(100)) | ||||
|                                 .unwrap(); | ||||
|                             total += items.len(); | ||||
|                             if next_cursor == 0 { | ||||
|                                 break; | ||||
|                             } | ||||
|                             cursor = next_cursor; | ||||
|                         } | ||||
|                         total | ||||
|                     }); | ||||
|                 } | ||||
|             ); | ||||
|         } | ||||
|     } | ||||
|      | ||||
|     group.finish(); | ||||
| } | ||||
|  | ||||
| /// Benchmark SCAN operation with pattern matching | ||||
| fn bench_scan_pattern(c: &mut Criterion) { | ||||
|     let mut group = c.benchmark_group("scan_ops/scan_pattern"); | ||||
|      | ||||
|     for backend_type in BackendType::all() { | ||||
|         // Create backend with mixed key patterns | ||||
|         let backend = BenchmarkBackend::new(backend_type).expect("Failed to create backend"); | ||||
|         let mut generator = DataGenerator::new(42); | ||||
|          | ||||
|         // Insert keys with different patterns | ||||
|         for i in 0..3_000 { | ||||
|             let key = if i < 1_000 { | ||||
|                 format!("user:{}:profile", i) | ||||
|             } else if i < 2_000 { | ||||
|                 format!("session:{}:data", i - 1_000) | ||||
|             } else { | ||||
|                 format!("cache:{}:value", i - 2_000) | ||||
|             }; | ||||
|             let value = generator.generate_value(100); | ||||
|             backend.storage.set(key, value).unwrap(); | ||||
|         } | ||||
|          | ||||
|         // Benchmark pattern matching | ||||
|         for pattern in ["user:*", "session:*", "cache:*"] { | ||||
|             group.bench_with_input( | ||||
|                 BenchmarkId::new(format!("{}/pattern", backend.name()), pattern), | ||||
|                 &(backend.storage.clone(), pattern), | ||||
|                 |b, (storage, pattern)| { | ||||
|                     b.iter(|| { | ||||
|                         let mut cursor = 0u64; | ||||
|                         let mut total = 0; | ||||
|                         loop { | ||||
|                             let (next_cursor, items) = storage | ||||
|                                 .scan(cursor, Some(pattern), Some(100)) | ||||
|                                 .unwrap(); | ||||
|                             total += items.len(); | ||||
|                             if next_cursor == 0 { | ||||
|                                 break; | ||||
|                             } | ||||
|                             cursor = next_cursor; | ||||
|                         } | ||||
|                         total | ||||
|                     }); | ||||
|                 } | ||||
|             ); | ||||
|         } | ||||
|     } | ||||
|      | ||||
|     group.finish(); | ||||
| } | ||||
|  | ||||
| /// Benchmark HSCAN operation - scan hash fields | ||||
| fn bench_hscan(c: &mut Criterion) { | ||||
|     let mut group = c.benchmark_group("scan_ops/hscan"); | ||||
|      | ||||
|     for fields_count in [10, 100] { | ||||
|         for backend_type in BackendType::all() { | ||||
|             let backend = setup_populated_backend_hashes(backend_type, 100, fields_count, 100) | ||||
|                 .expect("Failed to setup backend"); | ||||
|             let generator = DataGenerator::new(42); | ||||
|             let key = generator.generate_key("bench:hash", 0); | ||||
|              | ||||
|             group.bench_with_input( | ||||
|                 BenchmarkId::new(format!("{}/fields", backend.name()), fields_count), | ||||
|                 &(backend, key), | ||||
|                 |b, (backend, key)| { | ||||
|                     b.iter(|| { | ||||
|                         let mut cursor = 0u64; | ||||
|                         let mut total = 0; | ||||
|                         loop { | ||||
|                             let (next_cursor, items) = backend.storage | ||||
|                                 .hscan(key, cursor, None, Some(10)) | ||||
|                                 .unwrap(); | ||||
|                             total += items.len(); | ||||
|                             if next_cursor == 0 { | ||||
|                                 break; | ||||
|                             } | ||||
|                             cursor = next_cursor; | ||||
|                         } | ||||
|                         total | ||||
|                     }); | ||||
|                 } | ||||
|             ); | ||||
|         } | ||||
|     } | ||||
|      | ||||
|     group.finish(); | ||||
| } | ||||
|  | ||||
| /// Benchmark HSCAN with pattern matching | ||||
| fn bench_hscan_pattern(c: &mut Criterion) { | ||||
|     let mut group = c.benchmark_group("scan_ops/hscan_pattern"); | ||||
|      | ||||
|     for backend_type in BackendType::all() { | ||||
|         let backend = BenchmarkBackend::new(backend_type).expect("Failed to create backend"); | ||||
|         let mut generator = DataGenerator::new(42); | ||||
|          | ||||
|         // Create a hash with mixed field patterns | ||||
|         let key = "bench:hash:0".to_string(); | ||||
|         let mut fields = Vec::new(); | ||||
|         for i in 0..100 { | ||||
|             let field = if i < 33 { | ||||
|                 format!("user_{}", i) | ||||
|             } else if i < 66 { | ||||
|                 format!("session_{}", i - 33) | ||||
|             } else { | ||||
|                 format!("cache_{}", i - 66) | ||||
|             }; | ||||
|             let value = generator.generate_value(100); | ||||
|             fields.push((field, value)); | ||||
|         } | ||||
|         backend.storage.hset(&key, fields).unwrap(); | ||||
|          | ||||
|         // Benchmark pattern matching | ||||
|         for pattern in ["user_*", "session_*", "cache_*"] { | ||||
|             group.bench_with_input( | ||||
|                 BenchmarkId::new(format!("{}/pattern", backend.name()), pattern), | ||||
|                 &(backend.storage.clone(), key.clone(), pattern), | ||||
|                 |b, (storage, key, pattern)| { | ||||
|                     b.iter(|| { | ||||
|                         let mut cursor = 0u64; | ||||
|                         let mut total = 0; | ||||
|                         loop { | ||||
|                             let (next_cursor, items) = storage | ||||
|                                 .hscan(key, cursor, Some(pattern), Some(10)) | ||||
|                                 .unwrap(); | ||||
|                             total += items.len(); | ||||
|                             if next_cursor == 0 { | ||||
|                                 break; | ||||
|                             } | ||||
|                             cursor = next_cursor; | ||||
|                         } | ||||
|                         total | ||||
|                     }); | ||||
|                 } | ||||
|             ); | ||||
|         } | ||||
|     } | ||||
|      | ||||
|     group.finish(); | ||||
| } | ||||
|  | ||||
| /// Benchmark KEYS operation with various patterns | ||||
| fn bench_keys_operation(c: &mut Criterion) { | ||||
|     let mut group = c.benchmark_group("scan_ops/keys"); | ||||
|      | ||||
|     for backend_type in BackendType::all() { | ||||
|         // Create backend with mixed key patterns | ||||
|         let backend = BenchmarkBackend::new(backend_type).expect("Failed to create backend"); | ||||
|         let mut generator = DataGenerator::new(42); | ||||
|          | ||||
|         // Insert keys with different patterns | ||||
|         for i in 0..3_000 { | ||||
|             let key = if i < 1_000 { | ||||
|                 format!("user:{}:profile", i) | ||||
|             } else if i < 2_000 { | ||||
|                 format!("session:{}:data", i - 1_000) | ||||
|             } else { | ||||
|                 format!("cache:{}:value", i - 2_000) | ||||
|             }; | ||||
|             let value = generator.generate_value(100); | ||||
|             backend.storage.set(key, value).unwrap(); | ||||
|         } | ||||
|          | ||||
|         // Benchmark different patterns | ||||
|         for pattern in ["*", "user:*", "session:*", "*:profile", "user:*:profile"] { | ||||
|             group.bench_with_input( | ||||
|                 BenchmarkId::new(format!("{}/pattern", backend.name()), pattern), | ||||
|                 &(backend.storage.clone(), pattern), | ||||
|                 |b, (storage, pattern)| { | ||||
|                     b.iter(|| { | ||||
|                         storage.keys(pattern).unwrap() | ||||
|                     }); | ||||
|                 } | ||||
|             ); | ||||
|         } | ||||
|     } | ||||
|      | ||||
|     group.finish(); | ||||
| } | ||||
|  | ||||
| /// Benchmark DBSIZE operation | ||||
| fn bench_dbsize(c: &mut Criterion) { | ||||
|     let mut group = c.benchmark_group("scan_ops/dbsize"); | ||||
|      | ||||
|     for size in [1_000, 10_000] { | ||||
|         for backend_type in BackendType::all() { | ||||
|             let backend = setup_populated_backend(backend_type, size, 100) | ||||
|                 .expect("Failed to setup backend"); | ||||
|              | ||||
|             group.bench_with_input( | ||||
|                 BenchmarkId::new(format!("{}/size", backend.name()), size), | ||||
|                 &backend, | ||||
|                 |b, backend| { | ||||
|                     b.iter(|| { | ||||
|                         backend.storage.dbsize().unwrap() | ||||
|                     }); | ||||
|                 } | ||||
|             ); | ||||
|         } | ||||
|     } | ||||
|      | ||||
|     group.finish(); | ||||
| } | ||||
|  | ||||
| /// Benchmark LRANGE with different range sizes | ||||
| fn bench_lrange_sizes(c: &mut Criterion) { | ||||
|     let mut group = c.benchmark_group("scan_ops/lrange"); | ||||
|      | ||||
|     for range_size in [10, 50, 100] { | ||||
|         for backend_type in BackendType::all() { | ||||
|             let backend = setup_populated_backend_lists(backend_type, 100, 100, 100) | ||||
|                 .expect("Failed to setup backend"); | ||||
|             let generator = DataGenerator::new(42); | ||||
|             let key = generator.generate_key("bench:list", 0); | ||||
|              | ||||
|             group.bench_with_input( | ||||
|                 BenchmarkId::new(format!("{}/range", backend.name()), range_size), | ||||
|                 &(backend, key, range_size), | ||||
|                 |b, (backend, key, range_size)| { | ||||
|                     b.iter(|| { | ||||
|                         backend.storage.lrange(key, 0, (*range_size - 1) as i64).unwrap() | ||||
|                     }); | ||||
|                 } | ||||
|             ); | ||||
|         } | ||||
|     } | ||||
|      | ||||
|     group.finish(); | ||||
| } | ||||
|  | ||||
| /// Benchmark HKEYS operation | ||||
| fn bench_hkeys(c: &mut Criterion) { | ||||
|     let mut group = c.benchmark_group("scan_ops/hkeys"); | ||||
|      | ||||
|     for fields_count in [10, 50, 100] { | ||||
|         for backend_type in BackendType::all() { | ||||
|             let backend = setup_populated_backend_hashes(backend_type, 100, fields_count, 100) | ||||
|                 .expect("Failed to setup backend"); | ||||
|             let generator = DataGenerator::new(42); | ||||
|             let key = generator.generate_key("bench:hash", 0); | ||||
|              | ||||
|             group.bench_with_input( | ||||
|                 BenchmarkId::new(format!("{}/fields", backend.name()), fields_count), | ||||
|                 &(backend, key), | ||||
|                 |b, (backend, key)| { | ||||
|                     b.iter(|| { | ||||
|                         backend.storage.hkeys(key).unwrap() | ||||
|                     }); | ||||
|                 } | ||||
|             ); | ||||
|         } | ||||
|     } | ||||
|      | ||||
|     group.finish(); | ||||
| } | ||||
|  | ||||
| /// Benchmark HVALS operation | ||||
| fn bench_hvals(c: &mut Criterion) { | ||||
|     let mut group = c.benchmark_group("scan_ops/hvals"); | ||||
|      | ||||
|     for fields_count in [10, 50, 100] { | ||||
|         for backend_type in BackendType::all() { | ||||
|             let backend = setup_populated_backend_hashes(backend_type, 100, fields_count, 100) | ||||
|                 .expect("Failed to setup backend"); | ||||
|             let generator = DataGenerator::new(42); | ||||
|             let key = generator.generate_key("bench:hash", 0); | ||||
|              | ||||
|             group.bench_with_input( | ||||
|                 BenchmarkId::new(format!("{}/fields", backend.name()), fields_count), | ||||
|                 &(backend, key), | ||||
|                 |b, (backend, key)| { | ||||
|                     b.iter(|| { | ||||
|                         backend.storage.hvals(key).unwrap() | ||||
|                     }); | ||||
|                 } | ||||
|             ); | ||||
|         } | ||||
|     } | ||||
|      | ||||
|     group.finish(); | ||||
| } | ||||
|  | ||||
| criterion_group!( | ||||
|     benches, | ||||
|     bench_scan_full, | ||||
|     bench_scan_pattern, | ||||
|     bench_hscan, | ||||
|     bench_hscan_pattern, | ||||
|     bench_keys_operation, | ||||
|     bench_dbsize, | ||||
|     bench_lrange_sizes, | ||||
|     bench_hkeys, | ||||
|     bench_hvals, | ||||
| ); | ||||
|  | ||||
| criterion_main!(benches); | ||||
							
								
								
									
										444
									
								
								benches/single_ops.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										444
									
								
								benches/single_ops.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,444 @@ | ||||
| // benches/single_ops.rs | ||||
| use criterion::{criterion_group, criterion_main, Criterion, BenchmarkId, BatchSize}; | ||||
|  | ||||
| mod common; | ||||
| use common::*; | ||||
|  | ||||
| /// Benchmark string SET operations | ||||
| fn bench_string_set(c: &mut Criterion) { | ||||
|     let mut group = c.benchmark_group("single_ops/strings/set"); | ||||
|      | ||||
|     for backend_type in BackendType::all() { | ||||
|         let backend = BenchmarkBackend::new(backend_type).expect("Failed to create backend"); | ||||
|         let mut generator = DataGenerator::new(42); | ||||
|          | ||||
|         group.bench_with_input( | ||||
|             BenchmarkId::new(backend.name(), "100bytes"), | ||||
|             &backend, | ||||
|             |b, backend| { | ||||
|                 b.iter_batched( | ||||
|                     || { | ||||
|                         let key = generator.generate_key("bench:key", rand::random::<usize>() % 100000); | ||||
|                         let value = generator.generate_value(100); | ||||
|                         (key, value) | ||||
|                     }, | ||||
|                     |(key, value)| { | ||||
|                         backend.storage.set(key, value).unwrap(); | ||||
|                     }, | ||||
|                     BatchSize::SmallInput | ||||
|                 ); | ||||
|             } | ||||
|         ); | ||||
|     } | ||||
|      | ||||
|     group.finish(); | ||||
| } | ||||
|  | ||||
| /// Benchmark string GET operations | ||||
| fn bench_string_get(c: &mut Criterion) { | ||||
|     let mut group = c.benchmark_group("single_ops/strings/get"); | ||||
|      | ||||
|     for backend_type in BackendType::all() { | ||||
|         // Pre-populate with 10K keys | ||||
|         let backend = setup_populated_backend(backend_type, 10_000, 100) | ||||
|             .expect("Failed to setup backend"); | ||||
|         let generator = DataGenerator::new(42); | ||||
|          | ||||
|         group.bench_with_input( | ||||
|             BenchmarkId::new(backend.name(), "100bytes"), | ||||
|             &backend, | ||||
|             |b, backend| { | ||||
|                 b.iter_batched( | ||||
|                     || { | ||||
|                         let key_id = rand::random::<usize>() % 10_000; | ||||
|                         generator.generate_key("bench:key", key_id) | ||||
|                     }, | ||||
|                     |key| { | ||||
|                         backend.storage.get(&key).unwrap(); | ||||
|                     }, | ||||
|                     BatchSize::SmallInput | ||||
|                 ); | ||||
|             } | ||||
|         ); | ||||
|     } | ||||
|      | ||||
|     group.finish(); | ||||
| } | ||||
|  | ||||
| /// Benchmark string DEL operations | ||||
| fn bench_string_del(c: &mut Criterion) { | ||||
|     let mut group = c.benchmark_group("single_ops/strings/del"); | ||||
|      | ||||
|     for backend_type in BackendType::all() { | ||||
|         group.bench_with_input( | ||||
|             BenchmarkId::new(backend_type.name(), "100bytes"), | ||||
|             &backend_type, | ||||
|             |b, &backend_type| { | ||||
|                 b.iter_batched( | ||||
|                     || { | ||||
|                         // Create fresh backend with one key for each iteration | ||||
|                         let backend = BenchmarkBackend::new(backend_type).unwrap(); | ||||
|                         let mut generator = DataGenerator::new(42); | ||||
|                         let key = generator.generate_key("bench:key", 0); | ||||
|                         let value = generator.generate_value(100); | ||||
|                         backend.storage.set(key.clone(), value).unwrap(); | ||||
|                         (backend, key) | ||||
|                     }, | ||||
|                     |(backend, key)| { | ||||
|                         backend.storage.del(key).unwrap(); | ||||
|                     }, | ||||
|                     BatchSize::SmallInput | ||||
|                 ); | ||||
|             } | ||||
|         ); | ||||
|     } | ||||
|      | ||||
|     group.finish(); | ||||
| } | ||||
|  | ||||
| /// Benchmark string EXISTS operations | ||||
| fn bench_string_exists(c: &mut Criterion) { | ||||
|     let mut group = c.benchmark_group("single_ops/strings/exists"); | ||||
|      | ||||
|     for backend_type in BackendType::all() { | ||||
|         let backend = setup_populated_backend(backend_type, 10_000, 100) | ||||
|             .expect("Failed to setup backend"); | ||||
|         let generator = DataGenerator::new(42); | ||||
|          | ||||
|         group.bench_with_input( | ||||
|             BenchmarkId::new(backend.name(), "100bytes"), | ||||
|             &backend, | ||||
|             |b, backend| { | ||||
|                 b.iter_batched( | ||||
|                     || { | ||||
|                         let key_id = rand::random::<usize>() % 10_000; | ||||
|                         generator.generate_key("bench:key", key_id) | ||||
|                     }, | ||||
|                     |key| { | ||||
|                         backend.storage.exists(&key).unwrap(); | ||||
|                     }, | ||||
|                     BatchSize::SmallInput | ||||
|                 ); | ||||
|             } | ||||
|         ); | ||||
|     } | ||||
|      | ||||
|     group.finish(); | ||||
| } | ||||
|  | ||||
| /// Benchmark hash HSET operations | ||||
| fn bench_hash_hset(c: &mut Criterion) { | ||||
|     let mut group = c.benchmark_group("single_ops/hashes/hset"); | ||||
|      | ||||
|     for backend_type in BackendType::all() { | ||||
|         let backend = BenchmarkBackend::new(backend_type).expect("Failed to create backend"); | ||||
|         let mut generator = DataGenerator::new(42); | ||||
|          | ||||
|         group.bench_with_input( | ||||
|             BenchmarkId::new(backend.name(), "single_field"), | ||||
|             &backend, | ||||
|             |b, backend| { | ||||
|                 b.iter_batched( | ||||
|                     || { | ||||
|                         let key = generator.generate_key("bench:hash", rand::random::<usize>() % 1000); | ||||
|                         let field = format!("field{}", rand::random::<usize>() % 100); | ||||
|                         let value = generator.generate_value(100); | ||||
|                         (key, field, value) | ||||
|                     }, | ||||
|                     |(key, field, value)| { | ||||
|                         backend.storage.hset(&key, vec![(field, value)]).unwrap(); | ||||
|                     }, | ||||
|                     BatchSize::SmallInput | ||||
|                 ); | ||||
|             } | ||||
|         ); | ||||
|     } | ||||
|      | ||||
|     group.finish(); | ||||
| } | ||||
|  | ||||
| /// Benchmark hash HGET operations | ||||
| fn bench_hash_hget(c: &mut Criterion) { | ||||
|     let mut group = c.benchmark_group("single_ops/hashes/hget"); | ||||
|      | ||||
|     for backend_type in BackendType::all() { | ||||
|         // Pre-populate with hashes | ||||
|         let backend = setup_populated_backend_hashes(backend_type, 1_000, 10, 100) | ||||
|             .expect("Failed to setup backend"); | ||||
|         let generator = DataGenerator::new(42); | ||||
|          | ||||
|         group.bench_with_input( | ||||
|             BenchmarkId::new(backend.name(), "single_field"), | ||||
|             &backend, | ||||
|             |b, backend| { | ||||
|                 b.iter_batched( | ||||
|                     || { | ||||
|                         let key = generator.generate_key("bench:hash", rand::random::<usize>() % 1_000); | ||||
|                         let field = format!("field{}", rand::random::<usize>() % 10); | ||||
|                         (key, field) | ||||
|                     }, | ||||
|                     |(key, field)| { | ||||
|                         backend.storage.hget(&key, &field).unwrap(); | ||||
|                     }, | ||||
|                     BatchSize::SmallInput | ||||
|                 ); | ||||
|             } | ||||
|         ); | ||||
|     } | ||||
|      | ||||
|     group.finish(); | ||||
| } | ||||
|  | ||||
| /// Benchmark hash HGETALL operations | ||||
| fn bench_hash_hgetall(c: &mut Criterion) { | ||||
|     let mut group = c.benchmark_group("single_ops/hashes/hgetall"); | ||||
|      | ||||
|     for backend_type in BackendType::all() { | ||||
|         let backend = setup_populated_backend_hashes(backend_type, 1_000, 10, 100) | ||||
|             .expect("Failed to setup backend"); | ||||
|         let generator = DataGenerator::new(42); | ||||
|          | ||||
|         group.bench_with_input( | ||||
|             BenchmarkId::new(backend.name(), "10_fields"), | ||||
|             &backend, | ||||
|             |b, backend| { | ||||
|                 b.iter_batched( | ||||
|                     || { | ||||
|                         generator.generate_key("bench:hash", rand::random::<usize>() % 1_000) | ||||
|                     }, | ||||
|                     |key| { | ||||
|                         backend.storage.hgetall(&key).unwrap(); | ||||
|                     }, | ||||
|                     BatchSize::SmallInput | ||||
|                 ); | ||||
|             } | ||||
|         ); | ||||
|     } | ||||
|      | ||||
|     group.finish(); | ||||
| } | ||||
|  | ||||
| /// Benchmark hash HDEL operations | ||||
| fn bench_hash_hdel(c: &mut Criterion) { | ||||
|     let mut group = c.benchmark_group("single_ops/hashes/hdel"); | ||||
|      | ||||
|     for backend_type in BackendType::all() { | ||||
|         group.bench_with_input( | ||||
|             BenchmarkId::new(backend_type.name(), "single_field"), | ||||
|             &backend_type, | ||||
|             |b, &backend_type| { | ||||
|                 b.iter_batched( | ||||
|                     || { | ||||
|                         let backend = setup_populated_backend_hashes(backend_type, 1, 10, 100).unwrap(); | ||||
|                         let generator = DataGenerator::new(42); | ||||
|                         let key = generator.generate_key("bench:hash", 0); | ||||
|                         let field = format!("field{}", rand::random::<usize>() % 10); | ||||
|                         (backend, key, field) | ||||
|                     }, | ||||
|                     |(backend, key, field)| { | ||||
|                         backend.storage.hdel(&key, vec![field]).unwrap(); | ||||
|                     }, | ||||
|                     BatchSize::SmallInput | ||||
|                 ); | ||||
|             } | ||||
|         ); | ||||
|     } | ||||
|      | ||||
|     group.finish(); | ||||
| } | ||||
|  | ||||
| /// Benchmark hash HEXISTS operations | ||||
| fn bench_hash_hexists(c: &mut Criterion) { | ||||
|     let mut group = c.benchmark_group("single_ops/hashes/hexists"); | ||||
|      | ||||
|     for backend_type in BackendType::all() { | ||||
|         let backend = setup_populated_backend_hashes(backend_type, 1_000, 10, 100) | ||||
|             .expect("Failed to setup backend"); | ||||
|         let generator = DataGenerator::new(42); | ||||
|          | ||||
|         group.bench_with_input( | ||||
|             BenchmarkId::new(backend.name(), "single_field"), | ||||
|             &backend, | ||||
|             |b, backend| { | ||||
|                 b.iter_batched( | ||||
|                     || { | ||||
|                         let key = generator.generate_key("bench:hash", rand::random::<usize>() % 1_000); | ||||
|                         let field = format!("field{}", rand::random::<usize>() % 10); | ||||
|                         (key, field) | ||||
|                     }, | ||||
|                     |(key, field)| { | ||||
|                         backend.storage.hexists(&key, &field).unwrap(); | ||||
|                     }, | ||||
|                     BatchSize::SmallInput | ||||
|                 ); | ||||
|             } | ||||
|         ); | ||||
|     } | ||||
|      | ||||
|     group.finish(); | ||||
| } | ||||
|  | ||||
| /// Benchmark list LPUSH operations | ||||
| fn bench_list_lpush(c: &mut Criterion) { | ||||
|     let mut group = c.benchmark_group("single_ops/lists/lpush"); | ||||
|      | ||||
|     for backend_type in BackendType::all() { | ||||
|         let backend = BenchmarkBackend::new(backend_type).expect("Failed to create backend"); | ||||
|         let mut generator = DataGenerator::new(42); | ||||
|          | ||||
|         group.bench_with_input( | ||||
|             BenchmarkId::new(backend.name(), "single_element"), | ||||
|             &backend, | ||||
|             |b, backend| { | ||||
|                 b.iter_batched( | ||||
|                     || { | ||||
|                         let key = generator.generate_key("bench:list", rand::random::<usize>() % 1000); | ||||
|                         let element = generator.generate_value(100); | ||||
|                         (key, element) | ||||
|                     }, | ||||
|                     |(key, element)| { | ||||
|                         backend.storage.lpush(&key, vec![element]).unwrap(); | ||||
|                     }, | ||||
|                     BatchSize::SmallInput | ||||
|                 ); | ||||
|             } | ||||
|         ); | ||||
|     } | ||||
|      | ||||
|     group.finish(); | ||||
| } | ||||
|  | ||||
| /// Benchmark list RPUSH operations | ||||
| fn bench_list_rpush(c: &mut Criterion) { | ||||
|     let mut group = c.benchmark_group("single_ops/lists/rpush"); | ||||
|      | ||||
|     for backend_type in BackendType::all() { | ||||
|         let backend = BenchmarkBackend::new(backend_type).expect("Failed to create backend"); | ||||
|         let mut generator = DataGenerator::new(42); | ||||
|          | ||||
|         group.bench_with_input( | ||||
|             BenchmarkId::new(backend.name(), "single_element"), | ||||
|             &backend, | ||||
|             |b, backend| { | ||||
|                 b.iter_batched( | ||||
|                     || { | ||||
|                         let key = generator.generate_key("bench:list", rand::random::<usize>() % 1000); | ||||
|                         let element = generator.generate_value(100); | ||||
|                         (key, element) | ||||
|                     }, | ||||
|                     |(key, element)| { | ||||
|                         backend.storage.rpush(&key, vec![element]).unwrap(); | ||||
|                     }, | ||||
|                     BatchSize::SmallInput | ||||
|                 ); | ||||
|             } | ||||
|         ); | ||||
|     } | ||||
|      | ||||
|     group.finish(); | ||||
| } | ||||
|  | ||||
| /// Benchmark list LPOP operations | ||||
| fn bench_list_lpop(c: &mut Criterion) { | ||||
|     let mut group = c.benchmark_group("single_ops/lists/lpop"); | ||||
|      | ||||
|     for backend_type in BackendType::all() { | ||||
|         group.bench_with_input( | ||||
|             BenchmarkId::new(backend_type.name(), "single_element"), | ||||
|             &backend_type, | ||||
|             |b, &backend_type| { | ||||
|                 b.iter_batched( | ||||
|                     || { | ||||
|                         let backend = setup_populated_backend_lists(backend_type, 1, 100, 100).unwrap(); | ||||
|                         let generator = DataGenerator::new(42); | ||||
|                         let key = generator.generate_key("bench:list", 0); | ||||
|                         (backend, key) | ||||
|                     }, | ||||
|                     |(backend, key)| { | ||||
|                         backend.storage.lpop(&key, 1).unwrap(); | ||||
|                     }, | ||||
|                     BatchSize::SmallInput | ||||
|                 ); | ||||
|             } | ||||
|         ); | ||||
|     } | ||||
|      | ||||
|     group.finish(); | ||||
| } | ||||
|  | ||||
| /// Benchmark list RPOP operations | ||||
| fn bench_list_rpop(c: &mut Criterion) { | ||||
|     let mut group = c.benchmark_group("single_ops/lists/rpop"); | ||||
|      | ||||
|     for backend_type in BackendType::all() { | ||||
|         group.bench_with_input( | ||||
|             BenchmarkId::new(backend_type.name(), "single_element"), | ||||
|             &backend_type, | ||||
|             |b, &backend_type| { | ||||
|                 b.iter_batched( | ||||
|                     || { | ||||
|                         let backend = setup_populated_backend_lists(backend_type, 1, 100, 100).unwrap(); | ||||
|                         let generator = DataGenerator::new(42); | ||||
|                         let key = generator.generate_key("bench:list", 0); | ||||
|                         (backend, key) | ||||
|                     }, | ||||
|                     |(backend, key)| { | ||||
|                         backend.storage.rpop(&key, 1).unwrap(); | ||||
|                     }, | ||||
|                     BatchSize::SmallInput | ||||
|                 ); | ||||
|             } | ||||
|         ); | ||||
|     } | ||||
|      | ||||
|     group.finish(); | ||||
| } | ||||
|  | ||||
| /// Benchmark list LRANGE operations | ||||
| fn bench_list_lrange(c: &mut Criterion) { | ||||
|     let mut group = c.benchmark_group("single_ops/lists/lrange"); | ||||
|      | ||||
|     for backend_type in BackendType::all() { | ||||
|         let backend = setup_populated_backend_lists(backend_type, 1_000, 100, 100) | ||||
|             .expect("Failed to setup backend"); | ||||
|         let generator = DataGenerator::new(42); | ||||
|          | ||||
|         group.bench_with_input( | ||||
|             BenchmarkId::new(backend.name(), "10_elements"), | ||||
|             &backend, | ||||
|             |b, backend| { | ||||
|                 b.iter_batched( | ||||
|                     || { | ||||
|                         generator.generate_key("bench:list", rand::random::<usize>() % 1_000) | ||||
|                     }, | ||||
|                     |key| { | ||||
|                         backend.storage.lrange(&key, 0, 9).unwrap(); | ||||
|                     }, | ||||
|                     BatchSize::SmallInput | ||||
|                 ); | ||||
|             } | ||||
|         ); | ||||
|     } | ||||
|      | ||||
|     group.finish(); | ||||
| } | ||||
|  | ||||
| criterion_group!( | ||||
|     benches, | ||||
|     bench_string_set, | ||||
|     bench_string_get, | ||||
|     bench_string_del, | ||||
|     bench_string_exists, | ||||
|     bench_hash_hset, | ||||
|     bench_hash_hget, | ||||
|     bench_hash_hgetall, | ||||
|     bench_hash_hdel, | ||||
|     bench_hash_hexists, | ||||
|     bench_list_lpush, | ||||
|     bench_list_rpush, | ||||
|     bench_list_lpop, | ||||
|     bench_list_rpop, | ||||
|     bench_list_lrange, | ||||
| ); | ||||
|  | ||||
| criterion_main!(benches); | ||||
							
								
								
									
										182
									
								
								docs/admin.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										182
									
								
								docs/admin.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,182 @@ | ||||
| # Admin Database 0 (`0.db`) | ||||
|  | ||||
| This page explains what the Admin Database `DB 0` is, why HeroDB uses it, and how to work with it as a developer and end-user. It’s a practical guide covering how databases are created, listed, secured with access keys, and encrypted using per-database secrets. | ||||
|  | ||||
| ## What is `DB 0`? | ||||
|  | ||||
| `DB 0` is the control-plane for a HeroDB instance. It stores metadata for all user databases (`db_id >= 1`) so the server can: | ||||
| - Know which databases exist (without scanning the filesystem) | ||||
| - Enforce access control (public/private with access keys) | ||||
| - Enforce per-database encryption (whether a given database must be opened encrypted and with which write-only key) | ||||
|  | ||||
| `DB 0` itself is always encrypted with the admin secret (the process-level secret provided at startup). | ||||
|  | ||||
| ## How `DB 0` is created and secured | ||||
|  | ||||
| - `DB 0` lives at `<base_dir>/0.db` | ||||
| - It is always encrypted using the `admin secret` provided at process startup (using the `--admin-secret <secret>` CLI flag) | ||||
| - Only clients that provide the correct admin secret can `SELECT 0` (see “`SELECT` + `KEY`” below) | ||||
|  | ||||
| At startup, the server bootstraps `DB 0` (initializes counters and structures) if it’s missing. | ||||
|  | ||||
| ## Metadata stored in `DB 0` | ||||
|  | ||||
| Keys in `DB 0` (internal layout, but useful to understand how things work): | ||||
|  | ||||
| - `admin:next_id` | ||||
|   - String counter holding the next id to allocate (initialized to `"1"`) | ||||
|  | ||||
| - `admin:dbs` | ||||
|   - A hash acting as a set of existing database ids | ||||
|   - field = id (as string), value = `"1"` | ||||
|  | ||||
| - `meta:db:<id>` | ||||
|   - A hash holding db-level metadata | ||||
|   - field `public` = `"true"` or `"false"` (defaults to `true` if missing) | ||||
|  | ||||
| - `meta:db:<id>:keys` | ||||
|   - A hash mapping access-key hashes to the string `Permission:created_at_seconds` | ||||
|   - Examples: `Read:1713456789` or `ReadWrite:1713456789` | ||||
|   - The plaintext access keys are never stored; only their `SHA-256` hashes are kept | ||||
|  | ||||
| - `meta:db:<id>:enc` | ||||
|    - A string holding the per-database encryption key used to open `<id>.db` encrypted | ||||
|    - This value is write-only from the perspective of the management APIs (it’s set at creation and never returned) | ||||
|  | ||||
| - `age:key:<name>` | ||||
|    - Base64-encoded X25519 recipient (public encryption key) for named AGE keys | ||||
| - `age:privkey:<name>` | ||||
|    - Base64-encoded X25519 identity (secret encryption key) for named AGE keys | ||||
| - `age:signpub:<name>` | ||||
|    - Base64-encoded Ed25519 verify public key for named AGE keys | ||||
| - `age:signpriv:<name>` | ||||
|    - Base64-encoded Ed25519 signing secret key for named AGE keys | ||||
|  | ||||
| > You don’t need to manipulate these keys directly; they’re listed to clarify the model. AGE keys are managed via AGE commands. | ||||
|  | ||||
| ## Database lifecycle | ||||
|  | ||||
| 1) Create a database (via JSON-RPC) | ||||
| - The server allocates an id from `admin:next_id`, registers it in `admin:dbs`, and defaults the database to `public=true` | ||||
| - If you pass an optional `encryption_key` during creation, the server persists it in `meta:db:<id>:enc`. That database will be opened in encrypted mode from then on | ||||
|  | ||||
| 2) Open and use a database | ||||
| - Clients select a database over RESP using `SELECT` | ||||
| - Authorization and encryption state are enforced using `DB 0` metadata | ||||
|  | ||||
| 3) Delete database files | ||||
| - Removing `<id>.db` removes the physical storage | ||||
| - `DB 0` remains the source of truth for existence and may be updated by future management methods as the system evolves | ||||
|  | ||||
| ## Access control model | ||||
|  | ||||
| - Public database (default) | ||||
|   - Anyone can `SELECT <id>` with no key, and will get `ReadWrite` permission | ||||
| - Private database | ||||
|   - You must provide an access key when selecting the database | ||||
|   - The server hashes the provided key with `SHA-256` and checks membership in `meta:db:<id>:keys` | ||||
|   - Permissions are `Read` or `ReadWrite` depending on how the key was added | ||||
| - Admin `DB 0` | ||||
|   - Requires the exact admin secret as the `KEY` argument to `SELECT 0` | ||||
|   - Permission is `ReadWrite` when the secret matches | ||||
|  | ||||
| Connections start with no database selected. Any command that requires storage (GET, SET, H*, L*, SCAN, etc.) will return an error until you issue a SELECT to choose a database. Admin DB 0 is never accessible without authenticating via SELECT 0 KEY <admin_secret>. | ||||
| ### How to select databases with optional `KEY` | ||||
|  | ||||
| - Public DB (no key required) | ||||
|   - `SELECT <id>` | ||||
|  | ||||
| - Private DB (access key required) | ||||
|   - `SELECT <id> KEY <plaintext_key>` | ||||
|  | ||||
| - Admin `DB 0` (admin secret required) | ||||
|   - `SELECT 0 KEY <admin_secret>` | ||||
|  | ||||
| Examples (using `redis-cli`): | ||||
| ```bash | ||||
| # Public database | ||||
| redis-cli -p $PORT SELECT 1 | ||||
| # → OK | ||||
|  | ||||
| # Private database | ||||
| redis-cli -p $PORT SELECT 2 KEY my-db2-access-key | ||||
| # → OK | ||||
|  | ||||
| # Admin DB 0 | ||||
| redis-cli -p $PORT SELECT 0 KEY my-admin-secret | ||||
| # → OK | ||||
| ``` | ||||
|  | ||||
| ## Per-database encryption | ||||
|  | ||||
| - At database creation, you can provide an optional per-db encryption key | ||||
| - If provided, the server persists that key in `DB 0` as `meta:db:<id>:enc` | ||||
| - When you later open the database, the engine checks whether `meta:db:<id>:enc` exists to decide if it must open `<id>.db` in encrypted mode | ||||
| - The per-db key is not returned by RPC—it is considered write-only configuration data | ||||
|  | ||||
| Operationally: | ||||
| - Create with encryption: pass a non-null `encryption_key` to the `createDatabase` RPC | ||||
| - Open later: simply `SELECT` the database; encryption is transparent to clients | ||||
|  | ||||
| ## Management via JSON-RPC | ||||
|  | ||||
| You can manage databases using the management RPC (namespaced `herodb.*`). Typical operations: | ||||
| - `createDatabase(backend, config, encryption_key?)` | ||||
|   - Allocates a new id, sets optional encryption key | ||||
| - `listDatabases()` | ||||
|   - Lists database ids and info (including whether storage is currently encrypted) | ||||
| - `getDatabaseInfo(db_id)` | ||||
|   - Returns details: backend, encrypted flag, size on disk, `key_count`, timestamps, etc. | ||||
| - `addAccessKey(db_id, key, permissions)` | ||||
|   - Adds a `Read` or `ReadWrite` access key (permissions = `"read"` | `"readwrite"`) | ||||
| - `listAccessKeys(db_id)` | ||||
|   - Returns hashes and permissions; you can use these hashes to delete keys | ||||
| - `deleteAccessKey(db_id, key_hash)` | ||||
|   - Removes a key by its hash | ||||
| - `setDatabasePublic(db_id, public)` | ||||
|   - Toggles public/private | ||||
|  | ||||
| Copyable JSON examples are provided in the [RPC examples documentation](./rpc_examples.md). | ||||
|  | ||||
| ## Typical flows | ||||
|  | ||||
| 1) Public, unencrypted database | ||||
| - Create a new database without an encryption key | ||||
| - Clients can immediately `SELECT <id>` without a key | ||||
| - You can later make it private and add keys if needed | ||||
|  | ||||
| 2) Private, encrypted database | ||||
| - Create passing an `encryption_key` | ||||
| - Mark it private (`setDatabasePublic false`) and add access keys | ||||
| - Clients must use `SELECT <id> KEY <plaintext_access_key>` | ||||
| - Storage opens in encrypted mode automatically | ||||
|  | ||||
| ## Security notes | ||||
|  | ||||
| - Only `SHA-256` hashes of access keys are stored in `DB 0`; keep plaintext keys safe on the client side | ||||
| - The per-db encryption key is never exposed via the API after it is set | ||||
| - The admin secret must be kept secure; anyone with it can `SELECT 0` and perform administrative actions | ||||
|  | ||||
| ## Troubleshooting | ||||
|  | ||||
| - `ERR invalid access key` when selecting a private db | ||||
|   - Ensure you passed the `KEY` argument: `SELECT <id> KEY <plaintext_key>` | ||||
|   - If you recently added the key, confirm the permissions and that you used the exact plaintext (hash must match) | ||||
|  | ||||
| - `Database X not found` | ||||
|   - The id isn’t registered in `DB 0` (`admin:dbs`). Use the management APIs to create or list databases | ||||
|  | ||||
| - Cannot `SELECT 0` | ||||
|   - The `KEY` must be the exact admin secret passed at server startup | ||||
|  | ||||
| ## Reference | ||||
|  | ||||
| - Admin metadata lives in `DB 0` (`0.db`) and controls: | ||||
|   - Existence: `admin:dbs` | ||||
|   - Access: `meta:db:<id>.public` and `meta:db:<id>:keys` | ||||
|   - Encryption: `meta:db:<id>:enc` | ||||
|  | ||||
| For command examples and management payloads: | ||||
| - RESP command basics: [docs/basics.md](./basics.md) | ||||
| - Supported commands: [docs/cmds.md](./cmds.md) | ||||
| - JSON-RPC examples: [docs/rpc_examples.md](./rpc_examples.md) | ||||
							
								
								
									
										96
									
								
								docs/age.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										96
									
								
								docs/age.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,96 @@ | ||||
| # HeroDB AGE Cryptography | ||||
|  | ||||
| HeroDB provides AGE-based asymmetric encryption and digital signatures over the Redis protocol using X25519 for encryption and Ed25519 for signatures. Keys can be used in stateless (ephemeral) or key-managed (persistent, named) modes. | ||||
|  | ||||
| In key-managed mode, HeroDB uses a unified keypair concept: a single Ed25519 signing key is deterministically derived into X25519 keys for encryption, allowing one keypair to handle both encryption and signatures transparently. | ||||
|  | ||||
| ## Cryptographic Algorithms | ||||
|  | ||||
| ### X25519 (Encryption) | ||||
| - Elliptic-curve Diffie-Hellman key exchange for symmetric key derivation. | ||||
| - Used for encrypting/decrypting messages. | ||||
|  | ||||
| ### Ed25519 (Signatures) | ||||
| - EdDSA digital signatures for message authentication. | ||||
| - Used for signing/verifying messages. | ||||
|  | ||||
| ### Key Derivation | ||||
| Ed25519 signing keys are deterministically converted to X25519 keys for encryption. This enables a single keypair to support both operations without additional keys. Derivation uses the Ed25519 secret scalar clamped for X25519. | ||||
|  | ||||
| In named keypairs, Ed25519 keys are stored, and X25519 keys are derived on-demand and cached. | ||||
|  | ||||
| ## Stateless Mode (Ephemeral Keys) | ||||
| No server-side storage; keys are provided with each command. | ||||
|  | ||||
| Available commands: | ||||
| - `AGE GENENC`: Generate ephemeral X25519 keypair. Returns `[recipient, identity]`. | ||||
| - `AGE GENSIGN`: Generate ephemeral Ed25519 keypair. Returns `[verify_pub, sign_secret]`. | ||||
| - `AGE ENCRYPT <recipient> <message>`: Encrypt message. Returns base64 ciphertext. | ||||
| - `AGE DECRYPT <identity> <ciphertext_b64>`: Decrypt ciphertext. Returns plaintext. | ||||
| - `AGE SIGN <sign_secret> <message>`: Sign message. Returns base64 signature. | ||||
| - `AGE VERIFY <verify_pub> <message> <signature_b64>`: Verify signature. Returns 1 (valid) or 0 (invalid). | ||||
|  | ||||
| Example: | ||||
| ```bash | ||||
| redis-cli AGE GENENC | ||||
| # → 1) "age1qz..."  # recipient (X25519 public) | ||||
| #    2) "AGE-SECRET-KEY-1..."  # identity (X25519 secret) | ||||
|  | ||||
| redis-cli AGE ENCRYPT "age1qz..." "hello" | ||||
| # → base64_ciphertext | ||||
|  | ||||
| redis-cli AGE DECRYPT "AGE-SECRET-KEY-1..." base64_ciphertext | ||||
| # → "hello" | ||||
| ``` | ||||
|  | ||||
| ## Key-Managed Mode (Persistent Named Keys) | ||||
| Keys are stored server-side under names. Supports unified keypairs for both encryption and signatures. | ||||
|  | ||||
| Available commands: | ||||
| - `AGE KEYGEN <name>`: Generate and store unified keypair. Returns `[recipient, identity]` in age format. | ||||
| - `AGE SIGNKEYGEN <name>`: Generate and store Ed25519 signing keypair. Returns `[verify_pub, sign_secret]`. | ||||
| - `AGE ENCRYPTNAME <name> <message>`: Encrypt with named key. Returns base64 ciphertext. | ||||
| - `AGE DECRYPTNAME <name> <ciphertext_b64>`: Decrypt with named key. Returns plaintext. | ||||
| - `AGE SIGNNAME <name> <message>`: Sign with named key. Returns base64 signature. | ||||
| - `AGE VERIFYNAME <name> <message> <signature_b64>`: Verify with named key. Returns 1 or 0. | ||||
| - `AGE LIST`: List all stored key names. Returns sorted array of names. | ||||
|  | ||||
| ### AGE LIST Output | ||||
| Returns a flat, deduplicated, sorted array of key names (strings). Each name corresponds to a stored keypair, which may include encryption keys (X25519), signing keys (Ed25519), or both. | ||||
|  | ||||
| Output format: `["name1", "name2", ...]` | ||||
|  | ||||
| Example: | ||||
| ```bash | ||||
| redis-cli AGE LIST | ||||
| # →  1) "<named_keypair_1>" | ||||
| #    2) "<named_keypair_2>" | ||||
| ``` | ||||
|  | ||||
| For unified keypairs (from `AGE KEYGEN`), the name handles both encryption (derived X25519) and signatures (stored Ed25519) transparently. | ||||
|  | ||||
| Example with named keys: | ||||
| ```bash | ||||
| redis-cli AGE KEYGEN app1 | ||||
| # →  1) "age1..."  # recipient | ||||
| #    2) "AGE-SECRET-KEY-1..."  # identity | ||||
|  | ||||
| redis-cli AGE ENCRYPTNAME app1 "secret message" | ||||
| # → base64_ciphertext | ||||
|  | ||||
| redis-cli AGE DECRYPTNAME app1 base64_ciphertext | ||||
| # → "secret message" | ||||
|  | ||||
| redis-cli AGE SIGNNAME app1 "message" | ||||
| # → base64_signature | ||||
|  | ||||
| redis-cli AGE VERIFYNAME app1 "message" base64_signature | ||||
| # → 1 | ||||
| ``` | ||||
|  | ||||
| ## Choosing a Mode | ||||
| - **Stateless**: For ad-hoc operations without persistence; client manages keys. | ||||
| - **Key-managed**: For centralized key lifecycle; server stores keys for convenience and discoverability. | ||||
|  | ||||
| Implementation: [herodb/src/age.rs](herodb/src/age.rs) <br>  | ||||
| Tests: [herodb/tests/usage_suite.rs](herodb/tests/usage_suite.rs) | ||||
| @@ -1,4 +1,75 @@ | ||||
| Here's an expanded version of the cmds.md documentation to include the list commands: | ||||
| # HeroDB Basics | ||||
| 
 | ||||
| ## Launching HeroDB | ||||
| 
 | ||||
| To launch HeroDB, use the binary with required and optional flags. The `--admin-secret` flag is mandatory, encrypting the admin database (DB 0) and authorizing admin access. | ||||
| 
 | ||||
| ### Launch Flags | ||||
| - `--dir <path>`: Directory for database files (default: current directory). | ||||
| - `--port <port>`: TCP port for Redis protocol (default: 6379). | ||||
| - `--debug`: Enable debug logging. | ||||
| - `--sled`: Use Sled backend (default: Redb). | ||||
| - `--enable-rpc`: Start JSON-RPC management server on port 8080 (HTTP over TCP). | ||||
| - `--rpc-port <port>`: Custom RPC port (default: 8080). | ||||
| - `--enable-rpc-ipc`: Start JSON-RPC over a Unix Domain Socket (non-HTTP). | ||||
| - `--rpc-ipc-path <path>`: Path to the Unix socket for IPC (default: `/tmp/herodb.ipc`). | ||||
| - `--admin-secret <secret>`: Required secret for DB 0 encryption and admin access. | ||||
| 
 | ||||
| Example: | ||||
| ```bash | ||||
| ./target/release/herodb --dir /tmp/herodb --admin-secret mysecret --port 6379 --enable-rpc | ||||
| ``` | ||||
| 
 | ||||
| To enable JSON-RPC over a Unix Domain Socket at `/tmp/herodb.sock`: | ||||
| ```bash | ||||
| ./target/release/herodb --dir /tmp/herodb --admin-secret mysecret --enable-rpc-ipc --rpc-ipc-path /tmp/herodb.sock | ||||
| ``` | ||||
| 
 | ||||
| Test the IPC endpoint interactively with socat (non-HTTP transport): | ||||
| ```bash | ||||
| sudo socat -d -d -t 5 - UNIX-CONNECT:/tmp/herodb.sock | ||||
| ``` | ||||
| Then paste a framed JSON-RPC request. Example: | ||||
| ``` | ||||
| {"jsonrpc":"2.0","method":"hero_listDatabases","params":[],"id":3} | ||||
| ``` | ||||
| More IPC examples are in [docs/rpc_examples.md](docs/rpc_examples.md). | ||||
| 
 | ||||
| Deprecated flags (`--encrypt`, `--encryption-key`) are ignored for data DBs; per-database encryption is managed via RPC. | ||||
| 
 | ||||
| ## Admin Database (DB 0) | ||||
| 
 | ||||
| DB 0 acts as the administrative database instance, storing metadata for all user databases (IDs >= 1). It controls existence, access control, and per-database encryption. DB 0 is always encrypted with the `--admin-secret`. | ||||
| 
 | ||||
| When creating a new database, DB 0 allocates an ID, registers it, and optionally stores a per-database encryption key (write-only). Databases are public by default; use RPC to set them private, requiring access keys for SELECT (read or readwrite based on permissions). Keys are persisted in DB 0 for managed AGE operations. | ||||
| 
 | ||||
| Access DB 0 with `SELECT 0 KEY <admin-secret>`. | ||||
| 
 | ||||
| ## Symmetric Encryption | ||||
| 
 | ||||
| HeroDB supports stateless symmetric encryption via SYM commands, using XChaCha20-Poly1305 AEAD. | ||||
| 
 | ||||
| Commands: | ||||
| - `SYM KEYGEN`: Generate 32-byte key. Returns base64-encoded key. | ||||
| - `SYM ENCRYPT <key_b64> <message>`: Encrypt message. Returns base64 ciphertext. | ||||
| - `SYM DECRYPT <key_b64> <ciphertext_b64>`: Decrypt. Returns plaintext. | ||||
| 
 | ||||
| Example: | ||||
| ```bash | ||||
| redis-cli SYM KEYGEN | ||||
| # → base64_key | ||||
| 
 | ||||
| redis-cli SYM ENCRYPT base64_key "secret" | ||||
| # → base64_ciphertext | ||||
| 
 | ||||
| redis-cli SYM DECRYPT base64_key base64_ciphertext | ||||
| # → "secret" | ||||
| ``` | ||||
| 
 | ||||
| ## RPC Options | ||||
| 
 | ||||
| Enable the JSON-RPC server with `--enable-rpc` for database management. Methods include creating databases, managing access keys, and setting encryption. See [JSON-RPC Examples](./rpc_examples.md) for payloads. | ||||
| 
 | ||||
| # HeroDB Commands | ||||
| 
 | ||||
| HeroDB implements a subset of Redis commands over the Redis protocol. This document describes the available commands and their usage. | ||||
| @@ -575,6 +646,29 @@ redis-cli -p $PORT AGE LIST | ||||
| #    2) "keyname2" | ||||
| ``` | ||||
| 
 | ||||
| ## SYM Commands | ||||
| 
 | ||||
| ### SYM KEYGEN | ||||
| Generate a symmetric encryption key. | ||||
| ```bash | ||||
| redis-cli -p $PORT SYM KEYGEN | ||||
| # → base64_encoded_32byte_key | ||||
| ``` | ||||
| 
 | ||||
| ### SYM ENCRYPT | ||||
| Encrypt a message with a symmetric key. | ||||
| ```bash | ||||
| redis-cli -p $PORT SYM ENCRYPT <key_b64> "message" | ||||
| # → base64_encoded_ciphertext | ||||
| ``` | ||||
| 
 | ||||
| ### SYM DECRYPT | ||||
| Decrypt a ciphertext with a symmetric key. | ||||
| ```bash | ||||
| redis-cli -p $PORT SYM DECRYPT <key_b64> <ciphertext_b64> | ||||
| # → decrypted_message | ||||
| ``` | ||||
| 
 | ||||
| ## Server Information Commands | ||||
| 
 | ||||
| ### INFO | ||||
| @@ -621,3 +715,27 @@ This expanded documentation includes all the list commands that were implemented | ||||
| 10. LINDEX - get element by index | ||||
| 11. LRANGE - get range of elements | ||||
| 
 | ||||
| 
 | ||||
| ## Updated Database Selection and Access Keys | ||||
| 
 | ||||
| HeroDB uses an `Admin DB 0` to control database existence, access, and encryption. Access to data DBs can be public (no key) or private (requires a key). See detailed model in [docs/admin.md](./admin.md). | ||||
| 
 | ||||
| Examples: | ||||
| 
 | ||||
| ```bash | ||||
| # Public database (no key required) | ||||
| redis-cli -p $PORT SELECT 1 | ||||
| # → OK | ||||
| ``` | ||||
| 
 | ||||
| ```bash | ||||
| # Private database (requires access key) | ||||
| redis-cli -p $PORT SELECT 2 KEY my-db2-access-key | ||||
| # → OK | ||||
| ``` | ||||
| 
 | ||||
| ```bash | ||||
| # Admin DB 0 (requires admin secret) | ||||
| redis-cli -p $PORT SELECT 0 KEY my-admin-secret | ||||
| # → OK | ||||
| ``` | ||||
							
								
								
									
										409
									
								
								docs/benchmarking.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										409
									
								
								docs/benchmarking.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,409 @@ | ||||
| # HeroDB Performance Benchmarking Guide | ||||
|  | ||||
| ## Overview | ||||
|  | ||||
| This document describes the comprehensive benchmarking suite for HeroDB, designed to measure and compare the performance characteristics of the two storage backends: **redb** (default) and **sled**. | ||||
|  | ||||
| ## Benchmark Architecture | ||||
|  | ||||
| ### Design Principles | ||||
|  | ||||
| 1. **Fair Comparison**: Identical test datasets and operations across all backends | ||||
| 2. **Statistical Rigor**: Using Criterion for statistically sound measurements | ||||
| 3. **Real-World Scenarios**: Mix of synthetic and realistic workload patterns | ||||
| 4. **Reproducibility**: Deterministic test data generation with fixed seeds | ||||
| 5. **Isolation**: Each benchmark runs in a clean environment | ||||
|  | ||||
| ### Benchmark Categories | ||||
|  | ||||
| #### 1. Single-Operation CRUD Benchmarks | ||||
| Measures the performance of individual database operations: | ||||
|  | ||||
| - **String Operations** | ||||
|   - `SET` - Write a single key-value pair | ||||
|   - `GET` - Read a single key-value pair | ||||
|   - `DEL` - Delete a single key | ||||
|   - `EXISTS` - Check key existence | ||||
|  | ||||
| - **Hash Operations** | ||||
|   - `HSET` - Set single field in hash | ||||
|   - `HGET` - Get single field from hash | ||||
|   - `HGETALL` - Get all fields from hash | ||||
|   - `HDEL` - Delete field from hash | ||||
|   - `HEXISTS` - Check field existence | ||||
|  | ||||
| - **List Operations** | ||||
|   - `LPUSH` - Push to list head | ||||
|   - `RPUSH` - Push to list tail | ||||
|   - `LPOP` - Pop from list head | ||||
|   - `RPOP` - Pop from list tail | ||||
|   - `LRANGE` - Get range of elements | ||||
|  | ||||
| #### 2. Bulk Operation Benchmarks | ||||
| Tests throughput with varying batch sizes: | ||||
|  | ||||
| - **Bulk Insert**: 100, 1,000, 10,000 records | ||||
| - **Bulk Read**: Sequential and random access patterns | ||||
| - **Bulk Update**: Modify existing records | ||||
| - **Bulk Delete**: Remove multiple records | ||||
|  | ||||
| #### 3. Query and Scan Benchmarks | ||||
| Evaluates iteration and filtering performance: | ||||
|  | ||||
| - **SCAN**: Cursor-based key iteration | ||||
| - **HSCAN**: Hash field iteration | ||||
| - **KEYS**: Pattern matching (with various patterns) | ||||
| - **Range Queries**: List range operations | ||||
|  | ||||
| #### 4. Concurrent Operation Benchmarks | ||||
| Simulates multi-client scenarios: | ||||
|  | ||||
| - **10 Concurrent Clients**: Light load | ||||
| - **50 Concurrent Clients**: Medium load | ||||
| - **Mixed Workload**: 70% reads, 30% writes | ||||
|  | ||||
| #### 5. Memory Profiling | ||||
| Tracks memory usage patterns: | ||||
|  | ||||
| - **Allocation Tracking**: Total allocations per operation | ||||
| - **Peak Memory**: Maximum memory usage | ||||
| - **Memory Efficiency**: Bytes per record stored | ||||
|  | ||||
| ### Test Data Specifications | ||||
|  | ||||
| #### Dataset Sizes | ||||
| - **Small**: 1,000 - 10,000 records | ||||
| - **Medium**: 10,000 records (primary focus) | ||||
|  | ||||
| #### Data Characteristics | ||||
| - **Key Format**: `bench:key:{id}` (predictable, sortable) | ||||
| - **Value Sizes**:  | ||||
|   - Small: 50-100 bytes | ||||
|   - Medium: 500-1000 bytes | ||||
|   - Large: 5000-10000 bytes | ||||
| - **Hash Fields**: 5-20 fields per hash | ||||
| - **List Elements**: 10-100 elements per list | ||||
|  | ||||
| ### Metrics Collected | ||||
|  | ||||
| For each benchmark, we collect: | ||||
|  | ||||
| 1. **Latency Metrics** | ||||
|    - Mean execution time | ||||
|    - Median (p50) | ||||
|    - 95th percentile (p95) | ||||
|    - 99th percentile (p99) | ||||
|    - Standard deviation | ||||
|  | ||||
| 2. **Throughput Metrics** | ||||
|    - Operations per second | ||||
|    - Records per second (for bulk operations) | ||||
|  | ||||
| 3. **Memory Metrics** | ||||
|    - Total allocations | ||||
|    - Peak memory usage | ||||
|    - Average bytes per operation | ||||
|  | ||||
| 4. **Initialization Overhead** | ||||
|    - Database startup time | ||||
|    - First operation latency (cold cache) | ||||
|  | ||||
| ## Benchmark Structure | ||||
|  | ||||
| ### Directory Layout | ||||
|  | ||||
| ``` | ||||
| benches/ | ||||
| ├── common/ | ||||
| │   ├── mod.rs              # Shared utilities | ||||
| │   ├── data_generator.rs   # Test data generation | ||||
| │   ├── metrics.rs          # Custom metrics collection | ||||
| │   └── backends.rs         # Backend setup helpers | ||||
| ├── single_ops.rs           # Single-operation benchmarks | ||||
| ├── bulk_ops.rs             # Bulk operation benchmarks | ||||
| ├── scan_ops.rs             # Scan and query benchmarks | ||||
| ├── concurrent_ops.rs       # Concurrent operation benchmarks | ||||
| └── memory_profile.rs       # Memory profiling benchmarks | ||||
| ``` | ||||
|  | ||||
| ### Running Benchmarks | ||||
|  | ||||
| #### Run All Benchmarks | ||||
| ```bash | ||||
| cargo bench | ||||
| ``` | ||||
|  | ||||
| #### Run Specific Benchmark Suite | ||||
| ```bash | ||||
| cargo bench --bench single_ops | ||||
| cargo bench --bench bulk_ops | ||||
| cargo bench --bench concurrent_ops | ||||
| ``` | ||||
|  | ||||
| #### Run Specific Backend | ||||
| ```bash | ||||
| cargo bench -- redb | ||||
| cargo bench -- sled | ||||
| ``` | ||||
|  | ||||
| #### Generate Reports | ||||
| ```bash | ||||
| # Run benchmarks and save results | ||||
| cargo bench -- --save-baseline main | ||||
|  | ||||
| # Compare against baseline | ||||
| cargo bench -- --baseline main | ||||
|  | ||||
| # Export to CSV | ||||
| cargo bench -- --output-format csv > results.csv | ||||
| ``` | ||||
|  | ||||
| ### Output Formats | ||||
|  | ||||
| #### 1. Terminal Output (Default) | ||||
| Real-time progress with statistical summaries: | ||||
| ``` | ||||
| single_ops/redb/set/small | ||||
|                         time:   [1.234 µs 1.245 µs 1.256 µs] | ||||
|                         thrpt:  [802.5K ops/s 810.2K ops/s 818.1K ops/s] | ||||
| ``` | ||||
|  | ||||
| #### 2. CSV Export | ||||
| Structured data for analysis: | ||||
| ```csv | ||||
| backend,operation,dataset_size,mean_ns,median_ns,p95_ns,p99_ns,throughput_ops_sec | ||||
| redb,set,small,1245,1240,1890,2100,810200 | ||||
| sled,set,small,1567,1550,2340,2890,638000 | ||||
| ``` | ||||
|  | ||||
| #### 3. JSON Export | ||||
| Detailed metrics for programmatic processing: | ||||
| ```json | ||||
| { | ||||
|   "benchmark": "single_ops/redb/set/small", | ||||
|   "metrics": { | ||||
|     "mean": 1245, | ||||
|     "median": 1240, | ||||
|     "p95": 1890, | ||||
|     "p99": 2100, | ||||
|     "std_dev": 145, | ||||
|     "throughput": 810200 | ||||
|   }, | ||||
|   "memory": { | ||||
|     "allocations": 3, | ||||
|     "peak_bytes": 4096 | ||||
|   } | ||||
| } | ||||
| ``` | ||||
|  | ||||
| ## Benchmark Implementation Details | ||||
|  | ||||
| ### Backend Setup | ||||
|  | ||||
| Each benchmark creates isolated database instances: | ||||
|  | ||||
| ```rust | ||||
| // Redb backend | ||||
| let temp_dir = TempDir::new()?; | ||||
| let db_path = temp_dir.path().join("bench.db"); | ||||
| let storage = Storage::new(db_path, false, None)?; | ||||
|  | ||||
| // Sled backend | ||||
| let temp_dir = TempDir::new()?; | ||||
| let db_path = temp_dir.path().join("bench.sled"); | ||||
| let storage = SledStorage::new(db_path, false, None)?; | ||||
| ``` | ||||
|  | ||||
| ### Data Generation | ||||
|  | ||||
| Deterministic data generation ensures reproducibility: | ||||
|  | ||||
| ```rust | ||||
| use rand::{SeedableRng, Rng}; | ||||
| use rand::rngs::StdRng; | ||||
|  | ||||
| fn generate_test_data(count: usize, seed: u64) -> Vec<(String, String)> { | ||||
|     let mut rng = StdRng::seed_from_u64(seed); | ||||
|     (0..count) | ||||
|         .map(|i| { | ||||
|             let key = format!("bench:key:{:08}", i); | ||||
|             let value = generate_value(&mut rng, 100); | ||||
|             (key, value) | ||||
|         }) | ||||
|         .collect() | ||||
| } | ||||
| ``` | ||||
|  | ||||
| ### Concurrent Testing | ||||
|  | ||||
| Using Tokio for async concurrent operations: | ||||
|  | ||||
| ```rust | ||||
| async fn concurrent_benchmark( | ||||
|     storage: Arc<dyn StorageBackend>, | ||||
|     num_clients: usize, | ||||
|     operations: usize | ||||
| ) { | ||||
|     let tasks: Vec<_> = (0..num_clients) | ||||
|         .map(|client_id| { | ||||
|             let storage = storage.clone(); | ||||
|             tokio::spawn(async move { | ||||
|                 for i in 0..operations { | ||||
|                     let key = format!("client:{}:key:{}", client_id, i); | ||||
|                     storage.set(key, "value".to_string()).unwrap(); | ||||
|                 } | ||||
|             }) | ||||
|         }) | ||||
|         .collect(); | ||||
|      | ||||
|     futures::future::join_all(tasks).await; | ||||
| } | ||||
| ``` | ||||
|  | ||||
| ## Interpreting Results | ||||
|  | ||||
| ### Performance Comparison | ||||
|  | ||||
| When comparing backends, consider: | ||||
|  | ||||
| 1. **Latency vs Throughput Trade-offs** | ||||
|    - Lower latency = better for interactive workloads | ||||
|    - Higher throughput = better for batch processing | ||||
|  | ||||
| 2. **Consistency** | ||||
|    - Lower standard deviation = more predictable performance | ||||
|    - Check p95/p99 for tail latency | ||||
|  | ||||
| 3. **Scalability** | ||||
|    - How performance changes with dataset size | ||||
|    - Concurrent operation efficiency | ||||
|  | ||||
| ### Backend Selection Guidelines | ||||
|  | ||||
| Based on benchmark results, choose: | ||||
|  | ||||
| **redb** when: | ||||
| - Need predictable latency | ||||
| - Working with structured data (separate tables) | ||||
| - Require high concurrent read performance | ||||
| - Memory efficiency is important | ||||
|  | ||||
| **sled** when: | ||||
| - Need high write throughput | ||||
| - Working with uniform data types | ||||
| - Require lock-free operations | ||||
| - Crash recovery is critical | ||||
|  | ||||
| ## Memory Profiling | ||||
|  | ||||
| ### Using DHAT | ||||
|  | ||||
| For detailed memory profiling: | ||||
|  | ||||
| ```bash | ||||
| # Install valgrind and dhat | ||||
| sudo apt-get install valgrind | ||||
|  | ||||
| # Run with DHAT | ||||
| cargo bench --bench memory_profile -- --profile-time=10 | ||||
| ``` | ||||
|  | ||||
| ### Custom Allocation Tracking | ||||
|  | ||||
| The benchmarks include custom allocation tracking: | ||||
|  | ||||
| ```rust | ||||
| #[global_allocator] | ||||
| static ALLOC: dhat::Alloc = dhat::Alloc; | ||||
|  | ||||
| fn track_allocations<F>(f: F) -> AllocationStats | ||||
| where | ||||
|     F: FnOnce(), | ||||
| { | ||||
|     let _profiler = dhat::Profiler::new_heap(); | ||||
|     f(); | ||||
|     // Extract stats from profiler | ||||
| } | ||||
| ``` | ||||
|  | ||||
| ## Continuous Benchmarking | ||||
|  | ||||
| ### Regression Detection | ||||
|  | ||||
| Compare against baseline to detect performance regressions: | ||||
|  | ||||
| ```bash | ||||
| # Save current performance as baseline | ||||
| cargo bench -- --save-baseline v0.1.0 | ||||
|  | ||||
| # After changes, compare | ||||
| cargo bench -- --baseline v0.1.0 | ||||
|  | ||||
| # Criterion will highlight significant changes | ||||
| ``` | ||||
|  | ||||
| ### CI Integration | ||||
|  | ||||
| Add to CI pipeline: | ||||
|  | ||||
| ```yaml | ||||
| - name: Run Benchmarks | ||||
|   run: | | ||||
|     cargo bench --no-fail-fast -- --output-format json > bench-results.json | ||||
|      | ||||
| - name: Compare Results | ||||
|   run: | | ||||
|     python scripts/compare_benchmarks.py \ | ||||
|       --baseline baseline.json \ | ||||
|       --current bench-results.json \ | ||||
|       --threshold 10  # Fail if >10% regression | ||||
| ``` | ||||
|  | ||||
| ## Troubleshooting | ||||
|  | ||||
| ### Common Issues | ||||
|  | ||||
| 1. **Inconsistent Results** | ||||
|    - Ensure system is idle during benchmarks | ||||
|    - Disable CPU frequency scaling | ||||
|    - Run multiple iterations | ||||
|  | ||||
| 2. **Out of Memory** | ||||
|    - Reduce dataset sizes | ||||
|    - Run benchmarks sequentially | ||||
|    - Increase system swap space | ||||
|  | ||||
| 3. **Slow Benchmarks** | ||||
|    - Reduce sample size in Criterion config | ||||
|    - Use `--quick` flag for faster runs | ||||
|    - Focus on specific benchmarks | ||||
|  | ||||
| ### Performance Tips | ||||
|  | ||||
| ```bash | ||||
| # Quick benchmark run (fewer samples) | ||||
| cargo bench -- --quick | ||||
|  | ||||
| # Verbose output for debugging | ||||
| cargo bench -- --verbose | ||||
|  | ||||
| # Profile specific operation | ||||
| cargo bench -- single_ops/redb/set | ||||
| ``` | ||||
|  | ||||
| ## Future Enhancements | ||||
|  | ||||
| Potential additions to the benchmark suite: | ||||
|  | ||||
| 1. **Transaction Performance**: Measure MULTI/EXEC overhead | ||||
| 2. **Encryption Overhead**: Compare encrypted vs non-encrypted | ||||
| 3. **Persistence Testing**: Measure flush/sync performance | ||||
| 4. **Recovery Time**: Database restart and recovery speed | ||||
| 5. **Network Overhead**: Redis protocol parsing impact | ||||
| 6. **Long-Running Stability**: Performance over extended periods | ||||
|  | ||||
| ## References | ||||
|  | ||||
| - [Criterion.rs Documentation](https://bheisler.github.io/criterion.rs/book/) | ||||
| - [DHAT Memory Profiler](https://valgrind.org/docs/manual/dh-manual.html) | ||||
| - [Rust Performance Book](https://nnethercote.github.io/perf-book/) | ||||
| @@ -70,6 +70,15 @@ MULTI/EXEC/DISCARD | ✅ | ❌ | Only supported in redb | | ||||
| **Encryption** | | | | | ||||
| Data-at-rest encryption | ✅ | ✅ | Both support [age](age.tech) encryption | | ||||
| AGE commands | ✅ | ✅ | Both support AGE crypto commands | | ||||
| **Full-Text Search** | | | | | ||||
| FT.CREATE | ✅ | ✅ | Create search index with schema | | ||||
| FT.ADD | ✅ | ✅ | Add document to search index | | ||||
| FT.SEARCH | ✅ | ✅ | Search documents with query | | ||||
| FT.DEL | ✅ | ✅ | Delete document from index | | ||||
| FT.INFO | ✅ | ✅ | Get index information | | ||||
| FT.DROP | ✅ | ✅ | Drop search index | | ||||
| FT.ALTER | ✅ | ✅ | Alter index schema | | ||||
| FT.AGGREGATE | ✅ | ✅ | Aggregate search results | | ||||
| 
 | ||||
| ### Performance Considerations | ||||
| 
 | ||||
| @@ -114,3 +123,34 @@ redis-cli -p 6379 --rdb dump.rdb | ||||
| # Import to sled | ||||
| redis-cli -p 6381 --pipe < dump.rdb | ||||
| ``` | ||||
| 
 | ||||
| ## Authentication and Database Selection | ||||
| 
 | ||||
| Connections start with no database selected. Any storage-backed command (GET, SET, H*, L*, SCAN, etc.) will return an error until you issue a SELECT to choose a database. | ||||
| 
 | ||||
| HeroDB uses an `Admin DB 0` to govern database existence, access and per-db encryption. Access control is enforced via `Admin DB 0` metadata. See the full model in [docs/admin.md](./admin.md). | ||||
| 
 | ||||
| Examples: | ||||
| ```bash | ||||
| # Public database (no key required) | ||||
| redis-cli -p $PORT SELECT 1 | ||||
| # → OK | ||||
| ``` | ||||
| 
 | ||||
| ```bash | ||||
| # Private database (requires access key) | ||||
| redis-cli -p $PORT SELECT 2 KEY my-db2-access-key | ||||
| # → OK | ||||
| ``` | ||||
| 
 | ||||
| ```bash | ||||
| # Admin DB 0 (requires admin secret) | ||||
| redis-cli -p $PORT SELECT 0 KEY my-admin-secret | ||||
| # → OK | ||||
| ``` | ||||
| 
 | ||||
| ```bash | ||||
| # Before selecting a DB, storage commands will fail | ||||
| redis-cli -p $PORT GET key | ||||
| # → -ERR No database selected. Use SELECT <id> [KEY <key>] first | ||||
| ``` | ||||
							
								
								
									
										440
									
								
								docs/lance.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										440
									
								
								docs/lance.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,440 @@ | ||||
| # Lance Vector Backend (RESP + JSON-RPC) | ||||
|  | ||||
| This document explains how to use HeroDB’s Lance-backed vector store. It is text-first: users provide text, and HeroDB computes embeddings server-side (no manual vectors). It includes copy-pasteable RESP (redis-cli) and JSON-RPC examples for: | ||||
|  | ||||
| - Creating a Lance database | ||||
| - Embedding provider configuration (OpenAI, Azure OpenAI, or deterministic test provider) | ||||
| - Dataset lifecycle: CREATE, LIST, INFO, DROP | ||||
| - Ingestion: STORE text (+ optional metadata) | ||||
| - Search: QUERY with K, optional FILTER and RETURN | ||||
| - Delete by id | ||||
| - Index creation (currently a placeholder/no-op) | ||||
|  | ||||
| References: | ||||
| - Implementation: [src/lance_store.rs](src/lance_store.rs), [src/cmd.rs](src/cmd.rs), [src/rpc.rs](src/rpc.rs), [src/server.rs](src/server.rs), [src/embedding.rs](src/embedding.rs) | ||||
|  | ||||
| Notes: | ||||
| - Admin DB 0 cannot be Lance (or Tantivy). Only databases with id >= 1 can use Lance. | ||||
| - Permissions: | ||||
|   - Read operations (SEARCH, LIST, INFO) require read permission. | ||||
|   - Mutating operations (CREATE, STORE, CREATEINDEX, DEL, DROP, EMBEDDING CONFIG SET) require readwrite permission. | ||||
| - Backend gating: | ||||
|   - If a DB is Lance, only LANCE.* and basic control commands (PING, ECHO, SELECT, INFO, CLIENT, etc.) are permitted. | ||||
|   - If a DB is not Lance, LANCE.* commands return an error. | ||||
|  | ||||
| Storage layout and schema: | ||||
| - Files live at: <base_dir>/lance/<db_id>/<dataset>.lance | ||||
| - Records schema: | ||||
|   - id: Utf8 (non-null) | ||||
|   - vector: FixedSizeList<Float32, dim> (non-null) | ||||
|   - text: Utf8 (nullable) | ||||
|   - meta: Utf8 JSON (nullable) | ||||
| - Search is an L2 KNN brute-force scan for now (lower score = better). Index creation is a no-op placeholder to be implemented later. | ||||
|  | ||||
| Prerequisites: | ||||
| - Start HeroDB with RPC enabled (for management calls): | ||||
|   - See [docs/basics.md](./basics.md) for flags. Example: | ||||
|     ```bash | ||||
|     ./target/release/herodb --dir /tmp/herodb --admin-secret mysecret --port 6379 --enable-rpc | ||||
|     ``` | ||||
|  | ||||
|  | ||||
| ## 0) Create a Lance-backed database (JSON-RPC) | ||||
|  | ||||
| Use the management API to create a database with backend "Lance". DB 0 is reserved for admin and cannot be Lance. | ||||
|  | ||||
| Request: | ||||
| ```json | ||||
| { | ||||
|   "jsonrpc": "2.0", | ||||
|   "id": 1, | ||||
|   "method": "herodb_createDatabase", | ||||
|   "params": [ | ||||
|     "Lance", | ||||
|     { "name": "vectors-db", "storage_path": null, "max_size": null, "redis_version": null }, | ||||
|     null | ||||
|   ] | ||||
| } | ||||
| ``` | ||||
|  | ||||
| - Response contains the allocated db_id (>= 1). Use that id below (replace 1 with your actual id). | ||||
|  | ||||
| Select the database over RESP: | ||||
| ```bash | ||||
| redis-cli -p 6379 SELECT 1 | ||||
| # → OK | ||||
| ``` | ||||
|  | ||||
|  | ||||
| ## 1) Configure embedding provider (server-side embeddings) | ||||
|  | ||||
| HeroDB embeds text internally at STORE/SEARCH time using a per-dataset EmbeddingConfig sidecar. Configure provider before creating a dataset to choose dimensions and provider. | ||||
|  | ||||
| Supported providers: | ||||
| - openai (standard OpenAI API or custom OpenAI-compatible endpoints) | ||||
| - testhash (deterministic, CI-friendly; no network) | ||||
|  | ||||
| Environment variable for OpenAI: | ||||
| - Standard OpenAI: export OPENAI_API_KEY=sk-... | ||||
|  | ||||
| RESP examples: | ||||
| ```bash | ||||
| # Standard OpenAI with default dims (model-dependent, e.g. 1536) | ||||
| redis-cli -p 6379 LANCE.EMBEDDING CONFIG SET myset PROVIDER openai MODEL text-embedding-3-small | ||||
|  | ||||
| # OpenAI with reduced output dimension (e.g., 512) when supported | ||||
| redis-cli -p 6379 LANCE.EMBEDDING CONFIG SET myset PROVIDER openai MODEL text-embedding-3-small PARAM dim 512 | ||||
|  | ||||
| # Custom OpenAI-compatible endpoint (e.g., self-hosted) | ||||
| redis-cli -p 6379 LANCE.EMBEDDING CONFIG SET myset PROVIDER openai MODEL text-embedding-3-small \ | ||||
|   PARAM endpoint http://localhost:8081/v1/embeddings \ | ||||
|   PARAM dim 512 | ||||
|  | ||||
| # Deterministic test provider (no network, stable vectors) | ||||
| redis-cli -p 6379 LANCE.EMBEDDING CONFIG SET myset PROVIDER testhash MODEL any | ||||
| ``` | ||||
|  | ||||
| Read config: | ||||
| ```bash | ||||
| redis-cli -p 6379 LANCE.EMBEDDING CONFIG GET myset | ||||
| # → JSON blob describing provider/model/params | ||||
| ``` | ||||
|  | ||||
| JSON-RPC examples: | ||||
| ```json | ||||
| { | ||||
|   "jsonrpc": "2.0", | ||||
|   "id": 2, | ||||
|   "method": "herodb_lanceSetEmbeddingConfig", | ||||
|   "params": [ | ||||
|     1, | ||||
|     "myset", | ||||
|     "openai", | ||||
|     "text-embedding-3-small", | ||||
|     { "dim": "512" } | ||||
|   ] | ||||
| } | ||||
| ``` | ||||
|  | ||||
| ```json | ||||
| { | ||||
|   "jsonrpc": "2.0", | ||||
|   "id": 3, | ||||
|   "method": "herodb_lanceGetEmbeddingConfig", | ||||
|   "params": [1, "myset"] | ||||
| } | ||||
| ``` | ||||
|  | ||||
|  | ||||
| ## 2) Create a dataset | ||||
|  | ||||
| Choose a dimension that matches your embedding configuration. For OpenAI text-embedding-3-small without dimension override, typical dimension is 1536; when `dim` is set (e.g., 512), use that. The current API requires an explicit DIM. | ||||
|  | ||||
| RESP: | ||||
| ```bash | ||||
| redis-cli -p 6379 LANCE.CREATE myset DIM 512 | ||||
| # → OK | ||||
| ``` | ||||
|  | ||||
| JSON-RPC: | ||||
| ```json | ||||
| { | ||||
|   "jsonrpc": "2.0", | ||||
|   "id": 4, | ||||
|   "method": "herodb_lanceCreate", | ||||
|   "params": [1, "myset", 512] | ||||
| } | ||||
| ``` | ||||
|  | ||||
|  | ||||
| ## 3) Store text documents (server-side embedding) | ||||
|  | ||||
| Provide your id, the text to embed, and optional META fields. The server computes the embedding using the configured provider and stores id/vector/text/meta in the Lance dataset. Upserts by id are supported via delete-then-append semantics. | ||||
|  | ||||
| RESP: | ||||
| ```bash | ||||
| redis-cli -p 6379 LANCE.STORE myset ID doc-1 TEXT "Hello vector world" META title "Hello" category "demo" | ||||
| # → OK | ||||
| ``` | ||||
|  | ||||
| JSON-RPC: | ||||
| ```json | ||||
| { | ||||
|   "jsonrpc": "2.0", | ||||
|   "id": 5, | ||||
|   "method": "herodb_lanceStoreText", | ||||
|   "params": [ | ||||
|     1, | ||||
|     "myset", | ||||
|     "doc-1", | ||||
|     "Hello vector world", | ||||
|     { "title": "Hello", "category": "demo" } | ||||
|   ] | ||||
| } | ||||
| ``` | ||||
|  | ||||
|  | ||||
| ## 4) Search with a text query | ||||
|  | ||||
| Provide a query string; the server embeds it and performs KNN search. Optional: FILTER expression and RETURN subset of fields. | ||||
|  | ||||
| RESP: | ||||
| ```bash | ||||
| # K nearest neighbors for the query text | ||||
| redis-cli -p 6379 LANCE.SEARCH myset K 5 QUERY "greetings to vectors" | ||||
| # → Array of hits: [id, score, [k,v, ...]] pairs, lower score = closer | ||||
|  | ||||
| # With a filter on meta fields and return only title | ||||
| redis-cli -p 6379 LANCE.SEARCH myset K 3 QUERY "greetings to vectors" FILTER "category = 'demo'" RETURN 1 title | ||||
| ``` | ||||
|  | ||||
| JSON-RPC: | ||||
| ```json | ||||
| { | ||||
|   "jsonrpc": "2.0", | ||||
|   "id": 6, | ||||
|   "method": "herodb_lanceSearchText", | ||||
|   "params": [1, "myset", "greetings to vectors", 5, null, null] | ||||
| } | ||||
| ``` | ||||
|  | ||||
| With filter and selected fields: | ||||
| ```json | ||||
| { | ||||
|   "jsonrpc": "2.0", | ||||
|   "id": 7, | ||||
|   "method": "herodb_lanceSearchText", | ||||
|   "params": [1, "myset", "greetings to vectors", 3, "category = 'demo'", ["title"]] | ||||
| } | ||||
| ``` | ||||
|  | ||||
| Response shape: | ||||
| - RESP over redis-cli: an array of hits [id, score, [k, v, ...]]. | ||||
| - JSON-RPC returns an object containing the RESP-encoded wire format string or a structured result depending on implementation. See [src/rpc.rs](src/rpc.rs) for details. | ||||
|  | ||||
|  | ||||
| ## 5) Create an index (placeholder) | ||||
|  | ||||
| Index creation currently returns OK but is a no-op. It will integrate Lance vector indices in a future update. | ||||
|  | ||||
| RESP: | ||||
| ```bash | ||||
| redis-cli -p 6379 LANCE.CREATEINDEX myset TYPE "ivf_pq" PARAM nlist 100 PARAM pq_m 16 | ||||
| # → OK (no-op for now) | ||||
| ``` | ||||
|  | ||||
| JSON-RPC: | ||||
| ```json | ||||
| { | ||||
|   "jsonrpc": "2.0", | ||||
|   "id": 8, | ||||
|   "method": "herodb_lanceCreateIndex", | ||||
|   "params": [1, "myset", "ivf_pq", { "nlist": "100", "pq_m": "16" }] | ||||
| } | ||||
| ``` | ||||
|  | ||||
|  | ||||
| ## 6) Inspect datasets | ||||
|  | ||||
| RESP: | ||||
| ```bash | ||||
| # List datasets in current Lance DB | ||||
| redis-cli -p 6379 LANCE.LIST | ||||
|  | ||||
| # Get dataset info | ||||
| redis-cli -p 6379 LANCE.INFO myset | ||||
| ``` | ||||
|  | ||||
| JSON-RPC: | ||||
| ```json | ||||
| { | ||||
|   "jsonrpc": "2.0", | ||||
|   "id": 9, | ||||
|   "method": "herodb_lanceList", | ||||
|   "params": [1] | ||||
| } | ||||
| ``` | ||||
|  | ||||
| ```json | ||||
| { | ||||
|   "jsonrpc": "2.0", | ||||
|   "id": 10, | ||||
|   "method": "herodb_lanceInfo", | ||||
|   "params": [1, "myset"] | ||||
| } | ||||
| ``` | ||||
|  | ||||
|  | ||||
| ## 7) Delete and drop | ||||
|  | ||||
| RESP: | ||||
| ```bash | ||||
| # Delete by id | ||||
| redis-cli -p 6379 LANCE.DEL myset doc-1 | ||||
| # → OK | ||||
|  | ||||
| # Drop the entire dataset | ||||
| redis-cli -p 6379 LANCE.DROP myset | ||||
| # → OK | ||||
| ``` | ||||
|  | ||||
| JSON-RPC: | ||||
| ```json | ||||
| { | ||||
|   "jsonrpc": "2.0", | ||||
|   "id": 11, | ||||
|   "method": "herodb_lanceDel", | ||||
|   "params": [1, "myset", "doc-1"] | ||||
| } | ||||
| ``` | ||||
|  | ||||
| ```json | ||||
| { | ||||
|   "jsonrpc": "2.0", | ||||
|   "id": 12, | ||||
|   "method": "herodb_lanceDrop", | ||||
|   "params": [1, "myset"] | ||||
| } | ||||
| ``` | ||||
|  | ||||
|  | ||||
| ## 8) End-to-end example (RESP) | ||||
|  | ||||
| ```bash | ||||
| # 1. Select Lance DB (assume db_id=1 created via RPC) | ||||
| redis-cli -p 6379 SELECT 1 | ||||
|  | ||||
| # 2. Configure embedding provider (OpenAI small model at 512 dims) | ||||
| redis-cli -p 6379 LANCE.EMBEDDING CONFIG SET myset PROVIDER openai MODEL text-embedding-3-small PARAM dim 512 | ||||
|  | ||||
| # 3. Create dataset | ||||
| redis-cli -p 6379 LANCE.CREATE myset DIM 512 | ||||
|  | ||||
| # 4. Store documents | ||||
| redis-cli -p 6379 LANCE.STORE myset ID doc-1 TEXT "The quick brown fox jumps over the lazy dog" META title "Fox" category "animal" | ||||
| redis-cli -p 6379 LANCE.STORE myset ID doc-2 TEXT "A fast auburn fox vaulted a sleepy canine" META title "Fox paraphrase" category "animal" | ||||
|  | ||||
| # 5. Search | ||||
| redis-cli -p 6379 LANCE.SEARCH myset K 2 QUERY "quick brown fox" RETURN 1 title | ||||
|  | ||||
| # 6. Dataset info and listing | ||||
| redis-cli -p 6379 LANCE.INFO myset | ||||
| redis-cli -p 6379 LANCE.LIST | ||||
|  | ||||
| # 7. Delete and drop | ||||
| redis-cli -p 6379 LANCE.DEL myset doc-2 | ||||
| redis-cli -p 6379 LANCE.DROP myset | ||||
| ``` | ||||
|  | ||||
|  | ||||
| ## 9) End-to-end example (JSON-RPC) | ||||
|  | ||||
| Assume RPC server on port 8080. Replace ids and ports as needed. | ||||
|  | ||||
| 1) Create Lance DB: | ||||
| ```json | ||||
| { | ||||
|   "jsonrpc": "2.0", | ||||
|   "id": 100, | ||||
|   "method": "herodb_createDatabase", | ||||
|   "params": ["Lance", { "name": "vectors-db", "storage_path": null, "max_size": null, "redis_version": null }, null] | ||||
| } | ||||
| ``` | ||||
|  | ||||
| 2) Set embedding config: | ||||
| ```json | ||||
| { | ||||
|   "jsonrpc": "2.0", | ||||
|   "id": 101, | ||||
|   "method": "herodb_lanceSetEmbeddingConfig", | ||||
|   "params": [1, "myset", "openai", "text-embedding-3-small", { "dim": "512" }] | ||||
| } | ||||
| ``` | ||||
|  | ||||
| 3) Create dataset: | ||||
| ```json | ||||
| { | ||||
|   "jsonrpc": "2.0", | ||||
|   "id": 102, | ||||
|   "method": "herodb_lanceCreate", | ||||
|   "params": [1, "myset", 512] | ||||
| } | ||||
| ``` | ||||
|  | ||||
| 4) Store text: | ||||
| ```json | ||||
| { | ||||
|   "jsonrpc": "2.0", | ||||
|   "id": 103, | ||||
|   "method": "herodb_lanceStoreText", | ||||
|   "params": [1, "myset", "doc-1", "The quick brown fox jumps over the lazy dog", { "title": "Fox", "category": "animal" }] | ||||
| } | ||||
| ``` | ||||
|  | ||||
| 5) Search text: | ||||
| ```json | ||||
| { | ||||
|   "jsonrpc": "2.0", | ||||
|   "id": 104, | ||||
|   "method": "herodb_lanceSearchText", | ||||
|   "params": [1, "myset", "quick brown fox", 2, null, ["title"]] | ||||
| } | ||||
| ``` | ||||
|  | ||||
| 6) Info/list: | ||||
| ```json | ||||
| { | ||||
|   "jsonrpc": "2.0", | ||||
|   "id": 105, | ||||
|   "method": "herodb_lanceInfo", | ||||
|   "params": [1, "myset"] | ||||
| } | ||||
| ``` | ||||
|  | ||||
| ```json | ||||
| { | ||||
|   "jsonrpc": "2.0", | ||||
|   "id": 106, | ||||
|   "method": "herodb_lanceList", | ||||
|   "params": [1] | ||||
| } | ||||
| ``` | ||||
|  | ||||
| 7) Delete/drop: | ||||
| ```json | ||||
| { | ||||
|   "jsonrpc": "2.0", | ||||
|   "id": 107, | ||||
|   "method": "herodb_lanceDel", | ||||
|   "params": [1, "myset", "doc-1"] | ||||
| } | ||||
| ``` | ||||
|  | ||||
| ```json | ||||
| { | ||||
|   "jsonrpc": "2.0", | ||||
|   "id": 108, | ||||
|   "method": "herodb_lanceDrop", | ||||
|   "params": [1, "myset"] | ||||
| } | ||||
| ``` | ||||
|  | ||||
|  | ||||
| ## 10) Operational notes and troubleshooting | ||||
|  | ||||
| - If using OpenAI and you see “missing API key env”, set: | ||||
|   - Standard: `export OPENAI_API_KEY=sk-...` | ||||
|   - Azure: `export AZURE_OPENAI_API_KEY=...` and pass `use_azure true`, `azure_endpoint`, `azure_deployment`, `azure_api_version`. | ||||
| - Dimensions mismatch: | ||||
|   - Ensure the dataset DIM equals the provider’s embedding dim. For OpenAI text-embedding-3 models, set `PARAM dim 512` (or another supported size) and use that same DIM for `LANCE.CREATE`. | ||||
| - DB 0 restriction: | ||||
|   - Lance is not allowed on DB 0. Use db_id >= 1. | ||||
| - Permissions: | ||||
|   - Read operations (SEARCH, LIST, INFO) require read permission. | ||||
|   - Mutations (CREATE, STORE, CREATEINDEX, DEL, DROP, EMBEDDING CONFIG SET) require readwrite permission. | ||||
| - Backend gating: | ||||
|   - On Lance DBs, only LANCE.* commands are accepted (plus basic control). | ||||
| - Current index behavior: | ||||
|   - `LANCE.CREATEINDEX` returns OK but is a no-op. Future versions will integrate Lance vector indices. | ||||
| - Implementation files for reference: | ||||
|   - [src/lance_store.rs](src/lance_store.rs), [src/cmd.rs](src/cmd.rs), [src/rpc.rs](src/rpc.rs), [src/server.rs](src/server.rs), [src/embedding.rs](src/embedding.rs) | ||||
							
								
								
									
										134
									
								
								docs/lancedb_text_and_images_example.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										134
									
								
								docs/lancedb_text_and_images_example.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,134 @@ | ||||
| # LanceDB Text and Images: End-to-End Example | ||||
|  | ||||
| This guide demonstrates creating a Lance backend database, ingesting two text documents and two images, performing searches over both, and cleaning up the datasets. | ||||
|  | ||||
| Prerequisites | ||||
| - Build HeroDB and start the server with JSON-RPC enabled. | ||||
| Commands: | ||||
| ```bash | ||||
| cargo build --release | ||||
| ./target/release/herodb --dir /tmp/herodb --admin-secret mysecret --port 6379 --enable-rpc | ||||
| ``` | ||||
|  | ||||
| We'll use: | ||||
| - redis-cli for RESP commands against port 6379 | ||||
| - curl for JSON-RPC against 8080 if desired | ||||
| - Deterministic local embedders to avoid external dependencies: testhash (text, dim 64) and testimagehash (image, dim 512) | ||||
|  | ||||
| 0) Create a Lance-backed database (JSON-RPC) | ||||
| Request: | ||||
| ```json | ||||
| { "jsonrpc": "2.0", "id": 1, "method": "herodb_createDatabase", "params": ["Lance", { "name": "media-db", "storage_path": null, "max_size": null, "redis_version": null }, null] } | ||||
| ``` | ||||
| Response returns db_id (assume 1). Select DB over RESP: | ||||
| ```bash | ||||
| redis-cli -p 6379 SELECT 1 | ||||
| # → OK | ||||
| ``` | ||||
|  | ||||
| 1) Configure embedding providers | ||||
| We'll create two datasets with independent embedding configs: | ||||
| - textset → provider testhash, dim 64 | ||||
| - imageset → provider testimagehash, dim 512 | ||||
|  | ||||
| Text config: | ||||
| ```bash | ||||
| redis-cli -p 6379 LANCE.EMBEDDING CONFIG SET textset PROVIDER testhash MODEL any PARAM dim 64 | ||||
| # → OK | ||||
| ``` | ||||
| Image config: | ||||
| ```bash | ||||
| redis-cli -p 6379 LANCE.EMBEDDING CONFIG SET imageset PROVIDER testimagehash MODEL any PARAM dim 512 | ||||
| # → OK | ||||
| ``` | ||||
|  | ||||
| 2) Create datasets | ||||
| ```bash | ||||
| redis-cli -p 6379 LANCE.CREATE textset DIM 64 | ||||
| # → OK | ||||
| redis-cli -p 6379 LANCE.CREATE imageset DIM 512 | ||||
| # → OK | ||||
| ``` | ||||
|  | ||||
| 3) Ingest two text documents (server-side embedding) | ||||
| ```bash | ||||
| redis-cli -p 6379 LANCE.STORE textset ID doc-1 TEXT "The quick brown fox jumps over the lazy dog" META title "Fox" category "animal" | ||||
| # → OK | ||||
| redis-cli -p 6379 LANCE.STORE textset ID doc-2 TEXT "A fast auburn fox vaulted a sleepy canine" META title "Paraphrase" category "animal" | ||||
| # → OK | ||||
| ``` | ||||
|  | ||||
| 4) Ingest two images | ||||
| You can provide a URI or base64 bytes. Use URI for URIs, BYTES for base64 data. | ||||
| Example using free placeholder images: | ||||
| ```bash | ||||
| # Store via URI | ||||
| redis-cli -p 6379 LANCE.STOREIMAGE imageset ID img-1 URI "https://picsum.photos/seed/1/256/256" META title "Seed1" group "demo" | ||||
| # → OK | ||||
| redis-cli -p 6379 LANCE.STOREIMAGE imageset ID img-2 URI "https://picsum.photos/seed/2/256/256" META title "Seed2" group "demo" | ||||
| # → OK | ||||
| ``` | ||||
| If your environment blocks outbound HTTP, you can embed image bytes: | ||||
| ```bash | ||||
| # Example: read a local file and base64 it (replace path) | ||||
| b64=$(base64 -w0 ./image1.png) | ||||
| redis-cli -p 6379 LANCE.STOREIMAGE imageset ID img-b64-1 BYTES "$b64" META title "Local1" group "demo" | ||||
| ``` | ||||
|  | ||||
| 5) Search text | ||||
| ```bash | ||||
| # Top-2 nearest neighbors for a query | ||||
| redis-cli -p 6379 LANCE.SEARCH textset K 2 QUERY "quick brown fox" RETURN 1 title | ||||
| # → 1) [id, score, [k1,v1,...]] | ||||
| ``` | ||||
| With a filter (supports equality on schema or meta keys): | ||||
| ```bash | ||||
| redis-cli -p 6379 LANCE.SEARCH textset K 2 QUERY "fox jumps" FILTER "category = 'animal'" RETURN 1 title | ||||
| ``` | ||||
|  | ||||
| 6) Search images | ||||
| ```bash | ||||
| # Provide a URI as the query | ||||
| redis-cli -p 6379 LANCE.SEARCHIMAGE imageset K 2 QUERYURI "https://picsum.photos/seed/1/256/256" RETURN 1 title | ||||
|  | ||||
| # Or provide base64 bytes as the query | ||||
| qb64=$(curl -s https://picsum.photos/seed/3/256/256 | base64 -w0) | ||||
| redis-cli -p 6379 LANCE.SEARCHIMAGE imageset K 2 QUERYBYTES "$qb64" RETURN 1 title | ||||
| ``` | ||||
|  | ||||
| 7) Inspect datasets | ||||
| ```bash | ||||
| redis-cli -p 6379 LANCE.LIST | ||||
| redis-cli -p 6379 LANCE.INFO textset | ||||
| redis-cli -p 6379 LANCE.INFO imageset | ||||
| ``` | ||||
|  | ||||
| 8) Delete by id and drop datasets | ||||
| ```bash | ||||
| # Delete one record | ||||
| redis-cli -p 6379 LANCE.DEL textset doc-2 | ||||
| # → OK | ||||
|  | ||||
| # Drop entire datasets | ||||
| redis-cli -p 6379 LANCE.DROP textset | ||||
| redis-cli -p 6379 LANCE.DROP imageset | ||||
| # → OK | ||||
| ``` | ||||
|  | ||||
| Appendix: Using OpenAI embeddings instead of test providers | ||||
| Text: | ||||
| ```bash | ||||
| export OPENAI_API_KEY=sk-... | ||||
| redis-cli -p 6379 LANCE.EMBEDDING CONFIG SET textset PROVIDER openai MODEL text-embedding-3-small PARAM dim 512 | ||||
| redis-cli -p 6379 LANCE.CREATE textset DIM 512 | ||||
| ``` | ||||
| Custom OpenAI-compatible endpoint: | ||||
| ```bash | ||||
| redis-cli -p 6379 LANCE.EMBEDDING CONFIG SET textset PROVIDER openai MODEL text-embedding-3-small \ | ||||
|   PARAM endpoint http://localhost:8081/v1/embeddings \ | ||||
|   PARAM dim 512 | ||||
| ``` | ||||
| Notes: | ||||
| - Ensure dataset DIM matches the configured embedding dimension. | ||||
| - Lance is only available for non-admin databases (db_id >= 1). | ||||
| - On Lance DBs, only LANCE.* and basic control commands are allowed. | ||||
							
								
								
									
										831
									
								
								docs/local_embedder_full_example.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										831
									
								
								docs/local_embedder_full_example.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,831 @@ | ||||
| # HeroDB Embedding Models: Complete Tutorial | ||||
|  | ||||
| This tutorial demonstrates how to use embedding models with HeroDB for vector search, covering local self-hosted models, OpenAI's API, and deterministic test embedders. | ||||
|  | ||||
| ## Table of Contents | ||||
| - [Prerequisites](#prerequisites) | ||||
| - [Scenario 1: Local Embedding Model](#scenario-1-local-embedding-model-testing) | ||||
| - [Scenario 2: OpenAI API](#scenario-2-openai-api) | ||||
| - [Scenario 3: Deterministic Test Embedder](#scenario-3-deterministic-test-embedder-no-network) | ||||
| - [Troubleshooting](#troubleshooting) | ||||
|  | ||||
| --- | ||||
|  | ||||
| ## Prerequisites | ||||
|  | ||||
| ### Start HeroDB Server | ||||
|  | ||||
| Build and start HeroDB with RPC enabled: | ||||
|  | ||||
| ```bash | ||||
| cargo build --release | ||||
| ./target/release/herodb --dir ./data --admin-secret my-admin-secret --enable-rpc --rpc-port 8080 | ||||
| ``` | ||||
|  | ||||
| This starts: | ||||
| - Redis-compatible server on port 6379 | ||||
| - JSON-RPC server on port 8080 | ||||
|  | ||||
| ### Client Tools | ||||
|  | ||||
| For Redis-like commands: | ||||
| ```bash | ||||
| redis-cli -p 6379 | ||||
| ``` | ||||
|  | ||||
| For JSON-RPC calls, use `curl`: | ||||
| ```bash | ||||
| curl -X POST http://localhost:8080 \ | ||||
|   -H "Content-Type: application/json" \ | ||||
|   -d '{"jsonrpc":"2.0","id":1,"method":"herodb_METHOD","params":[...]}' | ||||
| ``` | ||||
|  | ||||
| --- | ||||
|  | ||||
| ## Scenario 1: Local Embedding Model (Testing) | ||||
|  | ||||
| Run your own embedding service locally for development, testing, or privacy. | ||||
|  | ||||
| ### Option A: Python Mock Server (Simplest) | ||||
|  | ||||
| This creates a minimal OpenAI-compatible embedding server for testing. | ||||
|  | ||||
| **1. Create `mock_embedder.py`:** | ||||
|  | ||||
| ```python | ||||
| from flask import Flask, request, jsonify | ||||
| import numpy as np | ||||
|  | ||||
| app = Flask(__name__) | ||||
|  | ||||
| @app.route('/v1/embeddings', methods=['POST']) | ||||
| def embeddings(): | ||||
|     """OpenAI-compatible embeddings endpoint""" | ||||
|     data = request.json | ||||
|     inputs = data.get('input', []) | ||||
|      | ||||
|     # Handle both single string and array | ||||
|     if isinstance(inputs, str): | ||||
|         inputs = [inputs] | ||||
|      | ||||
|     # Generate deterministic 768-dim embeddings (hash-based) | ||||
|     embeddings = [] | ||||
|     for text in inputs: | ||||
|         # Simple hash to vector (deterministic) | ||||
|         vec = np.zeros(768) | ||||
|         for i, char in enumerate(text[:768]): | ||||
|             vec[i % 768] += ord(char) / 255.0 | ||||
|          | ||||
|         # L2 normalize | ||||
|         norm = np.linalg.norm(vec) | ||||
|         if norm > 0: | ||||
|             vec = vec / norm | ||||
|          | ||||
|         embeddings.append(vec.tolist()) | ||||
|      | ||||
|     return jsonify({ | ||||
|         "data": [{"embedding": emb, "index": i} for i, emb in enumerate(embeddings)], | ||||
|         "model": data.get('model', 'mock-local'), | ||||
|         "usage": {"total_tokens": sum(len(t) for t in inputs)} | ||||
|     }) | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     print("Starting mock embedding server on http://127.0.0.1:8081") | ||||
|     app.run(host='127.0.0.1', port=8081, debug=False) | ||||
| ``` | ||||
|  | ||||
| **2. Install dependencies and run:** | ||||
|  | ||||
| ```bash | ||||
| pip install flask numpy | ||||
| python mock_embedder.py | ||||
| ``` | ||||
|  | ||||
| Output: `Starting mock embedding server on http://127.0.0.1:8081` | ||||
|  | ||||
| **3. Test the server (optional):** | ||||
|  | ||||
| ```bash | ||||
| curl -X POST http://127.0.0.1:8081/v1/embeddings \ | ||||
|   -H "Content-Type: application/json" \ | ||||
|   -d '{"input":["hello world"],"model":"test"}' | ||||
| ``` | ||||
|  | ||||
| You should see a JSON response with a 768-dimensional embedding. | ||||
|  | ||||
| ### End-to-End Example with Local Model | ||||
|  | ||||
| **Step 1: Create a Lance database** | ||||
|  | ||||
| JSON-RPC: | ||||
| ```json | ||||
| { | ||||
|   "jsonrpc": "2.0", | ||||
|   "id": 1, | ||||
|   "method": "herodb_createDatabase", | ||||
|   "params": [ | ||||
|     "Lance", | ||||
|     { "name": "local-vectors", "storage_path": null, "max_size": null, "redis_version": null }, | ||||
|     null | ||||
|   ] | ||||
| } | ||||
| ``` | ||||
|  | ||||
| Expected response: | ||||
| ```json | ||||
| {"jsonrpc":"2.0","id":1,"result":1} | ||||
| ``` | ||||
|  | ||||
| The database ID is `1`. | ||||
|  | ||||
| **Step 2: Configure embedding for the dataset** | ||||
|  | ||||
| JSON-RPC: | ||||
| ```json | ||||
| { | ||||
|   "jsonrpc": "2.0", | ||||
|   "id": 2, | ||||
|   "method": "herodb_lanceSetEmbeddingConfig", | ||||
|   "params": [ | ||||
|     1, | ||||
|     "products", | ||||
|     { | ||||
|       "provider": "openai", | ||||
|       "model": "mock-local", | ||||
|       "dim": 768, | ||||
|       "endpoint": "http://127.0.0.1:8081/v1/embeddings", | ||||
|       "headers": { | ||||
|         "Authorization": "Bearer dummy" | ||||
|       }, | ||||
|       "timeout_ms": 30000 | ||||
|     } | ||||
|   ] | ||||
| } | ||||
| ``` | ||||
|  | ||||
| Redis-like: | ||||
| ```bash | ||||
| redis-cli -p 6379 | ||||
| SELECT 1 | ||||
| LANCE.EMBEDDING CONFIG SET products PROVIDER openai MODEL mock-local DIM 768 ENDPOINT http://127.0.0.1:8081/v1/embeddings HEADER Authorization "Bearer dummy" TIMEOUTMS 30000 | ||||
| ``` | ||||
|  | ||||
| Expected response: | ||||
| ```json | ||||
| {"jsonrpc":"2.0","id":2,"result":true} | ||||
| ``` | ||||
|  | ||||
| **Step 3: Verify configuration** | ||||
|  | ||||
| JSON-RPC: | ||||
| ```json | ||||
| { | ||||
|   "jsonrpc": "2.0", | ||||
|   "id": 3, | ||||
|   "method": "herodb_lanceGetEmbeddingConfig", | ||||
|   "params": [1, "products"] | ||||
| } | ||||
| ``` | ||||
|  | ||||
| Redis-like: | ||||
| ```bash | ||||
| LANCE.EMBEDDING CONFIG GET products | ||||
| ``` | ||||
|  | ||||
| Expected: Returns your configuration with provider, model, dim, endpoint, etc. | ||||
|  | ||||
| **Step 4: Insert product data** | ||||
|  | ||||
| JSON-RPC (item 1): | ||||
| ```json | ||||
| { | ||||
|   "jsonrpc": "2.0", | ||||
|   "id": 4, | ||||
|   "method": "herodb_lanceStoreText", | ||||
|   "params": [ | ||||
|     1, | ||||
|     "products", | ||||
|     "item-1", | ||||
|     "Waterproof hiking boots with ankle support and aggressive tread", | ||||
|     { "brand": "TrailMax", "category": "footwear", "price": "129.99" } | ||||
|   ] | ||||
| } | ||||
| ``` | ||||
|  | ||||
| Redis-like: | ||||
| ```bash | ||||
| LANCE.STORE products ID item-1 TEXT "Waterproof hiking boots with ankle support and aggressive tread" META brand TrailMax category footwear price 129.99 | ||||
| ``` | ||||
|  | ||||
| JSON-RPC (item 2): | ||||
| ```json | ||||
| { | ||||
|   "jsonrpc": "2.0", | ||||
|   "id": 5, | ||||
|   "method": "herodb_lanceStoreText", | ||||
|   "params": [ | ||||
|     1, | ||||
|     "products", | ||||
|     "item-2", | ||||
|     "Lightweight running shoes with breathable mesh upper", | ||||
|     { "brand": "SpeedFit", "category": "footwear", "price": "89.99" } | ||||
|   ] | ||||
| } | ||||
| ``` | ||||
|  | ||||
| JSON-RPC (item 3): | ||||
| ```json | ||||
| { | ||||
|   "jsonrpc": "2.0", | ||||
|   "id": 6, | ||||
|   "method": "herodb_lanceStoreText", | ||||
|   "params": [ | ||||
|     1, | ||||
|     "products", | ||||
|     "item-3", | ||||
|     "Insulated winter jacket with removable hood and multiple pockets", | ||||
|     { "brand": "WarmTech", "category": "outerwear", "price": "199.99" } | ||||
|   ] | ||||
| } | ||||
| ``` | ||||
|  | ||||
| JSON-RPC (item 4): | ||||
| ```json | ||||
| { | ||||
|   "jsonrpc": "2.0", | ||||
|   "id": 7, | ||||
|   "method": "herodb_lanceStoreText", | ||||
|   "params": [ | ||||
|     1, | ||||
|     "products", | ||||
|     "item-4", | ||||
|     "Camping tent for 4 people with waterproof rainfly", | ||||
|     { "brand": "OutdoorPro", "category": "camping", "price": "249.99" } | ||||
|   ] | ||||
| } | ||||
| ``` | ||||
|  | ||||
| Expected response for each: `{"jsonrpc":"2.0","id":N,"result":true}` | ||||
|  | ||||
| **Step 5: Search by text query** | ||||
|  | ||||
| JSON-RPC: | ||||
| ```json | ||||
| { | ||||
|   "jsonrpc": "2.0", | ||||
|   "id": 8, | ||||
|   "method": "herodb_lanceSearchText", | ||||
|   "params": [ | ||||
|     1, | ||||
|     "products", | ||||
|     "boots for hiking in wet conditions", | ||||
|     3, | ||||
|     null, | ||||
|     ["brand", "category", "price"] | ||||
|   ] | ||||
| } | ||||
| ``` | ||||
|  | ||||
| Redis-like: | ||||
| ```bash | ||||
| LANCE.SEARCH products K 3 QUERY "boots for hiking in wet conditions" RETURN 3 brand category price | ||||
| ``` | ||||
|  | ||||
| Expected response: | ||||
| ```json | ||||
| { | ||||
|   "jsonrpc": "2.0", | ||||
|   "id": 8, | ||||
|   "result": { | ||||
|     "results": [ | ||||
|       { | ||||
|         "id": "item-1", | ||||
|         "score": 0.234, | ||||
|         "meta": { | ||||
|           "brand": "TrailMax", | ||||
|           "category": "footwear", | ||||
|           "price": "129.99" | ||||
|         } | ||||
|       }, | ||||
|       ... | ||||
|     ] | ||||
|   } | ||||
| } | ||||
| ``` | ||||
|  | ||||
| **Step 6: Search with metadata filter** | ||||
|  | ||||
| JSON-RPC: | ||||
| ```json | ||||
| { | ||||
|   "jsonrpc": "2.0", | ||||
|   "id": 9, | ||||
|   "method": "herodb_lanceSearchText", | ||||
|   "params": [ | ||||
|     1, | ||||
|     "products", | ||||
|     "comfortable shoes for running", | ||||
|     5, | ||||
|     "category = 'footwear'", | ||||
|     null | ||||
|   ] | ||||
| } | ||||
| ``` | ||||
|  | ||||
| Redis-like: | ||||
| ```bash | ||||
| LANCE.SEARCH products K 5 QUERY "comfortable shoes for running" FILTER "category = 'footwear'" | ||||
| ``` | ||||
|  | ||||
| This returns only items where `category` equals `'footwear'`. | ||||
|  | ||||
| **Step 7: List datasets** | ||||
|  | ||||
| JSON-RPC: | ||||
| ```json | ||||
| { | ||||
|   "jsonrpc": "2.0", | ||||
|   "id": 10, | ||||
|   "method": "herodb_lanceList", | ||||
|   "params": [1] | ||||
| } | ||||
| ``` | ||||
|  | ||||
| Redis-like: | ||||
| ```bash | ||||
| LANCE.LIST | ||||
| ``` | ||||
|  | ||||
| **Step 8: Get dataset info** | ||||
|  | ||||
| JSON-RPC: | ||||
| ```json | ||||
| { | ||||
|   "jsonrpc": "2.0", | ||||
|   "id": 11, | ||||
|   "method": "herodb_lanceInfo", | ||||
|   "params": [1, "products"] | ||||
| } | ||||
| ``` | ||||
|  | ||||
| Redis-like: | ||||
| ```bash | ||||
| LANCE.INFO products | ||||
| ``` | ||||
|  | ||||
| Returns dimension, row count, and other metadata. | ||||
|  | ||||
| --- | ||||
|  | ||||
| ## Scenario 2: OpenAI API | ||||
|  | ||||
| Use OpenAI's production embedding service for semantic search. | ||||
|  | ||||
| ### Setup | ||||
|  | ||||
| **1. Set your API key:** | ||||
|  | ||||
| ```bash | ||||
| export OPENAI_API_KEY="sk-your-actual-openai-key-here" | ||||
| ``` | ||||
|  | ||||
| **2. Start HeroDB** (same as before): | ||||
|  | ||||
| ```bash | ||||
| ./target/release/herodb --dir ./data --admin-secret my-admin-secret --enable-rpc --rpc-port 8080 | ||||
| ``` | ||||
|  | ||||
| ### End-to-End Example with OpenAI | ||||
|  | ||||
| **Step 1: Create a Lance database** | ||||
|  | ||||
| JSON-RPC: | ||||
| ```json | ||||
| { | ||||
|   "jsonrpc": "2.0", | ||||
|   "id": 1, | ||||
|   "method": "herodb_createDatabase", | ||||
|   "params": [ | ||||
|     "Lance", | ||||
|     { "name": "openai-vectors", "storage_path": null, "max_size": null, "redis_version": null }, | ||||
|     null | ||||
|   ] | ||||
| } | ||||
| ``` | ||||
|  | ||||
| Expected: `{"jsonrpc":"2.0","id":1,"result":1}` (database ID = 1) | ||||
|  | ||||
| **Step 2: Configure OpenAI embeddings** | ||||
|  | ||||
| JSON-RPC: | ||||
| ```json | ||||
| { | ||||
|   "jsonrpc": "2.0", | ||||
|   "id": 2, | ||||
|   "method": "herodb_lanceSetEmbeddingConfig", | ||||
|   "params": [ | ||||
|     1, | ||||
|     "documents", | ||||
|     { | ||||
|       "provider": "openai", | ||||
|       "model": "text-embedding-3-small", | ||||
|       "dim": 1536, | ||||
|       "endpoint": null, | ||||
|       "headers": {}, | ||||
|       "timeout_ms": 30000 | ||||
|     } | ||||
|   ] | ||||
| } | ||||
| ``` | ||||
|  | ||||
| Redis-like: | ||||
| ```bash | ||||
| redis-cli -p 6379 | ||||
| SELECT 1 | ||||
| LANCE.EMBEDDING CONFIG SET documents PROVIDER openai MODEL text-embedding-3-small DIM 1536 TIMEOUTMS 30000 | ||||
| ``` | ||||
|  | ||||
| Notes: | ||||
| - `endpoint` is `null` (defaults to OpenAI API: https://api.openai.com/v1/embeddings) | ||||
| - `headers` is empty (Authorization auto-added from OPENAI_API_KEY env var) | ||||
| - `dim` is 1536 for text-embedding-3-small | ||||
|  | ||||
| Expected: `{"jsonrpc":"2.0","id":2,"result":true}` | ||||
|  | ||||
| **Step 3: Insert documents** | ||||
|  | ||||
| JSON-RPC: | ||||
| ```json | ||||
| { | ||||
|   "jsonrpc": "2.0", | ||||
|   "id": 3, | ||||
|   "method": "herodb_lanceStoreText", | ||||
|   "params": [ | ||||
|     1, | ||||
|     "documents", | ||||
|     "doc-1", | ||||
|     "The quick brown fox jumps over the lazy dog", | ||||
|     { "source": "example", "lang": "en", "topic": "animals" } | ||||
|   ] | ||||
| } | ||||
| ``` | ||||
|  | ||||
| ```json | ||||
| { | ||||
|   "jsonrpc": "2.0", | ||||
|   "id": 4, | ||||
|   "method": "herodb_lanceStoreText", | ||||
|   "params": [ | ||||
|     1, | ||||
|     "documents", | ||||
|     "doc-2", | ||||
|     "Machine learning models require large datasets for training and validation", | ||||
|     { "source": "tech", "lang": "en", "topic": "ai" } | ||||
|   ] | ||||
| } | ||||
| ``` | ||||
|  | ||||
| ```json | ||||
| { | ||||
|   "jsonrpc": "2.0", | ||||
|   "id": 5, | ||||
|   "method": "herodb_lanceStoreText", | ||||
|   "params": [ | ||||
|     1, | ||||
|     "documents", | ||||
|     "doc-3", | ||||
|     "Python is a popular programming language for data science and web development", | ||||
|     { "source": "tech", "lang": "en", "topic": "programming" } | ||||
|   ] | ||||
| } | ||||
| ``` | ||||
|  | ||||
| Redis-like: | ||||
| ```bash | ||||
| LANCE.STORE documents ID doc-1 TEXT "The quick brown fox jumps over the lazy dog" META source example lang en topic animals | ||||
| LANCE.STORE documents ID doc-2 TEXT "Machine learning models require large datasets for training and validation" META source tech lang en topic ai | ||||
| LANCE.STORE documents ID doc-3 TEXT "Python is a popular programming language for data science and web development" META source tech lang en topic programming | ||||
| ``` | ||||
|  | ||||
| Expected for each: `{"jsonrpc":"2.0","id":N,"result":true}` | ||||
|  | ||||
| **Step 4: Semantic search** | ||||
|  | ||||
| JSON-RPC: | ||||
| ```json | ||||
| { | ||||
|   "jsonrpc": "2.0", | ||||
|   "id": 6, | ||||
|   "method": "herodb_lanceSearchText", | ||||
|   "params": [ | ||||
|     1, | ||||
|     "documents", | ||||
|     "artificial intelligence and neural networks", | ||||
|     3, | ||||
|     null, | ||||
|     ["source", "topic"] | ||||
|   ] | ||||
| } | ||||
| ``` | ||||
|  | ||||
| Redis-like: | ||||
| ```bash | ||||
| LANCE.SEARCH documents K 3 QUERY "artificial intelligence and neural networks" RETURN 2 source topic | ||||
| ``` | ||||
|  | ||||
| Expected response (doc-2 should rank highest due to semantic similarity): | ||||
| ```json | ||||
| { | ||||
|   "jsonrpc": "2.0", | ||||
|   "id": 6, | ||||
|   "result": { | ||||
|     "results": [ | ||||
|       { | ||||
|         "id": "doc-2", | ||||
|         "score": 0.123, | ||||
|         "meta": { | ||||
|           "source": "tech", | ||||
|           "topic": "ai" | ||||
|         } | ||||
|       }, | ||||
|       { | ||||
|         "id": "doc-3", | ||||
|         "score": 0.456, | ||||
|         "meta": { | ||||
|           "source": "tech", | ||||
|           "topic": "programming" | ||||
|         } | ||||
|       }, | ||||
|       { | ||||
|         "id": "doc-1", | ||||
|         "score": 0.789, | ||||
|         "meta": { | ||||
|           "source": "example", | ||||
|           "topic": "animals" | ||||
|         } | ||||
|       } | ||||
|     ] | ||||
|   } | ||||
| } | ||||
| ``` | ||||
|  | ||||
| Note: Lower score = better match (L2 distance). | ||||
|  | ||||
| **Step 5: Search with filter** | ||||
|  | ||||
| JSON-RPC: | ||||
| ```json | ||||
| { | ||||
|   "jsonrpc": "2.0", | ||||
|   "id": 7, | ||||
|   "method": "herodb_lanceSearchText", | ||||
|   "params": [ | ||||
|     1, | ||||
|     "documents", | ||||
|     "programming and software", | ||||
|     5, | ||||
|     "topic = 'programming'", | ||||
|     null | ||||
|   ] | ||||
| } | ||||
| ``` | ||||
|  | ||||
| Redis-like: | ||||
| ```bash | ||||
| LANCE.SEARCH documents K 5 QUERY "programming and software" FILTER "topic = 'programming'" | ||||
| ``` | ||||
|  | ||||
| This returns only documents where `topic` equals `'programming'`. | ||||
|  | ||||
| --- | ||||
|  | ||||
|  | ||||
| --- | ||||
|  | ||||
| ## Scenario 3: Deterministic Test Embedder (No Network) | ||||
|  | ||||
| For CI/offline development, use the built-in test embedder that requires no external service. | ||||
|  | ||||
| ### Configuration | ||||
|  | ||||
| JSON-RPC: | ||||
| ```json | ||||
| { | ||||
|   "jsonrpc": "2.0", | ||||
|   "id": 1, | ||||
|   "method": "herodb_lanceSetEmbeddingConfig", | ||||
|   "params": [ | ||||
|     1, | ||||
|     "testdata", | ||||
|     { | ||||
|       "provider": "test", | ||||
|       "model": "dev", | ||||
|       "dim": 64, | ||||
|       "endpoint": null, | ||||
|       "headers": {}, | ||||
|       "timeout_ms": null | ||||
|     } | ||||
|   ] | ||||
| } | ||||
| ``` | ||||
|  | ||||
| Redis-like: | ||||
| ```bash | ||||
| SELECT 1 | ||||
| LANCE.EMBEDDING CONFIG SET testdata PROVIDER test MODEL dev DIM 64 | ||||
| ``` | ||||
|  | ||||
| ### Usage | ||||
|  | ||||
| Use `lanceStoreText` and `lanceSearchText` as in previous scenarios. The embeddings are: | ||||
| - Deterministic (same text → same vector) | ||||
| - Fast (no network) | ||||
| - Not semantic (hash-based, not ML) | ||||
|  | ||||
| Perfect for testing the vector storage/search mechanics without external dependencies. | ||||
|  | ||||
| --- | ||||
|  | ||||
| ## Advanced: Custom Headers and Timeouts | ||||
|  | ||||
| ### Example: Local model with custom auth | ||||
|  | ||||
| JSON-RPC: | ||||
| ```json | ||||
| { | ||||
|   "jsonrpc": "2.0", | ||||
|   "id": 1, | ||||
|   "method": "herodb_lanceSetEmbeddingConfig", | ||||
|   "params": [ | ||||
|     1, | ||||
|     "secure-data", | ||||
|     { | ||||
|       "provider": "openai", | ||||
|       "model": "custom-model", | ||||
|       "dim": 512, | ||||
|       "endpoint": "http://192.168.1.100:9000/embeddings", | ||||
|       "headers": { | ||||
|         "Authorization": "Bearer my-local-token", | ||||
|         "X-Custom-Header": "value" | ||||
|       }, | ||||
|       "timeout_ms": 60000 | ||||
|     } | ||||
|   ] | ||||
| } | ||||
| ``` | ||||
|  | ||||
| ### Example: OpenAI with explicit API key (not from env) | ||||
|  | ||||
| JSON-RPC: | ||||
| ```json | ||||
| { | ||||
|   "jsonrpc": "2.0", | ||||
|   "id": 1, | ||||
|   "method": "herodb_lanceSetEmbeddingConfig", | ||||
|   "params": [ | ||||
|     1, | ||||
|     "dataset", | ||||
|     { | ||||
|       "provider": "openai", | ||||
|       "model": "text-embedding-3-small", | ||||
|       "dim": 1536, | ||||
|       "endpoint": null, | ||||
|       "headers": { | ||||
|         "Authorization": "Bearer sk-your-key-here" | ||||
|       }, | ||||
|       "timeout_ms": 30000 | ||||
|     } | ||||
|   ] | ||||
| } | ||||
| ``` | ||||
|  | ||||
| --- | ||||
|  | ||||
| ## Troubleshooting | ||||
|  | ||||
| ### Error: "Embedding config not set for dataset" | ||||
|  | ||||
| **Cause:** You tried to use `lanceStoreText` or `lanceSearchText` without configuring an embedder. | ||||
|  | ||||
| **Solution:** Run `lanceSetEmbeddingConfig` first. | ||||
|  | ||||
| ### Error: "Embedding dimension mismatch: expected X, got Y" | ||||
|  | ||||
| **Cause:** The embedding service returned vectors of a different size than configured. | ||||
|  | ||||
| **Solution:**  | ||||
| - For OpenAI text-embedding-3-small, use `dim: 1536` | ||||
| - For your local mock (from this tutorial), use `dim: 768` | ||||
| - Check your embedding service's actual output dimension | ||||
|  | ||||
| ### Error: "Missing API key in env 'OPENAI_API_KEY'" | ||||
|  | ||||
| **Cause:** Using OpenAI provider without setting the API key. | ||||
|  | ||||
| **Solution:** | ||||
| - Set `export OPENAI_API_KEY="sk-..."` before starting HeroDB, OR | ||||
| - Pass the key explicitly in headers: `"Authorization": "Bearer sk-..."` | ||||
|  | ||||
| ### Error: "HTTP request failed" or "Embeddings API error 404" | ||||
|  | ||||
| **Cause:** Cannot reach the embedding endpoint. | ||||
|  | ||||
| **Solution:** | ||||
| - Verify your local server is running: `curl http://127.0.0.1:8081/v1/embeddings` | ||||
| - Check the endpoint URL in your config | ||||
| - Ensure firewall allows the connection | ||||
|  | ||||
| ### Error: "ERR DB backend is not Lance" | ||||
|  | ||||
| **Cause:** Trying to use LANCE.* commands on a non-Lance database. | ||||
|  | ||||
| **Solution:** Create the database with backend "Lance" (see Step 1). | ||||
|  | ||||
| ### Error: "write permission denied" | ||||
|  | ||||
| **Cause:** Database is private and you haven't authenticated. | ||||
|  | ||||
| **Solution:** Use `SELECT <db_id> KEY <access-key>` or make the database public via RPC. | ||||
|  | ||||
| --- | ||||
|  | ||||
| ## Complete Example Script (Bash + curl) | ||||
|  | ||||
| Save as `test_embeddings.sh`: | ||||
|  | ||||
| ```bash | ||||
| #!/bin/bash | ||||
|  | ||||
| RPC_URL="http://localhost:8080" | ||||
|  | ||||
| # 1. Create Lance database | ||||
| curl -X POST $RPC_URL -H "Content-Type: application/json" -d '{ | ||||
|   "jsonrpc": "2.0", | ||||
|   "id": 1, | ||||
|   "method": "herodb_createDatabase", | ||||
|   "params": ["Lance", {"name": "test-vectors", "storage_path": null, "max_size": null, "redis_version": null}, null] | ||||
| }' | ||||
|  | ||||
| echo -e "\n" | ||||
|  | ||||
| # 2. Configure local embedder | ||||
| curl -X POST $RPC_URL -H "Content-Type: application/json" -d '{ | ||||
|   "jsonrpc": "2.0", | ||||
|   "id": 2, | ||||
|   "method": "herodb_lanceSetEmbeddingConfig", | ||||
|   "params": [1, "products", { | ||||
|     "provider": "openai", | ||||
|     "model": "mock", | ||||
|     "dim": 768, | ||||
|     "endpoint": "http://127.0.0.1:8081/v1/embeddings", | ||||
|     "headers": {"Authorization": "Bearer dummy"}, | ||||
|     "timeout_ms": 30000 | ||||
|   }] | ||||
| }' | ||||
|  | ||||
| echo -e "\n" | ||||
|  | ||||
| # 3. Insert data | ||||
| curl -X POST $RPC_URL -H "Content-Type: application/json" -d '{ | ||||
|   "jsonrpc": "2.0", | ||||
|   "id": 3, | ||||
|   "method": "herodb_lanceStoreText", | ||||
|   "params": [1, "products", "item-1", "Hiking boots", {"brand": "TrailMax"}] | ||||
| }' | ||||
|  | ||||
| echo -e "\n" | ||||
|  | ||||
| # 4. Search | ||||
| curl -X POST $RPC_URL -H "Content-Type: application/json" -d '{ | ||||
|   "jsonrpc": "2.0", | ||||
|   "id": 4, | ||||
|   "method": "herodb_lanceSearchText", | ||||
|   "params": [1, "products", "outdoor footwear", 5, null, null] | ||||
| }' | ||||
|  | ||||
| echo -e "\n" | ||||
| ``` | ||||
|  | ||||
| Run: | ||||
| ```bash | ||||
| chmod +x test_embeddings.sh | ||||
| ./test_embeddings.sh | ||||
| ``` | ||||
|  | ||||
| --- | ||||
|  | ||||
| ## Summary | ||||
|  | ||||
| | Provider | Use Case | Endpoint | API Key | | ||||
| |----------|----------|----------|---------| | ||||
| | `openai` | Production semantic search | Default (OpenAI) or custom URL | OPENAI_API_KEY env or headers | | ||||
| | `openai` | Local self-hosted gateway | http://127.0.0.1:8081/... | Optional (depends on your service) | | ||||
| | `test` | CI/offline development | N/A (local hash) | None | | ||||
| | `image_test` | Image testing | N/A (local hash) | None | | ||||
|  | ||||
| **Notes:** | ||||
| - The `provider` field is always `"openai"` for OpenAI-compatible services (whether cloud or local). This is because it uses the OpenAI-compatible API shape. | ||||
| - Use `endpoint` to point to your local service | ||||
| - Use `headers` for custom authentication | ||||
| - `dim` must match your embedding service's output dimension | ||||
| - Once configured, `lanceStoreText` and `lanceSearchText` handle embedding automatically | ||||
							
								
								
									
										161
									
								
								docs/rpc_examples.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										161
									
								
								docs/rpc_examples.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,161 @@ | ||||
| # HeroDB JSON-RPC Examples | ||||
|  | ||||
| These examples show full JSON-RPC 2.0 payloads for managing HeroDB via the RPC API (enable with `--enable-rpc`). Methods are named as `hero_<function>`. Params are positional arrays; enum values are strings (e.g., `"Redb"`). Copy-paste into Postman or similar clients. | ||||
|  | ||||
| ## Database Management | ||||
|  | ||||
| ### Create Database | ||||
| Creates a new database with optional per-database encryption key (stored write-only in Admin DB 0). | ||||
|  | ||||
| ```json | ||||
| { | ||||
|   "jsonrpc": "2.0", | ||||
|   "id": 1, | ||||
|   "method": "hero_createDatabase", | ||||
|   "params": [ | ||||
|     "Redb", | ||||
|     { "name": null, "storage_path": null, "max_size": null, "redis_version": null }, | ||||
|     null | ||||
|   ] | ||||
| } | ||||
| ``` | ||||
|  | ||||
| With encryption: | ||||
| ```json | ||||
| { | ||||
|   "jsonrpc": "2.0", | ||||
|   "id": 2, | ||||
|   "method": "hero_createDatabase", | ||||
|   "params": [ | ||||
|     "Sled", | ||||
|     { "name": "secure-db", "storage_path": null, "max_size": null, "redis_version": null }, | ||||
|     "my-per-db-encryption-key" | ||||
|   ] | ||||
| } | ||||
| ``` | ||||
|  | ||||
| ### List Databases | ||||
| Returns array of database infos (id, backend, encrypted status, size, etc.). | ||||
|  | ||||
| ```json | ||||
| { | ||||
|   "jsonrpc": "2.0", | ||||
|   "id": 3, | ||||
|   "method": "hero_listDatabases", | ||||
|   "params": [] | ||||
| } | ||||
| ``` | ||||
|  | ||||
| ### Get Database Info | ||||
| Retrieves detailed info for a specific database. | ||||
|  | ||||
| ```json | ||||
| { | ||||
|   "jsonrpc": "2.0", | ||||
|   "id": 4, | ||||
|   "method": "hero_getDatabaseInfo", | ||||
|   "params": [1] | ||||
| } | ||||
| ``` | ||||
|  | ||||
| ### Delete Database | ||||
| Removes physical database file; metadata remains in Admin DB 0. | ||||
|  | ||||
| ```json | ||||
| { | ||||
|   "jsonrpc": "2.0", | ||||
|   "id": 5, | ||||
|   "method": "hero_deleteDatabase", | ||||
|   "params": [1] | ||||
| } | ||||
| ``` | ||||
|  | ||||
| ## Access Control | ||||
|  | ||||
| ### Add Access Key | ||||
| Adds a hashed access key for private databases. Permissions: `"read"` or `"readwrite"`. | ||||
|  | ||||
| ```json | ||||
| { | ||||
|   "jsonrpc": "2.0", | ||||
|   "id": 6, | ||||
|   "method": "hero_addAccessKey", | ||||
|   "params": [2, "my-access-key", "readwrite"] | ||||
| } | ||||
| ``` | ||||
|  | ||||
| ### List Access Keys | ||||
| Returns array of key hashes, permissions, and creation timestamps. | ||||
|  | ||||
| ```json | ||||
| { | ||||
|   "jsonrpc": "2.0", | ||||
|   "id": 7, | ||||
|   "method": "hero_listAccessKeys", | ||||
|   "params": [2] | ||||
| } | ||||
| ``` | ||||
|  | ||||
| ### Delete Access Key | ||||
| Removes key by its SHA-256 hash. | ||||
|  | ||||
| ```json | ||||
| { | ||||
|   "jsonrpc": "2.0", | ||||
|   "id": 8, | ||||
|   "method": "hero_deleteAccessKey", | ||||
|   "params": [2, "0123abcd...keyhash..."] | ||||
| } | ||||
| ``` | ||||
|  | ||||
| ### Set Database Public/Private | ||||
| Toggles public access (default true). Private databases require access keys. | ||||
|  | ||||
| ```json | ||||
| { | ||||
|   "jsonrpc": "2.0", | ||||
|   "id": 9, | ||||
|   "method": "hero_setDatabasePublic", | ||||
|   "params": [2, false] | ||||
| } | ||||
| ``` | ||||
|  | ||||
| ## Server Info | ||||
|  | ||||
| ### Get Server Stats | ||||
| Returns stats like total databases and uptime. | ||||
|  | ||||
| ```json | ||||
| { | ||||
|   "jsonrpc": "2.0", | ||||
|   "id": 10, | ||||
|   "method": "hero_getServerStats", | ||||
|   "params": [] | ||||
| } | ||||
| ``` | ||||
|  | ||||
| ## Notes | ||||
| - Per-database encryption keys are write-only; set at creation and used transparently. | ||||
| - Access keys are hashed (SHA-256) for storage; provide plaintext in requests. | ||||
| - Backend options: `"Redb"` (default) or `"Sled"`. | ||||
| - Config object fields (name, storage_path, etc.) are optional and currently ignored but positional. | ||||
|  | ||||
| ## IPC over Unix Socket (non-HTTP) | ||||
|  | ||||
| HeroDB supports JSON-RPC over a Unix Domain Socket using reth-ipc. This transport is not HTTP; messages are JSON-RPC framed with a Content-Length header. | ||||
|  | ||||
| - Enable IPC on startup (adjust the socket path as needed): | ||||
|   - herodb --dir /path/to/data --admin-secret YOUR_SECRET --enable-rpc-ipc --rpc-ipc-path /tmp/herodb.sock | ||||
|  | ||||
| - The same RPC methods are available as over HTTP. Namespace is "hero" (e.g. hero_listDatabases). See the RPC trait in [src/rpc.rs](src/rpc.rs) and CLI flags in [src/main.rs](src/main.rs). The IPC bootstrap is in [src/rpc_server.rs](src/rpc_server.rs). | ||||
|  | ||||
| ### Test via socat (interactive) | ||||
|  | ||||
| 1) Connect to the socket with a small timeout: | ||||
| ``` | ||||
| sudo socat -d -d -t 5 - UNIX-CONNECT:/tmp/herodb.sock | ||||
| ``` | ||||
|  | ||||
| 2) Paste a framed JSON-RPC request (Content-Length header, then a blank line, then the JSON body). For example to call hero_listDatabases: | ||||
|  | ||||
| {"jsonrpc":"2.0","id":3,"method":"hero_listDatabases","params":[]} | ||||
							
								
								
									
										397
									
								
								docs/search.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										397
									
								
								docs/search.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,397 @@ | ||||
| # Full-Text Search with Tantivy | ||||
|  | ||||
| HeroDB includes powerful full-text search capabilities powered by [Tantivy](https://github.com/quickwit-oss/tantivy), a fast full-text search engine library written in Rust. This provides Redis-compatible search commands similar to RediSearch. | ||||
|  | ||||
| ## Overview | ||||
|  | ||||
| The search functionality allows you to: | ||||
| - Create search indexes with custom schemas | ||||
| - Index documents with multiple field types | ||||
| - Perform complex queries with filters | ||||
| - Support for text, numeric, date, and geographic data | ||||
| - Real-time search with high performance | ||||
|  | ||||
| ## Search Commands | ||||
|  | ||||
| ### FT.CREATE - Create Search Index | ||||
|  | ||||
| Create a new search index with a defined schema. | ||||
|  | ||||
| ```bash | ||||
| FT.CREATE index_name SCHEMA field_name field_type [options] [field_name field_type [options] ...] | ||||
| ``` | ||||
|  | ||||
| **Field Types:** | ||||
| - `TEXT` - Full-text searchable text fields | ||||
| - `NUMERIC` - Numeric fields (integers, floats) | ||||
| - `TAG` - Tag fields for exact matching | ||||
| - `GEO` - Geographic coordinates (lat,lon) | ||||
| - `DATE` - Date/timestamp fields | ||||
|  | ||||
| **Field Options:** | ||||
| - `STORED` - Store field value for retrieval | ||||
| - `INDEXED` - Make field searchable | ||||
| - `TOKENIZED` - Enable tokenization for text fields | ||||
| - `FAST` - Enable fast access for numeric fields | ||||
|  | ||||
| **Example:** | ||||
| ```bash | ||||
| # Create a product search index | ||||
| FT.CREATE products SCHEMA  | ||||
|   title TEXT STORED INDEXED TOKENIZED | ||||
|   description TEXT STORED INDEXED TOKENIZED   | ||||
|   price NUMERIC STORED INDEXED FAST | ||||
|   category TAG STORED | ||||
|   location GEO STORED | ||||
|   created_date DATE STORED INDEXED | ||||
| ``` | ||||
|  | ||||
| ### FT.ADD - Add Document to Index | ||||
|  | ||||
| Add a document to a search index. | ||||
|  | ||||
| ```bash | ||||
| FT.ADD index_name doc_id [SCORE score] FIELDS field_name field_value [field_name field_value ...] | ||||
| ``` | ||||
|  | ||||
| **Example:** | ||||
| ```bash | ||||
| # Add a product document | ||||
| FT.ADD products product:1 SCORE 1.0 FIELDS  | ||||
|   title "Wireless Headphones"  | ||||
|   description "High-quality wireless headphones with noise cancellation" | ||||
|   price 199.99 | ||||
|   category "electronics" | ||||
|   location "37.7749,-122.4194" | ||||
|   created_date 1640995200000 | ||||
| ``` | ||||
|  | ||||
| ### FT.SEARCH - Search Documents | ||||
|  | ||||
| Search for documents in an index. | ||||
|  | ||||
| ```bash | ||||
| FT.SEARCH index_name query [LIMIT offset count] [FILTER field min max] [RETURN field [field ...]] | ||||
| ``` | ||||
|  | ||||
| **Query Syntax:** | ||||
| - Simple terms: `wireless headphones` | ||||
| - Phrase queries: `"noise cancellation"` | ||||
| - Field-specific: `title:wireless` | ||||
| - Boolean operators: `wireless AND headphones` | ||||
| - Wildcards: `head*` | ||||
|  | ||||
| **Examples:** | ||||
| ```bash | ||||
| # Simple text search | ||||
| FT.SEARCH products "wireless headphones" | ||||
|  | ||||
| # Search with filters | ||||
| FT.SEARCH products "headphones" FILTER price 100 300 LIMIT 0 10 | ||||
|  | ||||
| # Field-specific search | ||||
| FT.SEARCH products "title:wireless AND category:electronics" | ||||
|  | ||||
| # Return specific fields only | ||||
| FT.SEARCH products "*" RETURN title price | ||||
| ``` | ||||
|  | ||||
| ### FT.DEL - Delete Document | ||||
|  | ||||
| Remove a document from the search index. | ||||
|  | ||||
| ```bash | ||||
| FT.DEL index_name doc_id | ||||
| ``` | ||||
|  | ||||
| **Example:** | ||||
| ```bash | ||||
| FT.DEL products product:1 | ||||
| ``` | ||||
|  | ||||
| ### FT.INFO - Get Index Information | ||||
|  | ||||
| Get information about a search index. | ||||
|  | ||||
| ```bash | ||||
| FT.INFO index_name | ||||
| ``` | ||||
|  | ||||
| **Returns:** | ||||
| - Index name and document count | ||||
| - Field definitions and types | ||||
| - Index configuration | ||||
|  | ||||
| **Example:** | ||||
| ```bash | ||||
| FT.INFO products | ||||
| ``` | ||||
|  | ||||
| ### FT.DROP - Drop Index | ||||
|  | ||||
| Delete an entire search index. | ||||
|  | ||||
| ```bash | ||||
| FT.DROP index_name | ||||
| ``` | ||||
|  | ||||
| **Example:** | ||||
| ```bash | ||||
| FT.DROP products | ||||
| ``` | ||||
|  | ||||
| ### FT.ALTER - Alter Index Schema | ||||
|  | ||||
| Add new fields to an existing index. | ||||
|  | ||||
| ```bash | ||||
| FT.ALTER index_name SCHEMA ADD field_name field_type [options] | ||||
| ``` | ||||
|  | ||||
| **Example:** | ||||
| ```bash | ||||
| FT.ALTER products SCHEMA ADD brand TAG STORED | ||||
| ``` | ||||
|  | ||||
| ### FT.AGGREGATE - Aggregate Search Results | ||||
|  | ||||
| Perform aggregations on search results. | ||||
|  | ||||
| ```bash | ||||
| FT.AGGREGATE index_name query [GROUPBY field] [REDUCE function field AS alias] | ||||
| ``` | ||||
|  | ||||
| **Example:** | ||||
| ```bash | ||||
| # Group products by category and count | ||||
| FT.AGGREGATE products "*" GROUPBY category REDUCE COUNT 0 AS count | ||||
| ``` | ||||
|  | ||||
| ## Field Types in Detail | ||||
|  | ||||
| ### TEXT Fields | ||||
| - **Purpose**: Full-text search on natural language content | ||||
| - **Features**: Tokenization, stemming, stop-word removal | ||||
| - **Options**: `STORED`, `INDEXED`, `TOKENIZED` | ||||
| - **Example**: Product titles, descriptions, content | ||||
|  | ||||
| ### NUMERIC Fields   | ||||
| - **Purpose**: Numeric data for range queries and sorting | ||||
| - **Types**: I64, U64, F64 | ||||
| - **Options**: `STORED`, `INDEXED`, `FAST` | ||||
| - **Example**: Prices, quantities, ratings | ||||
|  | ||||
| ### TAG Fields | ||||
| - **Purpose**: Exact-match categorical data | ||||
| - **Features**: No tokenization, exact string matching | ||||
| - **Options**: `STORED`, case sensitivity control | ||||
| - **Example**: Categories, brands, status values | ||||
|  | ||||
| ### GEO Fields | ||||
| - **Purpose**: Geographic coordinates | ||||
| - **Format**: "latitude,longitude" (e.g., "37.7749,-122.4194") | ||||
| - **Features**: Geographic distance queries | ||||
| - **Options**: `STORED` | ||||
|  | ||||
| ### DATE Fields | ||||
| - **Purpose**: Timestamp and date data | ||||
| - **Format**: Unix timestamp in milliseconds | ||||
| - **Features**: Range queries, temporal filtering | ||||
| - **Options**: `STORED`, `INDEXED`, `FAST` | ||||
|  | ||||
| ## Search Query Syntax | ||||
|  | ||||
| ### Basic Queries | ||||
| ```bash | ||||
| # Single term | ||||
| FT.SEARCH products "wireless" | ||||
|  | ||||
| # Multiple terms (AND by default) | ||||
| FT.SEARCH products "wireless headphones" | ||||
|  | ||||
| # Phrase query | ||||
| FT.SEARCH products "\"noise cancellation\"" | ||||
| ``` | ||||
|  | ||||
| ### Field-Specific Queries | ||||
| ```bash | ||||
| # Search in specific field | ||||
| FT.SEARCH products "title:wireless" | ||||
|  | ||||
| # Multiple field queries | ||||
| FT.SEARCH products "title:wireless AND description:bluetooth" | ||||
| ``` | ||||
|  | ||||
| ### Boolean Operators | ||||
| ```bash | ||||
| # AND operator | ||||
| FT.SEARCH products "wireless AND headphones" | ||||
|  | ||||
| # OR operator   | ||||
| FT.SEARCH products "wireless OR bluetooth" | ||||
|  | ||||
| # NOT operator | ||||
| FT.SEARCH products "headphones NOT wired" | ||||
| ``` | ||||
|  | ||||
| ### Wildcards and Fuzzy Search | ||||
| ```bash | ||||
| # Wildcard search | ||||
| FT.SEARCH products "head*" | ||||
|  | ||||
| # Fuzzy search (approximate matching) | ||||
| FT.SEARCH products "%headphone%" | ||||
| ``` | ||||
|  | ||||
| ### Range Queries | ||||
| ```bash | ||||
| # Numeric range in query | ||||
| FT.SEARCH products "@price:[100 300]" | ||||
|  | ||||
| # Date range | ||||
| FT.SEARCH products "@created_date:[1640995200000 1672531200000]" | ||||
| ``` | ||||
|  | ||||
| ## Filtering and Sorting | ||||
|  | ||||
| ### FILTER Clause | ||||
| ```bash | ||||
| # Numeric filter | ||||
| FT.SEARCH products "headphones" FILTER price 100 300 | ||||
|  | ||||
| # Multiple filters | ||||
| FT.SEARCH products "*" FILTER price 100 500 FILTER rating 4 5 | ||||
| ``` | ||||
|  | ||||
| ### LIMIT Clause | ||||
| ```bash | ||||
| # Pagination | ||||
| FT.SEARCH products "wireless" LIMIT 0 10    # First 10 results | ||||
| FT.SEARCH products "wireless" LIMIT 10 10   # Next 10 results | ||||
| ``` | ||||
|  | ||||
| ### RETURN Clause | ||||
| ```bash | ||||
| # Return specific fields | ||||
| FT.SEARCH products "*" RETURN title price | ||||
|  | ||||
| # Return all stored fields (default) | ||||
| FT.SEARCH products "*" | ||||
| ``` | ||||
|  | ||||
| ## Performance Considerations | ||||
|  | ||||
| ### Indexing Strategy | ||||
| - Only index fields you need to search on | ||||
| - Use `FAST` option for frequently filtered numeric fields | ||||
| - Consider storage vs. search performance trade-offs | ||||
|  | ||||
| ### Query Optimization | ||||
| - Use specific field queries when possible | ||||
| - Combine filters with text queries for better performance | ||||
| - Use pagination with LIMIT for large result sets | ||||
|  | ||||
| ### Memory Usage | ||||
| - Tantivy indexes are memory-mapped for performance | ||||
| - Index size depends on document count and field configuration | ||||
| - Monitor disk space for index storage | ||||
|  | ||||
| ## Integration with Redis Commands | ||||
|  | ||||
| Search indexes work alongside regular Redis data: | ||||
|  | ||||
| ```bash | ||||
| # Store product data in Redis hash | ||||
| HSET product:1 title "Wireless Headphones" price "199.99" | ||||
|  | ||||
| # Index the same data for search | ||||
| FT.ADD products product:1 FIELDS title "Wireless Headphones" price 199.99 | ||||
|  | ||||
| # Search returns document IDs that can be used with Redis commands | ||||
| FT.SEARCH products "wireless" | ||||
| # Returns: product:1 | ||||
|  | ||||
| # Retrieve full data using Redis | ||||
| HGETALL product:1 | ||||
| ``` | ||||
|  | ||||
| ## Example Use Cases | ||||
|  | ||||
| ### E-commerce Product Search | ||||
| ```bash | ||||
| # Create product catalog index | ||||
| FT.CREATE catalog SCHEMA  | ||||
|   name TEXT STORED INDEXED TOKENIZED | ||||
|   description TEXT INDEXED TOKENIZED | ||||
|   price NUMERIC STORED INDEXED FAST | ||||
|   category TAG STORED | ||||
|   brand TAG STORED | ||||
|   rating NUMERIC STORED FAST | ||||
|  | ||||
| # Add products | ||||
| FT.ADD catalog prod:1 FIELDS name "iPhone 14" price 999 category "phones" brand "apple" rating 4.5 | ||||
| FT.ADD catalog prod:2 FIELDS name "Samsung Galaxy" price 899 category "phones" brand "samsung" rating 4.3 | ||||
|  | ||||
| # Search queries | ||||
| FT.SEARCH catalog "iPhone" | ||||
| FT.SEARCH catalog "phones" FILTER price 800 1000 | ||||
| FT.SEARCH catalog "@brand:apple" | ||||
| ``` | ||||
|  | ||||
| ### Content Management | ||||
| ```bash | ||||
| # Create content index | ||||
| FT.CREATE content SCHEMA | ||||
|   title TEXT STORED INDEXED TOKENIZED | ||||
|   body TEXT INDEXED TOKENIZED | ||||
|   author TAG STORED | ||||
|   published DATE STORED INDEXED | ||||
|   tags TAG STORED | ||||
|  | ||||
| # Search content | ||||
| FT.SEARCH content "machine learning" | ||||
| FT.SEARCH content "@author:john AND @tags:ai" | ||||
| FT.SEARCH content "*" FILTER published 1640995200000 1672531200000 | ||||
| ``` | ||||
|  | ||||
| ### Geographic Search | ||||
| ```bash | ||||
| # Create location-based index | ||||
| FT.CREATE places SCHEMA | ||||
|   name TEXT STORED INDEXED TOKENIZED | ||||
|   location GEO STORED | ||||
|   type TAG STORED | ||||
|  | ||||
| # Add locations | ||||
| FT.ADD places place:1 FIELDS name "Golden Gate Bridge" location "37.8199,-122.4783" type "landmark" | ||||
|  | ||||
| # Geographic queries (future feature) | ||||
| FT.SEARCH places "@location:[37.7749 -122.4194 10 km]" | ||||
| ``` | ||||
|  | ||||
| ## Error Handling | ||||
|  | ||||
| Common error responses: | ||||
| - `ERR index not found` - Index doesn't exist | ||||
| - `ERR field not found` - Field not defined in schema | ||||
| - `ERR invalid query syntax` - Malformed query | ||||
| - `ERR document not found` - Document ID doesn't exist | ||||
|  | ||||
| ## Best Practices | ||||
|  | ||||
| 1. **Schema Design**: Plan your schema carefully - changes require reindexing | ||||
| 2. **Field Selection**: Only store and index fields you actually need | ||||
| 3. **Batch Operations**: Add multiple documents efficiently | ||||
| 4. **Query Testing**: Test queries for performance with realistic data | ||||
| 5. **Monitoring**: Monitor index size and query performance | ||||
| 6. **Backup**: Include search indexes in backup strategies | ||||
|  | ||||
| ## Future Enhancements | ||||
|  | ||||
| Planned features: | ||||
| - Geographic distance queries | ||||
| - Advanced aggregations and faceting | ||||
| - Highlighting of search results | ||||
| - Synonyms and custom analyzers | ||||
| - Real-time suggestions and autocomplete | ||||
| - Index replication and sharding | ||||
							
								
								
									
										253
									
								
								docs/tantivy.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										253
									
								
								docs/tantivy.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,253 @@ | ||||
| # Tantivy Full‑Text Backend (JSON‑RPC) | ||||
|  | ||||
| This document explains how to use HeroDB’s Tantivy-backed full‑text search as a dedicated database backend and provides copy‑pasteable JSON‑RPC requests. Tantivy is available only for non‑admin databases (db_id >= 1). Admin DB 0 always uses Redb/Sled and rejects FT operations. | ||||
|  | ||||
| Important characteristics: | ||||
| - Tantivy is a third backend alongside Redb and Sled. It provides search indexes only; there is no KV store backing it. | ||||
| - On Tantivy databases, Redis KV/list/hash commands are rejected; only FT commands and basic control (SELECT, CLIENT, INFO, etc.) are allowed. | ||||
| - FT JSON‑RPC is namespaced as "herodb" and methods are named with underscore: herodb_ftCreate, herodb_ftAdd, herodb_ftSearch, herodb_ftDel, herodb_ftInfo, herodb_ftDrop. | ||||
|  | ||||
| Reference to server implementation: | ||||
| - RPC methods are defined in [rust.trait Rpc()](src/rpc.rs:70): | ||||
|   - [rust.fn ft_create()](src/rpc.rs:121) | ||||
|   - [rust.fn ft_add()](src/rpc.rs:130) | ||||
|   - [rust.fn ft_search()](src/rpc.rs:141) | ||||
|   - [rust.fn ft_del()](src/rpc.rs:154) | ||||
|   - [rust.fn ft_info()](src/rpc.rs:158) | ||||
|   - [rust.fn ft_drop()](src/rpc.rs:162) | ||||
|  | ||||
| Notes on responses: | ||||
| - ftCreate/ftAdd/ftDel/ftDrop return a JSON boolean: true on success. | ||||
| - ftSearch/ftInfo return a JSON object with a single key "resp" containing a RESP‑encoded string (wire format used by Redis). You can display or parse it on the client side as needed. | ||||
|  | ||||
| RESP usage (redis-cli): | ||||
| - For RESP clients, you must SELECT the Tantivy database first. SELECT now succeeds for Tantivy DBs without opening KV storage. | ||||
| - After SELECT, you can run FT.* commands within that DB context. | ||||
|  | ||||
| Example with redis-cli: | ||||
| ```bash | ||||
| # Connect to server | ||||
| redis-cli -p 6379 | ||||
|  | ||||
| # Select Tantivy DB 1 (public by default) | ||||
| SELECT 1 | ||||
| # → OK | ||||
|  | ||||
| # Create index | ||||
| FT.CREATE product_catalog SCHEMA title TEXT description TEXT category TAG price NUMERIC rating NUMERIC location GEO | ||||
| # → OK | ||||
|  | ||||
| # Add a document | ||||
| FT.ADD product_catalog product:1 1.0 title "Wireless Bluetooth Headphones" description "Premium noise-canceling headphones with 30-hour battery life" category "electronics,audio" price 299.99 rating 4.5 location "-122.4194,37.7749" | ||||
| # → OK | ||||
|  | ||||
| # Search | ||||
| FT.SEARCH product_catalog wireless LIMIT 0 3 | ||||
| # → RESP array with hits | ||||
| ``` | ||||
|  | ||||
| Storage layout (on disk): | ||||
| - Indices are stored per database under: | ||||
|   - <base_dir>/search_indexes/<db_id>/<index_name> | ||||
| - Example: /tmp/test/search_indexes/1/product_catalog | ||||
|  | ||||
| 0) Create a new Tantivy database | ||||
|  | ||||
| Use herodb_createDatabase with backend "Tantivy". DB 0 cannot be Tantivy. | ||||
|  | ||||
| ```json | ||||
| { | ||||
|   "jsonrpc": "2.0", | ||||
|   "id": 1, | ||||
|   "method": "herodb_createDatabase", | ||||
|   "params": [ | ||||
|     "Tantivy", | ||||
|     { "name": "search-db", "storage_path": null, "max_size": null, "redis_version": null }, | ||||
|     null | ||||
|   ] | ||||
| } | ||||
| ``` | ||||
|  | ||||
| The response contains the allocated db_id (>= 1). Use that id in the calls below. | ||||
|  | ||||
| 1) FT.CREATE — create an index with schema | ||||
|  | ||||
| Method: herodb_ftCreate → [rust.fn ft_create()](src/rpc.rs:121) | ||||
|  | ||||
| Schema format is an array of tuples: [ [field_name, field_type, [options...] ], ... ] | ||||
| Supported field types: "TEXT", "NUMERIC" (defaults to F64), "TAG", "GEO" | ||||
| Supported options (subset): "WEIGHT", "SORTABLE", "NOINDEX", "SEPARATOR", "CASESENSITIVE" | ||||
|  | ||||
| ```json | ||||
| { | ||||
|   "jsonrpc": "2.0", | ||||
|   "id": 2, | ||||
|   "method": "herodb_ftCreate", | ||||
|   "params": [ | ||||
|     1, | ||||
|     "product_catalog", | ||||
|     [ | ||||
|       ["title", "TEXT", ["SORTABLE"]], | ||||
|       ["description", "TEXT", []], | ||||
|       ["category", "TAG", ["SEPARATOR", ","]], | ||||
|       ["price", "NUMERIC", ["SORTABLE"]], | ||||
|       ["rating", "NUMERIC", []], | ||||
|       ["location", "GEO", []] | ||||
|     ] | ||||
|   ] | ||||
| } | ||||
| ``` | ||||
|  | ||||
| Returns: true on success. | ||||
|  | ||||
| 2) FT.ADD — add or replace a document | ||||
|  | ||||
| Method: herodb_ftAdd → [rust.fn ft_add()](src/rpc.rs:130) | ||||
|  | ||||
| Fields is an object (map) of field_name → value (all values are sent as strings). GEO expects "lat,lon". | ||||
|  | ||||
| ```json | ||||
| { | ||||
|   "jsonrpc": "2.0", | ||||
|   "id": 3, | ||||
|   "method": "herodb_ftAdd", | ||||
|   "params": [ | ||||
|     1, | ||||
|     "product_catalog", | ||||
|     "product:1", | ||||
|     1.0, | ||||
|     { | ||||
|       "title": "Wireless Bluetooth Headphones", | ||||
|       "description": "Premium noise-canceling headphones with 30-hour battery life", | ||||
|       "category": "electronics,audio", | ||||
|       "price": "299.99", | ||||
|       "rating": "4.5", | ||||
|       "location": "-122.4194,37.7749" | ||||
|     } | ||||
|   ] | ||||
| } | ||||
| ``` | ||||
|  | ||||
| Returns: true on success. | ||||
|  | ||||
| 3) FT.SEARCH — query an index | ||||
|  | ||||
| Method: herodb_ftSearch → [rust.fn ft_search()](src/rpc.rs:141) | ||||
|  | ||||
| Parameters: (db_id, index_name, query, filters?, limit?, offset?, return_fields?) | ||||
| - filters: array of [field, value] pairs (Equals filter) | ||||
| - limit/offset: numbers (defaults: limit=10, offset=0) | ||||
| - return_fields: array of field names to include (optional) | ||||
|  | ||||
| Simple query: | ||||
| ```json | ||||
| { | ||||
|   "jsonrpc": "2.0", | ||||
|   "id": 4, | ||||
|   "method": "herodb_ftSearch", | ||||
|   "params": [1, "product_catalog", "wireless", null, 10, 0, null] | ||||
| } | ||||
| ``` | ||||
|  | ||||
| Pagination + filters + selected fields: | ||||
| ```json | ||||
| { | ||||
|   "jsonrpc": "2.0", | ||||
|   "id": 5, | ||||
|   "method": "herodb_ftSearch", | ||||
|   "params": [ | ||||
|     1, | ||||
|     "product_catalog", | ||||
|     "mouse", | ||||
|     [["category", "electronics"]], | ||||
|     5, | ||||
|     0, | ||||
|     ["title", "price", "rating"] | ||||
|   ] | ||||
| } | ||||
| ``` | ||||
|  | ||||
| Response shape: | ||||
| ```json | ||||
| { | ||||
|   "jsonrpc": "2.0", | ||||
|   "id": 5, | ||||
|   "result": { "resp": "*...RESP encoded array..." } | ||||
| } | ||||
| ``` | ||||
|  | ||||
| 4) FT.INFO — index metadata | ||||
|  | ||||
| Method: herodb_ftInfo → [rust.fn ft_info()](src/rpc.rs:158) | ||||
|  | ||||
| ```json | ||||
| { | ||||
|   "jsonrpc": "2.0", | ||||
|   "id": 6, | ||||
|   "method": "herodb_ftInfo", | ||||
|   "params": [1, "product_catalog"] | ||||
| } | ||||
| ``` | ||||
|  | ||||
| Response shape: | ||||
| ```json | ||||
| { | ||||
|   "jsonrpc": "2.0", | ||||
|   "id": 6, | ||||
|   "result": { "resp": "*...RESP encoded array with fields and counts..." } | ||||
| } | ||||
| ``` | ||||
|  | ||||
| 5) FT.DEL — delete by doc id | ||||
|  | ||||
| Method: herodb_ftDel → [rust.fn ft_del()](src/rpc.rs:154) | ||||
|  | ||||
| ```json | ||||
| { | ||||
|   "jsonrpc": "2.0", | ||||
|   "id": 7, | ||||
|   "method": "herodb_ftDel", | ||||
|   "params": [1, "product_catalog", "product:1"] | ||||
| } | ||||
| ``` | ||||
|  | ||||
| Returns: true on success. Note: current implementation logs and returns success; physical delete may be a no‑op until delete is finalized in the engine. | ||||
|  | ||||
| 6) FT.DROP — drop an index | ||||
|  | ||||
| Method: herodb_ftDrop → [rust.fn ft_drop()](src/rpc.rs:162) | ||||
|  | ||||
| ```json | ||||
| { | ||||
|   "jsonrpc": "2.0", | ||||
|   "id": 8, | ||||
|   "method": "herodb_ftDrop", | ||||
|   "params": [1, "product_catalog"] | ||||
| } | ||||
| ``` | ||||
|  | ||||
| Returns: true on success. | ||||
|  | ||||
| Field types and options | ||||
|  | ||||
| - TEXT: stored/indexed/tokenized text. "SORTABLE" marks it fast (stored + fast path in our wrapper). | ||||
| - NUMERIC: stored/indexed numeric; default precision F64. "SORTABLE" enables fast column. | ||||
| - TAG: exact matching terms. Options: "SEPARATOR" (default ","), "CASESENSITIVE" (default false). | ||||
| - GEO: "lat,lon" string; stored as two numeric fields internally. | ||||
|  | ||||
| Backend and permission gating | ||||
|  | ||||
| - FT methods are rejected on DB 0. | ||||
| - FT methods require the database backend to be Tantivy; otherwise RPC returns an error. | ||||
| - Write‑like FT methods (create/add/del/drop) follow the same permission model as Redis writes on selected databases. | ||||
|  | ||||
| Troubleshooting | ||||
|  | ||||
| - "DB backend is not Tantivy": ensure the database was created with backend "Tantivy". | ||||
| - "FT not allowed on DB 0": use a non‑admin database id (>= 1). | ||||
| - Empty search results: confirm that the queried fields are tokenized/indexed (TEXT) and that documents were added successfully. | ||||
|  | ||||
| Related docs | ||||
|  | ||||
| - Command‑level search overview: [docs/search.md](./search.md) | ||||
| - RPC definitions: [src/rpc.rs](../src/rpc.rs) | ||||
							
								
								
									
										171
									
								
								examples/README.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										171
									
								
								examples/README.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,171 @@ | ||||
| # HeroDB Tantivy Search Examples | ||||
|  | ||||
| This directory contains examples demonstrating HeroDB's full-text search capabilities powered by Tantivy. | ||||
|  | ||||
| ## Tantivy Search Demo (Bash Script) | ||||
|  | ||||
| ### Overview | ||||
| The `tantivy_search_demo.sh` script provides a comprehensive demonstration of HeroDB's search functionality using Redis commands. It showcases various search scenarios including basic text search, filtering, sorting, geographic queries, and more. | ||||
|  | ||||
| ### Prerequisites | ||||
| 1. **HeroDB Server**: The server must be running on port 6381 | ||||
| 2. **Redis CLI**: The `redis-cli` tool must be installed and available in your PATH | ||||
|  | ||||
| ### Running the Demo | ||||
|  | ||||
| #### Step 1: Start HeroDB Server | ||||
| ```bash | ||||
| # From the project root directory | ||||
| cargo run -- --port 6381 | ||||
| ``` | ||||
|  | ||||
| #### Step 2: Run the Demo (in a new terminal) | ||||
| ```bash | ||||
| # From the project root directory | ||||
| ./examples/tantivy_search_demo.sh | ||||
| ``` | ||||
|  | ||||
| ### What the Demo Covers | ||||
|  | ||||
| The script demonstrates 15 different search scenarios: | ||||
|  | ||||
| 1. **Index Creation** - Creating a search index with various field types | ||||
| 2. **Data Insertion** - Adding sample products to the index | ||||
| 3. **Basic Text Search** - Simple keyword searches | ||||
| 4. **Filtered Search** - Combining text search with category filters | ||||
| 5. **Numeric Range Search** - Finding products within price ranges | ||||
| 6. **Sorting Results** - Ordering results by different fields | ||||
| 7. **Limited Results** - Pagination and result limiting | ||||
| 8. **Complex Queries** - Multi-field searches with sorting | ||||
| 9. **Geographic Search** - Location-based queries | ||||
| 10. **Index Information** - Getting statistics about the search index | ||||
| 11. **Search Comparison** - Tantivy vs simple pattern matching | ||||
| 12. **Fuzzy Search** - Typo tolerance and approximate matching | ||||
| 13. **Phrase Search** - Exact phrase matching | ||||
| 14. **Boolean Queries** - AND, OR, NOT operators | ||||
| 15. **Cleanup** - Removing test data | ||||
|  | ||||
| ### Sample Data | ||||
|  | ||||
| The demo uses a product catalog with the following fields: | ||||
| - **title** (TEXT) - Product name with higher search weight | ||||
| - **description** (TEXT) - Detailed product description | ||||
| - **category** (TAG) - Comma-separated categories | ||||
| - **price** (NUMERIC) - Product price for range queries | ||||
| - **rating** (NUMERIC) - Customer rating for sorting | ||||
| - **location** (GEO) - Geographic coordinates for location searches | ||||
|  | ||||
| ### Key Redis Commands Demonstrated | ||||
|  | ||||
| #### Index Management | ||||
| ```bash | ||||
| # Create search index | ||||
| FT.CREATE product_catalog ON HASH PREFIX 1 product: SCHEMA title TEXT WEIGHT 2.0 SORTABLE description TEXT category TAG SEPARATOR , price NUMERIC SORTABLE rating NUMERIC SORTABLE location GEO | ||||
|  | ||||
| # Get index information | ||||
| FT.INFO product_catalog | ||||
|  | ||||
| # Drop index | ||||
| FT.DROPINDEX product_catalog | ||||
| ``` | ||||
|  | ||||
| #### Search Queries | ||||
| ```bash | ||||
| # Basic text search | ||||
| FT.SEARCH product_catalog wireless | ||||
|  | ||||
| # Filtered search | ||||
| FT.SEARCH product_catalog 'organic @category:{food}' | ||||
|  | ||||
| # Numeric range | ||||
| FT.SEARCH product_catalog '@price:[50 150]' | ||||
|  | ||||
| # Sorted results | ||||
| FT.SEARCH product_catalog '@category:{electronics}' SORTBY price ASC | ||||
|  | ||||
| # Geographic search | ||||
| FT.SEARCH product_catalog '@location:[37.7749 -122.4194 50 km]' | ||||
|  | ||||
| # Boolean queries | ||||
| FT.SEARCH product_catalog 'wireless AND audio' | ||||
| FT.SEARCH product_catalog 'coffee OR tea' | ||||
|  | ||||
| # Phrase search | ||||
| FT.SEARCH product_catalog '"noise canceling"' | ||||
| ``` | ||||
|  | ||||
| ### Interactive Features | ||||
|  | ||||
| The demo script includes: | ||||
| - **Colored output** for better readability | ||||
| - **Pause between steps** to review results | ||||
| - **Error handling** with clear error messages | ||||
| - **Automatic cleanup** of test data | ||||
| - **Progress indicators** showing what each step demonstrates | ||||
|  | ||||
| ### Troubleshooting | ||||
|  | ||||
| #### HeroDB Not Running | ||||
| ``` | ||||
| ✗ HeroDB is not running on port 6381 | ||||
| ℹ Please start HeroDB with: cargo run -- --port 6381 | ||||
| ``` | ||||
| **Solution**: Start the HeroDB server in a separate terminal. | ||||
|  | ||||
| #### Redis CLI Not Found | ||||
| ``` | ||||
| redis-cli: command not found | ||||
| ``` | ||||
| **Solution**: Install Redis tools or use an alternative Redis client. | ||||
|  | ||||
| #### Connection Refused | ||||
| ``` | ||||
| Could not connect to Redis at localhost:6381: Connection refused | ||||
| ``` | ||||
| **Solution**: Ensure HeroDB is running and listening on the correct port. | ||||
|  | ||||
| ### Manual Testing | ||||
|  | ||||
| You can also run individual commands manually: | ||||
|  | ||||
| ```bash | ||||
| # Connect to HeroDB | ||||
| redis-cli -h localhost -p 6381 | ||||
|  | ||||
| # Create a simple index | ||||
| FT.CREATE myindex ON HASH SCHEMA title TEXT description TEXT | ||||
|  | ||||
| # Add a document | ||||
| HSET doc:1 title "Hello World" description "This is a test document" | ||||
|  | ||||
| # Search | ||||
| FT.SEARCH myindex hello | ||||
| ``` | ||||
|  | ||||
| ### Performance Notes | ||||
|  | ||||
| - **Indexing**: Documents are indexed in real-time as they're added | ||||
| - **Search Speed**: Full-text search is much faster than pattern matching on large datasets | ||||
| - **Memory Usage**: Tantivy indexes are memory-efficient and disk-backed | ||||
| - **Scalability**: Supports millions of documents with sub-second search times | ||||
|  | ||||
| ### Advanced Features | ||||
|  | ||||
| The demo showcases advanced Tantivy features: | ||||
| - **Relevance Scoring** - Results ranked by relevance | ||||
| - **Fuzzy Matching** - Handles typos and approximate matches | ||||
| - **Field Weighting** - Title field has higher search weight | ||||
| - **Multi-field Search** - Search across multiple fields simultaneously | ||||
| - **Geographic Queries** - Distance-based location searches | ||||
| - **Numeric Ranges** - Efficient range queries on numeric fields | ||||
| - **Tag Filtering** - Fast categorical filtering | ||||
|  | ||||
| ### Next Steps | ||||
|  | ||||
| After running the demo, explore: | ||||
| 1. **Custom Schemas** - Define your own field types and configurations | ||||
| 2. **Large Datasets** - Test with thousands or millions of documents | ||||
| 3. **Real Applications** - Integrate search into your applications | ||||
| 4. **Performance Tuning** - Optimize for your specific use case | ||||
|  | ||||
| For more information, see the [search documentation](../herodb/docs/search.md). | ||||
							
								
								
									
										186
									
								
								examples/simple_demo.sh
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										186
									
								
								examples/simple_demo.sh
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,186 @@ | ||||
| #!/bin/bash | ||||
|  | ||||
| # Simple HeroDB Demo - Basic Redis Commands | ||||
| # This script demonstrates basic Redis functionality that's currently implemented | ||||
|  | ||||
| set -e  # Exit on any error | ||||
|  | ||||
| # Configuration | ||||
| REDIS_HOST="localhost" | ||||
| REDIS_PORT="6381" | ||||
| REDIS_CLI="redis-cli -h $REDIS_HOST -p $REDIS_PORT" | ||||
|  | ||||
| # Colors for output | ||||
| RED='\033[0;31m' | ||||
| GREEN='\033[0;32m' | ||||
| BLUE='\033[0;34m' | ||||
| YELLOW='\033[1;33m' | ||||
| NC='\033[0m' # No Color | ||||
|  | ||||
| # Function to print colored output | ||||
| print_header() { | ||||
|     echo -e "${BLUE}=== $1 ===${NC}" | ||||
| } | ||||
|  | ||||
| print_success() { | ||||
|     echo -e "${GREEN}✓ $1${NC}" | ||||
| } | ||||
|  | ||||
| print_info() { | ||||
|     echo -e "${YELLOW}ℹ $1${NC}" | ||||
| } | ||||
|  | ||||
| print_error() { | ||||
|     echo -e "${RED}✗ $1${NC}" | ||||
| } | ||||
|  | ||||
| # Function to check if HeroDB is running | ||||
| check_herodb() { | ||||
|     print_info "Checking if HeroDB is running on port $REDIS_PORT..." | ||||
|     if ! $REDIS_CLI ping > /dev/null 2>&1; then | ||||
|         print_error "HeroDB is not running on port $REDIS_PORT" | ||||
|         print_info "Please start HeroDB with: cargo run -- --port $REDIS_PORT" | ||||
|         exit 1 | ||||
|     fi | ||||
|     print_success "HeroDB is running and responding" | ||||
| } | ||||
|  | ||||
| # Function to execute Redis command with error handling | ||||
| execute_cmd() { | ||||
|     local cmd="$1" | ||||
|     local description="$2" | ||||
|      | ||||
|     echo -e "${YELLOW}Command:${NC} $cmd" | ||||
|     if result=$($REDIS_CLI $cmd 2>&1); then | ||||
|         echo -e "${GREEN}Result:${NC} $result" | ||||
|         return 0 | ||||
|     else | ||||
|         print_error "Failed: $description" | ||||
|         echo "Error: $result" | ||||
|         return 1 | ||||
|     fi | ||||
| } | ||||
|  | ||||
| # Main demo function | ||||
| main() { | ||||
|     clear | ||||
|     print_header "HeroDB Basic Functionality Demo" | ||||
|     echo "This demo shows basic Redis commands that are currently implemented" | ||||
|     echo "HeroDB runs on port $REDIS_PORT (instead of Redis default 6379)" | ||||
|     echo | ||||
|  | ||||
|     # Check if HeroDB is running | ||||
|     check_herodb | ||||
|     echo | ||||
|  | ||||
|     print_header "Step 1: Basic Key-Value Operations" | ||||
|      | ||||
|     execute_cmd "SET greeting 'Hello HeroDB!'" "Setting a simple key-value pair" | ||||
|     echo | ||||
|     execute_cmd "GET greeting" "Getting the value" | ||||
|     echo | ||||
|     execute_cmd "SET counter 42" "Setting a numeric value" | ||||
|     echo | ||||
|     execute_cmd "INCR counter" "Incrementing the counter" | ||||
|     echo | ||||
|     execute_cmd "GET counter" "Getting the incremented value" | ||||
|     echo | ||||
|  | ||||
|     print_header "Step 2: Hash Operations" | ||||
|      | ||||
|     execute_cmd "HSET user:1 name 'John Doe' email 'john@example.com' age 30" "Setting hash fields" | ||||
|     echo | ||||
|     execute_cmd "HGET user:1 name" "Getting a specific field" | ||||
|     echo | ||||
|     execute_cmd "HGETALL user:1" "Getting all fields" | ||||
|     echo | ||||
|     execute_cmd "HLEN user:1" "Getting hash length" | ||||
|     echo | ||||
|  | ||||
|     print_header "Step 3: List Operations" | ||||
|      | ||||
|     execute_cmd "LPUSH tasks 'Write code' 'Test code' 'Deploy code'" "Adding items to list" | ||||
|     echo | ||||
|     execute_cmd "LLEN tasks" "Getting list length" | ||||
|     echo | ||||
|     execute_cmd "LRANGE tasks 0 -1" "Getting all list items" | ||||
|     echo | ||||
|     execute_cmd "LPOP tasks" "Popping from left" | ||||
|     echo | ||||
|     execute_cmd "LRANGE tasks 0 -1" "Checking remaining items" | ||||
|     echo | ||||
|  | ||||
|     print_header "Step 4: Key Management" | ||||
|      | ||||
|     execute_cmd "KEYS *" "Listing all keys" | ||||
|     echo | ||||
|     execute_cmd "EXISTS greeting" "Checking if key exists" | ||||
|     echo | ||||
|     execute_cmd "TYPE user:1" "Getting key type" | ||||
|     echo | ||||
|     execute_cmd "DBSIZE" "Getting database size" | ||||
|     echo | ||||
|  | ||||
|     print_header "Step 5: Expiration" | ||||
|      | ||||
|     execute_cmd "SET temp_key 'temporary value'" "Setting temporary key" | ||||
|     echo | ||||
|     execute_cmd "EXPIRE temp_key 5" "Setting 5 second expiration" | ||||
|     echo | ||||
|     execute_cmd "TTL temp_key" "Checking time to live" | ||||
|     echo | ||||
|     print_info "Waiting 2 seconds..." | ||||
|     sleep 2 | ||||
|     execute_cmd "TTL temp_key" "Checking TTL again" | ||||
|     echo | ||||
|  | ||||
|     print_header "Step 6: Multiple Operations" | ||||
|      | ||||
|     execute_cmd "MSET key1 'value1' key2 'value2' key3 'value3'" "Setting multiple keys" | ||||
|     echo | ||||
|     execute_cmd "MGET key1 key2 key3" "Getting multiple values" | ||||
|     echo | ||||
|     execute_cmd "DEL key1 key2" "Deleting multiple keys" | ||||
|     echo | ||||
|     execute_cmd "EXISTS key1 key2 key3" "Checking existence of multiple keys" | ||||
|     echo | ||||
|  | ||||
|     print_header "Step 7: Search Commands (Placeholder)" | ||||
|     print_info "Testing FT.CREATE command (currently returns placeholder response)" | ||||
|      | ||||
|     execute_cmd "FT.CREATE test_index SCHEMA title TEXT description TEXT" "Creating search index" | ||||
|     echo | ||||
|  | ||||
|     print_header "Step 8: Server Information" | ||||
|      | ||||
|     execute_cmd "INFO" "Getting server information" | ||||
|     echo | ||||
|     execute_cmd "CONFIG GET dir" "Getting configuration" | ||||
|     echo | ||||
|  | ||||
|     print_header "Step 9: Cleanup" | ||||
|      | ||||
|     execute_cmd "FLUSHDB" "Clearing database" | ||||
|     echo | ||||
|     execute_cmd "DBSIZE" "Confirming database is empty" | ||||
|     echo | ||||
|  | ||||
|     print_header "Demo Summary" | ||||
|     echo "This demonstration showed:" | ||||
|     echo "• Basic key-value operations (GET, SET, INCR)" | ||||
|     echo "• Hash operations (HSET, HGET, HGETALL)" | ||||
|     echo "• List operations (LPUSH, LPOP, LRANGE)" | ||||
|     echo "• Key management (KEYS, EXISTS, TYPE, DEL)" | ||||
|     echo "• Expiration handling (EXPIRE, TTL)" | ||||
|     echo "• Multiple key operations (MSET, MGET)" | ||||
|     echo "• Server information commands" | ||||
|     echo | ||||
|     print_success "HeroDB basic functionality demo completed successfully!" | ||||
|     echo | ||||
|     print_info "Note: Full-text search (FT.*) commands are defined but not yet fully implemented" | ||||
|     print_info "To run HeroDB server: cargo run -- --port 6381" | ||||
|     print_info "To connect with redis-cli: redis-cli -h localhost -p 6381" | ||||
| } | ||||
|  | ||||
| # Run the demo | ||||
| main "$@" | ||||
| @@ -1,29 +0,0 @@ | ||||
| [package] | ||||
| name = "herodb" | ||||
| version = "0.0.1" | ||||
| authors = ["Pin Fang <fpfangpin@hotmail.com>"] | ||||
| edition = "2021" | ||||
|  | ||||
| [dependencies] | ||||
| anyhow = "1.0.59" | ||||
| bytes = "1.3.0" | ||||
| thiserror = "1.0.32" | ||||
| tokio = { version = "1.23.0", features = ["full"] } | ||||
| clap = { version = "4.5.20", features = ["derive"] } | ||||
| byteorder = "1.4.3" | ||||
| futures = "0.3" | ||||
| sled = "0.34" | ||||
| redb = "2.1.3" | ||||
| serde = { version = "1.0", features = ["derive"] } | ||||
| serde_json = "1.0" | ||||
| bincode = "1.3" | ||||
| chacha20poly1305 = "0.10.1" | ||||
| rand = "0.8" | ||||
| sha2 = "0.10" | ||||
| age = "0.10" | ||||
| secrecy = "0.8" | ||||
| ed25519-dalek = "2" | ||||
| base64 = "0.22" | ||||
|  | ||||
| [dev-dependencies] | ||||
| redis = { version = "0.24", features = ["aio", "tokio-comp"] } | ||||
| @@ -1,85 +0,0 @@ | ||||
| # HeroDB | ||||
|  | ||||
| HeroDB is a Redis-compatible database built with Rust, offering a flexible and secure storage solution. It supports two primary storage backends: `redb` (default) and `sled`, both with full encryption capabilities. HeroDB aims to provide a robust and performant key-value store with advanced features like data-at-rest encryption, hash operations, list operations, and cursor-based scanning. | ||||
|  | ||||
| ## Purpose | ||||
|  | ||||
| The main purpose of HeroDB is to offer a lightweight, embeddable, and Redis-compatible database that prioritizes data security through transparent encryption. It's designed for applications that require fast, reliable data storage with the option for strong cryptographic protection, without the overhead of a full-fledged Redis server. | ||||
|  | ||||
| ## Features | ||||
|  | ||||
| - **Redis Compatibility**: Supports a subset of Redis commands over RESP (Redis Serialization Protocol) via TCP. | ||||
| - **Dual Backend Support**: | ||||
|     - `redb` (default): Optimized for concurrent access and high-throughput scenarios. | ||||
|     - `sled`: A lock-free, log-structured database, excellent for specific workloads. | ||||
| - **Data-at-Rest Encryption**: Transparent encryption for both backends using the `age` encryption library. | ||||
| - **Key-Value Operations**: Full support for basic string, hash, and list operations. | ||||
| - **Expiration**: Time-to-live (TTL) functionality for keys. | ||||
| - **Scanning**: Cursor-based iteration for keys and hash fields (`SCAN`, `HSCAN`). | ||||
| - **AGE Cryptography Commands**: HeroDB-specific extensions for cryptographic operations. | ||||
|  | ||||
| ## Quick Start | ||||
|  | ||||
| ### Building HeroDB | ||||
|  | ||||
| To build HeroDB, navigate to the project root and run: | ||||
|  | ||||
| ```bash | ||||
| cargo build --release | ||||
| ``` | ||||
|  | ||||
| ### Running HeroDB | ||||
|  | ||||
| You can start HeroDB with different backends and encryption options: | ||||
|  | ||||
| #### Default `redb` Backend | ||||
|  | ||||
| ```bash | ||||
| ./target/release/herodb --dir /tmp/herodb_redb --port 6379 | ||||
| ``` | ||||
|  | ||||
| #### `sled` Backend | ||||
|  | ||||
| ```bash | ||||
| ./target/release/herodb --dir /tmp/herodb_sled --port 6379 --sled | ||||
| ``` | ||||
|  | ||||
| #### `redb` with Encryption | ||||
|  | ||||
| ```bash | ||||
| ./target/release/herodb --dir /tmp/herodb_encrypted --port 6379 --encrypt --key mysecretkey | ||||
| ``` | ||||
|  | ||||
| #### `sled` with Encryption | ||||
|  | ||||
| ```bash | ||||
| ./target/release/herodb --dir /tmp/herodb_sled_encrypted --port 6379 --sled --encrypt --key mysecretkey | ||||
| ``` | ||||
|  | ||||
| ## Usage with Redis Clients | ||||
|  | ||||
| HeroDB can be interacted with using any standard Redis client, such as `redis-cli`, `redis-py` (Python), or `ioredis` (Node.js). | ||||
|  | ||||
| ### Example with `redis-cli` | ||||
|  | ||||
| ```bash | ||||
| redis-cli -p 6379 SET mykey "Hello from HeroDB!" | ||||
| redis-cli -p 6379 GET mykey | ||||
| # → "Hello from HeroDB!" | ||||
|  | ||||
| redis-cli -p 6379 HSET user:1 name "Alice" age "30" | ||||
| redis-cli -p 6379 HGET user:1 name | ||||
| # → "Alice" | ||||
|  | ||||
| redis-cli -p 6379 SCAN 0 MATCH user:* COUNT 10 | ||||
| # → 1) "0" | ||||
| #    2) 1) "user:1" | ||||
| ``` | ||||
|  | ||||
| ## Documentation | ||||
|  | ||||
| For more detailed information on commands, features, and advanced usage, please refer to the documentation: | ||||
|  | ||||
| - [Basics](docs/basics.md) | ||||
| - [Supported Commands](docs/cmds.md) | ||||
| - [AGE Cryptography](docs/age.md) | ||||
| @@ -1,188 +0,0 @@ | ||||
| # HeroDB AGE usage: Stateless vs Key‑Managed | ||||
|  | ||||
| This document explains how to use the AGE cryptography commands exposed by HeroDB over the Redis protocol in two modes: | ||||
| - Stateless (ephemeral keys; nothing stored on the server) | ||||
| - Key‑managed (server‑persisted, named keys) | ||||
|  | ||||
| If you are new to the codebase, the exact tests that exercise these behaviors are: | ||||
| - [rust.test_07_age_stateless_suite()](herodb/tests/usage_suite.rs:495) | ||||
| - [rust.test_08_age_persistent_named_suite()](herodb/tests/usage_suite.rs:555) | ||||
|  | ||||
| Implementation entry points: | ||||
| - [herodb/src/age.rs](herodb/src/age.rs) | ||||
| - Dispatch from [herodb/src/cmd.rs](herodb/src/cmd.rs) | ||||
|  | ||||
| Note: Database-at-rest encryption flags in the test harness are unrelated to AGE commands; those flags control storage-level encryption of DB files. See the harness near [rust.start_test_server()](herodb/tests/usage_suite.rs:10). | ||||
|  | ||||
| ## Quick start | ||||
|  | ||||
| Assuming the server is running on localhost on some $PORT: | ||||
| ```bash | ||||
| ~/code/git.ourworld.tf/herocode/herodb/herodb/build.sh | ||||
| ~/code/git.ourworld.tf/herocode/herodb/target/release/herodb --dir /tmp/data --debug --$PORT 6381 --encryption-key 1234 --encrypt | ||||
| ``` | ||||
|  | ||||
|  | ||||
| ```bash | ||||
| export PORT=6381 | ||||
| # Generate an ephemeral keypair and encrypt/decrypt a message (stateless mode) | ||||
| redis-cli -p $PORT AGE GENENC | ||||
| # → returns an array: [recipient, identity] | ||||
|  | ||||
| redis-cli -p $PORT AGE ENCRYPT <recipient> "hello world" | ||||
| # → returns ciphertext (base64 in a bulk string) | ||||
|  | ||||
| redis-cli -p $PORT AGE DECRYPT <identity> <ciphertext_b64> | ||||
| # → returns "hello world" | ||||
| ``` | ||||
|  | ||||
| For key‑managed mode, generate a named key once and reference it by name afterwards: | ||||
|  | ||||
| ```bash | ||||
| redis-cli -p $PORT AGE KEYGEN app1 | ||||
| # → persists encryption keypair under name "app1" | ||||
|  | ||||
| redis-cli -p $PORT AGE ENCRYPTNAME app1 "hello" | ||||
| redis-cli -p $PORT AGE DECRYPTNAME app1 <ciphertext_b64> | ||||
| ``` | ||||
|  | ||||
| ## Stateless AGE (ephemeral) | ||||
|  | ||||
| Characteristics | ||||
|  | ||||
| - No server‑side storage of keys. | ||||
| - You pass the actual key material with every call. | ||||
| - Not listable via AGE LIST. | ||||
|  | ||||
| Commands and examples | ||||
|  | ||||
| 1) Ephemeral encryption keys | ||||
|  | ||||
| ```bash | ||||
| # Generate an ephemeral encryption keypair  | ||||
| redis-cli -p $PORT AGE GENENC | ||||
| # Example output (abridged): | ||||
| # 1) "age1qz..."          # recipient (public key) = can be used by others e.g. to verify what I sign | ||||
| # 2) "AGE-SECRET-KEY-1..." # identity (secret) = is like my private, cannot lose this one | ||||
|  | ||||
| # Encrypt with the recipient public key | ||||
| redis-cli -p $PORT AGE ENCRYPT "age1qz..." "hello world" | ||||
|  | ||||
| # → returns bulk string payload: base64 ciphertext (encrypted content) | ||||
|  | ||||
| # Decrypt with the identity (secret) in other words your private key | ||||
| redis-cli -p $PORT AGE DECRYPT "AGE-SECRET-KEY-1..." "<ciphertext_b64>" | ||||
| # → "hello world" | ||||
| ``` | ||||
|  | ||||
| 2) Ephemeral signing keys | ||||
|  | ||||
| > ? is this same as my private key | ||||
|  | ||||
| ```bash | ||||
|  | ||||
| # Generate an ephemeral signing keypair | ||||
| redis-cli -p $PORT AGE GENSIGN | ||||
| # Example output: | ||||
| # 1) "<verify_pub_b64>" | ||||
| # 2) "<sign_secret_b64>" | ||||
|  | ||||
| # Sign a message with the secret | ||||
| redis-cli -p $PORT AGE SIGN "<sign_secret_b64>" "msg" | ||||
| # → returns "<signature_b64>" | ||||
|  | ||||
| # Verify with the public key | ||||
| redis-cli -p $PORT AGE VERIFY "<verify_pub_b64>" "msg" "<signature_b64>" | ||||
| # → 1 (valid) or 0 (invalid) | ||||
| ``` | ||||
|  | ||||
| When to use | ||||
| - You do not want the server to store private keys. | ||||
| - You already manage key material on the client side. | ||||
| - You need ad‑hoc operations without persistence. | ||||
|  | ||||
| Reference test: [rust.test_07_age_stateless_suite()](herodb/tests/usage_suite.rs:495) | ||||
|  | ||||
| ## Key‑managed AGE (persistent, named) | ||||
|  | ||||
| Characteristics | ||||
| - Server generates and persists keypairs under a chosen name. | ||||
| - Clients refer to keys by name; raw secrets are not supplied on each call. | ||||
| - Keys are discoverable via AGE LIST. | ||||
|  | ||||
| Commands and examples | ||||
|  | ||||
| 1) Named encryption keys | ||||
|  | ||||
| ```bash | ||||
| # Create/persist a named encryption keypair | ||||
| redis-cli -p $PORT AGE KEYGEN app1 | ||||
| # → returns [recipient, identity] but also stores them under name "app1" | ||||
|  | ||||
| > TODO: should not return identity (security, but there can be separate function to export it e.g. AGE EXPORTKEY app1) | ||||
|  | ||||
| # Encrypt using the stored public key | ||||
| redis-cli -p $PORT AGE ENCRYPTNAME app1 "hello" | ||||
| # → returns bulk string payload: base64 ciphertext | ||||
|  | ||||
| # Decrypt using the stored secret | ||||
| redis-cli -p $PORT AGE DECRYPTNAME app1 "<ciphertext_b64>" | ||||
| # → "hello" | ||||
| ``` | ||||
|  | ||||
| 2) Named signing keys | ||||
|  | ||||
| ```bash | ||||
| # Create/persist a named signing keypair | ||||
| redis-cli -p $PORT AGE SIGNKEYGEN app1 | ||||
| # → returns [verify_pub_b64, sign_secret_b64] and stores under name "app1" | ||||
|  | ||||
| > TODO: should not return sign_secret_b64 (for security, but there can be separate function to export it e.g. AGE EXPORTSIGNKEY app1) | ||||
|  | ||||
| # Sign using the stored secret | ||||
| redis-cli -p $PORT AGE SIGNNAME app1 "msg" | ||||
| # → returns "<signature_b64>" | ||||
|  | ||||
| # Verify using the stored public key | ||||
| redis-cli -p $PORT AGE VERIFYNAME app1 "msg" "<signature_b64>" | ||||
| # → 1 (valid) or 0 (invalid) | ||||
| ``` | ||||
|  | ||||
| 3) List stored AGE keys | ||||
|  | ||||
| ```bash | ||||
| redis-cli -p $PORT AGE LIST | ||||
| # Example output includes labels such as "encpub" and your key names (e.g., "app1") | ||||
| ``` | ||||
|  | ||||
| When to use | ||||
| - You want centralized key storage/rotation and fewer secrets on the client. | ||||
| - You need names/labels for workflows and can trust the server with secrets. | ||||
| - You want discoverability (AGE LIST) and simpler client commands. | ||||
|  | ||||
| Reference test: [rust.test_08_age_persistent_named_suite()](herodb/tests/usage_suite.rs:555) | ||||
|  | ||||
| ## Choosing a mode | ||||
|  | ||||
| - Prefer Stateless when: | ||||
|   - Minimizing server trust for secret material is the priority. | ||||
|   - Clients already have a secure mechanism to store/distribute keys. | ||||
| - Prefer Key‑managed when: | ||||
|   - Centralized lifecycle, naming, and discoverability are beneficial. | ||||
|   - You plan to integrate rotation, ACLs, or auditability on the server side. | ||||
|  | ||||
| ## Security notes | ||||
|  | ||||
| - Treat identities and signing secrets as sensitive; avoid logging them. | ||||
| - For key‑managed mode, ensure server storage (and backups) are protected. | ||||
| - AGE operations here are application‑level crypto and are distinct from database-at-rest encryption configured in the test harness. | ||||
|  | ||||
| ## Repository pointers | ||||
|  | ||||
| - Stateless examples in tests: [rust.test_07_age_stateless_suite()](herodb/tests/usage_suite.rs:495) | ||||
| - Key‑managed examples in tests: [rust.test_08_age_persistent_named_suite()](herodb/tests/usage_suite.rs:555) | ||||
| - AGE implementation: [herodb/src/age.rs](herodb/src/age.rs) | ||||
| - Command dispatch: [herodb/src/cmd.rs](herodb/src/cmd.rs) | ||||
| - Bash demo: [herodb/examples/age_bash_demo.sh](herodb/examples/age_bash_demo.sh) | ||||
| - Rust persistent demo: [herodb/examples/age_persist_demo.rs](herodb/examples/age_persist_demo.rs) | ||||
| - Additional notes: [herodb/instructions/encrypt.md](herodb/instructions/encrypt.md) | ||||
| @@ -1,308 +0,0 @@ | ||||
| //! age.rs — AGE (rage) helpers + persistent key management for your mini-Redis. | ||||
| // | ||||
| // Features: | ||||
| // - X25519 encryption/decryption (age style) | ||||
| // - Ed25519 detached signatures + verification | ||||
| // - Persistent named keys in DB (strings): | ||||
| //      age:key:{name}       -> X25519 recipient (public encryption key, "age1...") | ||||
| //      age:privkey:{name}   -> X25519 identity (secret encryption key, "AGE-SECRET-KEY-1...") | ||||
| //      age:signpub:{name}   -> Ed25519 verify pubkey (public, used to verify signatures) | ||||
| //      age:signpriv:{name}  -> Ed25519 signing secret key (private, used to sign) | ||||
| // - Base64 wrapping for ciphertext/signature binary blobs. | ||||
|  | ||||
| use std::str::FromStr; | ||||
|  | ||||
| use secrecy::ExposeSecret; | ||||
| use age::{Decryptor, Encryptor}; | ||||
| use age::x25519; | ||||
|  | ||||
| use ed25519_dalek::{Signature, Signer, Verifier, SigningKey, VerifyingKey}; | ||||
|  | ||||
| use base64::{engine::general_purpose::STANDARD as B64, Engine as _}; | ||||
|  | ||||
| use crate::protocol::Protocol; | ||||
| use crate::server::Server; | ||||
| use crate::error::DBError; | ||||
|  | ||||
| // ---------- Internal helpers ---------- | ||||
|  | ||||
| #[derive(Debug)] | ||||
| pub enum AgeWireError { | ||||
|     ParseKey, | ||||
|     Crypto(String), | ||||
|     Utf8, | ||||
|     SignatureLen, | ||||
|     NotFound(&'static str),     // which kind of key was missing | ||||
|     Storage(String), | ||||
| } | ||||
|  | ||||
| impl AgeWireError { | ||||
|     fn to_protocol(self) -> Protocol { | ||||
|         match self { | ||||
|             AgeWireError::ParseKey => Protocol::err("ERR age: invalid key"), | ||||
|             AgeWireError::Crypto(e) => Protocol::err(&format!("ERR age: {e}")), | ||||
|             AgeWireError::Utf8 => Protocol::err("ERR age: invalid UTF-8 plaintext"), | ||||
|             AgeWireError::SignatureLen => Protocol::err("ERR age: bad signature length"), | ||||
|             AgeWireError::NotFound(w) => Protocol::err(&format!("ERR age: missing {w}")), | ||||
|             AgeWireError::Storage(e) => Protocol::err(&format!("ERR storage: {e}")), | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| fn parse_recipient(s: &str) -> Result<x25519::Recipient, AgeWireError> { | ||||
|     x25519::Recipient::from_str(s).map_err(|_| AgeWireError::ParseKey) | ||||
| } | ||||
| fn parse_identity(s: &str) -> Result<x25519::Identity, AgeWireError> { | ||||
|     x25519::Identity::from_str(s).map_err(|_| AgeWireError::ParseKey) | ||||
| } | ||||
| fn parse_ed25519_signing_key(s: &str) -> Result<SigningKey, AgeWireError> { | ||||
|     // Parse base64-encoded signing key | ||||
|     let bytes = B64.decode(s).map_err(|_| AgeWireError::ParseKey)?; | ||||
|     if bytes.len() != 32 { | ||||
|         return Err(AgeWireError::ParseKey); | ||||
|     } | ||||
|     let key_bytes: [u8; 32] = bytes.try_into().map_err(|_| AgeWireError::ParseKey)?; | ||||
|     Ok(SigningKey::from_bytes(&key_bytes)) | ||||
| } | ||||
| fn parse_ed25519_verifying_key(s: &str) -> Result<VerifyingKey, AgeWireError> { | ||||
|     // Parse base64-encoded verifying key | ||||
|     let bytes = B64.decode(s).map_err(|_| AgeWireError::ParseKey)?; | ||||
|     if bytes.len() != 32 { | ||||
|         return Err(AgeWireError::ParseKey); | ||||
|     } | ||||
|     let key_bytes: [u8; 32] = bytes.try_into().map_err(|_| AgeWireError::ParseKey)?; | ||||
|     VerifyingKey::from_bytes(&key_bytes).map_err(|_| AgeWireError::ParseKey) | ||||
| } | ||||
|  | ||||
| // ---------- Stateless crypto helpers (string in/out) ---------- | ||||
|  | ||||
| pub fn gen_enc_keypair() -> (String, String) { | ||||
|     let id = x25519::Identity::generate(); | ||||
|     let pk = id.to_public(); | ||||
|     (pk.to_string(), id.to_string().expose_secret().to_string()) // (recipient, identity) | ||||
| } | ||||
|  | ||||
| pub fn gen_sign_keypair() -> (String, String) { | ||||
|     use rand::RngCore; | ||||
|     use rand::rngs::OsRng; | ||||
|      | ||||
|     // Generate random 32 bytes for the signing key | ||||
|     let mut secret_bytes = [0u8; 32]; | ||||
|     OsRng.fill_bytes(&mut secret_bytes); | ||||
|      | ||||
|     let signing_key = SigningKey::from_bytes(&secret_bytes); | ||||
|     let verifying_key = signing_key.verifying_key(); | ||||
|      | ||||
|     // Encode as base64 for storage | ||||
|     let signing_key_b64 = B64.encode(signing_key.to_bytes()); | ||||
|     let verifying_key_b64 = B64.encode(verifying_key.to_bytes()); | ||||
|      | ||||
|     (verifying_key_b64, signing_key_b64) // (verify_pub, signing_secret) | ||||
| } | ||||
|  | ||||
| /// Encrypt `msg` for `recipient_str` (X25519). Returns base64(ciphertext). | ||||
| pub fn encrypt_b64(recipient_str: &str, msg: &str) -> Result<String, AgeWireError> { | ||||
|     let recipient = parse_recipient(recipient_str)?; | ||||
|     let enc = Encryptor::with_recipients(vec![Box::new(recipient)]) | ||||
|         .expect("failed to create encryptor"); // Handle Option<Encryptor> | ||||
|     let mut out = Vec::new(); | ||||
|     { | ||||
|         use std::io::Write; | ||||
|         let mut w = enc.wrap_output(&mut out).map_err(|e| AgeWireError::Crypto(e.to_string()))?; | ||||
|         w.write_all(msg.as_bytes()).map_err(|e| AgeWireError::Crypto(e.to_string()))?; | ||||
|         w.finish().map_err(|e| AgeWireError::Crypto(e.to_string()))?; | ||||
|     } | ||||
|     Ok(B64.encode(out)) | ||||
| } | ||||
|  | ||||
| /// Decrypt base64(ciphertext) with `identity_str`. Returns plaintext String. | ||||
| pub fn decrypt_b64(identity_str: &str, ct_b64: &str) -> Result<String, AgeWireError> { | ||||
|     let id = parse_identity(identity_str)?; | ||||
|     let ct = B64.decode(ct_b64.as_bytes()).map_err(|e| AgeWireError::Crypto(e.to_string()))?; | ||||
|     let dec = Decryptor::new(&ct[..]).map_err(|e| AgeWireError::Crypto(e.to_string()))?; | ||||
|      | ||||
|     // The decrypt method returns a Result<StreamReader, DecryptError> | ||||
|     let mut r = match dec { | ||||
|         Decryptor::Recipients(d) => d.decrypt(std::iter::once(&id as &dyn age::Identity)) | ||||
|             .map_err(|e| AgeWireError::Crypto(e.to_string()))?, | ||||
|         Decryptor::Passphrase(_) => return Err(AgeWireError::Crypto("Expected recipients, got passphrase".to_string())), | ||||
|     }; | ||||
|      | ||||
|     let mut pt = Vec::new(); | ||||
|     use std::io::Read; | ||||
|     r.read_to_end(&mut pt).map_err(|e| AgeWireError::Crypto(e.to_string()))?; | ||||
|     String::from_utf8(pt).map_err(|_| AgeWireError::Utf8) | ||||
| } | ||||
|  | ||||
| /// Sign bytes of `msg` (detached). Returns base64(signature bytes, 64 bytes). | ||||
| pub fn sign_b64(signing_secret_str: &str, msg: &str) -> Result<String, AgeWireError> { | ||||
|     let signing_key = parse_ed25519_signing_key(signing_secret_str)?; | ||||
|     let sig = signing_key.sign(msg.as_bytes()); | ||||
|     Ok(B64.encode(sig.to_bytes())) | ||||
| } | ||||
|  | ||||
| /// Verify detached signature (base64) for `msg` with pubkey. | ||||
| pub fn verify_b64(verify_pub_str: &str, msg: &str, sig_b64: &str) -> Result<bool, AgeWireError> { | ||||
|     let verifying_key = parse_ed25519_verifying_key(verify_pub_str)?; | ||||
|     let sig_bytes = B64.decode(sig_b64.as_bytes()).map_err(|e| AgeWireError::Crypto(e.to_string()))?; | ||||
|     if sig_bytes.len() != 64 { | ||||
|         return Err(AgeWireError::SignatureLen); | ||||
|     } | ||||
|     let sig = Signature::from_bytes(sig_bytes[..].try_into().unwrap()); | ||||
|     Ok(verifying_key.verify(msg.as_bytes(), &sig).is_ok()) | ||||
| } | ||||
|  | ||||
| // ---------- Storage helpers ---------- | ||||
|  | ||||
| fn sget(server: &Server, key: &str) -> Result<Option<String>, AgeWireError> { | ||||
|     let st = server.current_storage().map_err(|e| AgeWireError::Storage(e.0))?; | ||||
|     st.get(key).map_err(|e| AgeWireError::Storage(e.0)) | ||||
| } | ||||
| fn sset(server: &Server, key: &str, val: &str) -> Result<(), AgeWireError> { | ||||
|     let st = server.current_storage().map_err(|e| AgeWireError::Storage(e.0))?; | ||||
|     st.set(key.to_string(), val.to_string()).map_err(|e| AgeWireError::Storage(e.0)) | ||||
| } | ||||
|  | ||||
| fn enc_pub_key_key(name: &str) -> String { format!("age:key:{name}") } | ||||
| fn enc_priv_key_key(name: &str) -> String { format!("age:privkey:{name}") } | ||||
| fn sign_pub_key_key(name: &str) -> String { format!("age:signpub:{name}") } | ||||
| fn sign_priv_key_key(name: &str) -> String { format!("age:signpriv:{name}") } | ||||
|  | ||||
| // ---------- Command handlers (RESP Protocol) ---------- | ||||
| // Basic (stateless) ones kept for completeness | ||||
|  | ||||
| pub async fn cmd_age_genenc() -> Protocol { | ||||
|     let (recip, ident) = gen_enc_keypair(); | ||||
|     Protocol::Array(vec![Protocol::BulkString(recip), Protocol::BulkString(ident)]) | ||||
| } | ||||
|  | ||||
| pub async fn cmd_age_gensign() -> Protocol { | ||||
|     let (verify, secret) = gen_sign_keypair(); | ||||
|     Protocol::Array(vec![Protocol::BulkString(verify), Protocol::BulkString(secret)]) | ||||
| } | ||||
|  | ||||
| pub async fn cmd_age_encrypt(recipient: &str, message: &str) -> Protocol { | ||||
|     match encrypt_b64(recipient, message) { | ||||
|         Ok(b64) => Protocol::BulkString(b64), | ||||
|         Err(e) => e.to_protocol(), | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub async fn cmd_age_decrypt(identity: &str, ct_b64: &str) -> Protocol { | ||||
|     match decrypt_b64(identity, ct_b64) { | ||||
|         Ok(pt) => Protocol::BulkString(pt), | ||||
|         Err(e) => e.to_protocol(), | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub async fn cmd_age_sign(secret: &str, message: &str) -> Protocol { | ||||
|     match sign_b64(secret, message) { | ||||
|         Ok(b64sig) => Protocol::BulkString(b64sig), | ||||
|         Err(e) => e.to_protocol(), | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub async fn cmd_age_verify(verify_pub: &str, message: &str, sig_b64: &str) -> Protocol { | ||||
|     match verify_b64(verify_pub, message, sig_b64) { | ||||
|         Ok(true) => Protocol::SimpleString("1".to_string()), | ||||
|         Ok(false) => Protocol::SimpleString("0".to_string()), | ||||
|         Err(e) => e.to_protocol(), | ||||
|     } | ||||
| } | ||||
|  | ||||
| // ---------- NEW: Persistent, named-key commands ---------- | ||||
|  | ||||
| pub async fn cmd_age_keygen(server: &Server, name: &str) -> Protocol { | ||||
|     let (recip, ident) = gen_enc_keypair(); | ||||
|     if let Err(e) = sset(server, &enc_pub_key_key(name), &recip) { return e.to_protocol(); } | ||||
|     if let Err(e) = sset(server, &enc_priv_key_key(name), &ident) { return e.to_protocol(); } | ||||
|     Protocol::Array(vec![Protocol::BulkString(recip), Protocol::BulkString(ident)]) | ||||
| } | ||||
|  | ||||
| pub async fn cmd_age_signkeygen(server: &Server, name: &str) -> Protocol { | ||||
|     let (verify, secret) = gen_sign_keypair(); | ||||
|     if let Err(e) = sset(server, &sign_pub_key_key(name), &verify) { return e.to_protocol(); } | ||||
|     if let Err(e) = sset(server, &sign_priv_key_key(name), &secret) { return e.to_protocol(); } | ||||
|     Protocol::Array(vec![Protocol::BulkString(verify), Protocol::BulkString(secret)]) | ||||
| } | ||||
|  | ||||
| pub async fn cmd_age_encrypt_name(server: &Server, name: &str, message: &str) -> Protocol { | ||||
|     let recip = match sget(server, &enc_pub_key_key(name)) { | ||||
|         Ok(Some(v)) => v, | ||||
|         Ok(None) => return AgeWireError::NotFound("recipient (age:key:{name})").to_protocol(), | ||||
|         Err(e) => return e.to_protocol(), | ||||
|     }; | ||||
|     match encrypt_b64(&recip, message) { | ||||
|         Ok(ct) => Protocol::BulkString(ct), | ||||
|         Err(e) => e.to_protocol(), | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub async fn cmd_age_decrypt_name(server: &Server, name: &str, ct_b64: &str) -> Protocol { | ||||
|     let ident = match sget(server, &enc_priv_key_key(name)) { | ||||
|         Ok(Some(v)) => v, | ||||
|         Ok(None) => return AgeWireError::NotFound("identity (age:privkey:{name})").to_protocol(), | ||||
|         Err(e) => return e.to_protocol(), | ||||
|     }; | ||||
|     match decrypt_b64(&ident, ct_b64) { | ||||
|         Ok(pt) => Protocol::BulkString(pt), | ||||
|         Err(e) => e.to_protocol(), | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub async fn cmd_age_sign_name(server: &Server, name: &str, message: &str) -> Protocol { | ||||
|     let sec = match sget(server, &sign_priv_key_key(name)) { | ||||
|         Ok(Some(v)) => v, | ||||
|         Ok(None) => return AgeWireError::NotFound("signing secret (age:signpriv:{name})").to_protocol(), | ||||
|         Err(e) => return e.to_protocol(), | ||||
|     }; | ||||
|     match sign_b64(&sec, message) { | ||||
|         Ok(sig) => Protocol::BulkString(sig), | ||||
|         Err(e) => e.to_protocol(), | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub async fn cmd_age_verify_name(server: &Server, name: &str, message: &str, sig_b64: &str) -> Protocol { | ||||
|     let pubk = match sget(server, &sign_pub_key_key(name)) { | ||||
|         Ok(Some(v)) => v, | ||||
|         Ok(None) => return AgeWireError::NotFound("verify pubkey (age:signpub:{name})").to_protocol(), | ||||
|         Err(e) => return e.to_protocol(), | ||||
|     }; | ||||
|     match verify_b64(&pubk, message, sig_b64) { | ||||
|         Ok(true) => Protocol::SimpleString("1".to_string()), | ||||
|         Ok(false) => Protocol::SimpleString("0".to_string()), | ||||
|         Err(e) => e.to_protocol(), | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub async fn cmd_age_list(server: &Server) -> Protocol { | ||||
|     // Returns 4 arrays: ["encpub", <names...>], ["encpriv", ...], ["signpub", ...], ["signpriv", ...] | ||||
|     let st = match server.current_storage() { Ok(s) => s, Err(e) => return Protocol::err(&e.0) }; | ||||
|  | ||||
|     let pull = |pat: &str, prefix: &str| -> Result<Vec<String>, DBError> { | ||||
|         let keys = st.keys(pat)?; | ||||
|         let mut names: Vec<String> = keys.into_iter() | ||||
|             .filter_map(|k| k.strip_prefix(prefix).map(|x| x.to_string())) | ||||
|             .collect(); | ||||
|         names.sort(); | ||||
|         Ok(names) | ||||
|     }; | ||||
|  | ||||
|     let encpub  = match pull("age:key:*",      "age:key:")      { Ok(v) => v, Err(e)=> return Protocol::err(&e.0) }; | ||||
|     let encpriv = match pull("age:privkey:*",  "age:privkey:")  { Ok(v) => v, Err(e)=> return Protocol::err(&e.0) }; | ||||
|     let signpub = match pull("age:signpub:*",  "age:signpub:")  { Ok(v) => v, Err(e)=> return Protocol::err(&e.0) }; | ||||
|     let signpriv= match pull("age:signpriv:*", "age:signpriv:") { Ok(v) => v, Err(e)=> return Protocol::err(&e.0) }; | ||||
|  | ||||
|     let to_arr = |label: &str, v: Vec<String>| { | ||||
|         let mut out = vec![Protocol::BulkString(label.to_string())]; | ||||
|         out.push(Protocol::Array(v.into_iter().map(Protocol::BulkString).collect())); | ||||
|         Protocol::Array(out) | ||||
|     }; | ||||
|  | ||||
|     Protocol::Array(vec![ | ||||
|         to_arr("encpub", encpub), | ||||
|         to_arr("encpriv", encpriv), | ||||
|         to_arr("signpub", signpub), | ||||
|         to_arr("signpriv", signpriv), | ||||
|     ]) | ||||
| } | ||||
| @@ -1,10 +0,0 @@ | ||||
| pub mod age;   // NEW | ||||
| pub mod cmd; | ||||
| pub mod crypto; | ||||
| pub mod error; | ||||
| pub mod options; | ||||
| pub mod protocol; | ||||
| pub mod server; | ||||
| pub mod storage; | ||||
| pub mod storage_trait;  // Add this | ||||
| pub mod storage_sled;   // Add this | ||||
| @@ -1,89 +0,0 @@ | ||||
| // #![allow(unused_imports)] | ||||
|  | ||||
| use tokio::net::TcpListener; | ||||
|  | ||||
| use herodb::server; | ||||
|  | ||||
| use clap::Parser; | ||||
|  | ||||
| /// Simple program to greet a person | ||||
| #[derive(Parser, Debug)] | ||||
| #[command(version, about, long_about = None)] | ||||
| struct Args { | ||||
|     /// The directory of Redis DB file | ||||
|     #[arg(long)] | ||||
|     dir: String, | ||||
|  | ||||
|     /// The port of the Redis server, default is 6379 if not specified | ||||
|     #[arg(long)] | ||||
|     port: Option<u16>, | ||||
|  | ||||
|     /// Enable debug mode | ||||
|     #[arg(long)] | ||||
|     debug: bool, | ||||
|  | ||||
|  | ||||
|     /// Master encryption key for encrypted databases | ||||
|     #[arg(long)] | ||||
|     encryption_key: Option<String>, | ||||
|  | ||||
|     /// Encrypt the database | ||||
|     #[arg(long)] | ||||
|     encrypt: bool, | ||||
|  | ||||
|     /// Use the sled backend | ||||
|     #[arg(long)] | ||||
|     sled: bool, | ||||
| } | ||||
|  | ||||
| #[tokio::main] | ||||
| async fn main() { | ||||
|     // parse args | ||||
|     let args = Args::parse(); | ||||
|  | ||||
|     // bind port | ||||
|     let port = args.port.unwrap_or(6379); | ||||
|     println!("will listen on port: {}", port); | ||||
|     let listener = TcpListener::bind(format!("127.0.0.1:{}", port)) | ||||
|         .await | ||||
|         .unwrap(); | ||||
|  | ||||
|     // new DB option | ||||
|     let option = herodb::options::DBOption { | ||||
|         dir: args.dir, | ||||
|         debug: args.debug, | ||||
|         encryption_key: args.encryption_key, | ||||
|         encrypt: args.encrypt, | ||||
|         backend: if args.sled { | ||||
|             herodb::options::BackendType::Sled | ||||
|         } else { | ||||
|             herodb::options::BackendType::Redb | ||||
|         }, | ||||
|     }; | ||||
|  | ||||
|     // new server | ||||
|     let server = server::Server::new(option).await; | ||||
|  | ||||
|     // Add a small delay to ensure the port is ready | ||||
|     tokio::time::sleep(std::time::Duration::from_millis(100)).await; | ||||
|  | ||||
|     // accept new connections | ||||
|     loop { | ||||
|         let stream = listener.accept().await; | ||||
|         match stream { | ||||
|             Ok((stream, _)) => { | ||||
|                 println!("accepted new connection"); | ||||
|  | ||||
|                 let mut sc = server.clone(); | ||||
|                 tokio::spawn(async move { | ||||
|                     if let Err(e) = sc.handle(stream).await { | ||||
|                         println!("error: {:?}, will close the connection. Bye", e); | ||||
|                     } | ||||
|                 }); | ||||
|             } | ||||
|             Err(e) => { | ||||
|                 println!("error: {}", e); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| } | ||||
| @@ -1,14 +0,0 @@ | ||||
| #[derive(Debug, Clone)] | ||||
| pub enum BackendType { | ||||
|     Redb, | ||||
|     Sled, | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Clone)] | ||||
| pub struct DBOption { | ||||
|     pub dir: String, | ||||
|     pub debug: bool, | ||||
|     pub encrypt: bool, | ||||
|     pub encryption_key: Option<String>, | ||||
|     pub backend: BackendType, | ||||
| } | ||||
| @@ -1,263 +0,0 @@ | ||||
| use core::str; | ||||
| use std::collections::HashMap; | ||||
| use std::sync::Arc; | ||||
| use tokio::io::AsyncReadExt; | ||||
| use tokio::io::AsyncWriteExt; | ||||
| use tokio::sync::{Mutex, oneshot}; | ||||
|  | ||||
| use std::sync::atomic::{AtomicU64, Ordering}; | ||||
|  | ||||
| use crate::cmd::Cmd; | ||||
| use crate::error::DBError; | ||||
| use crate::options; | ||||
| use crate::protocol::Protocol; | ||||
| use crate::storage::Storage; | ||||
| use crate::storage_sled::SledStorage; | ||||
| use crate::storage_trait::StorageBackend; | ||||
|  | ||||
| #[derive(Clone)] | ||||
| pub struct Server { | ||||
|     pub db_cache: std::sync::Arc<std::sync::RwLock<HashMap<u64, Arc<dyn StorageBackend>>>>, | ||||
|     pub option: options::DBOption, | ||||
|     pub client_name: Option<String>, | ||||
|     pub selected_db: u64, // Changed from usize to u64 | ||||
|     pub queued_cmd: Option<Vec<(Cmd, Protocol)>>, | ||||
|  | ||||
|     // BLPOP waiter registry: per (db_index, key) FIFO of waiters | ||||
|     pub list_waiters: Arc<Mutex<HashMap<u64, HashMap<String, Vec<Waiter>>>>>, | ||||
|     pub waiter_seq: Arc<AtomicU64>, | ||||
| } | ||||
|  | ||||
| pub struct Waiter { | ||||
|     pub id: u64, | ||||
|     pub side: PopSide, | ||||
|     pub tx: oneshot::Sender<(String, String)>, // (key, element) | ||||
| } | ||||
|  | ||||
| #[derive(Clone, Copy, Debug, PartialEq, Eq)] | ||||
| pub enum PopSide { | ||||
|     Left, | ||||
|     Right, | ||||
| } | ||||
|  | ||||
| impl Server { | ||||
|     pub async fn new(option: options::DBOption) -> Self { | ||||
|         Server { | ||||
|             db_cache: Arc::new(std::sync::RwLock::new(HashMap::new())), | ||||
|             option, | ||||
|             client_name: None, | ||||
|             selected_db: 0, | ||||
|             queued_cmd: None, | ||||
|  | ||||
|             list_waiters: Arc::new(Mutex::new(HashMap::new())), | ||||
|             waiter_seq: Arc::new(AtomicU64::new(1)), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub fn current_storage(&self) -> Result<Arc<dyn StorageBackend>, DBError> { | ||||
|         let mut cache = self.db_cache.write().unwrap(); | ||||
|          | ||||
|         if let Some(storage) = cache.get(&self.selected_db) { | ||||
|             return Ok(storage.clone()); | ||||
|         } | ||||
|          | ||||
|          | ||||
|         // Create new database file | ||||
|         let db_file_path = std::path::PathBuf::from(self.option.dir.clone()) | ||||
|             .join(format!("{}.db", self.selected_db)); | ||||
|          | ||||
|         // Ensure the directory exists before creating the database file | ||||
|         if let Some(parent_dir) = db_file_path.parent() { | ||||
|             std::fs::create_dir_all(parent_dir).map_err(|e| { | ||||
|                 DBError(format!("Failed to create directory {}: {}", parent_dir.display(), e)) | ||||
|             })?; | ||||
|         } | ||||
|          | ||||
|         println!("Creating new db file: {}", db_file_path.display()); | ||||
|          | ||||
|         let storage: Arc<dyn StorageBackend> = match self.option.backend { | ||||
|             options::BackendType::Redb => { | ||||
|                 Arc::new(Storage::new( | ||||
|                     db_file_path, | ||||
|                     self.should_encrypt_db(self.selected_db), | ||||
|                     self.option.encryption_key.as_deref() | ||||
|                 )?) | ||||
|             } | ||||
|             options::BackendType::Sled => { | ||||
|                 Arc::new(SledStorage::new( | ||||
|                     db_file_path, | ||||
|                     self.should_encrypt_db(self.selected_db), | ||||
|                     self.option.encryption_key.as_deref() | ||||
|                 )?) | ||||
|             } | ||||
|         }; | ||||
|          | ||||
|         cache.insert(self.selected_db, storage.clone()); | ||||
|         Ok(storage) | ||||
|     } | ||||
|      | ||||
|     fn should_encrypt_db(&self, db_index: u64) -> bool { | ||||
|         // DB 0-9 are non-encrypted, DB 10+ are encrypted | ||||
|         self.option.encrypt && db_index >= 10 | ||||
|     } | ||||
|  | ||||
|     // ----- BLPOP waiter helpers ----- | ||||
|  | ||||
|     pub async fn register_waiter(&self, db_index: u64, key: &str, side: PopSide) -> (u64, oneshot::Receiver<(String, String)>) { | ||||
|         let id = self.waiter_seq.fetch_add(1, Ordering::Relaxed); | ||||
|         let (tx, rx) = oneshot::channel::<(String, String)>(); | ||||
|  | ||||
|         let mut guard = self.list_waiters.lock().await; | ||||
|         let per_db = guard.entry(db_index).or_insert_with(HashMap::new); | ||||
|         let q = per_db.entry(key.to_string()).or_insert_with(Vec::new); | ||||
|         q.push(Waiter { id, side, tx }); | ||||
|         (id, rx) | ||||
|     } | ||||
|  | ||||
|     pub async fn unregister_waiter(&self, db_index: u64, key: &str, id: u64) { | ||||
|         let mut guard = self.list_waiters.lock().await; | ||||
|         if let Some(per_db) = guard.get_mut(&db_index) { | ||||
|             if let Some(q) = per_db.get_mut(key) { | ||||
|                 q.retain(|w| w.id != id); | ||||
|                 if q.is_empty() { | ||||
|                     per_db.remove(key); | ||||
|                 } | ||||
|             } | ||||
|             if per_db.is_empty() { | ||||
|                 guard.remove(&db_index); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     // Called after LPUSH/RPUSH to deliver to blocked BLPOP waiters. | ||||
|     pub async fn drain_waiters_after_push(&self, key: &str) -> Result<(), DBError> { | ||||
|         let db_index = self.selected_db; | ||||
|  | ||||
|         loop { | ||||
|             // Check if any waiter exists | ||||
|             let maybe_waiter = { | ||||
|                 let mut guard = self.list_waiters.lock().await; | ||||
|                 if let Some(per_db) = guard.get_mut(&db_index) { | ||||
|                     if let Some(q) = per_db.get_mut(key) { | ||||
|                         if !q.is_empty() { | ||||
|                             // Pop FIFO | ||||
|                             Some(q.remove(0)) | ||||
|                         } else { | ||||
|                             None | ||||
|                         } | ||||
|                     } else { | ||||
|                         None | ||||
|                     } | ||||
|                 } else { | ||||
|                     None | ||||
|                 } | ||||
|             }; | ||||
|  | ||||
|             let waiter = if let Some(w) = maybe_waiter { w } else { break }; | ||||
|  | ||||
|             // Pop one element depending on waiter side | ||||
|             let elems = match waiter.side { | ||||
|                 PopSide::Left => self.current_storage()?.lpop(key, 1)?, | ||||
|                 PopSide::Right => self.current_storage()?.rpop(key, 1)?, | ||||
|             }; | ||||
|             if elems.is_empty() { | ||||
|                 // Nothing to deliver; re-register waiter at the front to preserve order | ||||
|                 let mut guard = self.list_waiters.lock().await; | ||||
|                 let per_db = guard.entry(db_index).or_insert_with(HashMap::new); | ||||
|                 let q = per_db.entry(key.to_string()).or_insert_with(Vec::new); | ||||
|                 q.insert(0, waiter); | ||||
|                 break; | ||||
|             } else { | ||||
|                 let elem = elems[0].clone(); | ||||
|                 // Send to waiter; if receiver dropped, just continue | ||||
|                 let _ = waiter.tx.send((key.to_string(), elem)); | ||||
|                 // Loop to try to satisfy more waiters if more elements remain | ||||
|                 continue; | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     pub async fn handle( | ||||
|         &mut self, | ||||
|         mut stream: tokio::net::TcpStream, | ||||
|     ) -> Result<(), DBError> { | ||||
|         // Accumulate incoming bytes to handle partial RESP frames | ||||
|         let mut acc = String::new(); | ||||
|         let mut buf = vec![0u8; 8192]; | ||||
|  | ||||
|         loop { | ||||
|             let n = match stream.read(&mut buf).await { | ||||
|                 Ok(0) => { | ||||
|                     println!("[handle] connection closed"); | ||||
|                     return Ok(()); | ||||
|                 } | ||||
|                 Ok(n) => n, | ||||
|                 Err(e) => { | ||||
|                     println!("[handle] read error: {:?}", e); | ||||
|                     return Err(e.into()); | ||||
|                 } | ||||
|             }; | ||||
|  | ||||
|             // Append to accumulator. RESP for our usage is ASCII-safe. | ||||
|             acc.push_str(str::from_utf8(&buf[..n])?); | ||||
|  | ||||
|             // Try to parse as many complete commands as are available in 'acc'. | ||||
|             loop { | ||||
|                 let parsed = Cmd::from(&acc); | ||||
|                 let (cmd, protocol, remaining) = match parsed { | ||||
|                     Ok((cmd, protocol, remaining)) => (cmd, protocol, remaining), | ||||
|                     Err(_e) => { | ||||
|                         // Incomplete or invalid frame; assume incomplete and wait for more data. | ||||
|                         // This avoids emitting spurious protocol_error for split frames. | ||||
|                         break; | ||||
|                     } | ||||
|                 }; | ||||
|  | ||||
|                 // Advance the accumulator to the unparsed remainder | ||||
|                 acc = remaining.to_string(); | ||||
|  | ||||
|                 if self.option.debug { | ||||
|                     println!("\x1b[34;1mgot command: {:?}, protocol: {:?}\x1b[0m", cmd, protocol); | ||||
|                 } else { | ||||
|                     println!("got command: {:?}, protocol: {:?}", cmd, protocol); | ||||
|                 } | ||||
|  | ||||
|                 // Check if this is a QUIT command before processing | ||||
|                 let is_quit = matches!(cmd, Cmd::Quit); | ||||
|  | ||||
|                 let res = match cmd.run(self).await { | ||||
|                     Ok(p) => p, | ||||
|                     Err(e) => { | ||||
|                         if self.option.debug { | ||||
|                             eprintln!("[run error] {:?}", e); | ||||
|                         } | ||||
|                         Protocol::err(&format!("ERR {}", e.0)) | ||||
|                     } | ||||
|                 }; | ||||
|  | ||||
|                 if self.option.debug { | ||||
|                     println!("\x1b[34;1mqueued cmd {:?}\x1b[0m", self.queued_cmd); | ||||
|                     println!("\x1b[32;1mgoing to send response {}\x1b[0m", res.encode()); | ||||
|                 } else { | ||||
|                     print!("queued cmd {:?}", self.queued_cmd); | ||||
|                     println!("going to send response {}", res.encode()); | ||||
|                 } | ||||
|  | ||||
|                 _ = stream.write(res.encode().as_bytes()).await?; | ||||
|  | ||||
|                 // If this was a QUIT command, close the connection | ||||
|                 if is_quit { | ||||
|                     println!("[handle] QUIT command received, closing connection"); | ||||
|                     return Ok(()); | ||||
|                 } | ||||
|  | ||||
|                 // Continue parsing any further complete commands already in 'acc' | ||||
|                 if acc.is_empty() { | ||||
|                     break; | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| } | ||||
							
								
								
									
										34
									
								
								mock_embedder.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										34
									
								
								mock_embedder.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,34 @@ | ||||
| from flask import Flask, request, jsonify | ||||
| import numpy as np | ||||
|  | ||||
| app = Flask(__name__) | ||||
|  | ||||
| @app.route('/v1/embeddings', methods=['POST']) | ||||
| def embeddings(): | ||||
|     data = request.json | ||||
|     inputs = data.get('input', []) | ||||
|     if isinstance(inputs, str): | ||||
|         inputs = [inputs] | ||||
|      | ||||
|     # Generate deterministic 768-dim embeddings (hash-based) | ||||
|     embeddings = [] | ||||
|     for text in inputs: | ||||
|         # Simple hash to vector | ||||
|         vec = np.zeros(768) | ||||
|         for i, char in enumerate(text[:768]): | ||||
|             vec[i % 768] += ord(char) / 255.0 | ||||
|         # Normalize | ||||
|         norm = np.linalg.norm(vec) | ||||
|         if norm > 0: | ||||
|             vec = vec / norm | ||||
|         embeddings.append(vec.tolist()) | ||||
|      | ||||
|     return jsonify({ | ||||
|         "data": [{"embedding": emb} for emb in embeddings], | ||||
|         "model": data.get('model', 'mock'), | ||||
|         "usage": {"total_tokens": sum(len(t) for t in inputs)} | ||||
|     }) | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     app.run(host='127.0.0.1', port=8081) | ||||
|  | ||||
							
								
								
									
										143
									
								
								run.sh
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										143
									
								
								run.sh
									
									
									
									
									
										Executable file
									
								
							| @@ -0,0 +1,143 @@ | ||||
| #!/bin/bash | ||||
| set -euo pipefail | ||||
| SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" | ||||
| cd "$SCRIPT_DIR" | ||||
|  | ||||
|  | ||||
| # Test script for HeroDB - Redis-compatible database with redb backend | ||||
| # This script starts the server and runs comprehensive tests | ||||
|  | ||||
| # Colors for output | ||||
| RED='\033[0;31m' | ||||
| GREEN='\033[0;32m' | ||||
| YELLOW='\033[1;33m' | ||||
| BLUE='\033[0;34m' | ||||
| NC='\033[0m' # No Color | ||||
|  | ||||
| # Configuration | ||||
| DB_DIR="/tmp/test_db" | ||||
| PORT=6381 | ||||
| SERVER_PID="" | ||||
|  | ||||
| # Function to print colored output | ||||
| print_status() { | ||||
|     echo -e "${BLUE}[INFO]${NC} $1" | ||||
| } | ||||
|  | ||||
| print_success() { | ||||
|     echo -e "${GREEN}[SUCCESS]${NC} $1" | ||||
| } | ||||
|  | ||||
| print_error() { | ||||
|     echo -e "${RED}[ERROR]${NC} $1" | ||||
| } | ||||
|  | ||||
| print_warning() { | ||||
|     echo -e "${YELLOW}[WARNING]${NC} $1" | ||||
| } | ||||
|  | ||||
| # Function to cleanup on exit | ||||
| cleanup() { | ||||
|     if [ ! -z "$SERVER_PID" ]; then | ||||
|         print_status "Stopping HeroDB server (PID: $SERVER_PID)..." | ||||
|         kill $SERVER_PID 2>/dev/null || true | ||||
|         wait $SERVER_PID 2>/dev/null || true | ||||
|     fi | ||||
|      | ||||
|     # Clean up test database | ||||
|     if [ -d "$DB_DIR" ]; then | ||||
|         print_status "Cleaning up test database directory..." | ||||
|         rm -rf "$DB_DIR" | ||||
|     fi | ||||
| } | ||||
|  | ||||
| # Set trap to cleanup on script exit | ||||
| trap cleanup EXIT | ||||
|  | ||||
| # Function to wait for server to start | ||||
| wait_for_server() { | ||||
|     local max_attempts=30 | ||||
|     local attempt=1 | ||||
|      | ||||
|     print_status "Waiting for server to start on port $PORT..." | ||||
|      | ||||
|     while [ $attempt -le $max_attempts ]; do | ||||
|         if nc -z localhost $PORT 2>/dev/null; then | ||||
|             print_success "Server is ready!" | ||||
|             return 0 | ||||
|         fi | ||||
|          | ||||
|         echo -n "." | ||||
|         sleep 1 | ||||
|         attempt=$((attempt + 1)) | ||||
|     done | ||||
|      | ||||
|     print_error "Server failed to start within $max_attempts seconds" | ||||
|     return 1 | ||||
| } | ||||
|  | ||||
| # Function to send Redis command and get response | ||||
| redis_cmd() { | ||||
|     local cmd="$1" | ||||
|     local expected="$2" | ||||
|      | ||||
|     print_status "Testing: $cmd" | ||||
|      | ||||
|     local result=$(echo "$cmd" | redis-cli -p $PORT --raw 2>/dev/null || echo "ERROR") | ||||
|      | ||||
|     if [ "$expected" != "" ] && [ "$result" != "$expected" ]; then | ||||
|         print_error "Expected: '$expected', Got: '$result'" | ||||
|         return 1 | ||||
|     else | ||||
|         print_success "✓ $cmd -> $result" | ||||
|         return 0 | ||||
|     fi | ||||
| } | ||||
|  | ||||
| # Main execution | ||||
| main() { | ||||
|     print_status "Starting HeroDB" | ||||
|      | ||||
|     # Build the project | ||||
|     print_status "Building HeroDB..." | ||||
|     if ! cargo build -p herodb --release; then | ||||
|         print_error "Failed to build HeroDB" | ||||
|         exit 1 | ||||
|     fi | ||||
|      | ||||
|     # Create test database directory | ||||
|     mkdir -p "$DB_DIR" | ||||
|      | ||||
|     # Start the server | ||||
|     print_status "Starting HeroDB server..." | ||||
|     ${SCRIPT_DIR}/target/release/herodb --dir "$DB_DIR" --port $PORT & | ||||
|     SERVER_PID=$! | ||||
|      | ||||
|     # Wait for server to start | ||||
|     if ! wait_for_server; then | ||||
|         print_error "Failed to start server" | ||||
|         exit 1 | ||||
|     fi | ||||
|      | ||||
| } | ||||
|  | ||||
| # Check dependencies | ||||
| check_dependencies() { | ||||
|     if ! command -v cargo &> /dev/null; then | ||||
|         print_error "cargo is required but not installed" | ||||
|         exit 1 | ||||
|     fi | ||||
|      | ||||
|     if ! command -v nc &> /dev/null; then | ||||
|         print_warning "netcat (nc) not found - some tests may not work properly" | ||||
|     fi | ||||
|      | ||||
|     if ! command -v redis-cli &> /dev/null; then | ||||
|         print_warning "redis-cli not found - using netcat fallback" | ||||
|     fi | ||||
| } | ||||
|  | ||||
| # Run dependency check and main function | ||||
| check_dependencies | ||||
| main "$@" | ||||
| tail -f /dev/null | ||||
| @@ -1,4 +1,7 @@ | ||||
| #!/bin/bash | ||||
| set -euo pipefail | ||||
| SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" | ||||
| cd "$SCRIPT_DIR" | ||||
| 
 | ||||
| echo "🧪 Running HeroDB Redis Compatibility Tests" | ||||
| echo "==========================================" | ||||
							
								
								
									
										258
									
								
								scripts/compare_backends.py
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										258
									
								
								scripts/compare_backends.py
									
									
									
									
									
										Executable file
									
								
							| @@ -0,0 +1,258 @@ | ||||
| #!/usr/bin/env python3 | ||||
| """ | ||||
| Compare performance between redb and sled backends. | ||||
| """ | ||||
|  | ||||
| import json | ||||
| import csv | ||||
| import sys | ||||
| from typing import Dict, List, Any | ||||
| from pathlib import Path | ||||
|  | ||||
| def load_results(input_file: str) -> List[Dict[str, Any]]: | ||||
|     """Load benchmark results from CSV or JSON file.""" | ||||
|     path = Path(input_file) | ||||
|      | ||||
|     if not path.exists(): | ||||
|         print(f"Error: File not found: {input_file}", file=sys.stderr) | ||||
|         return [] | ||||
|      | ||||
|     if path.suffix == '.json': | ||||
|         with open(input_file, 'r') as f: | ||||
|             data = json.load(f) | ||||
|             return data.get('benchmarks', []) | ||||
|     elif path.suffix == '.csv': | ||||
|         results = [] | ||||
|         with open(input_file, 'r') as f: | ||||
|             reader = csv.DictReader(f) | ||||
|             for row in reader: | ||||
|                 # Convert numeric fields | ||||
|                 row['mean_ns'] = float(row.get('mean_ns', 0)) | ||||
|                 row['median_ns'] = float(row.get('median_ns', 0)) | ||||
|                 row['throughput_ops_sec'] = float(row.get('throughput_ops_sec', 0)) | ||||
|                 results.append(row) | ||||
|         return results | ||||
|     else: | ||||
|         print(f"Error: Unsupported file format: {path.suffix}", file=sys.stderr) | ||||
|         return [] | ||||
|  | ||||
| def group_by_operation(results: List[Dict[str, Any]]) -> Dict[str, Dict[str, Dict]]: | ||||
|     """Group results by operation and backend.""" | ||||
|     grouped = {} | ||||
|      | ||||
|     for result in results: | ||||
|         operation = result.get('operation', result.get('name', '')) | ||||
|         backend = result.get('backend', '') | ||||
|          | ||||
|         if not operation or not backend: | ||||
|             continue | ||||
|          | ||||
|         if operation not in grouped: | ||||
|             grouped[operation] = {} | ||||
|          | ||||
|         grouped[operation][backend] = result | ||||
|      | ||||
|     return grouped | ||||
|  | ||||
| def calculate_speedup(redb_value: float, sled_value: float) -> float: | ||||
|     """Calculate speedup factor (positive means redb is faster).""" | ||||
|     if sled_value == 0: | ||||
|         return 0 | ||||
|     return sled_value / redb_value | ||||
|  | ||||
| def format_duration(nanos: float) -> str: | ||||
|     """Format duration in human-readable format.""" | ||||
|     if nanos < 1_000: | ||||
|         return f"{nanos:.0f} ns" | ||||
|     elif nanos < 1_000_000: | ||||
|         return f"{nanos / 1_000:.2f} µs" | ||||
|     elif nanos < 1_000_000_000: | ||||
|         return f"{nanos / 1_000_000:.2f} ms" | ||||
|     else: | ||||
|         return f"{nanos / 1_000_000_000:.2f} s" | ||||
|  | ||||
| def print_comparison_table(grouped: Dict[str, Dict[str, Dict]]): | ||||
|     """Print a comparison table of backends.""" | ||||
|     print("\n" + "=" * 100) | ||||
|     print("BACKEND PERFORMANCE COMPARISON") | ||||
|     print("=" * 100) | ||||
|     print() | ||||
|      | ||||
|     # Header | ||||
|     print(f"{'Operation':<30} {'redb (mean)':<15} {'sled (mean)':<15} {'Speedup':<12} {'Winner':<10}") | ||||
|     print("-" * 100) | ||||
|      | ||||
|     redb_wins = 0 | ||||
|     sled_wins = 0 | ||||
|     total_comparisons = 0 | ||||
|      | ||||
|     for operation in sorted(grouped.keys()): | ||||
|         backends = grouped[operation] | ||||
|          | ||||
|         if 'redb' in backends and 'sled' in backends: | ||||
|             redb_mean = backends['redb'].get('mean_ns', 0) | ||||
|             sled_mean = backends['sled'].get('mean_ns', 0) | ||||
|              | ||||
|             speedup = calculate_speedup(redb_mean, sled_mean) | ||||
|              | ||||
|             if speedup > 1.0: | ||||
|                 winner = "redb" | ||||
|                 redb_wins += 1 | ||||
|             elif speedup < 1.0: | ||||
|                 winner = "sled" | ||||
|                 sled_wins += 1 | ||||
|             else: | ||||
|                 winner = "tie" | ||||
|              | ||||
|             total_comparisons += 1 | ||||
|              | ||||
|             speedup_str = f"{speedup:.2f}x" if speedup != 0 else "N/A" | ||||
|              | ||||
|             print(f"{operation:<30} {format_duration(redb_mean):<15} {format_duration(sled_mean):<15} " | ||||
|                   f"{speedup_str:<12} {winner:<10}") | ||||
|      | ||||
|     print("-" * 100) | ||||
|     print(f"\nSummary: redb wins: {redb_wins}, sled wins: {sled_wins}, total: {total_comparisons}") | ||||
|      | ||||
|     if total_comparisons > 0: | ||||
|         redb_pct = (redb_wins / total_comparisons) * 100 | ||||
|         sled_pct = (sled_wins / total_comparisons) * 100 | ||||
|         print(f"Win rate: redb {redb_pct:.1f}%, sled {sled_pct:.1f}%") | ||||
|  | ||||
| def print_throughput_comparison(grouped: Dict[str, Dict[str, Dict]]): | ||||
|     """Print throughput comparison.""" | ||||
|     print("\n" + "=" * 100) | ||||
|     print("THROUGHPUT COMPARISON (ops/sec)") | ||||
|     print("=" * 100) | ||||
|     print() | ||||
|      | ||||
|     print(f"{'Operation':<30} {'redb':<20} {'sled':<20} {'Difference':<15}") | ||||
|     print("-" * 100) | ||||
|      | ||||
|     for operation in sorted(grouped.keys()): | ||||
|         backends = grouped[operation] | ||||
|          | ||||
|         if 'redb' in backends and 'sled' in backends: | ||||
|             redb_throughput = backends['redb'].get('throughput_ops_sec', 0) | ||||
|             sled_throughput = backends['sled'].get('throughput_ops_sec', 0) | ||||
|              | ||||
|             diff_pct = ((redb_throughput - sled_throughput) / sled_throughput * 100) if sled_throughput > 0 else 0 | ||||
|             diff_str = f"{diff_pct:+.1f}%" | ||||
|              | ||||
|             print(f"{operation:<30} {redb_throughput:>18,.0f}  {sled_throughput:>18,.0f}  {diff_str:>13}") | ||||
|  | ||||
| def generate_recommendations(grouped: Dict[str, Dict[str, Dict]]): | ||||
|     """Generate recommendations based on benchmark results.""" | ||||
|     print("\n" + "=" * 100) | ||||
|     print("RECOMMENDATIONS") | ||||
|     print("=" * 100) | ||||
|     print() | ||||
|      | ||||
|     redb_strengths = [] | ||||
|     sled_strengths = [] | ||||
|      | ||||
|     for operation, backends in grouped.items(): | ||||
|         if 'redb' in backends and 'sled' in backends: | ||||
|             redb_mean = backends['redb'].get('mean_ns', 0) | ||||
|             sled_mean = backends['sled'].get('mean_ns', 0) | ||||
|              | ||||
|             speedup = calculate_speedup(redb_mean, sled_mean) | ||||
|              | ||||
|             if speedup > 1.2:  # redb is >20% faster | ||||
|                 redb_strengths.append((operation, speedup)) | ||||
|             elif speedup < 0.8:  # sled is >20% faster | ||||
|                 sled_strengths.append((operation, 1/speedup)) | ||||
|      | ||||
|     print("Use redb when:") | ||||
|     if redb_strengths: | ||||
|         for op, speedup in sorted(redb_strengths, key=lambda x: x[1], reverse=True)[:5]: | ||||
|             print(f"  • {op}: {speedup:.2f}x faster than sled") | ||||
|     else: | ||||
|         print("  • No significant advantages found") | ||||
|      | ||||
|     print("\nUse sled when:") | ||||
|     if sled_strengths: | ||||
|         for op, speedup in sorted(sled_strengths, key=lambda x: x[1], reverse=True)[:5]: | ||||
|             print(f"  • {op}: {speedup:.2f}x faster than redb") | ||||
|     else: | ||||
|         print("  • No significant advantages found") | ||||
|      | ||||
|     print("\nGeneral guidelines:") | ||||
|     print("  • redb: Better for read-heavy workloads, predictable latency") | ||||
|     print("  • sled: Better for write-heavy workloads, memory efficiency") | ||||
|  | ||||
| def export_comparison(grouped: Dict[str, Dict[str, Dict]], output_file: str): | ||||
|     """Export comparison to CSV.""" | ||||
|     with open(output_file, 'w', newline='') as f: | ||||
|         fieldnames = ['operation', 'redb_mean_ns', 'sled_mean_ns', 'speedup',  | ||||
|                       'redb_throughput', 'sled_throughput', 'winner'] | ||||
|         writer = csv.DictWriter(f, fieldnames=fieldnames) | ||||
|         writer.writeheader() | ||||
|          | ||||
|         for operation, backends in sorted(grouped.items()): | ||||
|             if 'redb' in backends and 'sled' in backends: | ||||
|                 redb_mean = backends['redb'].get('mean_ns', 0) | ||||
|                 sled_mean = backends['sled'].get('mean_ns', 0) | ||||
|                 redb_throughput = backends['redb'].get('throughput_ops_sec', 0) | ||||
|                 sled_throughput = backends['sled'].get('throughput_ops_sec', 0) | ||||
|                  | ||||
|                 speedup = calculate_speedup(redb_mean, sled_mean) | ||||
|                 winner = "redb" if speedup > 1.0 else "sled" if speedup < 1.0 else "tie" | ||||
|                  | ||||
|                 writer.writerow({ | ||||
|                     'operation': operation, | ||||
|                     'redb_mean_ns': int(redb_mean), | ||||
|                     'sled_mean_ns': int(sled_mean), | ||||
|                     'speedup': f"{speedup:.2f}", | ||||
|                     'redb_throughput': f"{redb_throughput:.0f}", | ||||
|                     'sled_throughput': f"{sled_throughput:.0f}", | ||||
|                     'winner': winner | ||||
|                 }) | ||||
|      | ||||
|     print(f"\nComparison exported to {output_file}") | ||||
|  | ||||
| def main(): | ||||
|     if len(sys.argv) < 2: | ||||
|         print("Usage: python compare_backends.py <results_file> [--export comparison.csv]") | ||||
|         print("\nExample:") | ||||
|         print("  python compare_backends.py results.csv") | ||||
|         print("  python compare_backends.py results.json --export comparison.csv") | ||||
|         sys.exit(1) | ||||
|      | ||||
|     input_file = sys.argv[1] | ||||
|     export_file = None | ||||
|      | ||||
|     # Parse command line arguments | ||||
|     if '--export' in sys.argv: | ||||
|         idx = sys.argv.index('--export') | ||||
|         if idx + 1 < len(sys.argv): | ||||
|             export_file = sys.argv[idx + 1] | ||||
|      | ||||
|     # Load results | ||||
|     print(f"Loading results from {input_file}...") | ||||
|     results = load_results(input_file) | ||||
|      | ||||
|     if not results: | ||||
|         print("No results found!") | ||||
|         sys.exit(1) | ||||
|      | ||||
|     print(f"Loaded {len(results)} benchmark results") | ||||
|      | ||||
|     # Group by operation | ||||
|     grouped = group_by_operation(results) | ||||
|      | ||||
|     if not grouped: | ||||
|         print("No comparable results found!") | ||||
|         sys.exit(1) | ||||
|      | ||||
|     # Print comparisons | ||||
|     print_comparison_table(grouped) | ||||
|     print_throughput_comparison(grouped) | ||||
|     generate_recommendations(grouped) | ||||
|      | ||||
|     # Export if requested | ||||
|     if export_file: | ||||
|         export_comparison(grouped, export_file) | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     main() | ||||
							
								
								
									
										222
									
								
								scripts/parse_results.py
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										222
									
								
								scripts/parse_results.py
									
									
									
									
									
										Executable file
									
								
							| @@ -0,0 +1,222 @@ | ||||
| #!/usr/bin/env python3 | ||||
| """ | ||||
| Parse Criterion benchmark results and export to CSV/JSON formats. | ||||
| """ | ||||
|  | ||||
| import json | ||||
| import csv | ||||
| import sys | ||||
| import os | ||||
| from pathlib import Path | ||||
| from typing import Dict, List, Any | ||||
|  | ||||
| def parse_criterion_json(criterion_dir: str) -> List[Dict[str, Any]]: | ||||
|     """Parse Criterion benchmark results from the target directory.""" | ||||
|     results = [] | ||||
|     criterion_path = Path(criterion_dir) | ||||
|      | ||||
|     if not criterion_path.exists(): | ||||
|         print(f"Error: Criterion directory not found: {criterion_dir}", file=sys.stderr) | ||||
|         return results | ||||
|      | ||||
|     # Find all benchmark.json files | ||||
|     for benchmark_file in criterion_path.rglob("new/benchmark.json"): | ||||
|         try: | ||||
|             with open(benchmark_file, 'r') as f: | ||||
|                 data = json.load(f) | ||||
|              | ||||
|             # Extract benchmark name from path | ||||
|             bench_name = str(benchmark_file.parent.parent.name) | ||||
|              | ||||
|             # Extract metrics | ||||
|             result = { | ||||
|                 'name': bench_name, | ||||
|                 'mean_ns': data.get('mean', {}).get('point_estimate', 0), | ||||
|                 'median_ns': data.get('median', {}).get('point_estimate', 0), | ||||
|                 'std_dev_ns': data.get('std_dev', {}).get('point_estimate', 0), | ||||
|             } | ||||
|              | ||||
|             # Calculate throughput | ||||
|             if result['mean_ns'] > 0: | ||||
|                 result['throughput_ops_sec'] = 1_000_000_000 / result['mean_ns'] | ||||
|             else: | ||||
|                 result['throughput_ops_sec'] = 0 | ||||
|              | ||||
|             results.append(result) | ||||
|         except Exception as e: | ||||
|             print(f"Warning: Failed to parse {benchmark_file}: {e}", file=sys.stderr) | ||||
|      | ||||
|     return results | ||||
|  | ||||
| def parse_benchmark_name(name: str) -> Dict[str, str]: | ||||
|     """Parse benchmark name into components.""" | ||||
|     parts = name.split('/') | ||||
|      | ||||
|     result = { | ||||
|         'suite': parts[0] if len(parts) > 0 else '', | ||||
|         'category': parts[1] if len(parts) > 1 else '', | ||||
|         'operation': parts[2] if len(parts) > 2 else '', | ||||
|         'backend': '', | ||||
|         'parameter': '' | ||||
|     } | ||||
|      | ||||
|     # Try to extract backend name | ||||
|     for part in parts: | ||||
|         if 'redb' in part.lower(): | ||||
|             result['backend'] = 'redb' | ||||
|             break | ||||
|         elif 'sled' in part.lower(): | ||||
|             result['backend'] = 'sled' | ||||
|             break | ||||
|      | ||||
|     # Extract parameter (size, clients, etc.) | ||||
|     if len(parts) > 3: | ||||
|         result['parameter'] = parts[3] | ||||
|      | ||||
|     return result | ||||
|  | ||||
| def export_to_csv(results: List[Dict[str, Any]], output_file: str): | ||||
|     """Export results to CSV format.""" | ||||
|     if not results: | ||||
|         print("No results to export", file=sys.stderr) | ||||
|         return | ||||
|      | ||||
|     fieldnames = ['name', 'backend', 'operation', 'mean_ns', 'median_ns',  | ||||
|                   'std_dev_ns', 'throughput_ops_sec'] | ||||
|      | ||||
|     with open(output_file, 'w', newline='') as f: | ||||
|         writer = csv.DictWriter(f, fieldnames=fieldnames) | ||||
|         writer.writeheader() | ||||
|          | ||||
|         for result in results: | ||||
|             parsed = parse_benchmark_name(result['name']) | ||||
|             row = { | ||||
|                 'name': result['name'], | ||||
|                 'backend': parsed['backend'], | ||||
|                 'operation': parsed['operation'], | ||||
|                 'mean_ns': int(result['mean_ns']), | ||||
|                 'median_ns': int(result['median_ns']), | ||||
|                 'std_dev_ns': int(result['std_dev_ns']), | ||||
|                 'throughput_ops_sec': f"{result['throughput_ops_sec']:.2f}" | ||||
|             } | ||||
|             writer.writerow(row) | ||||
|      | ||||
|     print(f"Exported {len(results)} results to {output_file}") | ||||
|  | ||||
| def export_to_json(results: List[Dict[str, Any]], output_file: str): | ||||
|     """Export results to JSON format.""" | ||||
|     if not results: | ||||
|         print("No results to export", file=sys.stderr) | ||||
|         return | ||||
|      | ||||
|     # Enhance results with parsed information | ||||
|     enhanced_results = [] | ||||
|     for result in results: | ||||
|         parsed = parse_benchmark_name(result['name']) | ||||
|         enhanced = {**result, **parsed} | ||||
|         enhanced_results.append(enhanced) | ||||
|      | ||||
|     output = { | ||||
|         'benchmarks': enhanced_results, | ||||
|         'summary': { | ||||
|             'total_benchmarks': len(results), | ||||
|             'backends': list(set(r.get('backend', '') for r in enhanced_results if r.get('backend'))) | ||||
|         } | ||||
|     } | ||||
|      | ||||
|     with open(output_file, 'w') as f: | ||||
|         json.dump(output, f, indent=2) | ||||
|      | ||||
|     print(f"Exported {len(results)} results to {output_file}") | ||||
|  | ||||
| def print_summary(results: List[Dict[str, Any]]): | ||||
|     """Print a summary of benchmark results.""" | ||||
|     if not results: | ||||
|         print("No results to summarize") | ||||
|         return | ||||
|      | ||||
|     print("\n=== Benchmark Summary ===\n") | ||||
|     print(f"Total benchmarks: {len(results)}") | ||||
|      | ||||
|     # Group by backend | ||||
|     backends = {} | ||||
|     for result in results: | ||||
|         parsed = parse_benchmark_name(result['name']) | ||||
|         backend = parsed['backend'] | ||||
|         if backend: | ||||
|             if backend not in backends: | ||||
|                 backends[backend] = [] | ||||
|             backends[backend].append(result) | ||||
|      | ||||
|     for backend, bench_results in backends.items(): | ||||
|         print(f"\n{backend.upper()}:") | ||||
|         print(f"  Benchmarks: {len(bench_results)}") | ||||
|          | ||||
|         if bench_results: | ||||
|             mean_throughput = sum(r['throughput_ops_sec'] for r in bench_results) / len(bench_results) | ||||
|             print(f"  Avg throughput: {mean_throughput:.2f} ops/sec") | ||||
|              | ||||
|             fastest = max(bench_results, key=lambda x: x['throughput_ops_sec']) | ||||
|             print(f"  Fastest: {fastest['name']} ({fastest['throughput_ops_sec']:.2f} ops/sec)") | ||||
|  | ||||
| def main(): | ||||
|     if len(sys.argv) < 2: | ||||
|         print("Usage: python parse_results.py <criterion_dir> [--csv output.csv] [--json output.json]") | ||||
|         print("\nExample:") | ||||
|         print("  python parse_results.py target/criterion --csv results.csv --json results.json") | ||||
|         sys.exit(1) | ||||
|      | ||||
|     criterion_dir = sys.argv[1] | ||||
|      | ||||
|     # Parse command line arguments | ||||
|     csv_output = None | ||||
|     json_output = None | ||||
|      | ||||
|     i = 2 | ||||
|     while i < len(sys.argv): | ||||
|         if sys.argv[i] == '--csv' and i + 1 < len(sys.argv): | ||||
|             csv_output = sys.argv[i + 1] | ||||
|             i += 2 | ||||
|         elif sys.argv[i] == '--json' and i + 1 < len(sys.argv): | ||||
|             json_output = sys.argv[i + 1] | ||||
|             i += 2 | ||||
|         else: | ||||
|             i += 1 | ||||
|      | ||||
|     # Parse results | ||||
|     print(f"Parsing benchmark results from {criterion_dir}...") | ||||
|     results = parse_criterion_json(criterion_dir) | ||||
|      | ||||
|     if not results: | ||||
|         print("No benchmark results found!") | ||||
|         sys.exit(1) | ||||
|      | ||||
|     # Export results | ||||
|     if csv_output: | ||||
|         export_to_csv(results, csv_output) | ||||
|      | ||||
|     if json_output: | ||||
|         export_to_json(results, json_output) | ||||
|      | ||||
|     # Print summary | ||||
|     print_summary(results) | ||||
|      | ||||
|     # If no output specified, print to stdout | ||||
|     if not csv_output and not json_output: | ||||
|         print("\n=== CSV Output ===\n") | ||||
|         import io | ||||
|         output = io.StringIO() | ||||
|         fieldnames = ['name', 'mean_ns', 'median_ns', 'throughput_ops_sec'] | ||||
|         writer = csv.DictWriter(output, fieldnames=fieldnames) | ||||
|         writer.writeheader() | ||||
|         for result in results: | ||||
|             writer.writerow({ | ||||
|                 'name': result['name'], | ||||
|                 'mean_ns': int(result['mean_ns']), | ||||
|                 'median_ns': int(result['median_ns']), | ||||
|                 'throughput_ops_sec': f"{result['throughput_ops_sec']:.2f}" | ||||
|             }) | ||||
|         print(output.getvalue()) | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     main() | ||||
							
								
								
									
										1251
									
								
								specs/backgroundinfo/lance.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1251
									
								
								specs/backgroundinfo/lance.md
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										6847
									
								
								specs/backgroundinfo/lancedb.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										6847
									
								
								specs/backgroundinfo/lancedb.md
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										735
									
								
								specs/lance_implementation.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										735
									
								
								specs/lance_implementation.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,735 @@ | ||||
| Looking at your request to ensure embedding happens with an external embedding provider configured in Redis at `config:core:aiembed:url`, I'll modify the implementation to use an external embedding service instead of local models. | ||||
|  | ||||
| ## Updated Implementation with External Embedding Provider | ||||
|  | ||||
| ### 1. Update Dependencies in `Cargo.toml` | ||||
|  | ||||
| ```toml | ||||
| [dependencies] | ||||
| # ... existing dependencies ... | ||||
| lance = "0.18" | ||||
| lance-index = "0.18" | ||||
| lance-linalg = "0.18" | ||||
| arrow = "52" | ||||
| arrow-array = "52" | ||||
| arrow-schema = "52" | ||||
| parquet = "52" | ||||
| uuid = { version = "1.10", features = ["v4"] } | ||||
| reqwest = { version = "0.11", features = ["json"] } | ||||
| serde = { version = "1.0", features = ["derive"] } | ||||
| serde_json = "1.0" | ||||
| base64 = "0.22" | ||||
| image = "0.25" | ||||
| ``` | ||||
|  | ||||
| ### 2. Create Enhanced Lance Module with External Embedding | ||||
|  | ||||
| Create `src/lance_store.rs`: | ||||
|  | ||||
| ```rust | ||||
| use std::collections::HashMap; | ||||
| use std::path::PathBuf; | ||||
| use std::sync::Arc; | ||||
| use tokio::sync::RwLock; | ||||
|  | ||||
| use arrow::array::{Float32Array, StringArray, BinaryArray, ArrayRef}; | ||||
| use arrow::datatypes::{DataType, Field, Schema}; | ||||
| use arrow::record_batch::RecordBatch; | ||||
| use lance::dataset::{Dataset, WriteParams, WriteMode}; | ||||
| use lance::index::vector::VectorIndexParams; | ||||
| use lance_index::vector::pq::PQBuildParams; | ||||
| use lance_index::vector::ivf::IvfBuildParams; | ||||
|  | ||||
| use serde::{Deserialize, Serialize}; | ||||
| use crate::error::DBError; | ||||
| use crate::cmd::Protocol; | ||||
|  | ||||
| #[derive(Debug, Serialize, Deserialize)] | ||||
| struct EmbeddingRequest { | ||||
|     texts: Option<Vec<String>>, | ||||
|     images: Option<Vec<String>>, // base64 encoded | ||||
|     model: Option<String>, | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Serialize, Deserialize)] | ||||
| struct EmbeddingResponse { | ||||
|     embeddings: Vec<Vec<f32>>, | ||||
|     model: String, | ||||
|     usage: Option<HashMap<String, u32>>, | ||||
| } | ||||
|  | ||||
| pub struct LanceStore { | ||||
|     datasets: Arc<RwLock<HashMap<String, Arc<Dataset>>>>, | ||||
|     data_dir: PathBuf, | ||||
|     http_client: reqwest::Client, | ||||
| } | ||||
|  | ||||
| impl LanceStore { | ||||
|     pub async fn new(data_dir: PathBuf) -> Result<Self, DBError> { | ||||
|         // Create data directory if it doesn't exist | ||||
|         std::fs::create_dir_all(&data_dir) | ||||
|             .map_err(|e| DBError(format!("Failed to create Lance data directory: {}", e)))?; | ||||
|          | ||||
|         let http_client = reqwest::Client::builder() | ||||
|             .timeout(std::time::Duration::from_secs(30)) | ||||
|             .build() | ||||
|             .map_err(|e| DBError(format!("Failed to create HTTP client: {}", e)))?; | ||||
|          | ||||
|         Ok(Self { | ||||
|             datasets: Arc::new(RwLock::new(HashMap::new())), | ||||
|             data_dir, | ||||
|             http_client, | ||||
|         }) | ||||
|     } | ||||
|      | ||||
|     /// Get embedding service URL from Redis config | ||||
|     async fn get_embedding_url(&self, server: &crate::server::Server) -> Result<String, DBError> { | ||||
|         // Get the embedding URL from Redis config | ||||
|         let key = "config:core:aiembed:url"; | ||||
|          | ||||
|         // Use HGET to retrieve the URL from Redis hash | ||||
|         let cmd = crate::cmd::Cmd::HGet { | ||||
|             key: key.to_string(), | ||||
|             field: "url".to_string(), | ||||
|         }; | ||||
|          | ||||
|         // Execute command to get the config | ||||
|         let result = cmd.run(server).await?; | ||||
|          | ||||
|         match result { | ||||
|             Protocol::BulkString(url) => Ok(url), | ||||
|             Protocol::SimpleString(url) => Ok(url), | ||||
|             Protocol::Nil => Err(DBError( | ||||
|                 "Embedding service URL not configured. Set it with: HSET config:core:aiembed:url url <YOUR_EMBEDDING_SERVICE_URL>".to_string() | ||||
|             )), | ||||
|             _ => Err(DBError("Invalid embedding URL configuration".to_string())), | ||||
|         } | ||||
|     } | ||||
|      | ||||
|     /// Call external embedding service | ||||
|     async fn call_embedding_service( | ||||
|         &self, | ||||
|         server: &crate::server::Server, | ||||
|         texts: Option<Vec<String>>, | ||||
|         images: Option<Vec<String>>, | ||||
|     ) -> Result<Vec<Vec<f32>>, DBError> { | ||||
|         let url = self.get_embedding_url(server).await?; | ||||
|          | ||||
|         let request = EmbeddingRequest { | ||||
|             texts, | ||||
|             images, | ||||
|             model: None, // Let the service use its default | ||||
|         }; | ||||
|          | ||||
|         let response = self.http_client | ||||
|             .post(&url) | ||||
|             .json(&request) | ||||
|             .send() | ||||
|             .await | ||||
|             .map_err(|e| DBError(format!("Failed to call embedding service: {}", e)))?; | ||||
|          | ||||
|         if !response.status().is_success() { | ||||
|             let status = response.status(); | ||||
|             let error_text = response.text().await.unwrap_or_default(); | ||||
|             return Err(DBError(format!( | ||||
|                 "Embedding service returned error {}: {}",  | ||||
|                 status, error_text | ||||
|             ))); | ||||
|         } | ||||
|          | ||||
|         let embedding_response: EmbeddingResponse = response | ||||
|             .json() | ||||
|             .await | ||||
|             .map_err(|e| DBError(format!("Failed to parse embedding response: {}", e)))?; | ||||
|          | ||||
|         Ok(embedding_response.embeddings) | ||||
|     } | ||||
|      | ||||
|     pub async fn embed_text( | ||||
|         &self,  | ||||
|         server: &crate::server::Server, | ||||
|         texts: Vec<String> | ||||
|     ) -> Result<Vec<Vec<f32>>, DBError> { | ||||
|         if texts.is_empty() { | ||||
|             return Ok(Vec::new()); | ||||
|         } | ||||
|          | ||||
|         self.call_embedding_service(server, Some(texts), None).await | ||||
|     } | ||||
|      | ||||
|     pub async fn embed_image( | ||||
|         &self, | ||||
|         server: &crate::server::Server, | ||||
|         image_bytes: Vec<u8> | ||||
|     ) -> Result<Vec<f32>, DBError> { | ||||
|         // Convert image bytes to base64 | ||||
|         let base64_image = base64::encode(&image_bytes); | ||||
|          | ||||
|         let embeddings = self.call_embedding_service( | ||||
|             server,  | ||||
|             None,  | ||||
|             Some(vec![base64_image]) | ||||
|         ).await?; | ||||
|          | ||||
|         embeddings.into_iter() | ||||
|             .next() | ||||
|             .ok_or_else(|| DBError("No embedding returned for image".to_string())) | ||||
|     } | ||||
|      | ||||
|     pub async fn create_dataset( | ||||
|         &self, | ||||
|         name: &str, | ||||
|         schema: Schema, | ||||
|     ) -> Result<(), DBError> { | ||||
|         let dataset_path = self.data_dir.join(format!("{}.lance", name)); | ||||
|          | ||||
|         // Create empty dataset with schema | ||||
|         let write_params = WriteParams { | ||||
|             mode: WriteMode::Create, | ||||
|             ..Default::default() | ||||
|         }; | ||||
|          | ||||
|         // Create an empty RecordBatch with the schema | ||||
|         let empty_batch = RecordBatch::new_empty(Arc::new(schema)); | ||||
|         let batches = vec![empty_batch]; | ||||
|          | ||||
|         let dataset = Dataset::write( | ||||
|             batches, | ||||
|             dataset_path.to_str().unwrap(), | ||||
|             Some(write_params) | ||||
|         ).await | ||||
|         .map_err(|e| DBError(format!("Failed to create dataset: {}", e)))?; | ||||
|          | ||||
|         let mut datasets = self.datasets.write().await; | ||||
|         datasets.insert(name.to_string(), Arc::new(dataset)); | ||||
|          | ||||
|         Ok(()) | ||||
|     } | ||||
|      | ||||
|     pub async fn write_vectors( | ||||
|         &self, | ||||
|         dataset_name: &str, | ||||
|         vectors: Vec<Vec<f32>>, | ||||
|         metadata: Option<HashMap<String, Vec<String>>>, | ||||
|     ) -> Result<usize, DBError> { | ||||
|         let dataset_path = self.data_dir.join(format!("{}.lance", dataset_name)); | ||||
|          | ||||
|         // Open or get cached dataset | ||||
|         let dataset = self.get_or_open_dataset(dataset_name).await?; | ||||
|          | ||||
|         // Build RecordBatch | ||||
|         let num_vectors = vectors.len(); | ||||
|         if num_vectors == 0 { | ||||
|             return Ok(0); | ||||
|         } | ||||
|          | ||||
|         let dim = vectors.first() | ||||
|             .ok_or_else(|| DBError("Empty vectors".to_string()))? | ||||
|             .len(); | ||||
|          | ||||
|         // Flatten vectors | ||||
|         let flat_vectors: Vec<f32> = vectors.into_iter().flatten().collect(); | ||||
|         let vector_array = Float32Array::from(flat_vectors); | ||||
|         let vector_array = arrow::array::FixedSizeListArray::try_new_from_values( | ||||
|             vector_array,  | ||||
|             dim as i32 | ||||
|         ).map_err(|e| DBError(format!("Failed to create vector array: {}", e)))?; | ||||
|          | ||||
|         let mut arrays: Vec<ArrayRef> = vec![Arc::new(vector_array)]; | ||||
|         let mut fields = vec![Field::new( | ||||
|             "vector", | ||||
|             DataType::FixedSizeList( | ||||
|                 Arc::new(Field::new("item", DataType::Float32, true)), | ||||
|                 dim as i32 | ||||
|             ), | ||||
|             false | ||||
|         )]; | ||||
|          | ||||
|         // Add metadata columns if provided | ||||
|         if let Some(metadata) = metadata { | ||||
|             for (key, values) in metadata { | ||||
|                 if values.len() != num_vectors { | ||||
|                     return Err(DBError(format!( | ||||
|                         "Metadata field '{}' has {} values but expected {}",  | ||||
|                         key, values.len(), num_vectors | ||||
|                     ))); | ||||
|                 } | ||||
|                 let array = StringArray::from(values); | ||||
|                 arrays.push(Arc::new(array)); | ||||
|                 fields.push(Field::new(&key, DataType::Utf8, true)); | ||||
|             } | ||||
|         } | ||||
|          | ||||
|         let schema = Arc::new(Schema::new(fields)); | ||||
|         let batch = RecordBatch::try_new(schema, arrays) | ||||
|             .map_err(|e| DBError(format!("Failed to create RecordBatch: {}", e)))?; | ||||
|          | ||||
|         // Append to dataset | ||||
|         let write_params = WriteParams { | ||||
|             mode: WriteMode::Append, | ||||
|             ..Default::default() | ||||
|         }; | ||||
|          | ||||
|         Dataset::write( | ||||
|             vec![batch], | ||||
|             dataset_path.to_str().unwrap(), | ||||
|             Some(write_params) | ||||
|         ).await | ||||
|         .map_err(|e| DBError(format!("Failed to write to dataset: {}", e)))?; | ||||
|          | ||||
|         // Refresh cached dataset | ||||
|         let mut datasets = self.datasets.write().await; | ||||
|         datasets.remove(dataset_name); | ||||
|          | ||||
|         Ok(num_vectors) | ||||
|     } | ||||
|      | ||||
|     pub async fn search_vectors( | ||||
|         &self, | ||||
|         dataset_name: &str, | ||||
|         query_vector: Vec<f32>, | ||||
|         k: usize, | ||||
|         nprobes: Option<usize>, | ||||
|         refine_factor: Option<usize>, | ||||
|     ) -> Result<Vec<(f32, HashMap<String, String>)>, DBError> { | ||||
|         let dataset = self.get_or_open_dataset(dataset_name).await?; | ||||
|          | ||||
|         // Build query | ||||
|         let mut query = dataset.scan(); | ||||
|         query = query.nearest( | ||||
|             "vector", | ||||
|             &query_vector, | ||||
|             k, | ||||
|         ).map_err(|e| DBError(format!("Failed to build search query: {}", e)))?; | ||||
|          | ||||
|         if let Some(nprobes) = nprobes { | ||||
|             query = query.nprobes(nprobes); | ||||
|         } | ||||
|          | ||||
|         if let Some(refine) = refine_factor { | ||||
|             query = query.refine_factor(refine); | ||||
|         } | ||||
|          | ||||
|         // Execute search | ||||
|         let results = query | ||||
|             .try_into_stream() | ||||
|             .await | ||||
|             .map_err(|e| DBError(format!("Failed to execute search: {}", e)))? | ||||
|             .try_collect::<Vec<_>>() | ||||
|             .await | ||||
|             .map_err(|e| DBError(format!("Failed to collect results: {}", e)))?; | ||||
|          | ||||
|         // Process results | ||||
|         let mut output = Vec::new(); | ||||
|         for batch in results { | ||||
|             // Get distances | ||||
|             let distances = batch | ||||
|                 .column_by_name("_distance") | ||||
|                 .ok_or_else(|| DBError("No distance column".to_string()))? | ||||
|                 .as_any() | ||||
|                 .downcast_ref::<Float32Array>() | ||||
|                 .ok_or_else(|| DBError("Invalid distance type".to_string()))?; | ||||
|              | ||||
|             // Get metadata | ||||
|             for i in 0..batch.num_rows() { | ||||
|                 let distance = distances.value(i); | ||||
|                 let mut metadata = HashMap::new(); | ||||
|                  | ||||
|                 for field in batch.schema().fields() { | ||||
|                     if field.name() != "vector" && field.name() != "_distance" { | ||||
|                         if let Some(col) = batch.column_by_name(field.name()) { | ||||
|                             if let Some(str_array) = col.as_any().downcast_ref::<StringArray>() { | ||||
|                                 if !str_array.is_null(i) { | ||||
|                                     metadata.insert( | ||||
|                                         field.name().to_string(), | ||||
|                                         str_array.value(i).to_string() | ||||
|                                     ); | ||||
|                                 } | ||||
|                             } | ||||
|                         } | ||||
|                     } | ||||
|                 } | ||||
|                  | ||||
|                 output.push((distance, metadata)); | ||||
|             } | ||||
|         } | ||||
|          | ||||
|         Ok(output) | ||||
|     } | ||||
|      | ||||
|     pub async fn store_multimodal( | ||||
|         &self, | ||||
|         server: &crate::server::Server, | ||||
|         dataset_name: &str, | ||||
|         text: Option<String>, | ||||
|         image_bytes: Option<Vec<u8>>, | ||||
|         metadata: HashMap<String, String>, | ||||
|     ) -> Result<String, DBError> { | ||||
|         // Generate ID | ||||
|         let id = uuid::Uuid::new_v4().to_string(); | ||||
|          | ||||
|         // Generate embeddings using external service | ||||
|         let embedding = if let Some(text) = text.as_ref() { | ||||
|             self.embed_text(server, vec![text.clone()]).await? | ||||
|                 .into_iter() | ||||
|                 .next() | ||||
|                 .ok_or_else(|| DBError("No embedding returned".to_string()))? | ||||
|         } else if let Some(img) = image_bytes.as_ref() { | ||||
|             self.embed_image(server, img.clone()).await? | ||||
|         } else { | ||||
|             return Err(DBError("No text or image provided".to_string())); | ||||
|         }; | ||||
|          | ||||
|         // Prepare metadata | ||||
|         let mut full_metadata = metadata; | ||||
|         full_metadata.insert("id".to_string(), id.clone()); | ||||
|         if let Some(text) = text { | ||||
|             full_metadata.insert("text".to_string(), text); | ||||
|         } | ||||
|         if let Some(img) = image_bytes { | ||||
|             full_metadata.insert("image_base64".to_string(), base64::encode(img)); | ||||
|         } | ||||
|          | ||||
|         // Convert metadata to column vectors | ||||
|         let mut metadata_cols = HashMap::new(); | ||||
|         for (key, value) in full_metadata { | ||||
|             metadata_cols.insert(key, vec![value]); | ||||
|         } | ||||
|          | ||||
|         // Write to dataset | ||||
|         self.write_vectors(dataset_name, vec![embedding], Some(metadata_cols)).await?; | ||||
|          | ||||
|         Ok(id) | ||||
|     } | ||||
|      | ||||
|     pub async fn search_with_text( | ||||
|         &self, | ||||
|         server: &crate::server::Server, | ||||
|         dataset_name: &str, | ||||
|         query_text: String, | ||||
|         k: usize, | ||||
|         nprobes: Option<usize>, | ||||
|         refine_factor: Option<usize>, | ||||
|     ) -> Result<Vec<(f32, HashMap<String, String>)>, DBError> { | ||||
|         // Embed the query text using external service | ||||
|         let embeddings = self.embed_text(server, vec![query_text]).await?; | ||||
|         let query_vector = embeddings.into_iter() | ||||
|             .next() | ||||
|             .ok_or_else(|| DBError("No embedding returned for query".to_string()))?; | ||||
|          | ||||
|         // Search with the embedding | ||||
|         self.search_vectors(dataset_name, query_vector, k, nprobes, refine_factor).await | ||||
|     } | ||||
|      | ||||
|     pub async fn create_index( | ||||
|         &self, | ||||
|         dataset_name: &str, | ||||
|         index_type: &str, | ||||
|         num_partitions: Option<usize>, | ||||
|         num_sub_vectors: Option<usize>, | ||||
|     ) -> Result<(), DBError> { | ||||
|         let dataset = self.get_or_open_dataset(dataset_name).await?; | ||||
|          | ||||
|         let mut params = VectorIndexParams::default(); | ||||
|          | ||||
|         match index_type.to_uppercase().as_str() { | ||||
|             "IVF_PQ" => { | ||||
|                 params.ivf = IvfBuildParams { | ||||
|                     num_partitions: num_partitions.unwrap_or(256), | ||||
|                     ..Default::default() | ||||
|                 }; | ||||
|                 params.pq = PQBuildParams { | ||||
|                     num_sub_vectors: num_sub_vectors.unwrap_or(16), | ||||
|                     ..Default::default() | ||||
|                 }; | ||||
|             } | ||||
|             _ => return Err(DBError(format!("Unsupported index type: {}", index_type))), | ||||
|         } | ||||
|          | ||||
|         dataset.create_index( | ||||
|             &["vector"], | ||||
|             lance::index::IndexType::Vector, | ||||
|             None, | ||||
|             ¶ms, | ||||
|             true | ||||
|         ).await | ||||
|         .map_err(|e| DBError(format!("Failed to create index: {}", e)))?; | ||||
|          | ||||
|         Ok(()) | ||||
|     } | ||||
|      | ||||
|     async fn get_or_open_dataset(&self, name: &str) -> Result<Arc<Dataset>, DBError> { | ||||
|         let mut datasets = self.datasets.write().await; | ||||
|          | ||||
|         if let Some(dataset) = datasets.get(name) { | ||||
|             return Ok(dataset.clone()); | ||||
|         } | ||||
|          | ||||
|         let dataset_path = self.data_dir.join(format!("{}.lance", name)); | ||||
|         if !dataset_path.exists() { | ||||
|             return Err(DBError(format!("Dataset '{}' does not exist", name))); | ||||
|         } | ||||
|          | ||||
|         let dataset = Dataset::open(dataset_path.to_str().unwrap()) | ||||
|             .await | ||||
|             .map_err(|e| DBError(format!("Failed to open dataset: {}", e)))?; | ||||
|          | ||||
|         let dataset = Arc::new(dataset); | ||||
|         datasets.insert(name.to_string(), dataset.clone()); | ||||
|          | ||||
|         Ok(dataset) | ||||
|     } | ||||
|      | ||||
|     pub async fn list_datasets(&self) -> Result<Vec<String>, DBError> { | ||||
|         let mut datasets = Vec::new(); | ||||
|          | ||||
|         let entries = std::fs::read_dir(&self.data_dir) | ||||
|             .map_err(|e| DBError(format!("Failed to read data directory: {}", e)))?; | ||||
|          | ||||
|         for entry in entries { | ||||
|             let entry = entry.map_err(|e| DBError(format!("Failed to read entry: {}", e)))?; | ||||
|             let path = entry.path(); | ||||
|              | ||||
|             if path.is_dir() { | ||||
|                 if let Some(name) = path.file_name() { | ||||
|                     if let Some(name_str) = name.to_str() { | ||||
|                         if name_str.ends_with(".lance") { | ||||
|                             let dataset_name = name_str.trim_end_matches(".lance"); | ||||
|                             datasets.push(dataset_name.to_string()); | ||||
|                         } | ||||
|                     } | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|          | ||||
|         Ok(datasets) | ||||
|     } | ||||
|      | ||||
|     pub async fn drop_dataset(&self, name: &str) -> Result<(), DBError> { | ||||
|         // Remove from cache | ||||
|         let mut datasets = self.datasets.write().await; | ||||
|         datasets.remove(name); | ||||
|          | ||||
|         // Delete from disk | ||||
|         let dataset_path = self.data_dir.join(format!("{}.lance", name)); | ||||
|         if dataset_path.exists() { | ||||
|             std::fs::remove_dir_all(dataset_path) | ||||
|                 .map_err(|e| DBError(format!("Failed to delete dataset: {}", e)))?; | ||||
|         } | ||||
|          | ||||
|         Ok(()) | ||||
|     } | ||||
|      | ||||
|     pub async fn get_dataset_info(&self, name: &str) -> Result<HashMap<String, String>, DBError> { | ||||
|         let dataset = self.get_or_open_dataset(name).await?; | ||||
|          | ||||
|         let mut info = HashMap::new(); | ||||
|         info.insert("name".to_string(), name.to_string()); | ||||
|         info.insert("version".to_string(), dataset.version().to_string()); | ||||
|         info.insert("num_rows".to_string(), dataset.count_rows().await?.to_string()); | ||||
|          | ||||
|         // Get schema info | ||||
|         let schema = dataset.schema(); | ||||
|         let fields: Vec<String> = schema.fields() | ||||
|             .iter() | ||||
|             .map(|f| format!("{}:{}", f.name(), f.data_type())) | ||||
|             .collect(); | ||||
|         info.insert("schema".to_string(), fields.join(", ")); | ||||
|          | ||||
|         Ok(info) | ||||
|     } | ||||
| } | ||||
| ``` | ||||
|  | ||||
| ### 3. Update Command Implementations | ||||
|  | ||||
| Update the command implementations to pass the server reference for embedding service access: | ||||
|  | ||||
| ```rust | ||||
| // In cmd.rs, update the lance command implementations | ||||
|  | ||||
| async fn lance_store_cmd( | ||||
|     server: &Server, | ||||
|     dataset: &str, | ||||
|     text: Option<String>, | ||||
|     image_base64: Option<String>, | ||||
|     metadata: HashMap<String, String>, | ||||
| ) -> Result<Protocol, DBError> { | ||||
|     let lance_store = server.lance_store()?; | ||||
|      | ||||
|     // Decode image if provided | ||||
|     let image_bytes = if let Some(b64) = image_base64 { | ||||
|         Some(base64::decode(b64).map_err(|e|  | ||||
|             DBError(format!("Invalid base64 image: {}", e)))?) | ||||
|     } else { | ||||
|         None | ||||
|     }; | ||||
|      | ||||
|     // Pass server reference for embedding service access | ||||
|     let id = lance_store.store_multimodal( | ||||
|         server,  // Pass server to access Redis config | ||||
|         dataset, | ||||
|         text, | ||||
|         image_bytes, | ||||
|         metadata, | ||||
|     ).await?; | ||||
|      | ||||
|     Ok(Protocol::BulkString(id)) | ||||
| } | ||||
|  | ||||
| async fn lance_embed_text_cmd( | ||||
|     server: &Server, | ||||
|     texts: &[String], | ||||
| ) -> Result<Protocol, DBError> { | ||||
|     let lance_store = server.lance_store()?; | ||||
|      | ||||
|     // Pass server reference for embedding service access | ||||
|     let embeddings = lance_store.embed_text(server, texts.to_vec()).await?; | ||||
|      | ||||
|     // Return as array of vectors | ||||
|     let mut output = Vec::new(); | ||||
|     for embedding in embeddings { | ||||
|         let vector_str = format!("[{}]",  | ||||
|             embedding.iter() | ||||
|                 .map(|f| f.to_string()) | ||||
|                 .collect::<Vec<_>>() | ||||
|                 .join(",") | ||||
|         ); | ||||
|         output.push(Protocol::BulkString(vector_str)); | ||||
|     } | ||||
|      | ||||
|     Ok(Protocol::Array(output)) | ||||
| } | ||||
|  | ||||
| async fn lance_search_text_cmd( | ||||
|     server: &Server, | ||||
|     dataset: &str, | ||||
|     query_text: &str, | ||||
|     k: usize, | ||||
|     nprobes: Option<usize>, | ||||
|     refine_factor: Option<usize>, | ||||
| ) -> Result<Protocol, DBError> { | ||||
|     let lance_store = server.lance_store()?; | ||||
|      | ||||
|     // Search using text query (will be embedded automatically) | ||||
|     let results = lance_store.search_with_text( | ||||
|         server, | ||||
|         dataset, | ||||
|         query_text.to_string(), | ||||
|         k, | ||||
|         nprobes, | ||||
|         refine_factor, | ||||
|     ).await?; | ||||
|      | ||||
|     // Format results | ||||
|     let mut output = Vec::new(); | ||||
|     for (distance, metadata) in results { | ||||
|         let metadata_json = serde_json::to_string(&metadata) | ||||
|             .unwrap_or_else(|_| "{}".to_string()); | ||||
|          | ||||
|         output.push(Protocol::Array(vec![ | ||||
|             Protocol::BulkString(distance.to_string()), | ||||
|             Protocol::BulkString(metadata_json), | ||||
|         ])); | ||||
|     } | ||||
|      | ||||
|     Ok(Protocol::Array(output)) | ||||
| } | ||||
|  | ||||
| // Add new command for text-based search | ||||
| pub enum Cmd { | ||||
|     // ... existing commands ... | ||||
|     LanceSearchText { | ||||
|         dataset: String, | ||||
|         query_text: String, | ||||
|         k: usize, | ||||
|         nprobes: Option<usize>, | ||||
|         refine_factor: Option<usize>, | ||||
|     }, | ||||
| } | ||||
| ``` | ||||
|  | ||||
| ## Usage Examples | ||||
|  | ||||
| ### 1. Configure the Embedding Service | ||||
|  | ||||
| First, users need to configure the embedding service URL: | ||||
|  | ||||
| ```bash | ||||
| # Configure the embedding service endpoint | ||||
| redis-cli> HSET config:core:aiembed:url url "http://localhost:8000/embeddings" | ||||
| OK | ||||
|  | ||||
| # Or use a cloud service | ||||
| redis-cli> HSET config:core:aiembed:url url "https://api.openai.com/v1/embeddings" | ||||
| OK | ||||
| ``` | ||||
|  | ||||
| ### 2. Use Lance Commands with Automatic External Embedding | ||||
|  | ||||
| ```bash | ||||
| # Create a dataset | ||||
| redis-cli> LANCE.CREATE products DIM 1536 SCHEMA name:string price:float category:string | ||||
| OK | ||||
|  | ||||
| # Store text with automatic embedding (calls external service) | ||||
| redis-cli> LANCE.STORE products TEXT "Wireless noise-canceling headphones with 30-hour battery" name:AirPods price:299.99 category:Electronics | ||||
| "uuid-123-456" | ||||
|  | ||||
| # Search using text query (automatically embeds the query) | ||||
| redis-cli> LANCE.SEARCH.TEXT products "best headphones for travel" K 5 | ||||
| 1) "0.92"  | ||||
| 2) "{\"id\":\"uuid-123\",\"name\":\"AirPods\",\"price\":\"299.99\"}" | ||||
|  | ||||
| # Get embeddings directly | ||||
| redis-cli> LANCE.EMBED.TEXT "This text will be embedded" | ||||
| 1) "[0.123, 0.456, 0.789, ...]" | ||||
| ``` | ||||
|  | ||||
| ## External Embedding Service API Specification | ||||
|  | ||||
| The external embedding service should accept POST requests with this format: | ||||
|  | ||||
| ```json | ||||
| // Request | ||||
| { | ||||
|   "texts": ["text1", "text2"],  // Optional | ||||
|   "images": ["base64_img1"],    // Optional | ||||
|   "model": "text-embedding-ada-002"  // Optional | ||||
| } | ||||
|  | ||||
| // Response | ||||
| { | ||||
|   "embeddings": [[0.1, 0.2, ...], [0.3, 0.4, ...]], | ||||
|   "model": "text-embedding-ada-002", | ||||
|   "usage": { | ||||
|     "prompt_tokens": 100, | ||||
|     "total_tokens": 100 | ||||
|   } | ||||
| } | ||||
| ``` | ||||
|  | ||||
| ## Error Handling | ||||
|  | ||||
| The implementation includes comprehensive error handling: | ||||
|  | ||||
| 1. **Missing Configuration**: Clear error message if embedding URL not configured | ||||
| 2. **Service Failures**: Graceful handling of embedding service errors | ||||
| 3. **Timeout Protection**: 30-second timeout for embedding requests | ||||
| 4. **Retry Logic**: Could be added for resilience | ||||
|  | ||||
| ## Benefits of This Approach | ||||
|  | ||||
| 1. **Flexibility**: Supports any embedding service with compatible API | ||||
| 2. **Cost Control**: Use your preferred embedding provider | ||||
| 3. **Scalability**: Embedding service can be scaled independently | ||||
| 4. **Consistency**: All embeddings use the same configured service | ||||
| 5. **Security**: API keys and endpoints stored securely in Redis | ||||
|  | ||||
| This implementation ensures that all embedding operations go through the external service configured in Redis, providing a clean separation between the vector database functionality and the embedding generation. | ||||
|  | ||||
|  | ||||
| TODO EXTRA: | ||||
|  | ||||
| - secret for the embedding service API key | ||||
|  | ||||
							
								
								
									
										501
									
								
								src/admin_meta.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										501
									
								
								src/admin_meta.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,501 @@ | ||||
| use std::path::{Path, PathBuf}; | ||||
| use std::sync::{Arc, OnceLock, Mutex, RwLock}; | ||||
| use std::collections::HashMap; | ||||
|  | ||||
| use crate::error::DBError; | ||||
| use crate::options; | ||||
| use crate::rpc::Permissions; | ||||
| use crate::storage::Storage; | ||||
| use crate::storage_sled::SledStorage; | ||||
| use crate::storage_trait::StorageBackend; | ||||
|  | ||||
| // Key builders | ||||
| fn k_admin_next_id() -> &'static str { | ||||
|     "admin:next_id" | ||||
| } | ||||
| fn k_admin_dbs() -> &'static str { | ||||
|     "admin:dbs" | ||||
| } | ||||
| fn k_meta_db(id: u64) -> String { | ||||
|     format!("meta:db:{}", id) | ||||
| } | ||||
| fn k_meta_db_keys(id: u64) -> String { | ||||
|     format!("meta:db:{}:keys", id) | ||||
| } | ||||
| fn k_meta_db_enc(id: u64) -> String { | ||||
|     format!("meta:db:{}:enc", id) | ||||
| } | ||||
|  | ||||
| // Global cache of admin DB 0 handles per base_dir to avoid sled/reDB file-lock contention | ||||
| // and to correctly isolate different test instances with distinct directories. | ||||
| static ADMIN_STORAGES: OnceLock<RwLock<HashMap<String, Arc<dyn StorageBackend>>>> = OnceLock::new(); | ||||
|  | ||||
| // Global registry for data DB storages to avoid double-open across process. | ||||
| static DATA_STORAGES: OnceLock<RwLock<HashMap<u64, Arc<dyn StorageBackend>>>> = OnceLock::new(); | ||||
| static DATA_INIT_LOCK: Mutex<()> = Mutex::new(()); | ||||
|  | ||||
| fn init_admin_storage( | ||||
|     base_dir: &Path, | ||||
|     backend: options::BackendType, | ||||
|     admin_secret: &str, | ||||
| ) -> Result<Arc<dyn StorageBackend>, DBError> { | ||||
|     let db_file = base_dir.join("0.db"); | ||||
|     if let Some(parent_dir) = db_file.parent() { | ||||
|         std::fs::create_dir_all(parent_dir).map_err(|e| { | ||||
|             DBError(format!("Failed to create directory {}: {}", parent_dir.display(), e)) | ||||
|         })?; | ||||
|     } | ||||
|     let storage: Arc<dyn StorageBackend> = match backend { | ||||
|         options::BackendType::Redb => Arc::new(Storage::new(&db_file, true, Some(admin_secret))?), | ||||
|         options::BackendType::Sled => Arc::new(SledStorage::new(&db_file, true, Some(admin_secret))?), | ||||
|         options::BackendType::Tantivy | options::BackendType::Lance => { | ||||
|             return Err(DBError("Admin DB 0 cannot use search-only backends (Tantivy/Lance)".to_string())) | ||||
|         } | ||||
|     }; | ||||
|     Ok(storage) | ||||
| } | ||||
|  | ||||
| // Get or initialize a cached handle to admin DB 0 per base_dir (thread-safe, no double-open race) | ||||
| pub fn open_admin_storage( | ||||
|     base_dir: &Path, | ||||
|     backend: options::BackendType, | ||||
|     admin_secret: &str, | ||||
| ) -> Result<Arc<dyn StorageBackend>, DBError> { | ||||
|     let map = ADMIN_STORAGES.get_or_init(|| RwLock::new(HashMap::new())); | ||||
|     let key = base_dir.display().to_string(); | ||||
|     // Fast path | ||||
|     if let Some(st) = map.read().unwrap().get(&key) { | ||||
|         return Ok(st.clone()); | ||||
|     } | ||||
|     // Slow path with write lock | ||||
|     { | ||||
|         let mut w = map.write().unwrap(); | ||||
|         if let Some(st) = w.get(&key) { | ||||
|             return Ok(st.clone()); | ||||
|         } | ||||
|  | ||||
|         // Detect existing 0.db backend by filesystem, if present. | ||||
|         let admin_path = base_dir.join("0.db"); | ||||
|         let detected = if admin_path.exists() { | ||||
|             if admin_path.is_file() { | ||||
|                 Some(options::BackendType::Redb) | ||||
|             } else if admin_path.is_dir() { | ||||
|                 Some(options::BackendType::Sled) | ||||
|             } else { | ||||
|                 None | ||||
|             } | ||||
|         } else { | ||||
|             None | ||||
|         }; | ||||
|  | ||||
|         let effective_backend = match detected { | ||||
|             Some(d) if d != backend => { | ||||
|                 eprintln!( | ||||
|                     "warning: Admin DB 0 at {} appears to be {:?}, but process default is {:?}. Using detected backend.", | ||||
|                     admin_path.display(), | ||||
|                     d, | ||||
|                     backend | ||||
|                 ); | ||||
|                 d | ||||
|             } | ||||
|             Some(d) => d, | ||||
|             None => backend, // First boot: use requested backend to initialize 0.db | ||||
|         }; | ||||
|  | ||||
|         let st = init_admin_storage(base_dir, effective_backend, admin_secret)?; | ||||
|         w.insert(key, st.clone()); | ||||
|         Ok(st) | ||||
|     } | ||||
| } | ||||
|  | ||||
| // Ensure admin structures exist in encrypted DB 0 | ||||
| pub fn ensure_bootstrap( | ||||
|     base_dir: &Path, | ||||
|     backend: options::BackendType, | ||||
|     admin_secret: &str, | ||||
| ) -> Result<(), DBError> { | ||||
|     let admin = open_admin_storage(base_dir, backend, admin_secret)?; | ||||
|  | ||||
|     // Initialize next id if missing | ||||
|     if !admin.exists(k_admin_next_id())? { | ||||
|         admin.set(k_admin_next_id().to_string(), "1".to_string())?; | ||||
|     } | ||||
|     // admin:dbs is a hash; it's fine if it doesn't exist (hlen -> 0) | ||||
|     Ok(()) | ||||
| } | ||||
|  | ||||
| // Get or initialize a shared handle to a data DB (> 0), avoiding double-open across subsystems | ||||
| pub fn open_data_storage( | ||||
|     base_dir: &Path, | ||||
|     backend: options::BackendType, | ||||
|     admin_secret: &str, | ||||
|     id: u64, | ||||
| ) -> Result<Arc<dyn StorageBackend>, DBError> { | ||||
|     if id == 0 { | ||||
|         return open_admin_storage(base_dir, backend, admin_secret); | ||||
|     } | ||||
|  | ||||
|     // Validate existence in admin metadata | ||||
|     if !db_exists(base_dir, backend.clone(), admin_secret, id)? { | ||||
|         return Err(DBError(format!( | ||||
|             "Cannot open database instance {}, as that database instance does not exist.", | ||||
|             id | ||||
|         ))); | ||||
|     } | ||||
|  | ||||
|     let map = DATA_STORAGES.get_or_init(|| RwLock::new(HashMap::new())); | ||||
|     // Fast path | ||||
|     if let Some(st) = map.read().unwrap().get(&id) { | ||||
|         return Ok(st.clone()); | ||||
|     } | ||||
|  | ||||
|     // Slow path with init lock | ||||
|     let _guard = DATA_INIT_LOCK.lock().unwrap(); | ||||
|     if let Some(st) = map.read().unwrap().get(&id) { | ||||
|         return Ok(st.clone()); | ||||
|     } | ||||
|  | ||||
|     // Resolve effective backend for this db id: | ||||
|     // 1) Try admin meta "backend" field | ||||
|     // 2) If missing, sniff filesystem (file => Redb, dir => Sled), then persist into admin meta | ||||
|     // 3) Fallback to requested 'backend' (startup default) if nothing else is known | ||||
|     let meta_backend = get_database_backend(base_dir, backend.clone(), admin_secret, id).ok().flatten(); | ||||
|     let db_path = base_dir.join(format!("{}.db", id)); | ||||
|     let sniffed_backend = if db_path.exists() { | ||||
|         if db_path.is_file() { | ||||
|             Some(options::BackendType::Redb) | ||||
|         } else if db_path.is_dir() { | ||||
|             Some(options::BackendType::Sled) | ||||
|         } else { | ||||
|             None | ||||
|         } | ||||
|     } else { | ||||
|         None | ||||
|     }; | ||||
|     let effective_backend = meta_backend.clone().or(sniffed_backend).unwrap_or(backend.clone()); | ||||
|  | ||||
|     // If we had to sniff (i.e., meta missing), persist it for future robustness | ||||
|     if meta_backend.is_none() { | ||||
|         let _ = set_database_backend(base_dir, backend.clone(), admin_secret, id, effective_backend.clone()); | ||||
|     } | ||||
|  | ||||
|     // Warn if caller-provided backend differs from effective | ||||
|     if effective_backend != backend { | ||||
|         eprintln!( | ||||
|             "notice: Database {} backend resolved to {:?} (caller requested {:?}). Using resolved backend.", | ||||
|             id, effective_backend, backend | ||||
|         ); | ||||
|     } | ||||
|  | ||||
|     // Determine per-db encryption (from admin meta) | ||||
|     let enc = get_enc_key(base_dir, backend.clone(), admin_secret, id)?; | ||||
|     let should_encrypt = enc.is_some(); | ||||
|  | ||||
|     // Build database file path and ensure parent dir exists | ||||
|     let db_file = PathBuf::from(base_dir).join(format!("{}.db", id)); | ||||
|     if let Some(parent_dir) = db_file.parent() { | ||||
|         std::fs::create_dir_all(parent_dir).map_err(|e| { | ||||
|             DBError(format!("Failed to create directory {}: {}", parent_dir.display(), e)) | ||||
|         })?; | ||||
|     } | ||||
|  | ||||
|     // Open storage using the effective backend | ||||
|     let storage: Arc<dyn StorageBackend> = match effective_backend { | ||||
|         options::BackendType::Redb => Arc::new(Storage::new(&db_file, should_encrypt, enc.as_deref())?), | ||||
|         options::BackendType::Sled => Arc::new(SledStorage::new(&db_file, should_encrypt, enc.as_deref())?), | ||||
|         options::BackendType::Tantivy => { | ||||
|             return Err(DBError("Tantivy backend has no KV storage; use FT.* commands only".to_string())) | ||||
|         } | ||||
|         options::BackendType::Lance => { | ||||
|             return Err(DBError("Lance backend has no KV storage; use LANCE.* commands only".to_string())) | ||||
|         } | ||||
|     }; | ||||
|  | ||||
|     // Publish to registry | ||||
|     map.write().unwrap().insert(id, storage.clone()); | ||||
|     Ok(storage) | ||||
| } | ||||
|  | ||||
| // Allocate the next DB id and persist new pointer | ||||
| pub fn allocate_next_id( | ||||
|     base_dir: &Path, | ||||
|     backend: options::BackendType, | ||||
|     admin_secret: &str, | ||||
| ) -> Result<u64, DBError> { | ||||
|     let admin = open_admin_storage(base_dir, backend, admin_secret)?; | ||||
|     let cur = admin | ||||
|         .get(k_admin_next_id())? | ||||
|         .unwrap_or_else(|| "1".to_string()); | ||||
|     let id: u64 = cur.parse().unwrap_or(1); | ||||
|     let next = id.checked_add(1).ok_or_else(|| DBError("next_id overflow".into()))?; | ||||
|     admin.set(k_admin_next_id().to_string(), next.to_string())?; | ||||
|  | ||||
|     // Register into admin:dbs set/hash | ||||
|     let _ = admin.hset(k_admin_dbs(), vec![(id.to_string(), "1".to_string())])?; | ||||
|  | ||||
|     // Default meta for the new db: public true | ||||
|     let meta_key = k_meta_db(id); | ||||
|     let _ = admin.hset(&meta_key, vec![("public".to_string(), "true".to_string())])?; | ||||
|  | ||||
|     Ok(id) | ||||
| } | ||||
|  | ||||
| // Check existence of a db id in admin:dbs | ||||
| pub fn db_exists( | ||||
|     base_dir: &Path, | ||||
|     backend: options::BackendType, | ||||
|     admin_secret: &str, | ||||
|     id: u64, | ||||
| ) -> Result<bool, DBError> { | ||||
|     let admin = open_admin_storage(base_dir, backend, admin_secret)?; | ||||
|     Ok(admin.hexists(k_admin_dbs(), &id.to_string())?) | ||||
| } | ||||
|  | ||||
| // Get per-db encryption key, if any | ||||
| pub fn get_enc_key( | ||||
|     base_dir: &Path, | ||||
|     backend: options::BackendType, | ||||
|     admin_secret: &str, | ||||
|     id: u64, | ||||
| ) -> Result<Option<String>, DBError> { | ||||
|     let admin = open_admin_storage(base_dir, backend, admin_secret)?; | ||||
|     admin.get(&k_meta_db_enc(id)) | ||||
| } | ||||
|  | ||||
| // Set per-db encryption key (called during create) | ||||
| pub fn set_enc_key( | ||||
|     base_dir: &Path, | ||||
|     backend: options::BackendType, | ||||
|     admin_secret: &str, | ||||
|     id: u64, | ||||
|     key: &str, | ||||
| ) -> Result<(), DBError> { | ||||
|     let admin = open_admin_storage(base_dir, backend, admin_secret)?; | ||||
|     admin.set(k_meta_db_enc(id), key.to_string()) | ||||
| } | ||||
|  | ||||
| // Set database public flag | ||||
| pub fn set_database_public( | ||||
|     base_dir: &Path, | ||||
|     backend: options::BackendType, | ||||
|     admin_secret: &str, | ||||
|     id: u64, | ||||
|     public: bool, | ||||
| ) -> Result<(), DBError> { | ||||
|     let admin = open_admin_storage(base_dir, backend, admin_secret)?; | ||||
|     let mk = k_meta_db(id); | ||||
|     let _ = admin.hset(&mk, vec![("public".to_string(), public.to_string())])?; | ||||
|     Ok(()) | ||||
| } | ||||
|  | ||||
| // Persist per-db backend type in admin metadata (module-scope) | ||||
| pub fn set_database_backend( | ||||
|     base_dir: &Path, | ||||
|     backend: options::BackendType, | ||||
|     admin_secret: &str, | ||||
|     id: u64, | ||||
|     db_backend: options::BackendType, | ||||
| ) -> Result<(), DBError> { | ||||
|     let admin = open_admin_storage(base_dir, backend, admin_secret)?; | ||||
|     let mk = k_meta_db(id); | ||||
|     let val = match db_backend { | ||||
|         options::BackendType::Redb => "Redb", | ||||
|         options::BackendType::Sled => "Sled", | ||||
|         options::BackendType::Tantivy => "Tantivy", | ||||
|         options::BackendType::Lance => "Lance", | ||||
|     }; | ||||
|     let _ = admin.hset(&mk, vec![("backend".to_string(), val.to_string())])?; | ||||
|     Ok(()) | ||||
| } | ||||
|  | ||||
| pub fn get_database_backend( | ||||
|     base_dir: &Path, | ||||
|     backend: options::BackendType, | ||||
|     admin_secret: &str, | ||||
|     id: u64, | ||||
| ) -> Result<Option<options::BackendType>, DBError> { | ||||
|     let admin = open_admin_storage(base_dir, backend, admin_secret)?; | ||||
|     let mk = k_meta_db(id); | ||||
|     match admin.hget(&mk, "backend")? { | ||||
|         Some(s) if s == "Redb" => Ok(Some(options::BackendType::Redb)), | ||||
|         Some(s) if s == "Sled" => Ok(Some(options::BackendType::Sled)), | ||||
|         Some(s) if s == "Tantivy" => Ok(Some(options::BackendType::Tantivy)), | ||||
|         Some(s) if s == "Lance" => Ok(Some(options::BackendType::Lance)), | ||||
|         _ => Ok(None), | ||||
|     } | ||||
| } | ||||
|  | ||||
| // Set database name | ||||
| pub fn set_database_name( | ||||
|     base_dir: &Path, | ||||
|     backend: options::BackendType, | ||||
|     admin_secret: &str, | ||||
|     id: u64, | ||||
|     name: &str, | ||||
| ) -> Result<(), DBError> { | ||||
|     let admin = open_admin_storage(base_dir, backend, admin_secret)?; | ||||
|     let mk = k_meta_db(id); | ||||
|     let _ = admin.hset(&mk, vec![("name".to_string(), name.to_string())])?; | ||||
|     Ok(()) | ||||
| } | ||||
|  | ||||
| // Get database name | ||||
| pub fn get_database_name( | ||||
|     base_dir: &Path, | ||||
|     backend: options::BackendType, | ||||
|     admin_secret: &str, | ||||
|     id: u64, | ||||
| ) -> Result<Option<String>, DBError> { | ||||
|     let admin = open_admin_storage(base_dir, backend, admin_secret)?; | ||||
|     let mk = k_meta_db(id); | ||||
|     admin.hget(&mk, "name") | ||||
| } | ||||
|  | ||||
| // Internal: load public flag; default to true when meta missing | ||||
| fn load_public( | ||||
|     admin: &Arc<dyn StorageBackend>, | ||||
|     id: u64, | ||||
| ) -> Result<bool, DBError> { | ||||
|     let mk = k_meta_db(id); | ||||
|     match admin.hget(&mk, "public")? { | ||||
|         Some(v) => Ok(v == "true"), | ||||
|         None => Ok(true), | ||||
|     } | ||||
| } | ||||
|  | ||||
| // Add access key for db (value format: "Read:ts" or "ReadWrite:ts") | ||||
| pub fn add_access_key( | ||||
|     base_dir: &Path, | ||||
|     backend: options::BackendType, | ||||
|     admin_secret: &str, | ||||
|     id: u64, | ||||
|     key_plain: &str, | ||||
|     perms: Permissions, | ||||
| ) -> Result<(), DBError> { | ||||
|     let admin = open_admin_storage(base_dir, backend, admin_secret)?; | ||||
|     let hash = crate::rpc::hash_key(key_plain); | ||||
|     let v = match perms { | ||||
|         Permissions::Read => format!("Read:{}", now_secs()), | ||||
|         Permissions::ReadWrite => format!("ReadWrite:{}", now_secs()), | ||||
|     }; | ||||
|     let _ = admin.hset(&k_meta_db_keys(id), vec![(hash, v)])?; | ||||
|     Ok(()) | ||||
| } | ||||
|  | ||||
| // Delete access key by hash | ||||
| pub fn delete_access_key( | ||||
|     base_dir: &Path, | ||||
|     backend: options::BackendType, | ||||
|     admin_secret: &str, | ||||
|     id: u64, | ||||
|     key_hash: &str, | ||||
| ) -> Result<bool, DBError> { | ||||
|     let admin = open_admin_storage(base_dir, backend, admin_secret)?; | ||||
|     let n = admin.hdel(&k_meta_db_keys(id), vec![key_hash.to_string()])?; | ||||
|     Ok(n > 0) | ||||
| } | ||||
|  | ||||
| // List access keys, returning (hash, perms, created_at_secs) | ||||
| pub fn list_access_keys( | ||||
|     base_dir: &Path, | ||||
|     backend: options::BackendType, | ||||
|     admin_secret: &str, | ||||
|     id: u64, | ||||
| ) -> Result<Vec<(String, Permissions, u64)>, DBError> { | ||||
|     let admin = open_admin_storage(base_dir, backend, admin_secret)?; | ||||
|     let pairs = admin.hgetall(&k_meta_db_keys(id))?; | ||||
|     let mut out = Vec::new(); | ||||
|     for (hash, val) in pairs { | ||||
|         let (perm, ts) = parse_perm_value(&val); | ||||
|         out.push((hash, perm, ts)); | ||||
|     } | ||||
|     Ok(out) | ||||
| } | ||||
|  | ||||
| // Verify access permission for db id with optional key | ||||
| // Returns: | ||||
| // - Ok(Some(Permissions)) when access is allowed | ||||
| // - Ok(None) when not allowed or db missing (caller can distinguish by calling db_exists) | ||||
| pub fn verify_access( | ||||
|     base_dir: &Path, | ||||
|     backend: options::BackendType, | ||||
|     admin_secret: &str, | ||||
|     id: u64, | ||||
|     key_opt: Option<&str>, | ||||
| ) -> Result<Option<Permissions>, DBError> { | ||||
|     // Admin DB 0: require exact admin_secret | ||||
|     if id == 0 { | ||||
|         if let Some(k) = key_opt { | ||||
|             if k == admin_secret { | ||||
|                 return Ok(Some(Permissions::ReadWrite)); | ||||
|             } | ||||
|         } | ||||
|         return Ok(None); | ||||
|     } | ||||
|  | ||||
|     let admin = open_admin_storage(base_dir, backend, admin_secret)?; | ||||
|     if !admin.hexists(k_admin_dbs(), &id.to_string())? { | ||||
|         return Ok(None); | ||||
|     } | ||||
|  | ||||
|     let is_public = load_public(&admin, id)?; | ||||
|  | ||||
|     // If a key is explicitly provided, enforce its validity strictly. | ||||
|     // Do NOT fall back to public when an invalid key is supplied. | ||||
|     if let Some(k) = key_opt { | ||||
|         let hash = crate::rpc::hash_key(k); | ||||
|         if let Some(v) = admin.hget(&k_meta_db_keys(id), &hash)? { | ||||
|             let (perm, _ts) = parse_perm_value(&v); | ||||
|             return Ok(Some(perm)); | ||||
|         } | ||||
|         // Invalid key | ||||
|         return Ok(None); | ||||
|     } | ||||
|  | ||||
|     // No key provided: allow access if DB is public, otherwise deny | ||||
|     if is_public { | ||||
|         Ok(Some(Permissions::ReadWrite)) | ||||
|     } else { | ||||
|         Ok(None) | ||||
|     } | ||||
| } | ||||
|  | ||||
| // Enumerate all db ids | ||||
| pub fn list_dbs( | ||||
|     base_dir: &Path, | ||||
|     backend: options::BackendType, | ||||
|     admin_secret: &str, | ||||
| ) -> Result<Vec<u64>, DBError> { | ||||
|     let admin = open_admin_storage(base_dir, backend, admin_secret)?; | ||||
|     let ids = admin.hkeys(k_admin_dbs())?; | ||||
|     let mut out = Vec::new(); | ||||
|     for s in ids { | ||||
|         if let Ok(v) = s.parse() { | ||||
|             out.push(v); | ||||
|         } | ||||
|     } | ||||
|     Ok(out) | ||||
| } | ||||
|  | ||||
| // Helper: parse permission value "Read:ts" or "ReadWrite:ts" | ||||
| fn parse_perm_value(v: &str) -> (Permissions, u64) { | ||||
|     let mut parts = v.split(':'); | ||||
|     let p = parts.next().unwrap_or("Read"); | ||||
|     let ts = parts | ||||
|         .next() | ||||
|         .and_then(|s| s.parse().ok()) | ||||
|         .unwrap_or(0u64); | ||||
|     let perm = match p { | ||||
|         "ReadWrite" => Permissions::ReadWrite, | ||||
|         _ => Permissions::Read, | ||||
|     }; | ||||
|     (perm, ts) | ||||
| } | ||||
|  | ||||
| fn now_secs() -> u64 { | ||||
|     use std::time::{SystemTime, UNIX_EPOCH}; | ||||
|     SystemTime::now() | ||||
|         .duration_since(UNIX_EPOCH) | ||||
|         .unwrap_or_default() | ||||
|         .as_secs() | ||||
| } | ||||
							
								
								
									
										536
									
								
								src/age.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										536
									
								
								src/age.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,536 @@ | ||||
| //! age.rs — AGE (rage) helpers + persistent key management for your mini-Redis. | ||||
| // | ||||
| // Features: | ||||
| // - X25519 encryption/decryption (age style) | ||||
| // - Ed25519 detached signatures + verification | ||||
| // - Persistent named keys in DB (strings): | ||||
| //      age:key:{name}       -> X25519 recipient (public encryption key, "age1...") | ||||
| //      age:privkey:{name}   -> X25519 identity (secret encryption key, "AGE-SECRET-KEY-1...") | ||||
| //      age:signpub:{name}   -> Ed25519 verify pubkey (public, used to verify signatures) | ||||
| //      age:signpriv:{name}  -> Ed25519 signing secret key (private, used to sign) | ||||
| // - Base64 wrapping for ciphertext/signature binary blobs. | ||||
|  | ||||
| use std::str::FromStr; | ||||
|  | ||||
| use secrecy::ExposeSecret; | ||||
| use age::{Decryptor, Encryptor}; | ||||
| use age::x25519; | ||||
|  | ||||
| use ed25519_dalek::{Signature, Signer, Verifier, SigningKey, VerifyingKey}; | ||||
|  | ||||
| use base64::{engine::general_purpose::STANDARD as B64, Engine as _}; | ||||
| use std::collections::HashSet; | ||||
| use std::convert::TryInto; | ||||
|  | ||||
| use crate::protocol::Protocol; | ||||
| use crate::server::Server; | ||||
| use crate::error::DBError; | ||||
|  | ||||
| // ---------- Internal helpers ---------- | ||||
|  | ||||
| #[derive(Debug)] | ||||
| pub enum AgeWireError { | ||||
|     ParseKey, | ||||
|     Crypto(String), | ||||
|     Utf8, | ||||
|     SignatureLen, | ||||
|     NotFound(&'static str),     // which kind of key was missing | ||||
|     Storage(String), | ||||
| } | ||||
|  | ||||
| impl AgeWireError { | ||||
|     fn to_protocol(self) -> Protocol { | ||||
|         match self { | ||||
|             AgeWireError::ParseKey => Protocol::err("ERR age: invalid key"), | ||||
|             AgeWireError::Crypto(e) => Protocol::err(&format!("ERR age: {e}")), | ||||
|             AgeWireError::Utf8 => Protocol::err("ERR age: invalid UTF-8 plaintext"), | ||||
|             AgeWireError::SignatureLen => Protocol::err("ERR age: bad signature length"), | ||||
|             AgeWireError::NotFound(w) => Protocol::err(&format!("ERR age: missing {w}")), | ||||
|             AgeWireError::Storage(e) => Protocol::err(&format!("ERR storage: {e}")), | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| fn parse_recipient(s: &str) -> Result<x25519::Recipient, AgeWireError> { | ||||
|     x25519::Recipient::from_str(s).map_err(|_| AgeWireError::ParseKey) | ||||
| } | ||||
| fn parse_identity(s: &str) -> Result<x25519::Identity, AgeWireError> { | ||||
|     x25519::Identity::from_str(s).map_err(|_| AgeWireError::ParseKey) | ||||
| } | ||||
| fn parse_ed25519_signing_key(s: &str) -> Result<SigningKey, AgeWireError> { | ||||
|     // Parse base64-encoded signing key | ||||
|     let bytes = B64.decode(s).map_err(|_| AgeWireError::ParseKey)?; | ||||
|     if bytes.len() != 32 { | ||||
|         return Err(AgeWireError::ParseKey); | ||||
|     } | ||||
|     let key_bytes: [u8; 32] = bytes.try_into().map_err(|_| AgeWireError::ParseKey)?; | ||||
|     Ok(SigningKey::from_bytes(&key_bytes)) | ||||
| } | ||||
| fn parse_ed25519_verifying_key(s: &str) -> Result<VerifyingKey, AgeWireError> { | ||||
|     // Parse base64-encoded verifying key | ||||
|     let bytes = B64.decode(s).map_err(|_| AgeWireError::ParseKey)?; | ||||
|     if bytes.len() != 32 { | ||||
|         return Err(AgeWireError::ParseKey); | ||||
|     } | ||||
|     let key_bytes: [u8; 32] = bytes.try_into().map_err(|_| AgeWireError::ParseKey)?; | ||||
|     VerifyingKey::from_bytes(&key_bytes).map_err(|_| AgeWireError::ParseKey) | ||||
| } | ||||
|  | ||||
| // ---------- Derivation + Raw X25519 (Ed25519 -> X25519) ---------- | ||||
| // | ||||
| // We deterministically derive an X25519 keypair from an Ed25519 SigningKey. | ||||
| // We persist the X25519 public/secret as base64-encoded 32-byte raw values | ||||
| // (no "age1..."/"AGE-SECRET-KEY-1..." formatting). Name-based encrypt/decrypt | ||||
| // uses these raw values directly via x25519-dalek + ChaCha20Poly1305. | ||||
|  | ||||
| use chacha20poly1305::{aead::{Aead, KeyInit}, ChaCha20Poly1305, Key, Nonce}; | ||||
| use sha2::{Digest, Sha256}; | ||||
| use x25519_dalek::{PublicKey as XPublicKey, StaticSecret as XStaticSecret}; | ||||
|  | ||||
| fn derive_x25519_raw_from_ed25519(sk: &SigningKey) -> ([u8; 32], [u8; 32]) { | ||||
|     // X25519 secret scalar (clamped) from Ed25519 secret | ||||
|     let scalar: [u8; 32] = sk.to_scalar_bytes(); | ||||
|     // Build X25519 secret/public using dalek | ||||
|     let xsec = XStaticSecret::from(scalar); | ||||
|     let xpub = XPublicKey::from(&xsec); | ||||
|     (xpub.to_bytes(), xsec.to_bytes()) | ||||
| } | ||||
|  | ||||
| fn derive_x25519_raw_b64_from_ed25519(sk: &SigningKey) -> (String, String) { | ||||
|     let (xpub, xsec) = derive_x25519_raw_from_ed25519(sk); | ||||
|     (B64.encode(xpub), B64.encode(xsec)) | ||||
| } | ||||
|  | ||||
| // Helper: detect whether a stored key looks like an age-formatted string | ||||
| fn looks_like_age_format(s: &str) -> bool { | ||||
|     s.starts_with("age1") || s.starts_with("AGE-SECRET-KEY-1") | ||||
| } | ||||
|  | ||||
| // Our container format for name-based raw X25519 encryption: | ||||
| // bytes = "HDBX1" (5) || eph_pub(32) || nonce(12) || ciphertext(..) | ||||
| // Entire blob is base64-encoded for transport. | ||||
| const HDBX1_MAGIC: &[u8; 5] = b"HDBX1"; | ||||
|  | ||||
| fn encrypt_b64_with_x25519_raw(recip_pub_b64: &str, msg: &str) -> Result<String, AgeWireError> { | ||||
|     use rand::RngCore; | ||||
|     use rand::rngs::OsRng; | ||||
|  | ||||
|     // Parse recipient public key (raw 32 bytes, base64) | ||||
|     let recip_pub_bytes = B64.decode(recip_pub_b64).map_err(|_| AgeWireError::ParseKey)?; | ||||
|     if recip_pub_bytes.len() != 32 { return Err(AgeWireError::ParseKey); } | ||||
|     let recip_pub_arr: [u8; 32] = recip_pub_bytes.as_slice().try_into().map_err(|_| AgeWireError::ParseKey)?; | ||||
|     let recip_pub: XPublicKey = XPublicKey::from(recip_pub_arr); | ||||
|  | ||||
|     // Generate ephemeral X25519 keypair | ||||
|     let mut eph_sec_bytes = [0u8; 32]; | ||||
|     OsRng.fill_bytes(&mut eph_sec_bytes); | ||||
|     let eph_sec = XStaticSecret::from(eph_sec_bytes); | ||||
|     let eph_pub = XPublicKey::from(&eph_sec); | ||||
|  | ||||
|     // ECDH | ||||
|     let shared = eph_sec.diffie_hellman(&recip_pub); | ||||
|     // Derive symmetric key via SHA-256 over context + shared + parties | ||||
|     let mut hasher = Sha256::default(); | ||||
|     hasher.update(b"herodb-x25519-v1"); | ||||
|     hasher.update(shared.as_bytes()); | ||||
|     hasher.update(eph_pub.as_bytes()); | ||||
|     hasher.update(recip_pub.as_bytes()); | ||||
|     let key_bytes = hasher.finalize(); | ||||
|     let key = Key::from_slice(&key_bytes[..32]); | ||||
|  | ||||
|     // Nonce (12 bytes) | ||||
|     let mut nonce_bytes = [0u8; 12]; | ||||
|     OsRng.fill_bytes(&mut nonce_bytes); | ||||
|     let nonce = Nonce::from_slice(&nonce_bytes); | ||||
|  | ||||
|     // Encrypt | ||||
|     let cipher = ChaCha20Poly1305::new(key); | ||||
|     let ct = cipher.encrypt(nonce, msg.as_bytes()) | ||||
|         .map_err(|e| AgeWireError::Crypto(format!("encrypt: {e}")))?; | ||||
|  | ||||
|     // Assemble container | ||||
|     let mut out = Vec::with_capacity(5 + 32 + 12 + ct.len()); | ||||
|     out.extend_from_slice(HDBX1_MAGIC); | ||||
|     out.extend_from_slice(eph_pub.as_bytes()); | ||||
|     out.extend_from_slice(&nonce_bytes); | ||||
|     out.extend_from_slice(&ct); | ||||
|  | ||||
|     Ok(B64.encode(out)) | ||||
| } | ||||
|  | ||||
| fn decrypt_b64_with_x25519_raw(identity_sec_b64: &str, ct_b64: &str) -> Result<String, AgeWireError> { | ||||
|     // Parse X25519 secret (raw 32 bytes, base64) | ||||
|     let sec_bytes = B64.decode(identity_sec_b64).map_err(|_| AgeWireError::ParseKey)?; | ||||
|     if sec_bytes.len() != 32 { return Err(AgeWireError::ParseKey); } | ||||
|     let sec_arr: [u8; 32] = sec_bytes.as_slice().try_into().map_err(|_| AgeWireError::ParseKey)?; | ||||
|     let xsec = XStaticSecret::from(sec_arr); | ||||
|     let xpub = XPublicKey::from(&xsec); // self public | ||||
|  | ||||
|     // Decode container | ||||
|     let blob = B64.decode(ct_b64.as_bytes()).map_err(|e| AgeWireError::Crypto(e.to_string()))?; | ||||
|     if blob.len() < 5 + 32 + 12 { return Err(AgeWireError::Crypto("ciphertext too short".to_string())); } | ||||
|     if &blob[..5] != HDBX1_MAGIC { return Err(AgeWireError::Crypto("bad header".to_string())); } | ||||
|  | ||||
|     let eph_pub_arr: [u8; 32] = blob[5..5+32].try_into().map_err(|_| AgeWireError::Crypto("bad eph pub".to_string()))?; | ||||
|     let eph_pub = XPublicKey::from(eph_pub_arr); | ||||
|     let nonce_bytes: [u8; 12] = blob[5+32..5+32+12].try_into().unwrap(); | ||||
|     let ct = &blob[5+32+12..]; | ||||
|  | ||||
|     // Recompute shared + key | ||||
|     let shared = xsec.diffie_hellman(&eph_pub); | ||||
|     let mut hasher = Sha256::default(); | ||||
|     hasher.update(b"herodb-x25519-v1"); | ||||
|     hasher.update(shared.as_bytes()); | ||||
|     hasher.update(eph_pub.as_bytes()); | ||||
|     hasher.update(xpub.as_bytes()); | ||||
|     let key_bytes = hasher.finalize(); | ||||
|     let key = Key::from_slice(&key_bytes[..32]); | ||||
|  | ||||
|     // Decrypt | ||||
|     let cipher = ChaCha20Poly1305::new(key); | ||||
|     let nonce = Nonce::from_slice(&nonce_bytes); | ||||
|     let pt = cipher.decrypt(nonce, ct) | ||||
|         .map_err(|e| AgeWireError::Crypto(format!("decrypt: {e}")))?; | ||||
|  | ||||
|     String::from_utf8(pt).map_err(|_| AgeWireError::Utf8) | ||||
| } | ||||
|  | ||||
| // ---------- Stateless crypto helpers (string in/out) ---------- | ||||
|  | ||||
| pub fn gen_enc_keypair() -> (String, String) { | ||||
|     let id = x25519::Identity::generate(); | ||||
|     let pk = id.to_public(); | ||||
|     (pk.to_string(), id.to_string().expose_secret().to_string()) // (recipient, identity) | ||||
| } | ||||
|  | ||||
| pub fn gen_sign_keypair() -> (String, String) { | ||||
|     use rand::RngCore; | ||||
|     use rand::rngs::OsRng; | ||||
|      | ||||
|     // Generate random 32 bytes for the signing key | ||||
|     let mut secret_bytes = [0u8; 32]; | ||||
|     OsRng.fill_bytes(&mut secret_bytes); | ||||
|      | ||||
|     let signing_key = SigningKey::from_bytes(&secret_bytes); | ||||
|     let verifying_key = signing_key.verifying_key(); | ||||
|      | ||||
|     // Encode as base64 for storage | ||||
|     let signing_key_b64 = B64.encode(signing_key.to_bytes()); | ||||
|     let verifying_key_b64 = B64.encode(verifying_key.to_bytes()); | ||||
|      | ||||
|     (verifying_key_b64, signing_key_b64) // (verify_pub, signing_secret) | ||||
| } | ||||
|  | ||||
| /// Encrypt `msg` for `recipient_str` (X25519). Returns base64(ciphertext). | ||||
| pub fn encrypt_b64(recipient_str: &str, msg: &str) -> Result<String, AgeWireError> { | ||||
|     let recipient = parse_recipient(recipient_str)?; | ||||
|     let enc = Encryptor::with_recipients(vec![Box::new(recipient)]) | ||||
|         .expect("failed to create encryptor"); // Handle Option<Encryptor> | ||||
|     let mut out = Vec::new(); | ||||
|     { | ||||
|         use std::io::Write; | ||||
|         let mut w = enc.wrap_output(&mut out).map_err(|e| AgeWireError::Crypto(e.to_string()))?; | ||||
|         w.write_all(msg.as_bytes()).map_err(|e| AgeWireError::Crypto(e.to_string()))?; | ||||
|         w.finish().map_err(|e| AgeWireError::Crypto(e.to_string()))?; | ||||
|     } | ||||
|     Ok(B64.encode(out)) | ||||
| } | ||||
|  | ||||
| /// Decrypt base64(ciphertext) with `identity_str`. Returns plaintext String. | ||||
| pub fn decrypt_b64(identity_str: &str, ct_b64: &str) -> Result<String, AgeWireError> { | ||||
|     let id = parse_identity(identity_str)?; | ||||
|     let ct = B64.decode(ct_b64.as_bytes()).map_err(|e| AgeWireError::Crypto(e.to_string()))?; | ||||
|     let dec = Decryptor::new(&ct[..]).map_err(|e| AgeWireError::Crypto(e.to_string()))?; | ||||
|      | ||||
|     // The decrypt method returns a Result<StreamReader, DecryptError> | ||||
|     let mut r = match dec { | ||||
|         Decryptor::Recipients(d) => d.decrypt(std::iter::once(&id as &dyn age::Identity)) | ||||
|             .map_err(|e| AgeWireError::Crypto(e.to_string()))?, | ||||
|         Decryptor::Passphrase(_) => return Err(AgeWireError::Crypto("Expected recipients, got passphrase".to_string())), | ||||
|     }; | ||||
|      | ||||
|     let mut pt = Vec::new(); | ||||
|     use std::io::Read; | ||||
|     r.read_to_end(&mut pt).map_err(|e| AgeWireError::Crypto(e.to_string()))?; | ||||
|     String::from_utf8(pt).map_err(|_| AgeWireError::Utf8) | ||||
| } | ||||
|  | ||||
| /// Sign bytes of `msg` (detached). Returns base64(signature bytes, 64 bytes). | ||||
| pub fn sign_b64(signing_secret_str: &str, msg: &str) -> Result<String, AgeWireError> { | ||||
|     let signing_key = parse_ed25519_signing_key(signing_secret_str)?; | ||||
|     let sig = signing_key.sign(msg.as_bytes()); | ||||
|     Ok(B64.encode(sig.to_bytes())) | ||||
| } | ||||
|  | ||||
| /// Verify detached signature (base64) for `msg` with pubkey. | ||||
| pub fn verify_b64(verify_pub_str: &str, msg: &str, sig_b64: &str) -> Result<bool, AgeWireError> { | ||||
|     let verifying_key = parse_ed25519_verifying_key(verify_pub_str)?; | ||||
|     let sig_bytes = B64.decode(sig_b64.as_bytes()).map_err(|e| AgeWireError::Crypto(e.to_string()))?; | ||||
|     if sig_bytes.len() != 64 { | ||||
|         return Err(AgeWireError::SignatureLen); | ||||
|     } | ||||
|     let sig = Signature::from_bytes(sig_bytes[..].try_into().unwrap()); | ||||
|     Ok(verifying_key.verify(msg.as_bytes(), &sig).is_ok()) | ||||
| } | ||||
|  | ||||
| // ---------- Storage helpers ---------- | ||||
|  | ||||
| fn sget(server: &Server, key: &str) -> Result<Option<String>, AgeWireError> { | ||||
|     let st = server.current_storage().map_err(|e| AgeWireError::Storage(e.0))?; | ||||
|     st.get(key).map_err(|e| AgeWireError::Storage(e.0)) | ||||
| } | ||||
| fn sset(server: &Server, key: &str, val: &str) -> Result<(), AgeWireError> { | ||||
|     let st = server.current_storage().map_err(|e| AgeWireError::Storage(e.0))?; | ||||
|     st.set(key.to_string(), val.to_string()).map_err(|e| AgeWireError::Storage(e.0)) | ||||
| } | ||||
|  | ||||
| fn enc_pub_key_key(name: &str) -> String { format!("age:key:{name}") } | ||||
| fn enc_priv_key_key(name: &str) -> String { format!("age:privkey:{name}") } | ||||
| fn sign_pub_key_key(name: &str) -> String { format!("age:signpub:{name}") } | ||||
| fn sign_priv_key_key(name: &str) -> String { format!("age:signpriv:{name}") } | ||||
|  | ||||
| // ---------- Command handlers (RESP Protocol) ---------- | ||||
| // Basic (stateless) ones kept for completeness | ||||
|  | ||||
| pub async fn cmd_age_genenc() -> Protocol { | ||||
|     let (recip, ident) = gen_enc_keypair(); | ||||
|     Protocol::Array(vec![Protocol::BulkString(recip), Protocol::BulkString(ident)]) | ||||
| } | ||||
|  | ||||
| pub async fn cmd_age_gensign() -> Protocol { | ||||
|     let (verify, secret) = gen_sign_keypair(); | ||||
|     Protocol::Array(vec![Protocol::BulkString(verify), Protocol::BulkString(secret)]) | ||||
| } | ||||
|  | ||||
| pub async fn cmd_age_encrypt(recipient: &str, message: &str) -> Protocol { | ||||
|     match encrypt_b64(recipient, message) { | ||||
|         Ok(b64) => Protocol::BulkString(b64), | ||||
|         Err(e) => e.to_protocol(), | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub async fn cmd_age_decrypt(identity: &str, ct_b64: &str) -> Protocol { | ||||
|     match decrypt_b64(identity, ct_b64) { | ||||
|         Ok(pt) => Protocol::BulkString(pt), | ||||
|         Err(e) => e.to_protocol(), | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub async fn cmd_age_sign(secret: &str, message: &str) -> Protocol { | ||||
|     match sign_b64(secret, message) { | ||||
|         Ok(b64sig) => Protocol::BulkString(b64sig), | ||||
|         Err(e) => e.to_protocol(), | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub async fn cmd_age_verify(verify_pub: &str, message: &str, sig_b64: &str) -> Protocol { | ||||
|     match verify_b64(verify_pub, message, sig_b64) { | ||||
|         Ok(true) => Protocol::SimpleString("1".to_string()), | ||||
|         Ok(false) => Protocol::SimpleString("0".to_string()), | ||||
|         Err(e) => e.to_protocol(), | ||||
|     } | ||||
| } | ||||
|  | ||||
| // ---------- NEW: unified stateless generator (Ed25519 + derived X25519 raw) ---------- | ||||
| // | ||||
| // Returns 4-tuple: | ||||
| // [ verify_pub_b64 (32B), signpriv_b64 (32B), x25519_pub_b64 (32B), x25519_sec_b64 (32B) ] | ||||
| // No persistence (stateless). | ||||
| pub async fn cmd_age_genkey() -> Protocol { | ||||
|     use rand::RngCore; | ||||
|     use rand::rngs::OsRng; | ||||
|  | ||||
|     let mut secret_bytes = [0u8; 32]; | ||||
|     OsRng.fill_bytes(&mut secret_bytes); | ||||
|  | ||||
|     let signing_key = SigningKey::from_bytes(&secret_bytes); | ||||
|     let verifying_key = signing_key.verifying_key(); | ||||
|  | ||||
|     let verify_b64 = B64.encode(verifying_key.to_bytes()); | ||||
|     let sign_b64 = B64.encode(signing_key.to_bytes()); | ||||
|  | ||||
|     let (xpub_b64, xsec_b64) = derive_x25519_raw_b64_from_ed25519(&signing_key); | ||||
|  | ||||
|     Protocol::Array(vec![ | ||||
|         Protocol::BulkString(verify_b64), | ||||
|         Protocol::BulkString(sign_b64), | ||||
|         Protocol::BulkString(xpub_b64), | ||||
|         Protocol::BulkString(xsec_b64), | ||||
|     ]) | ||||
| } | ||||
|  | ||||
| // ---------- NEW: Persistent, named-key commands ---------- | ||||
|  | ||||
| pub async fn cmd_age_keygen(server: &Server, name: &str) -> Protocol { | ||||
|     use rand::RngCore; | ||||
|     use rand::rngs::OsRng; | ||||
|  | ||||
|     // Generate Ed25519 keypair | ||||
|     let mut secret_bytes = [0u8; 32]; | ||||
|     OsRng.fill_bytes(&mut secret_bytes); | ||||
|     let signing_key = SigningKey::from_bytes(&secret_bytes); | ||||
|     let verifying_key = signing_key.verifying_key(); | ||||
|  | ||||
|     // Encode Ed25519 as base64 (32 bytes) | ||||
|     let verify_b64 = B64.encode(verifying_key.to_bytes()); | ||||
|     let sign_b64 = B64.encode(signing_key.to_bytes()); | ||||
|  | ||||
|     // Derive X25519 raw (32-byte) keys and encode as base64 | ||||
|     let (xpub_b64, xsec_b64) = derive_x25519_raw_b64_from_ed25519(&signing_key); | ||||
|  | ||||
|     // Decode to create age-formatted strings | ||||
|     let xpub_bytes = B64.decode(&xpub_b64).unwrap(); | ||||
|     let xsec_bytes = B64.decode(&xsec_b64).unwrap(); | ||||
|     let xpub_arr: [u8; 32] = xpub_bytes.as_slice().try_into().unwrap(); | ||||
|     let xsec_arr: [u8; 32] = xsec_bytes.as_slice().try_into().unwrap(); | ||||
|     let recip_str = format!("age1{}", B64.encode(xpub_arr)); | ||||
|     let ident_str = format!("AGE-SECRET-KEY-1{}", B64.encode(xsec_arr)); | ||||
|  | ||||
|     // Persist Ed25519 and derived X25519 (key-managed mode) | ||||
|     if let Err(e) = sset(server, &sign_pub_key_key(name), &verify_b64) { return e.to_protocol(); } | ||||
|     if let Err(e) = sset(server, &sign_priv_key_key(name), &sign_b64) { return e.to_protocol(); } | ||||
|     if let Err(e) = sset(server, &enc_pub_key_key(name), &xpub_b64) { return e.to_protocol(); } | ||||
|     if let Err(e) = sset(server, &enc_priv_key_key(name), &xsec_b64) { return e.to_protocol(); } | ||||
|  | ||||
|     // Return [recipient, identity] in age format | ||||
|     Protocol::Array(vec![ | ||||
|         Protocol::BulkString(recip_str), | ||||
|         Protocol::BulkString(ident_str), | ||||
|     ]) | ||||
| } | ||||
|  | ||||
| pub async fn cmd_age_signkeygen(server: &Server, name: &str) -> Protocol { | ||||
|     let (verify, secret) = gen_sign_keypair(); | ||||
|     if let Err(e) = sset(server, &sign_pub_key_key(name), &verify) { return e.to_protocol(); } | ||||
|     if let Err(e) = sset(server, &sign_priv_key_key(name), &secret) { return e.to_protocol(); } | ||||
|     Protocol::Array(vec![Protocol::BulkString(verify), Protocol::BulkString(secret)]) | ||||
| } | ||||
|  | ||||
| pub async fn cmd_age_encrypt_name(server: &Server, name: &str, message: &str) -> Protocol { | ||||
|     // Load stored recipient (could be raw b64 32-byte or "age1..." from legacy) | ||||
|     let recip_or_b64 = match sget(server, &enc_pub_key_key(name)) { | ||||
|         Ok(Some(v)) => v, | ||||
|         Ok(None) => { | ||||
|             // Derive from stored Ed25519 if present, then persist | ||||
|             match sget(server, &sign_priv_key_key(name)) { | ||||
|                 Ok(Some(sign_b64)) => { | ||||
|                     let sk = match parse_ed25519_signing_key(&sign_b64) { | ||||
|                         Ok(k) => k, | ||||
|                         Err(e) => return e.to_protocol(), | ||||
|                     }; | ||||
|                     let (xpub_b64, xsec_b64) = derive_x25519_raw_b64_from_ed25519(&sk); | ||||
|                     if let Err(e) = sset(server, &enc_pub_key_key(name), &xpub_b64) { return e.to_protocol(); } | ||||
|                     if let Err(e) = sset(server, &enc_priv_key_key(name), &xsec_b64) { return e.to_protocol(); } | ||||
|                     xpub_b64 | ||||
|                 } | ||||
|                 Ok(None) => return AgeWireError::NotFound("recipient (age:key:{name})").to_protocol(), | ||||
|                 Err(e) => return e.to_protocol(), | ||||
|             } | ||||
|         } | ||||
|         Err(e) => return e.to_protocol(), | ||||
|     }; | ||||
|  | ||||
|     if looks_like_age_format(&recip_or_b64) { | ||||
|         match encrypt_b64(&recip_or_b64, message) { | ||||
|             Ok(ct) => Protocol::BulkString(ct), | ||||
|             Err(e) => e.to_protocol(), | ||||
|         } | ||||
|     } else { | ||||
|         match encrypt_b64_with_x25519_raw(&recip_or_b64, message) { | ||||
|             Ok(ct) => Protocol::BulkString(ct), | ||||
|             Err(e) => e.to_protocol(), | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub async fn cmd_age_decrypt_name(server: &Server, name: &str, ct_b64: &str) -> Protocol { | ||||
|     // Load stored identity (could be raw b64 32-byte or "AGE-SECRET-KEY-1..." from legacy) | ||||
|     let ident_or_b64 = match sget(server, &enc_priv_key_key(name)) { | ||||
|         Ok(Some(v)) => v, | ||||
|         Ok(None) => { | ||||
|             // Derive from stored Ed25519 if present, then persist | ||||
|             match sget(server, &sign_priv_key_key(name)) { | ||||
|                 Ok(Some(sign_b64)) => { | ||||
|                     let sk = match parse_ed25519_signing_key(&sign_b64) { | ||||
|                         Ok(k) => k, | ||||
|                         Err(e) => return e.to_protocol(), | ||||
|                     }; | ||||
|                     let (xpub_b64, xsec_b64) = derive_x25519_raw_b64_from_ed25519(&sk); | ||||
|                     if let Err(e) = sset(server, &enc_pub_key_key(name), &xpub_b64) { return e.to_protocol(); } | ||||
|                     if let Err(e) = sset(server, &enc_priv_key_key(name), &xsec_b64) { return e.to_protocol(); } | ||||
|                     xsec_b64 | ||||
|                 } | ||||
|                 Ok(None) => return AgeWireError::NotFound("identity (age:privkey:{name})").to_protocol(), | ||||
|                 Err(e) => return e.to_protocol(), | ||||
|             } | ||||
|         } | ||||
|         Err(e) => return e.to_protocol(), | ||||
|     }; | ||||
|  | ||||
|     if looks_like_age_format(&ident_or_b64) { | ||||
|         match decrypt_b64(&ident_or_b64, ct_b64) { | ||||
|             Ok(pt) => Protocol::BulkString(pt), | ||||
|             Err(e) => e.to_protocol(), | ||||
|         } | ||||
|     } else { | ||||
|         match decrypt_b64_with_x25519_raw(&ident_or_b64, ct_b64) { | ||||
|             Ok(pt) => Protocol::BulkString(pt), | ||||
|             Err(e) => e.to_protocol(), | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub async fn cmd_age_sign_name(server: &Server, name: &str, message: &str) -> Protocol { | ||||
|     let sec = match sget(server, &sign_priv_key_key(name)) { | ||||
|         Ok(Some(v)) => v, | ||||
|         Ok(None) => return AgeWireError::NotFound("signing secret (age:signpriv:{name})").to_protocol(), | ||||
|         Err(e) => return e.to_protocol(), | ||||
|     }; | ||||
|     match sign_b64(&sec, message) { | ||||
|         Ok(sig) => Protocol::BulkString(sig), | ||||
|         Err(e) => e.to_protocol(), | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub async fn cmd_age_verify_name(server: &Server, name: &str, message: &str, sig_b64: &str) -> Protocol { | ||||
|     let pubk = match sget(server, &sign_pub_key_key(name)) { | ||||
|         Ok(Some(v)) => v, | ||||
|         Ok(None) => return AgeWireError::NotFound("verify pubkey (age:signpub:{name})").to_protocol(), | ||||
|         Err(e) => return e.to_protocol(), | ||||
|     }; | ||||
|     match verify_b64(&pubk, message, sig_b64) { | ||||
|         Ok(true) => Protocol::SimpleString("1".to_string()), | ||||
|         Ok(false) => Protocol::SimpleString("0".to_string()), | ||||
|         Err(e) => e.to_protocol(), | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub async fn cmd_age_list(server: &Server) -> Protocol { | ||||
|     // Return a flat, deduplicated, sorted list of managed key names (no labels) | ||||
|     let st = match server.current_storage() { Ok(s) => s, Err(e) => return Protocol::err(&e.0) }; | ||||
|  | ||||
|     let pull = |pat: &str, prefix: &str| -> Result<Vec<String>, DBError> { | ||||
|         let keys = st.keys(pat)?; | ||||
|         let mut names: Vec<String> = keys | ||||
|             .into_iter() | ||||
|             .filter_map(|k| k.strip_prefix(prefix).map(|x| x.to_string())) | ||||
|             .collect(); | ||||
|         names.sort(); | ||||
|         Ok(names) | ||||
|     }; | ||||
|  | ||||
|     let encpub   = match pull("age:key:*",      "age:key:")      { Ok(v) => v, Err(e)=> return Protocol::err(&e.0) }; | ||||
|     let encpriv  = match pull("age:privkey:*",  "age:privkey:")  { Ok(v) => v, Err(e)=> return Protocol::err(&e.0) }; | ||||
|     let signpub  = match pull("age:signpub:*",  "age:signpub:")  { Ok(v) => v, Err(e)=> return Protocol::err(&e.0) }; | ||||
|     let signpriv = match pull("age:signpriv:*", "age:signpriv:") { Ok(v) => v, Err(e)=> return Protocol::err(&e.0) }; | ||||
|  | ||||
|     let mut set: HashSet<String> = HashSet::new(); | ||||
|     for n in encpub.into_iter().chain(encpriv).chain(signpub).chain(signpriv) { | ||||
|         set.insert(n); | ||||
|     } | ||||
|  | ||||
|     let mut names: Vec<String> = set.into_iter().collect(); | ||||
|     names.sort(); | ||||
|  | ||||
|     Protocol::Array(names.into_iter().map(Protocol::BulkString).collect()) | ||||
| } | ||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @@ -1,8 +1,8 @@ | ||||
| use chacha20poly1305::{ | ||||
|     aead::{Aead, KeyInit, OsRng}, | ||||
|     aead::{Aead, KeyInit}, | ||||
|     XChaCha20Poly1305, XNonce, | ||||
| }; | ||||
| use rand::RngCore; | ||||
| use rand::{rngs::OsRng, RngCore}; | ||||
| use sha2::{Digest, Sha256}; | ||||
| 
 | ||||
| const VERSION: u8 = 1; | ||||
| @@ -31,7 +31,7 @@ pub struct CryptoFactory { | ||||
| impl CryptoFactory { | ||||
|     /// Accepts any secret bytes; turns them into a 32-byte key (SHA-256).
 | ||||
|     pub fn new<S: AsRef<[u8]>>(secret: S) -> Self { | ||||
|         let mut h = Sha256::new(); | ||||
|         let mut h = Sha256::default(); | ||||
|         h.update(b"xchacha20poly1305-factory:v1"); // domain separation
 | ||||
|         h.update(secret.as_ref()); | ||||
|         let digest = h.finalize(); // 32 bytes
 | ||||
							
								
								
									
										353
									
								
								src/embedding.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										353
									
								
								src/embedding.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,353 @@ | ||||
| // Embedding abstraction with a single external provider (OpenAI-compatible) and local test providers. | ||||
|  | ||||
| use std::collections::HashMap; | ||||
| use std::sync::Arc; | ||||
|  | ||||
| use serde::{Deserialize, Serialize}; | ||||
|  | ||||
| use crate::error::DBError; | ||||
|  | ||||
| // Networking for OpenAI-compatible endpoints | ||||
| use std::time::Duration; | ||||
| use ureq::{Agent, AgentBuilder}; | ||||
| use serde_json::json; | ||||
|  | ||||
| /// Provider identifiers (minimal set). | ||||
| #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] | ||||
| #[serde(rename_all = "snake_case")] | ||||
| pub enum EmbeddingProvider { | ||||
|     /// External HTTP provider compatible with OpenAI's embeddings API. | ||||
|     openai, | ||||
|     /// Deterministic, local-only embedder for CI and offline development (text). | ||||
|     test, | ||||
|     /// Deterministic, local-only embedder for CI and offline development (image). | ||||
|     image_test, | ||||
| } | ||||
|  | ||||
| /// Serializable embedding configuration. | ||||
| /// - provider: "openai" | "test" | "image_test" | ||||
| /// - model: provider/model id (e.g., "text-embedding-3-small"), may be ignored by local gateways | ||||
| /// - dim: required output dimension (used to create Lance datasets and validate outputs) | ||||
| /// - endpoint: optional HTTP endpoint (defaults to OpenAI API when provider == openai) | ||||
| /// - headers: optional HTTP headers (e.g., Authorization). If empty and OPENAI_API_KEY is present, Authorization will be inferred. | ||||
| /// - timeout_ms: optional HTTP timeout in milliseconds (for both read and write) | ||||
| #[derive(Debug, Clone, Serialize, Deserialize)] | ||||
| pub struct EmbeddingConfig { | ||||
|     pub provider: EmbeddingProvider, | ||||
|     pub model: String, | ||||
|     pub dim: usize, | ||||
|     #[serde(default)] | ||||
|     pub endpoint: Option<String>, | ||||
|     #[serde(default)] | ||||
|     pub headers: HashMap<String, String>, | ||||
|     #[serde(default)] | ||||
|     pub timeout_ms: Option<u64>, | ||||
| } | ||||
|  | ||||
| /// A provider-agnostic text embedding interface. | ||||
| pub trait Embedder: Send + Sync { | ||||
|     /// Human-readable provider/model name | ||||
|     fn name(&self) -> String; | ||||
|     /// Embedding dimension | ||||
|     fn dim(&self) -> usize; | ||||
|     /// Embed a single text string into a fixed-length vector | ||||
|     fn embed(&self, text: &str) -> Result<Vec<f32>, DBError>; | ||||
|     /// Embed many texts; default maps embed() over inputs | ||||
|     fn embed_many(&self, texts: &[String]) -> Result<Vec<Vec<f32>>, DBError> { | ||||
|         texts.iter().map(|t| self.embed(t)).collect() | ||||
|     } | ||||
| } | ||||
|  | ||||
| /// Image embedding interface (separate from text to keep modality-specific inputs). | ||||
| pub trait ImageEmbedder: Send + Sync { | ||||
|     /// Human-readable provider/model name | ||||
|     fn name(&self) -> String; | ||||
|     /// Embedding dimension | ||||
|     fn dim(&self) -> usize; | ||||
|     /// Embed a single image (raw bytes) | ||||
|     fn embed_image(&self, bytes: &[u8]) -> Result<Vec<f32>, DBError>; | ||||
|     /// Embed many images; default maps embed_image() over inputs | ||||
|     fn embed_many_images(&self, images: &[Vec<u8>]) -> Result<Vec<Vec<f32>>, DBError> { | ||||
|         images.iter().map(|b| self.embed_image(b)).collect() | ||||
|     } | ||||
| } | ||||
|  | ||||
| //// ----------------------------- TEXT: deterministic test embedder ----------------------------- | ||||
|  | ||||
| /// Deterministic, no-deps, no-network embedder for CI and offline dev. | ||||
| /// Algorithm: | ||||
| /// - Fold bytes of UTF-8 into 'dim' buckets with a simple rolling hash | ||||
| /// - Apply tanh-like scaling and L2-normalize to unit length | ||||
| pub struct TestHashEmbedder { | ||||
|     dim: usize, | ||||
|     model_name: String, | ||||
| } | ||||
|  | ||||
| impl TestHashEmbedder { | ||||
|     pub fn new(dim: usize, model_name: impl Into<String>) -> Self { | ||||
|         Self { dim, model_name: model_name.into() } | ||||
|     } | ||||
|  | ||||
|     fn l2_normalize(mut v: Vec<f32>) -> Vec<f32> { | ||||
|         let norm: f32 = v.iter().map(|x| x * x).sum::<f32>().sqrt(); | ||||
|         if norm > 0.0 { | ||||
|             for x in &mut v { | ||||
|                 *x /= norm; | ||||
|             } | ||||
|         } | ||||
|         v | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl Embedder for TestHashEmbedder { | ||||
|     fn name(&self) -> String { | ||||
|         format!("test:{}", self.model_name) | ||||
|     } | ||||
|  | ||||
|     fn dim(&self) -> usize { | ||||
|         self.dim | ||||
|     } | ||||
|  | ||||
|     fn embed(&self, text: &str) -> Result<Vec<f32>, DBError> { | ||||
|         let mut acc = vec![0f32; self.dim]; | ||||
|         // A simple, deterministic folding hash over bytes | ||||
|         let mut h1: u32 = 2166136261u32; // FNV-like seed | ||||
|         let mut h2: u32 = 0x9e3779b9u32; // golden ratio | ||||
|         for (i, b) in text.as_bytes().iter().enumerate() { | ||||
|             h1 ^= *b as u32; | ||||
|             h1 = h1.wrapping_mul(16777619u32); | ||||
|             h2 = h2.wrapping_add(((*b as u32) << (i % 13)) ^ (h1.rotate_left((i % 7) as u32))); | ||||
|             let idx = (h1 ^ h2) as usize % self.dim; | ||||
|             // Map byte to [-1, 1] and accumulate with mild decay by position | ||||
|             let val = ((*b as f32) / 127.5 - 1.0) * (1.0 / (1.0 + (i as f32 / 32.0))); | ||||
|             acc[idx] += val; | ||||
|         } | ||||
|         // Non-linear squashing to stabilize + normalize | ||||
|         for x in &mut acc { | ||||
|             *x = x.tanh(); | ||||
|         } | ||||
|         Ok(Self::l2_normalize(acc)) | ||||
|     } | ||||
| } | ||||
|  | ||||
| //// ----------------------------- IMAGE: deterministic test embedder ----------------------------- | ||||
|  | ||||
| /// Deterministic image embedder that folds bytes into buckets, applies tanh-like nonlinearity, | ||||
| /// and L2-normalizes. Suitable for CI and offline development. | ||||
| /// NOTE: This is NOT semantic; it is a stable hash-like representation. | ||||
| pub struct TestImageHashEmbedder { | ||||
|     dim: usize, | ||||
|     model_name: String, | ||||
| } | ||||
|  | ||||
| impl TestImageHashEmbedder { | ||||
|     pub fn new(dim: usize, model_name: impl Into<String>) -> Self { | ||||
|         Self { dim, model_name: model_name.into() } | ||||
|     } | ||||
|  | ||||
|     fn l2_normalize(mut v: Vec<f32>) -> Vec<f32> { | ||||
|         let norm: f32 = v.iter().map(|x| x * x).sum::<f32>().sqrt(); | ||||
|         if norm > 0.0 { | ||||
|             for x in &mut v { | ||||
|                 *x /= norm; | ||||
|             } | ||||
|         } | ||||
|         v | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl ImageEmbedder for TestImageHashEmbedder { | ||||
|     fn name(&self) -> String { | ||||
|         format!("image_test:{}", self.model_name) | ||||
|     } | ||||
|  | ||||
|     fn dim(&self) -> usize { | ||||
|         self.dim | ||||
|     } | ||||
|  | ||||
|     fn embed_image(&self, bytes: &[u8]) -> Result<Vec<f32>, DBError> { | ||||
|         // Deterministic fold across bytes with two rolling accumulators. | ||||
|         let mut acc = vec![0f32; self.dim]; | ||||
|         let mut h1: u32 = 0x811C9DC5; // FNV-like | ||||
|         let mut h2: u32 = 0x9E3779B9; // golden ratio | ||||
|         for (i, b) in bytes.iter().enumerate() { | ||||
|             h1 ^= *b as u32; | ||||
|             h1 = h1.wrapping_mul(16777619u32); | ||||
|             // combine with position and h2 | ||||
|             h2 = h2.wrapping_add(((i as u32).rotate_left((i % 13) as u32)) ^ h1.rotate_left((i % 7) as u32)); | ||||
|             let idx = (h1 ^ h2) as usize % self.dim; | ||||
|             // Map to [-1,1] and decay with position | ||||
|             let val = ((*b as f32) / 127.5 - 1.0) * (1.0 / (1.0 + (i as f32 / 128.0))); | ||||
|             acc[idx] += val; | ||||
|         } | ||||
|         for x in &mut acc { | ||||
|             *x = x.tanh(); | ||||
|         } | ||||
|         Ok(Self::l2_normalize(acc)) | ||||
|     } | ||||
| } | ||||
|  | ||||
| //// ----------------------------- OpenAI-compatible HTTP embedder ----------------------------- | ||||
|  | ||||
| struct OpenAIEmbedder { | ||||
|     model: String, | ||||
|     dim: usize, | ||||
|     agent: Agent, | ||||
|     endpoint: String, | ||||
|     headers: Vec<(String, String)>, | ||||
| } | ||||
|  | ||||
| impl OpenAIEmbedder { | ||||
|     fn new_from_config(cfg: &EmbeddingConfig) -> Result<Self, DBError> { | ||||
|         // Resolve endpoint | ||||
|         let endpoint = cfg.endpoint.clone().unwrap_or_else(|| { | ||||
|             "https://api.openai.com/v1/embeddings".to_string() | ||||
|         }); | ||||
|  | ||||
|         // Determine expected dimension (required by config) | ||||
|         let dim = cfg.dim; | ||||
|  | ||||
|         // Build an HTTP agent with timeouts (blocking; no tokio runtime involved) | ||||
|         let to_ms = cfg.timeout_ms.unwrap_or(30_000); | ||||
|         let agent = AgentBuilder::new() | ||||
|             .timeout_read(Duration::from_millis(to_ms)) | ||||
|             .timeout_write(Duration::from_millis(to_ms)) | ||||
|             .build(); | ||||
|  | ||||
|         // Headers: start from cfg.headers, and add Authorization from env if absent and available | ||||
|         let mut headers: Vec<(String, String)> = | ||||
|             cfg.headers.iter().map(|(k, v)| (k.clone(), v.clone())).collect(); | ||||
|  | ||||
|         if !headers.iter().any(|(k, _)| k.eq_ignore_ascii_case("content-type")) { | ||||
|             headers.push(("Content-Type".to_string(), "application/json".to_string())); | ||||
|         } | ||||
|  | ||||
|         if !headers.iter().any(|(k, _)| k.eq_ignore_ascii_case("authorization")) { | ||||
|             if let Ok(key) = std::env::var("OPENAI_API_KEY") { | ||||
|                 headers.push(("Authorization".to_string(), format!("Bearer {}", key))); | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         Ok(Self { | ||||
|             model: cfg.model.clone(), | ||||
|             dim, | ||||
|             agent, | ||||
|             endpoint, | ||||
|             headers, | ||||
|         }) | ||||
|     } | ||||
|  | ||||
|     fn request_many(&self, inputs: &[String]) -> Result<Vec<Vec<f32>>, DBError> { | ||||
|         // Compose request body (OpenAI-compatible) | ||||
|         let mut body = json!({ "model": self.model, "input": inputs }); | ||||
|         if self.dim > 0 { | ||||
|             body.as_object_mut() | ||||
|                 .unwrap() | ||||
|                 .insert("dimensions".to_string(), json!(self.dim)); | ||||
|         } | ||||
|  | ||||
|         // Build request | ||||
|         let mut req = self.agent.post(&self.endpoint); | ||||
|         for (k, v) in &self.headers { | ||||
|             req = req.set(k, v); | ||||
|         } | ||||
|  | ||||
|         // Send and handle errors | ||||
|         let resp = req.send_json(body); | ||||
|         let text = match resp { | ||||
|             Ok(r) => r | ||||
|                 .into_string() | ||||
|                 .map_err(|e| DBError(format!("Failed to read embeddings response: {}", e)))?, | ||||
|             Err(ureq::Error::Status(code, r)) => { | ||||
|                 let body = r.into_string().unwrap_or_default(); | ||||
|                 return Err(DBError(format!("Embeddings API error {}: {}", code, body))); | ||||
|             } | ||||
|             Err(e) => return Err(DBError(format!("HTTP request failed: {}", e))), | ||||
|         }; | ||||
|  | ||||
|         let val: serde_json::Value = serde_json::from_str(&text) | ||||
|             .map_err(|e| DBError(format!("Invalid JSON from embeddings API: {}", e)))?; | ||||
|  | ||||
|         let data = val | ||||
|             .get("data") | ||||
|             .and_then(|d| d.as_array()) | ||||
|             .ok_or_else(|| DBError("Embeddings API response missing 'data' array".into()))?; | ||||
|  | ||||
|         let mut out: Vec<Vec<f32>> = Vec::with_capacity(data.len()); | ||||
|         for item in data { | ||||
|             let emb = item | ||||
|                 .get("embedding") | ||||
|                 .and_then(|e| e.as_array()) | ||||
|                 .ok_or_else(|| DBError("Embeddings API item missing 'embedding'".into()))?; | ||||
|             let mut v: Vec<f32> = Vec::with_capacity(emb.len()); | ||||
|             for n in emb { | ||||
|                 let f = n | ||||
|                     .as_f64() | ||||
|                     .ok_or_else(|| DBError("Embedding element is not a number".into()))?; | ||||
|                 v.push(f as f32); | ||||
|             } | ||||
|             if self.dim > 0 && v.len() != self.dim { | ||||
|                 return Err(DBError(format!( | ||||
|                     "Embedding dimension mismatch: expected {}, got {}. Configure 'dim' to match output.", | ||||
|                     self.dim, v.len() | ||||
|                 ))); | ||||
|             } | ||||
|             out.push(v); | ||||
|         } | ||||
|         Ok(out) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl Embedder for OpenAIEmbedder { | ||||
|     fn name(&self) -> String { | ||||
|         format!("openai:{}", self.model) | ||||
|     } | ||||
|  | ||||
|     fn dim(&self) -> usize { | ||||
|         self.dim | ||||
|     } | ||||
|  | ||||
|     fn embed(&self, text: &str) -> Result<Vec<f32>, DBError> { | ||||
|         let v = self.request_many(&[text.to_string()])?; | ||||
|         Ok(v.into_iter().next().unwrap_or_else(|| vec![0.0; self.dim])) | ||||
|     } | ||||
|  | ||||
|     fn embed_many(&self, texts: &[String]) -> Result<Vec<Vec<f32>>, DBError> { | ||||
|         if texts.is_empty() { | ||||
|             return Ok(vec![]); | ||||
|         } | ||||
|         self.request_many(texts) | ||||
|     } | ||||
| } | ||||
|  | ||||
| /// Create an embedder instance from a config. | ||||
| /// - openai: uses OpenAI-compatible embeddings REST API (endpoint override supported) | ||||
| /// - test: deterministic local text embedder (no network) | ||||
| /// - image_test: not valid for text (use create_image_embedder) | ||||
| pub fn create_embedder(config: &EmbeddingConfig) -> Result<Arc<dyn Embedder>, DBError> { | ||||
|     match &config.provider { | ||||
|         EmbeddingProvider::openai => { | ||||
|             let inner = OpenAIEmbedder::new_from_config(config)?; | ||||
|             Ok(Arc::new(inner)) | ||||
|         } | ||||
|         EmbeddingProvider::test => { | ||||
|             Ok(Arc::new(TestHashEmbedder::new(config.dim, config.model.clone()))) | ||||
|         } | ||||
|         EmbeddingProvider::image_test => { | ||||
|             Err(DBError("Use create_image_embedder() for image providers".into())) | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| /// Create an image embedder instance from a config. | ||||
| /// - image_test: deterministic local image embedder | ||||
| pub fn create_image_embedder(config: &EmbeddingConfig) -> Result<Arc<dyn ImageEmbedder>, DBError> { | ||||
|     match &config.provider { | ||||
|         EmbeddingProvider::image_test => { | ||||
|             Ok(Arc::new(TestImageHashEmbedder::new(config.dim, config.model.clone()))) | ||||
|         } | ||||
|         EmbeddingProvider::test | EmbeddingProvider::openai => { | ||||
|             Err(DBError("Configured text provider; dataset expects image provider (e.g., 'image_test')".into())) | ||||
|         } | ||||
|     } | ||||
| } | ||||
| @@ -9,6 +9,14 @@ use bincode; | ||||
| #[derive(Debug)] | ||||
| pub struct DBError(pub String); | ||||
| 
 | ||||
| impl std::fmt::Display for DBError { | ||||
|     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||||
|         write!(f, "{}", self.0) | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| impl std::error::Error for DBError {} | ||||
| 
 | ||||
| impl From<std::io::Error> for DBError { | ||||
|     fn from(item: std::io::Error) -> Self { | ||||
|         DBError(item.to_string().clone()) | ||||
							
								
								
									
										663
									
								
								src/lance_store.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										663
									
								
								src/lance_store.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,663 @@ | ||||
| // LanceDB store abstraction (per database instance) | ||||
| // This module encapsulates all Lance/LanceDB operations for a given DB id. | ||||
| // Notes: | ||||
| // - We persist each dataset (aka "table") under <base_dir>/lance/<db_id>/<name>.lance | ||||
| // - Schema convention: id: Utf8 (non-null), vector: FixedSizeList<Float32, dim> (non-null), meta: Utf8 (nullable JSON string) | ||||
| // - We implement naive KNN (L2) scan in Rust for search to avoid tight coupling to lancedb search builder API. | ||||
| //   Index creation uses lance::Dataset vector index; future optimization can route to index-aware search. | ||||
|  | ||||
| use std::cmp::Ordering; | ||||
| use std::collections::{BinaryHeap, HashMap}; | ||||
| use std::path::{Path, PathBuf}; | ||||
| use std::sync::Arc; | ||||
|  | ||||
| use crate::error::DBError; | ||||
|  | ||||
| use arrow_array::{Array, RecordBatch, RecordBatchIterator, StringArray}; | ||||
| use arrow_array::builder::{FixedSizeListBuilder, Float32Builder, StringBuilder}; | ||||
| use arrow_array::cast::AsArray; | ||||
| use arrow_schema::{DataType, Field, Schema}; | ||||
| use futures::StreamExt; | ||||
| use serde_json::Value as JsonValue; | ||||
|  | ||||
| // Low-level Lance core | ||||
| use lance::dataset::{WriteMode, WriteParams}; | ||||
| use lance::Dataset; | ||||
|  | ||||
| // Vector index (IVF_PQ etc.) | ||||
|  | ||||
| // High-level LanceDB (for deletes where available) | ||||
| use lancedb::connection::Connection; | ||||
| use arrow_array::types::Float32Type; | ||||
|  | ||||
| #[derive(Clone)] | ||||
| pub struct LanceStore { | ||||
|     base_dir: PathBuf, | ||||
|     db_id: u64, | ||||
| } | ||||
|  | ||||
| impl LanceStore { | ||||
|     // Create a new LanceStore rooted at <base_dir>/lance/<db_id> | ||||
|     pub fn new(base_dir: &Path, db_id: u64) -> Result<Self, DBError> { | ||||
|         let p = base_dir.join("lance").join(db_id.to_string()); | ||||
|         std::fs::create_dir_all(&p) | ||||
|             .map_err(|e| DBError(format!("Failed to create Lance dir {}: {}", p.display(), e)))?; | ||||
|         Ok(Self { base_dir: p, db_id }) | ||||
|     } | ||||
|  | ||||
|     fn dataset_path(&self, name: &str) -> PathBuf { | ||||
|         // Store datasets as directories or files with .lance suffix | ||||
|         // We accept both "<name>" and "<name>.lance" as logical name; normalize on ".lance" | ||||
|         let has_ext = name.ends_with(".lance"); | ||||
|         if has_ext { | ||||
|             self.base_dir.join(name) | ||||
|         } else { | ||||
|             self.base_dir.join(format!("{name}.lance")) | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     fn file_uri(path: &Path) -> String { | ||||
|         // lancedb can use filesystem path directly; keep it simple | ||||
|         // Avoid file:// scheme since local paths are supported. | ||||
|         path.to_string_lossy().to_string() | ||||
|     } | ||||
|  | ||||
|     async fn connect_db(&self) -> Result<Connection, DBError> { | ||||
|         let uri = Self::file_uri(&self.base_dir); | ||||
|         lancedb::connect(&uri) | ||||
|             .execute() | ||||
|             .await | ||||
|             .map_err(|e| DBError(format!("LanceDB connect failed at {}: {}", uri, e))) | ||||
|     } | ||||
|  | ||||
|     fn vector_field(dim: i32) -> Field { | ||||
|         Field::new( | ||||
|             "vector", | ||||
|             DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Float32, true)), dim), | ||||
|             false, | ||||
|         ) | ||||
|     } | ||||
|  | ||||
|     async fn read_existing_dim(&self, name: &str) -> Result<Option<i32>, DBError> { | ||||
|         let path = self.dataset_path(name); | ||||
|         if !path.exists() { | ||||
|             return Ok(None); | ||||
|         } | ||||
|         let ds = Dataset::open(path.to_string_lossy().as_ref()) | ||||
|             .await | ||||
|             .map_err(|e| DBError(format!("Open dataset failed: {}: {}", path.display(), e)))?; | ||||
|         // Scan a single batch to infer vector dimension from the 'vector' column type | ||||
|         let mut scan = ds.scan(); | ||||
|         if let Err(e) = scan.project(&["vector"]) { | ||||
|             return Err(DBError(format!("Project failed while inferring dim: {}", e))); | ||||
|         } | ||||
|         let mut stream = scan | ||||
|             .try_into_stream() | ||||
|             .await | ||||
|             .map_err(|e| DBError(format!("Scan stream failed while inferring dim: {}", e)))?; | ||||
|         if let Some(batch_res) = stream.next().await { | ||||
|             let batch = batch_res.map_err(|e| DBError(format!("Batch error: {}", e)))?; | ||||
|             let vec_col = batch | ||||
|                 .column_by_name("vector") | ||||
|                 .ok_or_else(|| DBError("Column 'vector' missing".into()))?; | ||||
|             let fsl = vec_col.as_fixed_size_list(); | ||||
|             let dim = fsl.value_length(); | ||||
|             return Ok(Some(dim)); | ||||
|         } | ||||
|         Ok(None) | ||||
|     } | ||||
|  | ||||
|     fn build_schema(dim: i32) -> Arc<Schema> { | ||||
|         Arc::new(Schema::new(vec![ | ||||
|             Field::new("id", DataType::Utf8, false), | ||||
|             Self::vector_field(dim), | ||||
|             Field::new("text", DataType::Utf8, true), | ||||
|             Field::new("media_type", DataType::Utf8, true), | ||||
|             Field::new("media_uri", DataType::Utf8, true), | ||||
|             Field::new("meta", DataType::Utf8, true), | ||||
|         ])) | ||||
|     } | ||||
|  | ||||
|     fn build_one_row_batch( | ||||
|         id: &str, | ||||
|         vector: &[f32], | ||||
|         meta: &HashMap<String, String>, | ||||
|         text: Option<&str>, | ||||
|         media_type: Option<&str>, | ||||
|         media_uri: Option<&str>, | ||||
|         dim: i32, | ||||
|     ) -> Result<(Arc<Schema>, RecordBatch), DBError> { | ||||
|         if vector.len() as i32 != dim { | ||||
|             return Err(DBError(format!( | ||||
|                 "Vector length mismatch: expected {}, got {}", | ||||
|                 dim, | ||||
|                 vector.len() | ||||
|             ))); | ||||
|         } | ||||
|  | ||||
|         let schema = Self::build_schema(dim); | ||||
|  | ||||
|         // id column | ||||
|         let mut id_builder = StringBuilder::new(); | ||||
|         id_builder.append_value(id); | ||||
|         let id_arr = Arc::new(id_builder.finish()) as Arc<dyn Array>; | ||||
|  | ||||
|         // vector column (FixedSizeList<Float32, dim>) | ||||
|         let v_builder = Float32Builder::with_capacity(vector.len()); | ||||
|         let mut list_builder = FixedSizeListBuilder::new(v_builder, dim); | ||||
|         for v in vector { | ||||
|             list_builder.values().append_value(*v); | ||||
|         } | ||||
|         list_builder.append(true); | ||||
|         let vec_arr = Arc::new(list_builder.finish()) as Arc<dyn Array>; | ||||
|  | ||||
|         // text column (optional) | ||||
|         let mut text_builder = StringBuilder::new(); | ||||
|         if let Some(t) = text { | ||||
|             text_builder.append_value(t); | ||||
|         } else { | ||||
|             text_builder.append_null(); | ||||
|         } | ||||
|         let text_arr = Arc::new(text_builder.finish()) as Arc<dyn Array>; | ||||
|  | ||||
|         // media_type column (optional) | ||||
|         let mut mt_builder = StringBuilder::new(); | ||||
|         if let Some(mt) = media_type { | ||||
|             mt_builder.append_value(mt); | ||||
|         } else { | ||||
|             mt_builder.append_null(); | ||||
|         } | ||||
|         let mt_arr = Arc::new(mt_builder.finish()) as Arc<dyn Array>; | ||||
|  | ||||
|         // media_uri column (optional) | ||||
|         let mut mu_builder = StringBuilder::new(); | ||||
|         if let Some(mu) = media_uri { | ||||
|             mu_builder.append_value(mu); | ||||
|         } else { | ||||
|             mu_builder.append_null(); | ||||
|         } | ||||
|         let mu_arr = Arc::new(mu_builder.finish()) as Arc<dyn Array>; | ||||
|  | ||||
|         // meta column (JSON string) | ||||
|         let meta_json = if meta.is_empty() { | ||||
|             None | ||||
|         } else { | ||||
|             Some(serde_json::to_string(meta).map_err(|e| DBError(format!("Serialize meta error: {e}")))?) | ||||
|         }; | ||||
|         let mut meta_builder = StringBuilder::new(); | ||||
|         if let Some(s) = meta_json { | ||||
|             meta_builder.append_value(&s); | ||||
|         } else { | ||||
|             meta_builder.append_null(); | ||||
|         } | ||||
|         let meta_arr = Arc::new(meta_builder.finish()) as Arc<dyn Array>; | ||||
|  | ||||
|         let batch = | ||||
|             RecordBatch::try_new(schema.clone(), vec![id_arr, vec_arr, text_arr, mt_arr, mu_arr, meta_arr]).map_err(|e| { | ||||
|                 DBError(format!("RecordBatch build failed: {e}")) | ||||
|             })?; | ||||
|  | ||||
|         Ok((schema, batch)) | ||||
|     } | ||||
|  | ||||
|     // Create a new dataset (vector collection) with dimension `dim`. | ||||
|     pub async fn create_dataset(&self, name: &str, dim: usize) -> Result<(), DBError> { | ||||
|         let dim_i32: i32 = dim | ||||
|             .try_into() | ||||
|             .map_err(|_| DBError("Dimension too large".into()))?; | ||||
|         let path = self.dataset_path(name); | ||||
|  | ||||
|         if path.exists() { | ||||
|             // Validate dimension if present | ||||
|             if let Some(existing_dim) = self.read_existing_dim(name).await? { | ||||
|                 if existing_dim != dim_i32 { | ||||
|                     return Err(DBError(format!( | ||||
|                         "Dataset '{}' already exists with dim {}, requested {}", | ||||
|                         name, existing_dim, dim_i32 | ||||
|                     ))); | ||||
|                 } | ||||
|                 // No-op | ||||
|                 return Ok(()); | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         // Create an empty dataset by writing an empty batch | ||||
|         let schema = Self::build_schema(dim_i32); | ||||
|         let empty_id = Arc::new(StringArray::new_null(0)); | ||||
|         // Build an empty FixedSizeListArray | ||||
|         let v_builder = Float32Builder::new(); | ||||
|         let mut list_builder = FixedSizeListBuilder::new(v_builder, dim_i32); | ||||
|         let empty_vec = Arc::new(list_builder.finish()) as Arc<dyn Array>; | ||||
|         let empty_text = Arc::new(StringArray::new_null(0)); | ||||
|         let empty_media_type = Arc::new(StringArray::new_null(0)); | ||||
|         let empty_media_uri = Arc::new(StringArray::new_null(0)); | ||||
|         let empty_meta = Arc::new(StringArray::new_null(0)); | ||||
|  | ||||
|         let empty_batch = | ||||
|             RecordBatch::try_new(schema.clone(), vec![empty_id, empty_vec, empty_text, empty_media_type, empty_media_uri, empty_meta]) | ||||
|                 .map_err(|e| DBError(format!("Build empty batch failed: {e}")))?; | ||||
|  | ||||
|         let write_params = WriteParams { | ||||
|             mode: WriteMode::Create, | ||||
|             ..Default::default() | ||||
|         }; | ||||
|  | ||||
|         let reader = RecordBatchIterator::new([Ok(empty_batch)], schema.clone()); | ||||
|  | ||||
|         Dataset::write(reader, path.to_string_lossy().as_ref(), Some(write_params)) | ||||
|             .await | ||||
|             .map_err(|e| DBError(format!("Create dataset failed at {}: {}", path.display(), e)))?; | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     // Store/Upsert a single vector with ID and optional metadata (append; duplicate IDs are possible for now) | ||||
|     pub async fn store_vector( | ||||
|         &self, | ||||
|         name: &str, | ||||
|         id: &str, | ||||
|         vector: Vec<f32>, | ||||
|         meta: HashMap<String, String>, | ||||
|         text: Option<String>, | ||||
|     ) -> Result<(), DBError> { | ||||
|         // Delegate to media-aware path with no media fields | ||||
|         self.store_vector_with_media(name, id, vector, meta, text, None, None).await | ||||
|     } | ||||
|  | ||||
|     /// Store/Upsert a single vector with optional text and media fields (media_type/media_uri). | ||||
|     pub async fn store_vector_with_media( | ||||
|         &self, | ||||
|         name: &str, | ||||
|         id: &str, | ||||
|         vector: Vec<f32>, | ||||
|         meta: HashMap<String, String>, | ||||
|         text: Option<String>, | ||||
|         media_type: Option<String>, | ||||
|         media_uri: Option<String>, | ||||
|     ) -> Result<(), DBError> { | ||||
|         let path = self.dataset_path(name); | ||||
|  | ||||
|         // Determine dimension: use existing or infer from vector | ||||
|         let dim_i32 = if let Some(d) = self.read_existing_dim(name).await? { | ||||
|             d | ||||
|         } else { | ||||
|             vector | ||||
|                 .len() | ||||
|                 .try_into() | ||||
|                 .map_err(|_| DBError("Vector length too large".into()))? | ||||
|         }; | ||||
|  | ||||
|         let (schema, batch) = Self::build_one_row_batch( | ||||
|             id, | ||||
|             &vector, | ||||
|             &meta, | ||||
|             text.as_deref(), | ||||
|             media_type.as_deref(), | ||||
|             media_uri.as_deref(), | ||||
|             dim_i32, | ||||
|         )?; | ||||
|  | ||||
|         // If LanceDB table exists and provides delete, we can upsert by deleting same id | ||||
|         // Try best-effort delete; ignore errors to keep operation append-only on failure | ||||
|         if path.exists() { | ||||
|             if let Ok(conn) = self.connect_db().await { | ||||
|                 if let Ok(mut tbl) = conn.open_table(name).execute().await { | ||||
|                     let _ = tbl | ||||
|                         .delete(&format!("id = '{}'", id.replace('\'', "''"))) | ||||
|                         .await; | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         let write_params = WriteParams { | ||||
|             mode: if path.exists() { | ||||
|                 WriteMode::Append | ||||
|             } else { | ||||
|                 WriteMode::Create | ||||
|             }, | ||||
|             ..Default::default() | ||||
|         }; | ||||
|         let reader = RecordBatchIterator::new([Ok(batch)], schema.clone()); | ||||
|  | ||||
|         Dataset::write(reader, path.to_string_lossy().as_ref(), Some(write_params)) | ||||
|             .await | ||||
|             .map_err(|e| DBError(format!("Write (append/create) failed: {}", e)))?; | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     // Delete a record by ID (best-effort; returns true if delete likely removed rows) | ||||
|     pub async fn delete_by_id(&self, name: &str, id: &str) -> Result<bool, DBError> { | ||||
|         let path = self.dataset_path(name); | ||||
|         if !path.exists() { | ||||
|             return Ok(false); | ||||
|         } | ||||
|         let conn = self.connect_db().await?; | ||||
|         let mut tbl = conn | ||||
|             .open_table(name) | ||||
|             .execute() | ||||
|             .await | ||||
|             .map_err(|e| DBError(format!("Open table '{}' failed: {}", name, e)))?; | ||||
|         // SQL-like predicate quoting | ||||
|         let pred = format!("id = '{}'", id.replace('\'', "''")); | ||||
|         // lancedb returns count or () depending on version; treat Ok as success | ||||
|         match tbl.delete(&pred).await { | ||||
|             Ok(_) => Ok(true), | ||||
|             Err(e) => Err(DBError(format!("Delete failed: {}", e))), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     // Drop the entire dataset | ||||
|     pub async fn drop_dataset(&self, name: &str) -> Result<bool, DBError> { | ||||
|         let path = self.dataset_path(name); | ||||
|         // Try LanceDB drop first | ||||
|         // Best-effort logical drop via lancedb if available; ignore failures. | ||||
|         // Note: we rely on filesystem removal below for final cleanup. | ||||
|         if let Ok(conn) = self.connect_db().await { | ||||
|             if let Ok(mut t) = conn.open_table(name).execute().await { | ||||
|                 // Best-effort delete-all to reduce footprint prior to fs removal | ||||
|                 let _ = t.delete("true").await; | ||||
|             } | ||||
|         } | ||||
|         if path.exists() { | ||||
|             if path.is_dir() { | ||||
|                 std::fs::remove_dir_all(&path) | ||||
|                     .map_err(|e| DBError(format!("Failed to drop dataset '{}': {}", name, e)))?; | ||||
|             } else { | ||||
|                 std::fs::remove_file(&path) | ||||
|                     .map_err(|e| DBError(format!("Failed to drop dataset '{}': {}", name, e)))?; | ||||
|             } | ||||
|             return Ok(true); | ||||
|         } | ||||
|         Ok(false) | ||||
|     } | ||||
|  | ||||
|     // Search top-k nearest with optional filter; returns tuple of (id, score (lower=L2), meta) | ||||
|     pub async fn search_vectors( | ||||
|         &self, | ||||
|         name: &str, | ||||
|         query: Vec<f32>, | ||||
|         k: usize, | ||||
|         filter: Option<String>, | ||||
|         return_fields: Option<Vec<String>>, | ||||
|     ) -> Result<Vec<(String, f32, HashMap<String, String>)>, DBError> { | ||||
|         let path = self.dataset_path(name); | ||||
|         if !path.exists() { | ||||
|             return Err(DBError(format!("Dataset '{}' not found", name))); | ||||
|         } | ||||
|         // Determine dim and validate query length | ||||
|         let dim_i32 = self | ||||
|             .read_existing_dim(name) | ||||
|             .await? | ||||
|             .ok_or_else(|| DBError("Vector column not found".into()))?; | ||||
|         if query.len() as i32 != dim_i32 { | ||||
|             return Err(DBError(format!( | ||||
|                 "Query vector length mismatch: expected {}, got {}", | ||||
|                 dim_i32, | ||||
|                 query.len() | ||||
|             ))); | ||||
|         } | ||||
|  | ||||
|         let ds = Dataset::open(path.to_string_lossy().as_ref()) | ||||
|             .await | ||||
|             .map_err(|e| DBError(format!("Open dataset failed: {}", e)))?; | ||||
|  | ||||
|         // Build scanner with projection; we project needed fields and filter client-side to support meta keys | ||||
|         let mut scan = ds.scan(); | ||||
|         if let Err(e) = scan.project(&["id", "vector", "meta", "text", "media_type", "media_uri"]) { | ||||
|             return Err(DBError(format!("Project failed: {}", e))); | ||||
|         } | ||||
|         // Note: we no longer push down filter to Lance to allow filtering on meta fields client-side. | ||||
|  | ||||
|         let mut stream = scan | ||||
|             .try_into_stream() | ||||
|             .await | ||||
|             .map_err(|e| DBError(format!("Scan stream failed: {}", e)))?; | ||||
|          | ||||
|         // Parse simple equality clause from filter for client-side filtering (supports one `key = 'value'`) | ||||
|         let clause = filter.as_ref().and_then(|s| { | ||||
|             fn parse_eq(s: &str) -> Option<(String, String)> { | ||||
|                 let s = s.trim(); | ||||
|                 let pos = s.find('=').or_else(|| s.find(" = "))?; | ||||
|                 let (k, vraw) = s.split_at(pos); | ||||
|                 let mut v = vraw.trim_start_matches('=').trim(); | ||||
|                 if (v.starts_with('\'') && v.ends_with('\'')) || (v.starts_with('"') && v.ends_with('"')) { | ||||
|                     if v.len() >= 2 { | ||||
|                         v = &v[1..v.len()-1]; | ||||
|                     } | ||||
|                 } | ||||
|                 let key = k.trim().trim_matches('"').trim_matches('\'').to_string(); | ||||
|                 if key.is_empty() { return None; } | ||||
|                 Some((key, v.to_string())) | ||||
|             } | ||||
|             parse_eq(s) | ||||
|         }); | ||||
|  | ||||
|         // Maintain a max-heap with reverse ordering to keep top-k smallest distances | ||||
|         #[derive(Debug)] | ||||
|         struct Hit { | ||||
|             dist: f32, | ||||
|             id: String, | ||||
|             meta: HashMap<String, String>, | ||||
|         } | ||||
|         impl PartialEq for Hit { | ||||
|             fn eq(&self, other: &Self) -> bool { | ||||
|                 self.dist.eq(&other.dist) | ||||
|             } | ||||
|         } | ||||
|         impl Eq for Hit {} | ||||
|         impl PartialOrd for Hit { | ||||
|             fn partial_cmp(&self, other: &Self) -> Option<Ordering> { | ||||
|                 // Reverse for max-heap: larger distance = "greater" | ||||
|                 other.dist.partial_cmp(&self.dist) | ||||
|             } | ||||
|         } | ||||
|         impl Ord for Hit { | ||||
|             fn cmp(&self, other: &Self) -> Ordering { | ||||
|                 self.partial_cmp(other).unwrap_or(Ordering::Equal) | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         let mut heap: BinaryHeap<Hit> = BinaryHeap::with_capacity(k); | ||||
|  | ||||
|         while let Some(batch_res) = stream.next().await { | ||||
|             let batch = batch_res.map_err(|e| DBError(format!("Stream batch error: {}", e)))?; | ||||
|  | ||||
|             let id_arr = batch | ||||
|                 .column_by_name("id") | ||||
|                 .ok_or_else(|| DBError("Column 'id' missing".into()))? | ||||
|                 .as_string::<i32>(); | ||||
|             let vec_arr = batch | ||||
|                 .column_by_name("vector") | ||||
|                 .ok_or_else(|| DBError("Column 'vector' missing".into()))? | ||||
|                 .as_fixed_size_list(); | ||||
|             let meta_arr = batch | ||||
|                 .column_by_name("meta") | ||||
|                 .map(|a| a.as_string::<i32>().clone()); | ||||
|             let text_arr = batch | ||||
|                 .column_by_name("text") | ||||
|                 .map(|a| a.as_string::<i32>().clone()); | ||||
|             let mt_arr = batch | ||||
|                 .column_by_name("media_type") | ||||
|                 .map(|a| a.as_string::<i32>().clone()); | ||||
|             let mu_arr = batch | ||||
|                 .column_by_name("media_uri") | ||||
|                 .map(|a| a.as_string::<i32>().clone()); | ||||
|  | ||||
|             for i in 0..batch.num_rows() { | ||||
|                 // Extract id | ||||
|                 let id_val = id_arr.value(i).to_string(); | ||||
|  | ||||
|                 // Parse meta JSON if present | ||||
|                 let mut meta: HashMap<String, String> = HashMap::new(); | ||||
|                 if let Some(meta_col) = &meta_arr { | ||||
|                     if !meta_col.is_null(i) { | ||||
|                         let s = meta_col.value(i); | ||||
|                         if let Ok(JsonValue::Object(map)) = serde_json::from_str::<JsonValue>(s) { | ||||
|                             for (k, v) in map { | ||||
|                                 if let Some(vs) = v.as_str() { | ||||
|                                     meta.insert(k, vs.to_string()); | ||||
|                                 } else if v.is_number() || v.is_boolean() { | ||||
|                                     meta.insert(k, v.to_string()); | ||||
|                                 } | ||||
|                             } | ||||
|                         } | ||||
|                     } | ||||
|                 } | ||||
|  | ||||
|                 // Evaluate simple equality filter if provided (supports one clause) | ||||
|                 let passes = if let Some((ref key, ref val)) = clause { | ||||
|                     let candidate = match key.as_str() { | ||||
|                         "id" => Some(id_val.clone()), | ||||
|                         "text" => text_arr.as_ref().and_then(|col| if col.is_null(i) { None } else { Some(col.value(i).to_string()) }), | ||||
|                         "media_type" => mt_arr.as_ref().and_then(|col| if col.is_null(i) { None } else { Some(col.value(i).to_string()) }), | ||||
|                         "media_uri" => mu_arr.as_ref().and_then(|col| if col.is_null(i) { None } else { Some(col.value(i).to_string()) }), | ||||
|                         _ => meta.get(key).cloned(), | ||||
|                     }; | ||||
|                     match candidate { | ||||
|                         Some(cv) => cv == *val, | ||||
|                         None => false, | ||||
|                     } | ||||
|                 } else { true }; | ||||
|                 if !passes { | ||||
|                     continue; | ||||
|                 } | ||||
|  | ||||
|                 // Compute L2 distance | ||||
|                 let val = vec_arr.value(i); | ||||
|                 let prim = val.as_primitive::<Float32Type>(); | ||||
|                 let mut dist: f32 = 0.0; | ||||
|                 let plen = prim.len(); | ||||
|                 for j in 0..plen { | ||||
|                     let r = prim.value(j); | ||||
|                     let d = query[j] - r; | ||||
|                     dist += d * d; | ||||
|                 } | ||||
|  | ||||
|                 // Apply return_fields on meta | ||||
|                 let mut meta_out = meta; | ||||
|                 if let Some(fields) = &return_fields { | ||||
|                     let mut filtered = HashMap::new(); | ||||
|                     for f in fields { | ||||
|                         if let Some(val) = meta_out.get(f) { | ||||
|                             filtered.insert(f.clone(), val.clone()); | ||||
|                         } | ||||
|                     } | ||||
|                     meta_out = filtered; | ||||
|                 } | ||||
|  | ||||
|                 let hit = Hit { dist, id: id_val, meta: meta_out }; | ||||
|  | ||||
|                 if heap.len() < k { | ||||
|                     heap.push(hit); | ||||
|                 } else if let Some(top) = heap.peek() { | ||||
|                     if hit.dist < top.dist { | ||||
|                         heap.pop(); | ||||
|                         heap.push(hit); | ||||
|                     } | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         // Extract and sort ascending by distance | ||||
|         let mut hits: Vec<Hit> = heap.into_sorted_vec(); // already ascending by dist due to Ord | ||||
|         let out = hits | ||||
|             .drain(..) | ||||
|             .map(|h| (h.id, h.dist, h.meta)) | ||||
|             .collect::<Vec<_>>(); | ||||
|         Ok(out) | ||||
|     } | ||||
|  | ||||
|     // Create an ANN index on the vector column (IVF_PQ or similar) | ||||
|     pub async fn create_index( | ||||
|         &self, | ||||
|         name: &str, | ||||
|         index_type: &str, | ||||
|         params: HashMap<String, String>, | ||||
|     ) -> Result<(), DBError> { | ||||
|         let path = self.dataset_path(name); | ||||
|         if !path.exists() { | ||||
|             return Err(DBError(format!("Dataset '{}' not found", name))); | ||||
|         } | ||||
|         // Attempt to create a vector index using lance low-level API if available. | ||||
|         // Some crate versions hide IndexType; to ensure build stability, we fall back to a no-op if the API is not accessible. | ||||
|         let _ = (index_type, params); // currently unused; reserved for future tuning | ||||
|         // TODO: Implement using lance::Dataset::create_index when public API is stable across versions. | ||||
|         // For now, succeed as a no-op to keep flows working; search will operate as brute-force scan. | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     // List datasets (tables) under this DB (show user-level logical names without .lance) | ||||
|     pub async fn list_datasets(&self) -> Result<Vec<String>, DBError> { | ||||
|         let mut out = Vec::new(); | ||||
|         if self.base_dir.exists() { | ||||
|             if let Ok(rd) = std::fs::read_dir(&self.base_dir) { | ||||
|                 for entry in rd.flatten() { | ||||
|                     let p = entry.path(); | ||||
|                     if let Some(name) = p.file_name().and_then(|s| s.to_str()) { | ||||
|                         // Only list .lance datasets | ||||
|                         if name.ends_with(".lance") { | ||||
|                             out.push(name.trim_end_matches(".lance").to_string()); | ||||
|                         } | ||||
|                     } | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|         Ok(out) | ||||
|     } | ||||
|  | ||||
|     // Return basic dataset info map | ||||
|     pub async fn get_dataset_info(&self, name: &str) -> Result<HashMap<String, String>, DBError> { | ||||
|         let path = self.dataset_path(name); | ||||
|         let mut m = HashMap::new(); | ||||
|         m.insert("name".to_string(), name.to_string()); | ||||
|         m.insert("path".to_string(), path.display().to_string()); | ||||
|         if !path.exists() { | ||||
|             return Err(DBError(format!("Dataset '{}' not found", name))); | ||||
|         } | ||||
|  | ||||
|         let ds = Dataset::open(path.to_string_lossy().as_ref()) | ||||
|             .await | ||||
|             .map_err(|e| DBError(format!("Open dataset failed: {}", e)))?; | ||||
|  | ||||
|         // dim: infer by scanning first batch | ||||
|         let mut dim_str = "unknown".to_string(); | ||||
|         { | ||||
|             let mut scan = ds.scan(); | ||||
|             if scan.project(&["vector"]).is_ok() { | ||||
|                 if let Ok(mut stream) = scan.try_into_stream().await { | ||||
|                     if let Some(batch_res) = stream.next().await { | ||||
|                         if let Ok(batch) = batch_res { | ||||
|                             if let Some(col) = batch.column_by_name("vector") { | ||||
|                                 let fsl = col.as_fixed_size_list(); | ||||
|                                 dim_str = fsl.value_length().to_string(); | ||||
|                             } | ||||
|                         } | ||||
|                     } | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|         m.insert("dimension".to_string(), dim_str); | ||||
|  | ||||
|         // row_count (approximate by scanning) | ||||
|         let mut scan = ds.scan(); | ||||
|         if let Err(e) = scan.project(&["id"]) { | ||||
|             return Err(DBError(format!("Project failed: {e}"))); | ||||
|         } | ||||
|         let mut stream = scan | ||||
|             .try_into_stream() | ||||
|             .await | ||||
|             .map_err(|e| DBError(format!("Scan failed: {e}")))?; | ||||
|         let mut rows: usize = 0; | ||||
|         while let Some(batch_res) = stream.next().await { | ||||
|             let batch = batch_res.map_err(|e| DBError(format!("Scan batch error: {}", e)))?; | ||||
|             rows += batch.num_rows(); | ||||
|         } | ||||
|         m.insert("row_count".to_string(), rows.to_string()); | ||||
|  | ||||
|         // indexes: we can’t easily enumerate; set to "unknown" (future: read index metadata) | ||||
|         m.insert("indexes".to_string(), "unknown".to_string()); | ||||
|  | ||||
|         Ok(m) | ||||
|     } | ||||
| } | ||||
							
								
								
									
										18
									
								
								src/lib.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										18
									
								
								src/lib.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,18 @@ | ||||
| pub mod age; | ||||
| pub mod sym; | ||||
| pub mod cmd; | ||||
| pub mod crypto; | ||||
| pub mod error; | ||||
| pub mod options; | ||||
| pub mod protocol; | ||||
| pub mod rpc; | ||||
| pub mod rpc_server; | ||||
| pub mod server; | ||||
| pub mod storage; | ||||
| pub mod storage_trait; | ||||
| pub mod storage_sled; | ||||
| pub mod admin_meta; | ||||
| pub mod tantivy_search; | ||||
| pub mod search_cmd; | ||||
| pub mod lance_store; | ||||
| pub mod embedding; | ||||
							
								
								
									
										173
									
								
								src/main.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										173
									
								
								src/main.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,173 @@ | ||||
| // #![allow(unused_imports)] | ||||
|  | ||||
| use std::path::PathBuf; | ||||
| use tokio::net::TcpListener; | ||||
|  | ||||
| use herodb::server; | ||||
| use herodb::rpc_server; | ||||
|  | ||||
| use clap::Parser; | ||||
|  | ||||
| /// Simple program to greet a person | ||||
| #[derive(Parser, Debug)] | ||||
| #[command(version, about, long_about = None)] | ||||
| struct Args { | ||||
|     /// The directory of Redis DB file | ||||
|     #[arg(long)] | ||||
|     dir: PathBuf, | ||||
|  | ||||
|     /// The port of the Redis server, default is 6379 if not specified | ||||
|     #[arg(long)] | ||||
|     port: Option<u16>, | ||||
|  | ||||
|     /// Enable debug mode | ||||
|     #[arg(long)] | ||||
|     debug: bool, | ||||
|  | ||||
|     /// Master encryption key for encrypted databases (deprecated; ignored for data DBs) | ||||
|     #[arg(long)] | ||||
|     encryption_key: Option<String>, | ||||
|  | ||||
|     /// Encrypt the database (deprecated; ignored for data DBs) | ||||
|     #[arg(long)] | ||||
|     encrypt: bool, | ||||
|  | ||||
|     /// Enable RPC management server | ||||
|     #[arg(long)] | ||||
|     enable_rpc: bool, | ||||
|  | ||||
|     /// RPC server port (default: 8080) | ||||
|     #[arg(long, default_value = "8080")] | ||||
|     rpc_port: u16, | ||||
|  | ||||
|     /// Enable RPC over Unix Domain Socket (IPC) | ||||
|     #[arg(long)] | ||||
|     enable_rpc_ipc: bool, | ||||
|  | ||||
|     /// RPC IPC socket path (Unix Domain Socket) | ||||
|     #[arg(long, default_value = "/tmp/herodb.ipc")] | ||||
|     rpc_ipc_path: String, | ||||
|  | ||||
|     /// Use the sled backend | ||||
|     #[arg(long)] | ||||
|     sled: bool, | ||||
|  | ||||
|     /// Admin secret used to encrypt DB 0 and authorize admin access (required) | ||||
|     #[arg(long)] | ||||
|     admin_secret: String, | ||||
| } | ||||
|  | ||||
| #[tokio::main] | ||||
| async fn main() { | ||||
|     // parse args | ||||
|     let args = Args::parse(); | ||||
|  | ||||
|     // bind port | ||||
|     let port = args.port.unwrap_or(6379); | ||||
|     println!("will listen on port: {}", port); | ||||
|     let listener = TcpListener::bind(format!("127.0.0.1:{}", port)) | ||||
|         .await | ||||
|         .unwrap(); | ||||
|  | ||||
|     // deprecation warnings for legacy flags | ||||
|     if args.encrypt || args.encryption_key.is_some() { | ||||
|         eprintln!("warning: --encrypt and --encryption-key are deprecated and ignored for data DBs. Admin DB 0 is always encrypted with --admin-secret."); | ||||
|     } | ||||
|     // basic validation for admin secret | ||||
|     if args.admin_secret.trim().is_empty() { | ||||
|         eprintln!("error: --admin-secret must not be empty"); | ||||
|         std::process::exit(2); | ||||
|     } | ||||
|  | ||||
|     // new DB option | ||||
|     let option = herodb::options::DBOption { | ||||
|         dir: args.dir.clone(), | ||||
|         port, | ||||
|         debug: args.debug, | ||||
|         encryption_key: args.encryption_key, | ||||
|         encrypt: args.encrypt, | ||||
|         backend: if args.sled { | ||||
|             herodb::options::BackendType::Sled | ||||
|         } else { | ||||
|             herodb::options::BackendType::Redb | ||||
|         }, | ||||
|         admin_secret: args.admin_secret.clone(), | ||||
|     }; | ||||
|  | ||||
|     let backend = option.backend.clone(); | ||||
|  | ||||
|     // Bootstrap admin DB 0 before opening any server storage | ||||
|     if let Err(e) = herodb::admin_meta::ensure_bootstrap(&args.dir, backend.clone(), &args.admin_secret) { | ||||
|         eprintln!("Failed to bootstrap admin DB 0: {}", e.0); | ||||
|         std::process::exit(2); | ||||
|     } | ||||
|  | ||||
|     // new server | ||||
|     let server = server::Server::new(option).await; | ||||
|  | ||||
|     // Add a small delay to ensure the port is ready | ||||
|     tokio::time::sleep(std::time::Duration::from_millis(100)).await; | ||||
|  | ||||
|     // Start RPC server if enabled | ||||
|     let _rpc_handle = if args.enable_rpc { | ||||
|         let rpc_addr = format!("127.0.0.1:{}", args.rpc_port).parse().unwrap(); | ||||
|         let base_dir = args.dir.clone(); | ||||
|  | ||||
|         match rpc_server::start_rpc_server(rpc_addr, base_dir, backend.clone(), args.admin_secret.clone()).await { | ||||
|             Ok(handle) => { | ||||
|                 println!("RPC management server started on port {}", args.rpc_port); | ||||
|                 Some(handle) | ||||
|             } | ||||
|             Err(e) => { | ||||
|                 eprintln!("Failed to start RPC server: {}", e); | ||||
|                 None | ||||
|             } | ||||
|         } | ||||
|     } else { | ||||
|         None | ||||
|     }; | ||||
|  | ||||
|     // Start IPC (Unix socket) RPC server if enabled | ||||
|     let _rpc_ipc_handle = if args.enable_rpc_ipc { | ||||
|         let base_dir = args.dir.clone(); | ||||
|         let ipc_path = args.rpc_ipc_path.clone(); | ||||
|  | ||||
|         // Remove stale socket if present | ||||
|         if std::path::Path::new(&ipc_path).exists() { | ||||
|             let _ = std::fs::remove_file(&ipc_path); | ||||
|         } | ||||
|  | ||||
|         match rpc_server::start_rpc_ipc_server(ipc_path.clone(), base_dir, backend.clone(), args.admin_secret.clone()).await { | ||||
|             Ok(handle) => { | ||||
|                 println!("RPC IPC server started at {}", ipc_path); | ||||
|                 Some(handle) | ||||
|             } | ||||
|             Err(e) => { | ||||
|                 eprintln!("Failed to start RPC IPC server: {}", e); | ||||
|                 None | ||||
|             } | ||||
|         } | ||||
|     } else { | ||||
|         None | ||||
|     }; | ||||
|  | ||||
|     // accept new connections | ||||
|     loop { | ||||
|         let stream = listener.accept().await; | ||||
|         match stream { | ||||
|             Ok((stream, _)) => { | ||||
|                 println!("accepted new connection"); | ||||
|  | ||||
|                 let mut sc = server.clone(); | ||||
|                 tokio::spawn(async move { | ||||
|                     if let Err(e) = sc.handle(stream).await { | ||||
|                         println!("error: {:?}, will close the connection. Bye", e); | ||||
|                     } | ||||
|                 }); | ||||
|             } | ||||
|             Err(e) => { | ||||
|                 println!("error: {}", e); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| } | ||||
							
								
								
									
										23
									
								
								src/options.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										23
									
								
								src/options.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,23 @@ | ||||
| use std::path::PathBuf; | ||||
|  | ||||
| #[derive(Debug, Clone, PartialEq, Eq)] | ||||
| pub enum BackendType { | ||||
|     Redb, | ||||
|     Sled, | ||||
|     Tantivy, // Full-text search backend (no KV storage) | ||||
|     Lance,   // Vector database backend (no KV storage) | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Clone)] | ||||
| pub struct DBOption { | ||||
|     pub dir: PathBuf, | ||||
|     pub port: u16, | ||||
|     pub debug: bool, | ||||
|     // Deprecated for data DBs; retained for backward-compat on CLI parsing | ||||
|     pub encrypt: bool, | ||||
|     // Deprecated for data DBs; retained for backward-compat on CLI parsing | ||||
|     pub encryption_key: Option<String>, | ||||
|     pub backend: BackendType, | ||||
|     // New: required admin secret, used to encrypt DB 0 and authorize admin operations | ||||
|     pub admin_secret: String, | ||||
| } | ||||
							
								
								
									
										1346
									
								
								src/rpc.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1346
									
								
								src/rpc.rs
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										52
									
								
								src/rpc_server.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										52
									
								
								src/rpc_server.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,52 @@ | ||||
| use std::net::SocketAddr; | ||||
| use std::path::PathBuf; | ||||
| use jsonrpsee::server::{ServerBuilder, ServerHandle}; | ||||
| use jsonrpsee::RpcModule; | ||||
| use reth_ipc::server::Builder as IpcServerBuilder; | ||||
|  | ||||
| use crate::rpc::{RpcServer, RpcServerImpl}; | ||||
|  | ||||
| /// Start the RPC server on the specified address | ||||
| pub async fn start_rpc_server(addr: SocketAddr, base_dir: PathBuf, backend: crate::options::BackendType, admin_secret: String) -> Result<ServerHandle, Box<dyn std::error::Error + Send + Sync>> { | ||||
|     // Create the RPC server implementation | ||||
|     let rpc_impl = RpcServerImpl::new(base_dir, backend, admin_secret); | ||||
|  | ||||
|     // Create the RPC module | ||||
|     let mut module = RpcModule::new(()); | ||||
|     module.merge(RpcServer::into_rpc(rpc_impl))?; | ||||
|  | ||||
|     // Build the server with both HTTP and WebSocket support | ||||
|     let server = ServerBuilder::default() | ||||
|         .build(addr) | ||||
|         .await?; | ||||
|  | ||||
|     // Start the server | ||||
|     let handle = server.start(module); | ||||
|  | ||||
|     println!("RPC server started on {}", addr); | ||||
|  | ||||
|     Ok(handle) | ||||
| } | ||||
|  | ||||
| /// Start the JSON-RPC IPC server on the specified Unix socket endpoint | ||||
| pub async fn start_rpc_ipc_server( | ||||
|     endpoint: String, | ||||
|     base_dir: PathBuf, | ||||
|     backend: crate::options::BackendType, | ||||
|     admin_secret: String, | ||||
| ) -> Result<ServerHandle, Box<dyn std::error::Error + Send + Sync>> { | ||||
|     // Create the RPC server implementation | ||||
|     let rpc_impl = RpcServerImpl::new(base_dir, backend, admin_secret); | ||||
|  | ||||
|     // Create the RPC module | ||||
|     let mut module = RpcModule::new(()); | ||||
|     module.merge(RpcServer::into_rpc(rpc_impl))?; | ||||
|  | ||||
|     // Build the IPC server and start it | ||||
|     let server = IpcServerBuilder::default().build(endpoint.clone()); | ||||
|     let handle = server.start(module).await?; | ||||
|  | ||||
|     println!("RPC IPC server started on {}", endpoint); | ||||
|  | ||||
|     Ok(handle) | ||||
| } | ||||
							
								
								
									
										378
									
								
								src/search_cmd.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										378
									
								
								src/search_cmd.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,378 @@ | ||||
| use crate::{ | ||||
|     error::DBError, | ||||
|     protocol::Protocol, | ||||
|     server::Server, | ||||
|     tantivy_search::{ | ||||
|         FieldDef, Filter, FilterType, IndexConfig, NumericType, SearchOptions, TantivySearch, | ||||
|     }, | ||||
| }; | ||||
| use std::collections::HashMap; | ||||
| use std::sync::Arc; | ||||
|  | ||||
| pub async fn ft_create_cmd( | ||||
|     server: &Server, | ||||
|     index_name: String, | ||||
|     schema: Vec<(String, String, Vec<String>)>, | ||||
| ) -> Result<Protocol, DBError> { | ||||
|     if server.selected_db == 0 { | ||||
|         return Ok(Protocol::err("FT commands are not allowed on DB 0")); | ||||
|     } | ||||
|     // Enforce Tantivy backend for selected DB | ||||
|     let is_tantivy = crate::admin_meta::get_database_backend( | ||||
|         &server.option.dir, | ||||
|         server.option.backend.clone(), | ||||
|         &server.option.admin_secret, | ||||
|         server.selected_db, | ||||
|     ) | ||||
|     .ok() | ||||
|     .flatten() | ||||
|     .map(|b| matches!(b, crate::options::BackendType::Tantivy)) | ||||
|     .unwrap_or(false); | ||||
|     if !is_tantivy { | ||||
|         return Ok(Protocol::err("ERR DB backend is not Tantivy; FT.* commands are not allowed")); | ||||
|     } | ||||
|  | ||||
|     if !server.has_write_permission() { | ||||
|         return Ok(Protocol::err("ERR write permission denied")); | ||||
|     } | ||||
|  | ||||
|     // Parse schema into field definitions | ||||
|     let mut field_definitions = Vec::new(); | ||||
|     for (field_name, field_type, options) in schema { | ||||
|         let field_def = match field_type.to_uppercase().as_str() { | ||||
|             "TEXT" => { | ||||
|                 let mut sortable = false; | ||||
|                 let mut no_index = false; | ||||
|                 // Weight is not used in current implementation | ||||
|                 let mut _weight = 1.0f32; | ||||
|                 let mut i = 0; | ||||
|                 while i < options.len() { | ||||
|                     match options[i].to_uppercase().as_str() { | ||||
|                         "WEIGHT" => { | ||||
|                             if i + 1 < options.len() { | ||||
|                                 _weight = options[i + 1].parse::<f32>().unwrap_or(1.0); | ||||
|                                 i += 2; | ||||
|                                 continue; | ||||
|                             } | ||||
|                         } | ||||
|                         "SORTABLE" => { | ||||
|                             sortable = true; | ||||
|                         } | ||||
|                         "NOINDEX" => { | ||||
|                             no_index = true; | ||||
|                         } | ||||
|                         _ => {} | ||||
|                     } | ||||
|                     i += 1; | ||||
|                 } | ||||
|                 FieldDef::Text { | ||||
|                     stored: true, | ||||
|                     indexed: !no_index, | ||||
|                     tokenized: true, | ||||
|                     fast: sortable, | ||||
|                 } | ||||
|             } | ||||
|             "NUMERIC" => { | ||||
|                 // default to F64 | ||||
|                 let mut sortable = false; | ||||
|                 for opt in &options { | ||||
|                     if opt.to_uppercase() == "SORTABLE" { | ||||
|                         sortable = true; | ||||
|                     } | ||||
|                 } | ||||
|                 FieldDef::Numeric { | ||||
|                     stored: true, | ||||
|                     indexed: true, | ||||
|                     fast: sortable, | ||||
|                     precision: NumericType::F64, | ||||
|                 } | ||||
|             } | ||||
|             "TAG" => { | ||||
|                 let mut separator = ",".to_string(); | ||||
|                 let mut case_sensitive = false; | ||||
|                 let mut i = 0; | ||||
|                 while i < options.len() { | ||||
|                     match options[i].to_uppercase().as_str() { | ||||
|                         "SEPARATOR" => { | ||||
|                             if i + 1 < options.len() { | ||||
|                                 separator = options[i + 1].clone(); | ||||
|                                 i += 2; | ||||
|                                 continue; | ||||
|                             } | ||||
|                         } | ||||
|                         "CASESENSITIVE" => { | ||||
|                             case_sensitive = true; | ||||
|                         } | ||||
|                         _ => {} | ||||
|                     } | ||||
|                     i += 1; | ||||
|                 } | ||||
|                 FieldDef::Tag { | ||||
|                     stored: true, | ||||
|                     separator, | ||||
|                     case_sensitive, | ||||
|                 } | ||||
|             } | ||||
|             "GEO" => FieldDef::Geo { stored: true }, | ||||
|             _ => { | ||||
|                 return Err(DBError(format!("Unknown field type: {}", field_type))); | ||||
|             } | ||||
|         }; | ||||
|         field_definitions.push((field_name, field_def)); | ||||
|     } | ||||
|  | ||||
|     // Create the search index | ||||
|     let search_path = server.search_index_path(); | ||||
|     let config = IndexConfig::default(); | ||||
|     let search_index = TantivySearch::new_with_schema( | ||||
|         search_path, | ||||
|         index_name.clone(), | ||||
|         field_definitions, | ||||
|         Some(config), | ||||
|     )?; | ||||
|  | ||||
|     // Store in registry | ||||
|     let mut indexes = server.search_indexes.write().unwrap(); | ||||
|     indexes.insert(index_name, Arc::new(search_index)); | ||||
|  | ||||
|     Ok(Protocol::SimpleString("OK".to_string())) | ||||
| } | ||||
|  | ||||
| pub async fn ft_add_cmd( | ||||
|     server: &Server, | ||||
|     index_name: String, | ||||
|     doc_id: String, | ||||
|     _score: f64, | ||||
|     fields: HashMap<String, String>, | ||||
| ) -> Result<Protocol, DBError> { | ||||
|     if server.selected_db == 0 { | ||||
|         return Ok(Protocol::err("FT commands are not allowed on DB 0")); | ||||
|     } | ||||
|     // Enforce Tantivy backend for selected DB | ||||
|     let is_tantivy = crate::admin_meta::get_database_backend( | ||||
|         &server.option.dir, | ||||
|         server.option.backend.clone(), | ||||
|         &server.option.admin_secret, | ||||
|         server.selected_db, | ||||
|     ) | ||||
|     .ok() | ||||
|     .flatten() | ||||
|     .map(|b| matches!(b, crate::options::BackendType::Tantivy)) | ||||
|     .unwrap_or(false); | ||||
|     if !is_tantivy { | ||||
|         return Ok(Protocol::err("ERR DB backend is not Tantivy; FT.* commands are not allowed")); | ||||
|     } | ||||
|     if !server.has_write_permission() { | ||||
|         return Ok(Protocol::err("ERR write permission denied")); | ||||
|     } | ||||
|     let indexes = server.search_indexes.read().unwrap(); | ||||
|     let search_index = indexes | ||||
|         .get(&index_name) | ||||
|         .ok_or_else(|| DBError(format!("Index '{}' not found", index_name)))?; | ||||
|     search_index.add_document_with_fields(&doc_id, fields)?; | ||||
|     Ok(Protocol::SimpleString("OK".to_string())) | ||||
| } | ||||
|  | ||||
| pub async fn ft_search_cmd( | ||||
|     server: &Server, | ||||
|     index_name: String, | ||||
|     query: String, | ||||
|     filters: Vec<(String, String)>, | ||||
|     limit: Option<usize>, | ||||
|     offset: Option<usize>, | ||||
|     return_fields: Option<Vec<String>>, | ||||
| ) -> Result<Protocol, DBError> { | ||||
|     if server.selected_db == 0 { | ||||
|         return Ok(Protocol::err("FT commands are not allowed on DB 0")); | ||||
|     } | ||||
|     // Enforce Tantivy backend for selected DB | ||||
|     let is_tantivy = crate::admin_meta::get_database_backend( | ||||
|         &server.option.dir, | ||||
|         server.option.backend.clone(), | ||||
|         &server.option.admin_secret, | ||||
|         server.selected_db, | ||||
|     ) | ||||
|     .ok() | ||||
|     .flatten() | ||||
|     .map(|b| matches!(b, crate::options::BackendType::Tantivy)) | ||||
|     .unwrap_or(false); | ||||
|     if !is_tantivy { | ||||
|         return Ok(Protocol::err("ERR DB backend is not Tantivy; FT.* commands are not allowed")); | ||||
|     } | ||||
|     if !server.has_read_permission() { | ||||
|         return Ok(Protocol::err("ERR read permission denied")); | ||||
|     } | ||||
|     let indexes = server.search_indexes.read().unwrap(); | ||||
|     let search_index = indexes | ||||
|         .get(&index_name) | ||||
|         .ok_or_else(|| DBError(format!("Index '{}' not found", index_name)))?; | ||||
|  | ||||
|     let search_filters = filters | ||||
|         .into_iter() | ||||
|         .map(|(field, value)| Filter { | ||||
|             field, | ||||
|             filter_type: FilterType::Equals(value), | ||||
|         }) | ||||
|         .collect(); | ||||
|  | ||||
|     let options = SearchOptions { | ||||
|         limit: limit.unwrap_or(10), | ||||
|         offset: offset.unwrap_or(0), | ||||
|         filters: search_filters, | ||||
|         sort_by: None, | ||||
|         return_fields, | ||||
|         highlight: false, | ||||
|     }; | ||||
|  | ||||
|     let results = search_index.search_with_options(&query, options)?; | ||||
|   | ||||
|     // Format results as a flattened Redis protocol array to match client expectations: | ||||
|     // [ total, doc_id, score, field, value, field, value, ... , doc_id, score, ... ] | ||||
|     let mut response = Vec::new(); | ||||
|     // First element is the total count | ||||
|     response.push(Protocol::BulkString(results.total.to_string())); | ||||
|     // Then each document flattened | ||||
|     for mut doc in results.documents { | ||||
|         // Add document ID if it exists | ||||
|         if let Some(id) = doc.fields.get("_id") { | ||||
|             response.push(Protocol::BulkString(id.clone())); | ||||
|         } | ||||
|         // Add score | ||||
|         response.push(Protocol::BulkString(doc.score.to_string())); | ||||
|         // Add fields as key-value pairs | ||||
|         for (field_name, field_value) in std::mem::take(&mut doc.fields) { | ||||
|             if field_name != "_id" { | ||||
|                 response.push(Protocol::BulkString(field_name)); | ||||
|                 response.push(Protocol::BulkString(field_value)); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|   | ||||
|     Ok(Protocol::Array(response)) | ||||
| } | ||||
|  | ||||
| pub async fn ft_del_cmd( | ||||
|     server: &Server, | ||||
|     index_name: String, | ||||
|     doc_id: String, | ||||
| ) -> Result<Protocol, DBError> { | ||||
|     if server.selected_db == 0 { | ||||
|         return Ok(Protocol::err("FT commands are not allowed on DB 0")); | ||||
|     } | ||||
|     // Enforce Tantivy backend for selected DB | ||||
|     let is_tantivy = crate::admin_meta::get_database_backend( | ||||
|         &server.option.dir, | ||||
|         server.option.backend.clone(), | ||||
|         &server.option.admin_secret, | ||||
|         server.selected_db, | ||||
|     ) | ||||
|     .ok() | ||||
|     .flatten() | ||||
|     .map(|b| matches!(b, crate::options::BackendType::Tantivy)) | ||||
|     .unwrap_or(false); | ||||
|     if !is_tantivy { | ||||
|         return Ok(Protocol::err("ERR DB backend is not Tantivy; FT.* commands are not allowed")); | ||||
|     } | ||||
|     if !server.has_write_permission() { | ||||
|         return Ok(Protocol::err("ERR write permission denied")); | ||||
|     } | ||||
|     let indexes = server.search_indexes.read().unwrap(); | ||||
|     let search_index = indexes | ||||
|         .get(&index_name) | ||||
|         .ok_or_else(|| DBError(format!("Index '{}' not found", index_name)))?; | ||||
|     let existed = search_index.delete_document_by_id(&doc_id)?; | ||||
|     Ok(Protocol::SimpleString(if existed { "1".to_string() } else { "0".to_string() })) | ||||
| } | ||||
|  | ||||
| pub async fn ft_info_cmd(server: &Server, index_name: String) -> Result<Protocol, DBError> { | ||||
|     if server.selected_db == 0 { | ||||
|         return Ok(Protocol::err("FT commands are not allowed on DB 0")); | ||||
|     } | ||||
|     // Enforce Tantivy backend for selected DB | ||||
|     let is_tantivy = crate::admin_meta::get_database_backend( | ||||
|         &server.option.dir, | ||||
|         server.option.backend.clone(), | ||||
|         &server.option.admin_secret, | ||||
|         server.selected_db, | ||||
|     ) | ||||
|     .ok() | ||||
|     .flatten() | ||||
|     .map(|b| matches!(b, crate::options::BackendType::Tantivy)) | ||||
|     .unwrap_or(false); | ||||
|     if !is_tantivy { | ||||
|         return Ok(Protocol::err("ERR DB backend is not Tantivy; FT.* commands are not allowed")); | ||||
|     } | ||||
|     if !server.has_read_permission() { | ||||
|         return Ok(Protocol::err("ERR read permission denied")); | ||||
|     } | ||||
|     let indexes = server.search_indexes.read().unwrap(); | ||||
|     let search_index = indexes | ||||
|         .get(&index_name) | ||||
|         .ok_or_else(|| DBError(format!("Index '{}' not found", index_name)))?; | ||||
|     let info = search_index.get_info()?; | ||||
|  | ||||
|     // Format info as Redis protocol | ||||
|     let mut response = Vec::new(); | ||||
|     response.push(Protocol::BulkString("index_name".to_string())); | ||||
|     response.push(Protocol::BulkString(info.name)); | ||||
|     response.push(Protocol::BulkString("num_docs".to_string())); | ||||
|     response.push(Protocol::BulkString(info.num_docs.to_string())); | ||||
|     response.push(Protocol::BulkString("num_fields".to_string())); | ||||
|     response.push(Protocol::BulkString(info.fields.len().to_string())); | ||||
|     response.push(Protocol::BulkString("fields".to_string())); | ||||
|     let fields_str = info | ||||
|         .fields | ||||
|         .iter() | ||||
|         .map(|f| format!("{}:{}", f.name, f.field_type)) | ||||
|         .collect::<Vec<_>>() | ||||
|         .join(", "); | ||||
|     response.push(Protocol::BulkString(fields_str)); | ||||
|     Ok(Protocol::Array(response)) | ||||
| } | ||||
|  | ||||
| pub async fn ft_drop_cmd(server: &Server, index_name: String) -> Result<Protocol, DBError> { | ||||
|     if server.selected_db == 0 { | ||||
|         return Ok(Protocol::err("FT commands are not allowed on DB 0")); | ||||
|     } | ||||
|     // Enforce Tantivy backend for selected DB | ||||
|     let is_tantivy = crate::admin_meta::get_database_backend( | ||||
|         &server.option.dir, | ||||
|         server.option.backend.clone(), | ||||
|         &server.option.admin_secret, | ||||
|         server.selected_db, | ||||
|     ) | ||||
|     .ok() | ||||
|     .flatten() | ||||
|     .map(|b| matches!(b, crate::options::BackendType::Tantivy)) | ||||
|     .unwrap_or(false); | ||||
|     if !is_tantivy { | ||||
|         return Ok(Protocol::err("ERR DB backend is not Tantivy; FT.* commands are not allowed")); | ||||
|     } | ||||
|  | ||||
|     if !server.has_write_permission() { | ||||
|         return Ok(Protocol::err("ERR write permission denied")); | ||||
|     } | ||||
|  | ||||
|     // Remove from registry and files; report error if nothing to drop | ||||
|     let mut existed = false; | ||||
|     { | ||||
|         let mut indexes = server.search_indexes.write().unwrap(); | ||||
|         if indexes.remove(&index_name).is_some() { | ||||
|             existed = true; | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     // Remove the index files from disk | ||||
|     let index_path = server.search_index_path().join(&index_name); | ||||
|     if index_path.exists() { | ||||
|         std::fs::remove_dir_all(&index_path) | ||||
|             .map_err(|e| DBError(format!("Failed to remove index files: {}", e)))?; | ||||
|         existed = true; | ||||
|     } | ||||
|  | ||||
|     if !existed { | ||||
|         return Ok(Protocol::err(&format!("ERR Index '{}' not found", index_name))); | ||||
|     } | ||||
|  | ||||
|     Ok(Protocol::SimpleString("OK".to_string())) | ||||
| } | ||||
							
								
								
									
										547
									
								
								src/server.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										547
									
								
								src/server.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,547 @@ | ||||
| use core::str; | ||||
| use std::collections::HashMap; | ||||
| use std::sync::Arc; | ||||
| use tokio::io::AsyncReadExt; | ||||
| use tokio::io::AsyncWriteExt; | ||||
| use tokio::sync::{Mutex, oneshot}; | ||||
|  | ||||
| use std::sync::atomic::{AtomicU64, Ordering}; | ||||
|  | ||||
| use crate::cmd::Cmd; | ||||
| use crate::error::DBError; | ||||
| use crate::options; | ||||
| use crate::protocol::Protocol; | ||||
| use crate::storage_trait::StorageBackend; | ||||
| use crate::admin_meta; | ||||
|  | ||||
| // Embeddings: config and cache | ||||
| use crate::embedding::{EmbeddingConfig, create_embedder, Embedder, create_image_embedder, ImageEmbedder}; | ||||
| use serde_json; | ||||
| use ureq::{Agent, AgentBuilder}; | ||||
| use std::time::Duration; | ||||
| use std::io::Read; | ||||
|  | ||||
| const NO_DB_SELECTED: u64 = u64::MAX; | ||||
|  | ||||
| #[derive(Clone)] | ||||
| pub struct Server { | ||||
|     pub db_cache: std::sync::Arc<std::sync::RwLock<HashMap<u64, Arc<dyn StorageBackend>>>>, | ||||
|     pub option: options::DBOption, | ||||
|     pub client_name: Option<String>, | ||||
|     pub selected_db: u64, // Changed from usize to u64 | ||||
|     pub queued_cmd: Option<Vec<(Cmd, Protocol)>>, | ||||
|     pub current_permissions: Option<crate::rpc::Permissions>, | ||||
|  | ||||
|     // In-memory registry of Tantivy search indexes for this server | ||||
|     pub search_indexes: Arc<std::sync::RwLock<HashMap<String, Arc<crate::tantivy_search::TantivySearch>>>>, | ||||
|  | ||||
|     // Per-DB Lance stores (vector DB), keyed by db_id | ||||
|     pub lance_stores: Arc<std::sync::RwLock<HashMap<u64, Arc<crate::lance_store::LanceStore>>>>, | ||||
|  | ||||
|     // Per-(db_id, dataset) embedder cache (text) | ||||
|     pub embedders: Arc<std::sync::RwLock<HashMap<(u64, String), Arc<dyn Embedder>>>>, | ||||
|  | ||||
|     // Per-(db_id, dataset) image embedder cache (image) | ||||
|     pub image_embedders: Arc<std::sync::RwLock<HashMap<(u64, String), Arc<dyn ImageEmbedder>>>>, | ||||
|      | ||||
|     // BLPOP waiter registry: per (db_index, key) FIFO of waiters | ||||
|     pub list_waiters: Arc<Mutex<HashMap<u64, HashMap<String, Vec<Waiter>>>>>, | ||||
|     pub waiter_seq: Arc<AtomicU64>, | ||||
| } | ||||
|  | ||||
| pub struct Waiter { | ||||
|     pub id: u64, | ||||
|     pub side: PopSide, | ||||
|     pub tx: oneshot::Sender<(String, String)>, // (key, element) | ||||
| } | ||||
|  | ||||
| #[derive(Clone, Copy, Debug, PartialEq, Eq)] | ||||
| pub enum PopSide { | ||||
|     Left, | ||||
|     Right, | ||||
| } | ||||
|  | ||||
| impl Server { | ||||
|     pub async fn new(option: options::DBOption) -> Self { | ||||
|         Server { | ||||
|             db_cache: Arc::new(std::sync::RwLock::new(HashMap::new())), | ||||
|             option, | ||||
|             client_name: None, | ||||
|             selected_db: NO_DB_SELECTED, | ||||
|             queued_cmd: None, | ||||
|             current_permissions: None, | ||||
|      | ||||
|             search_indexes: Arc::new(std::sync::RwLock::new(HashMap::new())), | ||||
|             lance_stores: Arc::new(std::sync::RwLock::new(HashMap::new())), | ||||
|             embedders: Arc::new(std::sync::RwLock::new(HashMap::new())), | ||||
|             image_embedders: Arc::new(std::sync::RwLock::new(HashMap::new())), | ||||
|             list_waiters: Arc::new(Mutex::new(HashMap::new())), | ||||
|             waiter_seq: Arc::new(AtomicU64::new(1)), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     // Path where search indexes are stored, namespaced per selected DB: | ||||
|     // <base_dir>/search_indexes/<db_id> | ||||
|     pub fn search_index_path(&self) -> std::path::PathBuf { | ||||
|         let base = std::path::PathBuf::from(&self.option.dir) | ||||
|             .join("search_indexes") | ||||
|             .join(self.selected_db.to_string()); | ||||
|         if !base.exists() { | ||||
|             let _ = std::fs::create_dir_all(&base); | ||||
|         } | ||||
|         base | ||||
|     } | ||||
|  | ||||
|     // Path where Lance datasets are stored, namespaced per selected DB: | ||||
|     // <base_dir>/lance/<db_id> | ||||
|     pub fn lance_data_path(&self) -> std::path::PathBuf { | ||||
|         let base = std::path::PathBuf::from(&self.option.dir) | ||||
|             .join("lance") | ||||
|             .join(self.selected_db.to_string()); | ||||
|         if !base.exists() { | ||||
|             let _ = std::fs::create_dir_all(&base); | ||||
|         } | ||||
|         base | ||||
|     } | ||||
|  | ||||
|     pub fn current_storage(&self) -> Result<Arc<dyn StorageBackend>, DBError> { | ||||
|         // Require explicit SELECT before any storage access | ||||
|         if self.selected_db == NO_DB_SELECTED { | ||||
|             return Err(DBError("No database selected. Use SELECT <id> [KEY <key>] first".to_string())); | ||||
|         } | ||||
|         // Admin DB 0 access must be authenticated with SELECT 0 KEY <admin_secret> | ||||
|         if self.selected_db == 0 { | ||||
|             if !matches!(self.current_permissions, Some(crate::rpc::Permissions::ReadWrite)) { | ||||
|                 return Err(DBError("Admin DB 0 requires SELECT 0 KEY <admin_secret>".to_string())); | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         let mut cache = self.db_cache.write().unwrap(); | ||||
|  | ||||
|         if let Some(storage) = cache.get(&self.selected_db) { | ||||
|             return Ok(storage.clone()); | ||||
|         } | ||||
|  | ||||
|         // Use process-wide shared handles to avoid sled/reDB double-open lock contention. | ||||
|         let storage = if self.selected_db == 0 { | ||||
|             // Admin DB 0: always via singleton | ||||
|             admin_meta::open_admin_storage( | ||||
|                 &self.option.dir, | ||||
|                 self.option.backend.clone(), | ||||
|                 &self.option.admin_secret, | ||||
|             )? | ||||
|         } else { | ||||
|             // Data DBs: via global registry keyed by id | ||||
|             admin_meta::open_data_storage( | ||||
|                 &self.option.dir, | ||||
|                 self.option.backend.clone(), | ||||
|                 &self.option.admin_secret, | ||||
|                 self.selected_db, | ||||
|             )? | ||||
|         }; | ||||
|  | ||||
|         cache.insert(self.selected_db, storage.clone()); | ||||
|         Ok(storage) | ||||
|     } | ||||
|  | ||||
|     /// Get or create the LanceStore for the currently selected DB. | ||||
|     /// Only valid for non-zero DBs and when the backend is Lance. | ||||
|     pub fn lance_store(&self) -> Result<Arc<crate::lance_store::LanceStore>, DBError> { | ||||
|         if self.selected_db == 0 { | ||||
|             return Err(DBError("Lance not available on admin DB 0".to_string())); | ||||
|         } | ||||
|         // Resolve backend for selected_db | ||||
|         let backend_opt = crate::admin_meta::get_database_backend( | ||||
|             &self.option.dir, | ||||
|             self.option.backend.clone(), | ||||
|             &self.option.admin_secret, | ||||
|             self.selected_db, | ||||
|         ) | ||||
|         .ok() | ||||
|         .flatten(); | ||||
|  | ||||
|         if !matches!(backend_opt, Some(crate::options::BackendType::Lance)) { | ||||
|             return Err(DBError("ERR DB backend is not Lance; LANCE.* commands are not allowed".to_string())); | ||||
|         } | ||||
|  | ||||
|         // Fast path: read lock | ||||
|         { | ||||
|             let map = self.lance_stores.read().unwrap(); | ||||
|             if let Some(store) = map.get(&self.selected_db) { | ||||
|                 return Ok(store.clone()); | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         // Slow path: create and insert | ||||
|         let store = Arc::new(crate::lance_store::LanceStore::new(&self.option.dir, self.selected_db)?); | ||||
|         { | ||||
|             let mut map = self.lance_stores.write().unwrap(); | ||||
|             map.insert(self.selected_db, store.clone()); | ||||
|         } | ||||
|         Ok(store) | ||||
|     } | ||||
|  | ||||
|     // ----- Embedding configuration and resolution ----- | ||||
|  | ||||
|     // Sidecar embedding config path: <base_dir>/lance/<db_id>/<dataset>.lance.embedding.json | ||||
|     fn dataset_embedding_config_path(&self, dataset: &str) -> std::path::PathBuf { | ||||
|         let mut base = self.lance_data_path(); | ||||
|         // Ensure parent dir exists | ||||
|         if !base.exists() { | ||||
|             let _ = std::fs::create_dir_all(&base); | ||||
|         } | ||||
|         base.push(format!("{}.lance.embedding.json", dataset)); | ||||
|         base | ||||
|     } | ||||
|  | ||||
|     /// Persist per-dataset embedding config as JSON sidecar. | ||||
|     pub fn set_dataset_embedding_config(&self, dataset: &str, cfg: &EmbeddingConfig) -> Result<(), DBError> { | ||||
|         if self.selected_db == 0 { | ||||
|             return Err(DBError("Lance not available on admin DB 0".to_string())); | ||||
|         } | ||||
|         let p = self.dataset_embedding_config_path(dataset); | ||||
|         let data = serde_json::to_vec_pretty(cfg) | ||||
|             .map_err(|e| DBError(format!("Failed to serialize embedding config: {}", e)))?; | ||||
|         std::fs::write(&p, data) | ||||
|             .map_err(|e| DBError(format!("Failed to write embedding config {}: {}", p.display(), e)))?; | ||||
|         // Invalidate embedder cache entry for this dataset | ||||
|         { | ||||
|             let mut map = self.embedders.write().unwrap(); | ||||
|             map.remove(&(self.selected_db, dataset.to_string())); | ||||
|         } | ||||
|         { | ||||
|             let mut map_img = self.image_embedders.write().unwrap(); | ||||
|             map_img.remove(&(self.selected_db, dataset.to_string())); | ||||
|         } | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     /// Load per-dataset embedding config. | ||||
|     pub fn get_dataset_embedding_config(&self, dataset: &str) -> Result<EmbeddingConfig, DBError> { | ||||
|         if self.selected_db == 0 { | ||||
|             return Err(DBError("Lance not available on admin DB 0".to_string())); | ||||
|         } | ||||
|         let p = self.dataset_embedding_config_path(dataset); | ||||
|         if !p.exists() { | ||||
|             return Err(DBError(format!( | ||||
|                 "Embedding config not set for dataset '{}'. Use LANCE.EMBEDDING CONFIG SET ... or RPC to configure.", | ||||
|                 dataset | ||||
|             ))); | ||||
|         } | ||||
|         let data = std::fs::read(&p) | ||||
|             .map_err(|e| DBError(format!("Failed to read embedding config {}: {}", p.display(), e)))?; | ||||
|         let cfg: EmbeddingConfig = serde_json::from_slice(&data) | ||||
|             .map_err(|e| DBError(format!("Failed to parse embedding config {}: {}", p.display(), e)))?; | ||||
|         Ok(cfg) | ||||
|     } | ||||
|  | ||||
|     /// Resolve or build an embedder for (db_id, dataset). Caches instance. | ||||
|     pub fn get_embedder_for(&self, dataset: &str) -> Result<Arc<dyn Embedder>, DBError> { | ||||
|         if self.selected_db == 0 { | ||||
|             return Err(DBError("Lance not available on admin DB 0".to_string())); | ||||
|         } | ||||
|         // Fast path | ||||
|         { | ||||
|             let map = self.embedders.read().unwrap(); | ||||
|             if let Some(e) = map.get(&(self.selected_db, dataset.to_string())) { | ||||
|                 return Ok(e.clone()); | ||||
|             } | ||||
|         } | ||||
|         // Load config and instantiate | ||||
|         let cfg = self.get_dataset_embedding_config(dataset)?; | ||||
|         let emb = create_embedder(&cfg)?; | ||||
|         { | ||||
|             let mut map = self.embedders.write().unwrap(); | ||||
|             map.insert((self.selected_db, dataset.to_string()), emb.clone()); | ||||
|         } | ||||
|         Ok(emb) | ||||
|     } | ||||
|  | ||||
|     /// Resolve or build an IMAGE embedder for (db_id, dataset). Caches instance. | ||||
|     pub fn get_image_embedder_for(&self, dataset: &str) -> Result<Arc<dyn ImageEmbedder>, DBError> { | ||||
|         if self.selected_db == 0 { | ||||
|             return Err(DBError("Lance not available on admin DB 0".to_string())); | ||||
|         } | ||||
|         // Fast path | ||||
|         { | ||||
|             let map = self.image_embedders.read().unwrap(); | ||||
|             if let Some(e) = map.get(&(self.selected_db, dataset.to_string())) { | ||||
|                 return Ok(e.clone()); | ||||
|             } | ||||
|         } | ||||
|         // Load config and instantiate | ||||
|         let cfg = self.get_dataset_embedding_config(dataset)?; | ||||
|         let emb = create_image_embedder(&cfg)?; | ||||
|         { | ||||
|             let mut map = self.image_embedders.write().unwrap(); | ||||
|             map.insert((self.selected_db, dataset.to_string()), emb.clone()); | ||||
|         } | ||||
|         Ok(emb) | ||||
|     } | ||||
|  | ||||
|     /// Download image bytes from a URI with safety checks (size, timeout, content-type, optional host allowlist). | ||||
|     /// Env overrides: | ||||
|     /// - HERODB_IMAGE_MAX_BYTES (u64, default 10485760) | ||||
|     /// - HERODB_IMAGE_FETCH_TIMEOUT_SECS (u64, default 30) | ||||
|     /// - HERODB_IMAGE_ALLOWED_HOSTS (comma-separated, optional) | ||||
|     pub fn fetch_image_bytes_from_uri(&self, uri: &str) -> Result<Vec<u8>, DBError> { | ||||
|         // Basic scheme validation | ||||
|         if !(uri.starts_with("http://") || uri.starts_with("https://")) { | ||||
|             return Err(DBError("Only http(s) URIs are supported for image fetch".into())); | ||||
|         } | ||||
|         // Parse host (naive) for allowlist check | ||||
|         let host = { | ||||
|             let after_scheme = match uri.find("://") { | ||||
|                 Some(i) => &uri[i + 3..], | ||||
|                 None => uri, | ||||
|             }; | ||||
|             let end = after_scheme.find('/').unwrap_or(after_scheme.len()); | ||||
|             let host_port = &after_scheme[..end]; | ||||
|             host_port.split('@').last().unwrap_or(host_port).split(':').next().unwrap_or(host_port).to_string() | ||||
|         }; | ||||
|  | ||||
|         let max_bytes: u64 = std::env::var("HERODB_IMAGE_MAX_BYTES").ok().and_then(|s| s.parse::<u64>().ok()).unwrap_or(10 * 1024 * 1024); | ||||
|         let timeout_secs: u64 = std::env::var("HERODB_IMAGE_FETCH_TIMEOUT_SECS").ok().and_then(|s| s.parse::<u64>().ok()).unwrap_or(30); | ||||
|         let allowed_hosts_env = std::env::var("HERODB_IMAGE_ALLOWED_HOSTS").ok(); | ||||
|         if let Some(allow) = allowed_hosts_env { | ||||
|             if !allow.split(',').map(|s| s.trim()).filter(|s| !s.is_empty()).any(|h| h.eq_ignore_ascii_case(&host)) { | ||||
|                 return Err(DBError(format!("Host '{}' not allowed for image fetch (HERODB_IMAGE_ALLOWED_HOSTS)", host))); | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         let agent: Agent = AgentBuilder::new() | ||||
|             .timeout_read(Duration::from_secs(timeout_secs)) | ||||
|             .timeout_write(Duration::from_secs(timeout_secs)) | ||||
|             .build(); | ||||
|  | ||||
|         let resp = agent.get(uri).call().map_err(|e| DBError(format!("HTTP GET failed: {}", e)))?; | ||||
|         // Validate content-type | ||||
|         let ctype = resp.header("Content-Type").unwrap_or(""); | ||||
|         let ctype_main = ctype.split(';').next().unwrap_or("").trim().to_ascii_lowercase(); | ||||
|         if !ctype_main.starts_with("image/") { | ||||
|             return Err(DBError(format!("Remote content-type '{}' is not image/*", ctype))); | ||||
|         } | ||||
|  | ||||
|         // Read with cap | ||||
|         let mut reader = resp.into_reader(); | ||||
|         let mut buf: Vec<u8> = Vec::with_capacity(8192); | ||||
|         let mut tmp = [0u8; 8192]; | ||||
|         let mut total: u64 = 0; | ||||
|         loop { | ||||
|             let n = reader.read(&mut tmp).map_err(|e| DBError(format!("Read error: {}", e)))?; | ||||
|             if n == 0 { break; } | ||||
|             total += n as u64; | ||||
|             if total > max_bytes { | ||||
|                 return Err(DBError(format!("Image exceeds max allowed bytes {}", max_bytes))); | ||||
|             } | ||||
|             buf.extend_from_slice(&tmp[..n]); | ||||
|         } | ||||
|         Ok(buf) | ||||
|     } | ||||
|  | ||||
|     /// Check if current permissions allow read operations | ||||
|     pub fn has_read_permission(&self) -> bool { | ||||
|         // No DB selected -> no permissions | ||||
|         if self.selected_db == NO_DB_SELECTED { | ||||
|             return false; | ||||
|         } | ||||
|         // If an explicit permission is set for this connection, honor it. | ||||
|         if let Some(perms) = self.current_permissions.as_ref() { | ||||
|             return matches!(*perms, crate::rpc::Permissions::Read | crate::rpc::Permissions::ReadWrite); | ||||
|         } | ||||
|         // Fallback ONLY when no explicit permission context (e.g., JSON-RPC flows without SELECT). | ||||
|         match crate::admin_meta::verify_access( | ||||
|             &self.option.dir, | ||||
|             self.option.backend.clone(), | ||||
|             &self.option.admin_secret, | ||||
|             self.selected_db, | ||||
|             None, | ||||
|         ) { | ||||
|             Ok(Some(crate::rpc::Permissions::Read)) | Ok(Some(crate::rpc::Permissions::ReadWrite)) => true, | ||||
|             _ => false, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     /// Check if current permissions allow write operations | ||||
|     pub fn has_write_permission(&self) -> bool { | ||||
|         // No DB selected -> no permissions | ||||
|         if self.selected_db == NO_DB_SELECTED { | ||||
|             return false; | ||||
|         } | ||||
|         // If an explicit permission is set for this connection, honor it. | ||||
|         if let Some(perms) = self.current_permissions.as_ref() { | ||||
|             return matches!(*perms, crate::rpc::Permissions::ReadWrite); | ||||
|         } | ||||
|         // Fallback ONLY when no explicit permission context (e.g., JSON-RPC flows without SELECT). | ||||
|         match crate::admin_meta::verify_access( | ||||
|             &self.option.dir, | ||||
|             self.option.backend.clone(), | ||||
|             &self.option.admin_secret, | ||||
|             self.selected_db, | ||||
|             None, | ||||
|         ) { | ||||
|             Ok(Some(crate::rpc::Permissions::ReadWrite)) => true, | ||||
|             _ => false, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     // ----- BLPOP waiter helpers ----- | ||||
|  | ||||
|     pub async fn register_waiter(&self, db_index: u64, key: &str, side: PopSide) -> (u64, oneshot::Receiver<(String, String)>) { | ||||
|         let id = self.waiter_seq.fetch_add(1, Ordering::Relaxed); | ||||
|         let (tx, rx) = oneshot::channel::<(String, String)>(); | ||||
|  | ||||
|         let mut guard = self.list_waiters.lock().await; | ||||
|         let per_db = guard.entry(db_index).or_insert_with(HashMap::new); | ||||
|         let q = per_db.entry(key.to_string()).or_insert_with(Vec::new); | ||||
|         q.push(Waiter { id, side, tx }); | ||||
|         (id, rx) | ||||
|     } | ||||
|  | ||||
|     pub async fn unregister_waiter(&self, db_index: u64, key: &str, id: u64) { | ||||
|         let mut guard = self.list_waiters.lock().await; | ||||
|         if let Some(per_db) = guard.get_mut(&db_index) { | ||||
|             if let Some(q) = per_db.get_mut(key) { | ||||
|                 q.retain(|w| w.id != id); | ||||
|                 if q.is_empty() { | ||||
|                     per_db.remove(key); | ||||
|                 } | ||||
|             } | ||||
|             if per_db.is_empty() { | ||||
|                 guard.remove(&db_index); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     // Called after LPUSH/RPUSH to deliver to blocked BLPOP waiters. | ||||
|     pub async fn drain_waiters_after_push(&self, key: &str) -> Result<(), DBError> { | ||||
|         let db_index = self.selected_db; | ||||
|  | ||||
|         loop { | ||||
|             // Check if any waiter exists | ||||
|             let maybe_waiter = { | ||||
|                 let mut guard = self.list_waiters.lock().await; | ||||
|                 if let Some(per_db) = guard.get_mut(&db_index) { | ||||
|                     if let Some(q) = per_db.get_mut(key) { | ||||
|                         if !q.is_empty() { | ||||
|                             // Pop FIFO | ||||
|                             Some(q.remove(0)) | ||||
|                         } else { | ||||
|                             None | ||||
|                         } | ||||
|                     } else { | ||||
|                         None | ||||
|                     } | ||||
|                 } else { | ||||
|                     None | ||||
|                 } | ||||
|             }; | ||||
|  | ||||
|             let waiter = if let Some(w) = maybe_waiter { w } else { break }; | ||||
|  | ||||
|             // Pop one element depending on waiter side | ||||
|             let elems = match waiter.side { | ||||
|                 PopSide::Left => self.current_storage()?.lpop(key, 1)?, | ||||
|                 PopSide::Right => self.current_storage()?.rpop(key, 1)?, | ||||
|             }; | ||||
|             if elems.is_empty() { | ||||
|                 // Nothing to deliver; re-register waiter at the front to preserve order | ||||
|                 let mut guard = self.list_waiters.lock().await; | ||||
|                 let per_db = guard.entry(db_index).or_insert_with(HashMap::new); | ||||
|                 let q = per_db.entry(key.to_string()).or_insert_with(Vec::new); | ||||
|                 q.insert(0, waiter); | ||||
|                 break; | ||||
|             } else { | ||||
|                 let elem = elems[0].clone(); | ||||
|                 // Send to waiter; if receiver dropped, just continue | ||||
|                 let _ = waiter.tx.send((key.to_string(), elem)); | ||||
|                 // Loop to try to satisfy more waiters if more elements remain | ||||
|                 continue; | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     pub async fn handle( | ||||
|         &mut self, | ||||
|         mut stream: tokio::net::TcpStream, | ||||
|     ) -> Result<(), DBError> { | ||||
|         // Accumulate incoming bytes to handle partial RESP frames | ||||
|         let mut acc = String::new(); | ||||
|         let mut buf = vec![0u8; 8192]; | ||||
|  | ||||
|         loop { | ||||
|             let n = match stream.read(&mut buf).await { | ||||
|                 Ok(0) => { | ||||
|                     println!("[handle] connection closed"); | ||||
|                     return Ok(()); | ||||
|                 } | ||||
|                 Ok(n) => n, | ||||
|                 Err(e) => { | ||||
|                     println!("[handle] read error: {:?}", e); | ||||
|                     return Err(e.into()); | ||||
|                 } | ||||
|             }; | ||||
|  | ||||
|             // Append to accumulator. RESP for our usage is ASCII-safe. | ||||
|             acc.push_str(str::from_utf8(&buf[..n])?); | ||||
|  | ||||
|             // Try to parse as many complete commands as are available in 'acc'. | ||||
|             loop { | ||||
|                 let parsed = Cmd::from(&acc); | ||||
|                 let (cmd, protocol, remaining) = match parsed { | ||||
|                     Ok((cmd, protocol, remaining)) => (cmd, protocol, remaining), | ||||
|                     Err(_e) => { | ||||
|                         // Incomplete or invalid frame; assume incomplete and wait for more data. | ||||
|                         // This avoids emitting spurious protocol_error for split frames. | ||||
|                         break; | ||||
|                     } | ||||
|                 }; | ||||
|  | ||||
|                 // Advance the accumulator to the unparsed remainder | ||||
|                 acc = remaining.to_string(); | ||||
|  | ||||
|                 if self.option.debug { | ||||
|                     println!("\x1b[34;1mgot command: {:?}, protocol: {:?}\x1b[0m", cmd, protocol); | ||||
|                 } else { | ||||
|                     println!("got command: {:?}, protocol: {:?}", cmd, protocol); | ||||
|                 } | ||||
|  | ||||
|                 // Check if this is a QUIT command before processing | ||||
|                 let is_quit = matches!(cmd, Cmd::Quit); | ||||
|  | ||||
|                 let res = match cmd.run(self).await { | ||||
|                     Ok(p) => p, | ||||
|                     Err(e) => { | ||||
|                         if self.option.debug { | ||||
|                             eprintln!("[run error] {:?}", e); | ||||
|                         } | ||||
|                         Protocol::err(&format!("ERR {}", e.0)) | ||||
|                     } | ||||
|                 }; | ||||
|  | ||||
|                 if self.option.debug { | ||||
|                     println!("\x1b[34;1mqueued cmd {:?}\x1b[0m", self.queued_cmd); | ||||
|                     println!("\x1b[32;1mgoing to send response {}\x1b[0m", res.encode()); | ||||
|                 } else { | ||||
|                     print!("queued cmd {:?}", self.queued_cmd); | ||||
|                     println!("going to send response {}", res.encode()); | ||||
|                 } | ||||
|  | ||||
|                 _ = stream.write(res.encode().as_bytes()).await?; | ||||
|  | ||||
|                 // If this was a QUIT command, close the connection | ||||
|                 if is_quit { | ||||
|                     println!("[handle] QUIT command received, closing connection"); | ||||
|                     return Ok(()); | ||||
|                 } | ||||
|  | ||||
|                 // Continue parsing any further complete commands already in 'acc' | ||||
|                 if acc.is_empty() { | ||||
|                     break; | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| } | ||||
							
								
								
									
										123
									
								
								src/sym.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										123
									
								
								src/sym.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,123 @@ | ||||
| //! sym.rs — Stateless symmetric encryption (Phase 1) | ||||
| //! | ||||
| //! Commands implemented (RESP): | ||||
| //! - SYM KEYGEN | ||||
| //! - SYM ENCRYPT <key_b64> <message> | ||||
| //! - SYM DECRYPT <key_b64> <ciphertext_b64> | ||||
| //! | ||||
| //! Notes: | ||||
| //! - Raw key: exactly 32 bytes, provided as Base64 in commands. | ||||
| //! - Cipher: XChaCha20-Poly1305 (AEAD) without AAD in Phase 1 | ||||
| //! - Ciphertext binary layout: [version:1][nonce:24][ciphertext||tag] | ||||
| //! - Encoding for wire I/O: Base64 | ||||
|  | ||||
| use base64::{engine::general_purpose::STANDARD as B64, Engine as _}; | ||||
| use chacha20poly1305::{ | ||||
|     aead::{Aead, KeyInit, OsRng}, | ||||
|     XChaCha20Poly1305, XNonce, | ||||
| }; | ||||
| use rand::RngCore; | ||||
|  | ||||
| use crate::protocol::Protocol; | ||||
|  | ||||
| const VERSION: u8 = 1; | ||||
| const NONCE_LEN: usize = 24; | ||||
| const TAG_LEN: usize = 16; | ||||
|  | ||||
| #[derive(Debug)] | ||||
| pub enum SymWireError { | ||||
|     InvalidKey, | ||||
|     BadEncoding, | ||||
|     BadFormat, | ||||
|     BadVersion(u8), | ||||
|     Crypto, | ||||
| } | ||||
|  | ||||
| impl SymWireError { | ||||
|     fn to_protocol(self) -> Protocol { | ||||
|         match self { | ||||
|             SymWireError::InvalidKey => Protocol::err("ERR sym: invalid key"), | ||||
|             SymWireError::BadEncoding => Protocol::err("ERR sym: bad encoding"), | ||||
|             SymWireError::BadFormat => Protocol::err("ERR sym: bad format"), | ||||
|             SymWireError::BadVersion(v) => Protocol::err(&format!("ERR sym: unsupported version {}", v)), | ||||
|             SymWireError::Crypto => Protocol::err("ERR sym: auth failed"), | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| fn decode_key_b64(s: &str) -> Result<chacha20poly1305::Key, SymWireError> { | ||||
|     let bytes = B64.decode(s.as_bytes()).map_err(|_| SymWireError::BadEncoding)?; | ||||
|     if bytes.len() != 32 { | ||||
|         return Err(SymWireError::InvalidKey); | ||||
|     } | ||||
|     Ok(chacha20poly1305::Key::from_slice(&bytes).to_owned()) | ||||
| } | ||||
|  | ||||
| fn encrypt_blob(key: &chacha20poly1305::Key, plaintext: &[u8]) -> Result<Vec<u8>, SymWireError> { | ||||
|     let cipher = XChaCha20Poly1305::new(key); | ||||
|  | ||||
|     let mut nonce_bytes = [0u8; NONCE_LEN]; | ||||
|     OsRng.fill_bytes(&mut nonce_bytes); | ||||
|     let nonce = XNonce::from_slice(&nonce_bytes); | ||||
|  | ||||
|     let mut out = Vec::with_capacity(1 + NONCE_LEN + plaintext.len() + TAG_LEN); | ||||
|     out.push(VERSION); | ||||
|     out.extend_from_slice(&nonce_bytes); | ||||
|  | ||||
|     let ct = cipher.encrypt(nonce, plaintext).map_err(|_| SymWireError::Crypto)?; | ||||
|     out.extend_from_slice(&ct); | ||||
|     Ok(out) | ||||
| } | ||||
|  | ||||
| fn decrypt_blob(key: &chacha20poly1305::Key, blob: &[u8]) -> Result<Vec<u8>, SymWireError> { | ||||
|     if blob.len() < 1 + NONCE_LEN + TAG_LEN { | ||||
|         return Err(SymWireError::BadFormat); | ||||
|     } | ||||
|     let ver = blob[0]; | ||||
|     if ver != VERSION { | ||||
|         return Err(SymWireError::BadVersion(ver)); | ||||
|     } | ||||
|     let nonce = XNonce::from_slice(&blob[1..1 + NONCE_LEN]); | ||||
|     let ct = &blob[1 + NONCE_LEN..]; | ||||
|  | ||||
|     let cipher = XChaCha20Poly1305::new(key); | ||||
|     cipher.decrypt(nonce, ct).map_err(|_| SymWireError::Crypto) | ||||
| } | ||||
|  | ||||
| // ---------- Command handlers (RESP) ---------- | ||||
|  | ||||
| pub async fn cmd_sym_keygen() -> Protocol { | ||||
|     let mut key_bytes = [0u8; 32]; | ||||
|     OsRng.fill_bytes(&mut key_bytes); | ||||
|     let key_b64 = B64.encode(key_bytes); | ||||
|     Protocol::BulkString(key_b64) | ||||
| } | ||||
|  | ||||
| pub async fn cmd_sym_encrypt(key_b64: &str, message: &str) -> Protocol { | ||||
|     let key = match decode_key_b64(key_b64) { | ||||
|         Ok(k) => k, | ||||
|         Err(e) => return e.to_protocol(), | ||||
|     }; | ||||
|     match encrypt_blob(&key, message.as_bytes()) { | ||||
|         Ok(blob) => Protocol::BulkString(B64.encode(blob)), | ||||
|         Err(e) => e.to_protocol(), | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub async fn cmd_sym_decrypt(key_b64: &str, ct_b64: &str) -> Protocol { | ||||
|     let key = match decode_key_b64(key_b64) { | ||||
|         Ok(k) => k, | ||||
|         Err(e) => return e.to_protocol(), | ||||
|     }; | ||||
|     let blob = match B64.decode(ct_b64.as_bytes()) { | ||||
|         Ok(b) => b, | ||||
|         Err(_) => return SymWireError::BadEncoding.to_protocol(), | ||||
|     }; | ||||
|     match decrypt_blob(&key, &blob) { | ||||
|         Ok(pt) => match String::from_utf8(pt) { | ||||
|             Ok(s) => Protocol::BulkString(s), | ||||
|             Err(_) => Protocol::err("ERR sym: invalid UTF-8 plaintext"), | ||||
|         }, | ||||
|         Err(e) => e.to_protocol(), | ||||
|     } | ||||
| } | ||||
							
								
								
									
										709
									
								
								src/tantivy_search.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										709
									
								
								src/tantivy_search.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,709 @@ | ||||
| use crate::error::DBError; | ||||
| use serde::{Deserialize, Serialize}; | ||||
| use std::collections::HashMap; | ||||
| use std::path::PathBuf; | ||||
| use std::sync::{Arc, RwLock}; | ||||
| use tantivy::{ | ||||
|     collector::TopDocs, | ||||
|     directory::MmapDirectory, | ||||
|     query::{BooleanQuery, Occur, Query, QueryParser, TermQuery}, | ||||
|     schema::{ | ||||
|         DateOptions, Field, IndexRecordOption, NumericOptions, Schema, TextFieldIndexing, TextOptions, STORED, STRING, | ||||
|     }, | ||||
|     tokenizer::TokenizerManager, | ||||
|     DateTime, Index, IndexReader, IndexWriter, TantivyDocument, Term, | ||||
| }; | ||||
| use tantivy::schema::Value; | ||||
|  | ||||
| #[derive(Debug, Clone, Serialize, Deserialize)] | ||||
| pub enum FieldDef { | ||||
|     Text { | ||||
|         stored: bool, | ||||
|         indexed: bool, | ||||
|         tokenized: bool, | ||||
|         fast: bool, | ||||
|     }, | ||||
|     Numeric { | ||||
|         stored: bool, | ||||
|         indexed: bool, | ||||
|         fast: bool, | ||||
|         precision: NumericType, | ||||
|     }, | ||||
|     Tag { | ||||
|         stored: bool, | ||||
|         separator: String, | ||||
|         case_sensitive: bool, | ||||
|     }, | ||||
|     Geo { | ||||
|         stored: bool, | ||||
|     }, | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Clone, Serialize, Deserialize)] | ||||
| pub enum NumericType { | ||||
|     I64, | ||||
|     U64, | ||||
|     F64, | ||||
|     Date, | ||||
| } | ||||
|  | ||||
| pub struct IndexSchema { | ||||
|     schema: Schema, | ||||
|     fields: HashMap<String, (Field, FieldDef)>, | ||||
|     default_search_fields: Vec<Field>, | ||||
| } | ||||
|  | ||||
| pub struct TantivySearch { | ||||
|     index: Index, | ||||
|     writer: Arc<RwLock<IndexWriter>>, | ||||
|     reader: IndexReader, | ||||
|     index_schema: IndexSchema, | ||||
|     name: String, | ||||
|     config: IndexConfig, | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Clone, Serialize, Deserialize)] | ||||
| pub struct IndexConfig { | ||||
|     pub language: String, | ||||
|     pub stopwords: Vec<String>, | ||||
|     pub stemming: bool, | ||||
|     pub max_doc_count: Option<usize>, | ||||
|     pub default_score: f64, | ||||
| } | ||||
|  | ||||
| impl Default for IndexConfig { | ||||
|     fn default() -> Self { | ||||
|         IndexConfig { | ||||
|             language: "english".to_string(), | ||||
|             stopwords: vec![], | ||||
|             stemming: true, | ||||
|             max_doc_count: None, | ||||
|             default_score: 1.0, | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl TantivySearch { | ||||
|     pub fn new_with_schema( | ||||
|         base_path: PathBuf, | ||||
|         name: String, | ||||
|         field_definitions: Vec<(String, FieldDef)>, | ||||
|         config: Option<IndexConfig>, | ||||
|     ) -> Result<Self, DBError> { | ||||
|         let index_path = base_path.join(&name); | ||||
|         std::fs::create_dir_all(&index_path) | ||||
|             .map_err(|e| DBError(format!("Failed to create index dir: {}", e)))?; | ||||
|  | ||||
|         // Build schema from field definitions | ||||
|         let mut schema_builder = Schema::builder(); | ||||
|         let mut fields = HashMap::new(); | ||||
|         let mut default_search_fields = Vec::new(); | ||||
|  | ||||
|         // Always add a document ID field | ||||
|         let id_field = schema_builder.add_text_field("_id", STRING | STORED); | ||||
|         fields.insert( | ||||
|             "_id".to_string(), | ||||
|             ( | ||||
|                 id_field, | ||||
|                 FieldDef::Text { | ||||
|                     stored: true, | ||||
|                     indexed: true, | ||||
|                     tokenized: false, | ||||
|                     fast: false, | ||||
|                 }, | ||||
|             ), | ||||
|         ); | ||||
|  | ||||
|         // Add user-defined fields | ||||
|         for (field_name, field_def) in field_definitions { | ||||
|             let field = match &field_def { | ||||
|                 FieldDef::Text { | ||||
|                     stored, | ||||
|                     indexed, | ||||
|                     tokenized, | ||||
|                     fast: _fast, | ||||
|                 } => { | ||||
|                     let mut text_options = TextOptions::default(); | ||||
|                     if *stored { | ||||
|                         text_options = text_options.set_stored(); | ||||
|                     } | ||||
|                     if *indexed { | ||||
|                         let indexing_options = if *tokenized { | ||||
|                             TextFieldIndexing::default() | ||||
|                                 .set_tokenizer("default") | ||||
|                                 .set_index_option(IndexRecordOption::WithFreqsAndPositions) | ||||
|                         } else { | ||||
|                             TextFieldIndexing::default() | ||||
|                                 .set_tokenizer("raw") | ||||
|                                 .set_index_option(IndexRecordOption::Basic) | ||||
|                         }; | ||||
|                         text_options = text_options.set_indexing_options(indexing_options); | ||||
|                         let f = schema_builder.add_text_field(&field_name, text_options); | ||||
|                         if *tokenized { | ||||
|                             default_search_fields.push(f); | ||||
|                         } | ||||
|                         f | ||||
|                     } else { | ||||
|                         schema_builder.add_text_field(&field_name, text_options) | ||||
|                     } | ||||
|                 } | ||||
|                 FieldDef::Numeric { | ||||
|                     stored, | ||||
|                     indexed, | ||||
|                     fast, | ||||
|                     precision, | ||||
|                 } => match precision { | ||||
|                     NumericType::I64 => { | ||||
|                         let mut opts = NumericOptions::default(); | ||||
|                         if *stored { | ||||
|                             opts = opts.set_stored(); | ||||
|                         } | ||||
|                         if *indexed { | ||||
|                             opts = opts.set_indexed(); | ||||
|                         } | ||||
|                         if *fast { | ||||
|                             opts = opts.set_fast(); | ||||
|                         } | ||||
|                         schema_builder.add_i64_field(&field_name, opts) | ||||
|                     } | ||||
|                     NumericType::U64 => { | ||||
|                         let mut opts = NumericOptions::default(); | ||||
|                         if *stored { | ||||
|                             opts = opts.set_stored(); | ||||
|                         } | ||||
|                         if *indexed { | ||||
|                             opts = opts.set_indexed(); | ||||
|                         } | ||||
|                         if *fast { | ||||
|                             opts = opts.set_fast(); | ||||
|                         } | ||||
|                         schema_builder.add_u64_field(&field_name, opts) | ||||
|                     } | ||||
|                     NumericType::F64 => { | ||||
|                         let mut opts = NumericOptions::default(); | ||||
|                         if *stored { | ||||
|                             opts = opts.set_stored(); | ||||
|                         } | ||||
|                         if *indexed { | ||||
|                             opts = opts.set_indexed(); | ||||
|                         } | ||||
|                         if *fast { | ||||
|                             opts = opts.set_fast(); | ||||
|                         } | ||||
|                         schema_builder.add_f64_field(&field_name, opts) | ||||
|                     } | ||||
|                     NumericType::Date => { | ||||
|                         let mut opts = DateOptions::default(); | ||||
|                         if *stored { | ||||
|                             opts = opts.set_stored(); | ||||
|                         } | ||||
|                         if *indexed { | ||||
|                             opts = opts.set_indexed(); | ||||
|                         } | ||||
|                         if *fast { | ||||
|                             opts = opts.set_fast(); | ||||
|                         } | ||||
|                         schema_builder.add_date_field(&field_name, opts) | ||||
|                     } | ||||
|                 }, | ||||
|                 FieldDef::Tag { | ||||
|                     stored, | ||||
|                     separator: _, | ||||
|                     case_sensitive: _, | ||||
|                 } => { | ||||
|                     let mut text_options = TextOptions::default(); | ||||
|                     if *stored { | ||||
|                         text_options = text_options.set_stored(); | ||||
|                     } | ||||
|                     text_options = text_options.set_indexing_options( | ||||
|                         TextFieldIndexing::default() | ||||
|                             .set_tokenizer("raw") | ||||
|                             .set_index_option(IndexRecordOption::Basic), | ||||
|                     ); | ||||
|                     schema_builder.add_text_field(&field_name, text_options) | ||||
|                 } | ||||
|                 FieldDef::Geo { stored } => { | ||||
|                     // For now, store as two f64 fields for lat/lon | ||||
|                     let mut opts = NumericOptions::default(); | ||||
|                     if *stored { | ||||
|                         opts = opts.set_stored(); | ||||
|                     } | ||||
|                     opts = opts.set_indexed().set_fast(); | ||||
|                     let lat_field = | ||||
|                         schema_builder.add_f64_field(&format!("{}_lat", field_name), opts.clone()); | ||||
|                     let lon_field = | ||||
|                         schema_builder.add_f64_field(&format!("{}_lon", field_name), opts); | ||||
|                     fields.insert( | ||||
|                         format!("{}_lat", field_name), | ||||
|                         ( | ||||
|                             lat_field, | ||||
|                             FieldDef::Numeric { | ||||
|                                 stored: *stored, | ||||
|                                 indexed: true, | ||||
|                                 fast: true, | ||||
|                                 precision: NumericType::F64, | ||||
|                             }, | ||||
|                         ), | ||||
|                     ); | ||||
|                     fields.insert( | ||||
|                         format!("{}_lon", field_name), | ||||
|                         ( | ||||
|                             lon_field, | ||||
|                             FieldDef::Numeric { | ||||
|                                 stored: *stored, | ||||
|                                 indexed: true, | ||||
|                                 fast: true, | ||||
|                                 precision: NumericType::F64, | ||||
|                             }, | ||||
|                         ), | ||||
|                     ); | ||||
|                     continue; // Skip adding the geo field itself | ||||
|                 } | ||||
|             }; | ||||
|             fields.insert(field_name.clone(), (field, field_def)); | ||||
|         } | ||||
|  | ||||
|         let schema = schema_builder.build(); | ||||
|         let index_schema = IndexSchema { | ||||
|             schema: schema.clone(), | ||||
|             fields, | ||||
|             default_search_fields, | ||||
|         }; | ||||
|  | ||||
|         // Create or open index | ||||
|         let dir = MmapDirectory::open(&index_path) | ||||
|             .map_err(|e| DBError(format!("Failed to open index directory: {}", e)))?; | ||||
|         let mut index = | ||||
|             Index::open_or_create(dir, schema).map_err(|e| DBError(format!("Failed to create index: {}", e)))?; | ||||
|  | ||||
|         // Configure tokenizers | ||||
|         let tokenizer_manager = TokenizerManager::default(); | ||||
|         index.set_tokenizers(tokenizer_manager); | ||||
|  | ||||
|         let writer = index | ||||
|             .writer(15_000_000) | ||||
|             .map_err(|e| DBError(format!("Failed to create index writer: {}", e)))?; | ||||
|         let reader = index | ||||
|             .reader() | ||||
|             .map_err(|e| DBError(format!("Failed to create reader: {}", e)))?; | ||||
|  | ||||
|         let config = config.unwrap_or_default(); | ||||
|  | ||||
|         Ok(TantivySearch { | ||||
|             index, | ||||
|             writer: Arc::new(RwLock::new(writer)), | ||||
|             reader, | ||||
|             index_schema, | ||||
|             name, | ||||
|             config, | ||||
|         }) | ||||
|     } | ||||
|  | ||||
|     pub fn add_document_with_fields( | ||||
|         &self, | ||||
|         doc_id: &str, | ||||
|         fields: HashMap<String, String>, | ||||
|     ) -> Result<(), DBError> { | ||||
|         let mut writer = self | ||||
|             .writer | ||||
|             .write() | ||||
|             .map_err(|e| DBError(format!("Failed to acquire writer lock: {}", e)))?; | ||||
|  | ||||
|         // Delete existing document with same ID | ||||
|         if let Some((id_field, _)) = self.index_schema.fields.get("_id") { | ||||
|             writer.delete_term(Term::from_field_text(*id_field, doc_id)); | ||||
|         } | ||||
|  | ||||
|         // Create new document | ||||
|         let mut doc = tantivy::doc!(); | ||||
|  | ||||
|         // Add document ID | ||||
|         if let Some((id_field, _)) = self.index_schema.fields.get("_id") { | ||||
|             doc.add_text(*id_field, doc_id); | ||||
|         } | ||||
|  | ||||
|         // Add other fields based on schema | ||||
|         for (field_name, field_value) in fields { | ||||
|             if let Some((field, field_def)) = self.index_schema.fields.get(&field_name) { | ||||
|                 match field_def { | ||||
|                     FieldDef::Text { .. } => { | ||||
|                         doc.add_text(*field, &field_value); | ||||
|                     } | ||||
|                     FieldDef::Numeric { precision, .. } => match precision { | ||||
|                         NumericType::I64 => { | ||||
|                             if let Ok(v) = field_value.parse::<i64>() { | ||||
|                                 doc.add_i64(*field, v); | ||||
|                             } | ||||
|                         } | ||||
|                         NumericType::U64 => { | ||||
|                             if let Ok(v) = field_value.parse::<u64>() { | ||||
|                                 doc.add_u64(*field, v); | ||||
|                             } | ||||
|                         } | ||||
|                         NumericType::F64 => { | ||||
|                             if let Ok(v) = field_value.parse::<f64>() { | ||||
|                                 doc.add_f64(*field, v); | ||||
|                             } | ||||
|                         } | ||||
|                         NumericType::Date => { | ||||
|                             if let Ok(v) = field_value.parse::<i64>() { | ||||
|                                 doc.add_date(*field, DateTime::from_timestamp_millis(v)); | ||||
|                             } | ||||
|                         } | ||||
|                     }, | ||||
|                     FieldDef::Tag { | ||||
|                         separator, | ||||
|                         case_sensitive, | ||||
|                         .. | ||||
|                     } => { | ||||
|                         let tags = if !case_sensitive { | ||||
|                             field_value.to_lowercase() | ||||
|                         } else { | ||||
|                             field_value.clone() | ||||
|                         }; | ||||
|                         for tag in tags.split(separator.as_str()) { | ||||
|                             doc.add_text(*field, tag.trim()); | ||||
|                         } | ||||
|                     } | ||||
|                     FieldDef::Geo { .. } => { | ||||
|                         let parts: Vec<&str> = field_value.split(',').collect(); | ||||
|                         if parts.len() == 2 { | ||||
|                             if let (Ok(lat), Ok(lon)) = | ||||
|                                 (parts[0].parse::<f64>(), parts[1].parse::<f64>()) | ||||
|                             { | ||||
|                                 if let Some((lat_field, _)) = | ||||
|                                     self.index_schema.fields.get(&format!("{}_lat", field_name)) | ||||
|                                 { | ||||
|                                     doc.add_f64(*lat_field, lat); | ||||
|                                 } | ||||
|                                 if let Some((lon_field, _)) = | ||||
|                                     self.index_schema.fields.get(&format!("{}_lon", field_name)) | ||||
|                                 { | ||||
|                                     doc.add_f64(*lon_field, lon); | ||||
|                                 } | ||||
|                             } | ||||
|                         } | ||||
|                     } | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         writer | ||||
|             .add_document(doc) | ||||
|             .map_err(|e| DBError(format!("Failed to add document: {}", e)))?; | ||||
|         writer | ||||
|             .commit() | ||||
|             .map_err(|e| DBError(format!("Failed to commit: {}", e)))?; | ||||
|         // Make new documents visible to searches | ||||
|         self.reader | ||||
|             .reload() | ||||
|             .map_err(|e| DBError(format!("Failed to reload reader: {}", e)))?; | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     pub fn search_with_options( | ||||
|         &self, | ||||
|         query_str: &str, | ||||
|         options: SearchOptions, | ||||
|     ) -> Result<SearchResults, DBError> { | ||||
|         // Ensure reader is up to date with latest commits | ||||
|         self.reader | ||||
|             .reload() | ||||
|             .map_err(|e| DBError(format!("Failed to reload reader: {}", e)))?; | ||||
|         let searcher = self.reader.searcher(); | ||||
|  | ||||
|         // Ensure we have searchable fields | ||||
|         if self.index_schema.default_search_fields.is_empty() { | ||||
|             return Err(DBError("No searchable fields defined in schema".to_string())); | ||||
|         } | ||||
|  | ||||
|         // Parse query based on search fields | ||||
|         let query_parser = QueryParser::for_index( | ||||
|             &self.index, | ||||
|             self.index_schema.default_search_fields.clone(), | ||||
|         ); | ||||
|         let parsed_query = query_parser | ||||
|             .parse_query(query_str) | ||||
|             .map_err(|e| DBError(format!("Failed to parse query: {}", e)))?; | ||||
|         let mut clauses: Vec<(Occur, Box<dyn Query>)> = vec![(Occur::Must, parsed_query)]; | ||||
|  | ||||
|         // Apply filters if any | ||||
|         for filter in options.filters { | ||||
|             if let Some((field, field_def)) = self.index_schema.fields.get(&filter.field) { | ||||
|                 match filter.filter_type { | ||||
|                     FilterType::Equals(value) => { | ||||
|                         match field_def { | ||||
|                             FieldDef::Text { .. } | FieldDef::Tag { .. } => { | ||||
|                                 let term_query = | ||||
|                                     TermQuery::new(Term::from_field_text(*field, &value), IndexRecordOption::Basic); | ||||
|                                 clauses.push((Occur::Must, Box::new(term_query))); | ||||
|                             } | ||||
|                             FieldDef::Numeric { precision, .. } => { | ||||
|                                 // Equals on numeric fields: parse to the right numeric type and use term query | ||||
|                                 match precision { | ||||
|                                     NumericType::I64 => { | ||||
|                                         if let Ok(v) = value.parse::<i64>() { | ||||
|                                             let term = Term::from_field_i64(*field, v); | ||||
|                                             let tq = TermQuery::new(term, IndexRecordOption::Basic); | ||||
|                                             clauses.push((Occur::Must, Box::new(tq))); | ||||
|                                         } | ||||
|                                     } | ||||
|                                     NumericType::U64 => { | ||||
|                                         if let Ok(v) = value.parse::<u64>() { | ||||
|                                             let term = Term::from_field_u64(*field, v); | ||||
|                                             let tq = TermQuery::new(term, IndexRecordOption::Basic); | ||||
|                                             clauses.push((Occur::Must, Box::new(tq))); | ||||
|                                         } | ||||
|                                     } | ||||
|                                     NumericType::F64 => { | ||||
|                                         if let Ok(v) = value.parse::<f64>() { | ||||
|                                             let term = Term::from_field_f64(*field, v); | ||||
|                                             let tq = TermQuery::new(term, IndexRecordOption::Basic); | ||||
|                                             clauses.push((Occur::Must, Box::new(tq))); | ||||
|                                         } | ||||
|                                     } | ||||
|                                     NumericType::Date => { | ||||
|                                         if let Ok(v) = value.parse::<i64>() { | ||||
|                                             let dt = DateTime::from_timestamp_millis(v); | ||||
|                                             let term = Term::from_field_date(*field, dt); | ||||
|                                             let tq = TermQuery::new(term, IndexRecordOption::Basic); | ||||
|                                             clauses.push((Occur::Must, Box::new(tq))); | ||||
|                                         } | ||||
|                                     } | ||||
|                                 } | ||||
|                             } | ||||
|                             FieldDef::Geo { .. } => { | ||||
|                                 // Geo equals isn't supported in this simplified version | ||||
|                             } | ||||
|                         } | ||||
|                     } | ||||
|                     FilterType::Range { .. } => { | ||||
|                         // TODO: Implement numeric range queries by building a RangeQuery per type | ||||
|                     } | ||||
|                     FilterType::InSet(values) => { | ||||
|                         // OR across values | ||||
|                         let mut sub_clauses: Vec<(Occur, Box<dyn Query>)> = vec![]; | ||||
|                         for value in values { | ||||
|                             let term_query = TermQuery::new( | ||||
|                                 Term::from_field_text(*field, &value), | ||||
|                                 IndexRecordOption::Basic, | ||||
|                             ); | ||||
|                             sub_clauses.push((Occur::Should, Box::new(term_query))); | ||||
|                         } | ||||
|                         clauses.push((Occur::Must, Box::new(BooleanQuery::new(sub_clauses)))); | ||||
|                     } | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         let final_query: Box<dyn Query> = if clauses.len() == 1 { | ||||
|             clauses.pop().unwrap().1 | ||||
|         } else { | ||||
|             Box::new(BooleanQuery::new(clauses)) | ||||
|         }; | ||||
|  | ||||
|         // Execute search | ||||
|         let top_docs = searcher | ||||
|             .search(&*final_query, &TopDocs::with_limit(options.limit + options.offset)) | ||||
|             .map_err(|e| DBError(format!("Search failed: {}", e)))?; | ||||
|         let total_hits = top_docs.len(); | ||||
|         let mut documents = Vec::new(); | ||||
|  | ||||
|         for (score, doc_address) in top_docs.into_iter().skip(options.offset).take(options.limit) { | ||||
|             let retrieved_doc: TantivyDocument = searcher | ||||
|                 .doc(doc_address) | ||||
|                 .map_err(|e| DBError(format!("Failed to retrieve doc: {}", e)))?; | ||||
|  | ||||
|             let mut doc_fields = HashMap::new(); | ||||
|  | ||||
|             // Extract stored fields (or synthesize) | ||||
|             for (field_name, (field, field_def)) in &self.index_schema.fields { | ||||
|                 match field_def { | ||||
|                     FieldDef::Text { stored, .. } | FieldDef::Tag { stored, .. } => { | ||||
|                         if *stored { | ||||
|                             if let Some(value) = retrieved_doc.get_first(*field) { | ||||
|                                 if let Some(text) = value.as_str() { | ||||
|                                     doc_fields.insert(field_name.clone(), text.to_string()); | ||||
|                                 } | ||||
|                             } | ||||
|                         } | ||||
|                     } | ||||
|                     FieldDef::Numeric { | ||||
|                         stored, precision, .. | ||||
|                     } => { | ||||
|                         if *stored { | ||||
|                             let value_str = match precision { | ||||
|                                 NumericType::I64 => retrieved_doc | ||||
|                                     .get_first(*field) | ||||
|                                     .and_then(|v| v.as_i64()) | ||||
|                                     .map(|v| v.to_string()), | ||||
|                                 NumericType::U64 => retrieved_doc | ||||
|                                     .get_first(*field) | ||||
|                                     .and_then(|v| v.as_u64()) | ||||
|                                     .map(|v| v.to_string()), | ||||
|                                 NumericType::F64 => retrieved_doc | ||||
|                                     .get_first(*field) | ||||
|                                     .and_then(|v| v.as_f64()) | ||||
|                                     .map(|v| v.to_string()), | ||||
|                                 NumericType::Date => retrieved_doc | ||||
|                                     .get_first(*field) | ||||
|                                     .and_then(|v| v.as_datetime()) | ||||
|                                     .map(|v| v.into_timestamp_millis().to_string()), | ||||
|                             }; | ||||
|                             if let Some(v) = value_str { | ||||
|                                 doc_fields.insert(field_name.clone(), v); | ||||
|                             } | ||||
|                         } | ||||
|                     } | ||||
|                     FieldDef::Geo { stored } => { | ||||
|                         if *stored { | ||||
|                             let lat_field = self | ||||
|                                 .index_schema | ||||
|                                 .fields | ||||
|                                 .get(&format!("{}_lat", field_name)) | ||||
|                                 .unwrap() | ||||
|                                 .0; | ||||
|                             let lon_field = self | ||||
|                                 .index_schema | ||||
|                                 .fields | ||||
|                                 .get(&format!("{}_lon", field_name)) | ||||
|                                 .unwrap() | ||||
|                                 .0; | ||||
|                             let lat = retrieved_doc.get_first(lat_field).and_then(|v| v.as_f64()); | ||||
|                             let lon = retrieved_doc.get_first(lon_field).and_then(|v| v.as_f64()); | ||||
|                             if let (Some(lat), Some(lon)) = (lat, lon) { | ||||
|                                 doc_fields.insert(field_name.clone(), format!("{},{}", lat, lon)); | ||||
|                             } | ||||
|                         } | ||||
|                     } | ||||
|                 } | ||||
|             } | ||||
|  | ||||
|             documents.push(SearchDocument { | ||||
|                 fields: doc_fields, | ||||
|                 score, | ||||
|             }); | ||||
|         } | ||||
|  | ||||
|         Ok(SearchResults { | ||||
|             total: total_hits, | ||||
|             documents, | ||||
|         }) | ||||
|     } | ||||
|  | ||||
|     pub fn get_info(&self) -> Result<IndexInfo, DBError> { | ||||
|         let searcher = self.reader.searcher(); | ||||
|         let num_docs = searcher.num_docs(); | ||||
|         let fields_info: Vec<FieldInfo> = self | ||||
|             .index_schema | ||||
|             .fields | ||||
|             .iter() | ||||
|             .map(|(name, (_, def))| FieldInfo { | ||||
|                 name: name.clone(), | ||||
|                 field_type: format!("{:?}", def), | ||||
|             }) | ||||
|             .collect(); | ||||
|         Ok(IndexInfo { | ||||
|             name: self.name.clone(), | ||||
|             num_docs, | ||||
|             fields: fields_info, | ||||
|             config: self.config.clone(), | ||||
|         }) | ||||
|     } | ||||
|  | ||||
|     /// Delete a document by its _id term. Returns true if the document existed before deletion. | ||||
|     pub fn delete_document_by_id(&self, doc_id: &str) -> Result<bool, DBError> { | ||||
|         // Determine existence by running a tiny term query | ||||
|         let existed = if let Some((id_field, _)) = self.index_schema.fields.get("_id") { | ||||
|             let term = Term::from_field_text(*id_field, doc_id); | ||||
|             let searcher = self.reader.searcher(); | ||||
|             let tq = TermQuery::new(term.clone(), IndexRecordOption::Basic); | ||||
|             let hits = searcher | ||||
|                 .search(&tq, &TopDocs::with_limit(1)) | ||||
|                 .map_err(|e| DBError(format!("Failed to search for existing doc: {}", e)))?; | ||||
|             !hits.is_empty() | ||||
|         } else { | ||||
|             false | ||||
|         }; | ||||
|  | ||||
|         // Perform deletion and commit | ||||
|         let mut writer = self | ||||
|             .writer | ||||
|             .write() | ||||
|             .map_err(|e| DBError(format!("Failed to acquire writer lock: {}", e)))?; | ||||
|         if let Some((id_field, _)) = self.index_schema.fields.get("_id") { | ||||
|             writer.delete_term(Term::from_field_text(*id_field, doc_id)); | ||||
|         } | ||||
|         writer | ||||
|             .commit() | ||||
|             .map_err(|e| DBError(format!("Failed to commit delete: {}", e)))?; | ||||
|         // Refresh reader to observe deletion | ||||
|         self.reader | ||||
|             .reload() | ||||
|             .map_err(|e| DBError(format!("Failed to reload reader: {}", e)))?; | ||||
|  | ||||
|         Ok(existed) | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Clone)] | ||||
| pub struct SearchOptions { | ||||
|     pub limit: usize, | ||||
|     pub offset: usize, | ||||
|     pub filters: Vec<Filter>, | ||||
|     pub sort_by: Option<String>, | ||||
|     pub return_fields: Option<Vec<String>>, | ||||
|     pub highlight: bool, | ||||
| } | ||||
|  | ||||
| impl Default for SearchOptions { | ||||
|     fn default() -> Self { | ||||
|         SearchOptions { | ||||
|             limit: 10, | ||||
|             offset: 0, | ||||
|             filters: vec![], | ||||
|             sort_by: None, | ||||
|             return_fields: None, | ||||
|             highlight: false, | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Clone)] | ||||
| pub struct Filter { | ||||
|     pub field: String, | ||||
|     pub filter_type: FilterType, | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Clone)] | ||||
| pub enum FilterType { | ||||
|     Equals(String), | ||||
|     Range { min: String, max: String }, | ||||
|     InSet(Vec<String>), | ||||
| } | ||||
|  | ||||
| #[derive(Debug)] | ||||
| pub struct SearchResults { | ||||
|     pub total: usize, | ||||
|     pub documents: Vec<SearchDocument>, | ||||
| } | ||||
|  | ||||
| #[derive(Debug)] | ||||
| pub struct SearchDocument { | ||||
|     pub fields: HashMap<String, String>, | ||||
|     pub score: f32, | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Serialize, Deserialize)] | ||||
| pub struct IndexInfo { | ||||
|     pub name: String, | ||||
|     pub num_docs: u64, | ||||
|     pub fields: Vec<FieldInfo>, | ||||
|     pub config: IndexConfig, | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Serialize, Deserialize)] | ||||
| pub struct FieldInfo { | ||||
|     pub name: String, | ||||
|     pub field_type: String, | ||||
| } | ||||
| @@ -1,10 +1,11 @@ | ||||
| #!/bin/bash | ||||
| set -euo pipefail | ||||
| SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" | ||||
| cd "$SCRIPT_DIR" | ||||
| 
 | ||||
| # Test script for HeroDB - Redis-compatible database with redb backend | ||||
| # This script starts the server and runs comprehensive tests | ||||
| 
 | ||||
| set -e | ||||
| 
 | ||||
| # Colors for output | ||||
| RED='\033[0;31m' | ||||
| GREEN='\033[0;32m' | ||||
| @@ -298,7 +299,7 @@ main() { | ||||
|      | ||||
|     # Start the server | ||||
|     print_status "Starting HeroDB server..." | ||||
|     ./target/release/herodb --dir "$DB_DIR" --port $PORT & | ||||
|     ../target/release/herodb --dir "$DB_DIR" --port $PORT & | ||||
|     SERVER_PID=$! | ||||
|      | ||||
|     # Wait for server to start | ||||
| @@ -1,4 +1,5 @@ | ||||
| use herodb::{server::Server, options::DBOption}; | ||||
| use std::path::PathBuf; | ||||
| use std::time::Duration; | ||||
| use tokio::io::{AsyncReadExt, AsyncWriteExt}; | ||||
| use tokio::net::TcpStream; | ||||
| @@ -22,11 +23,13 @@ async fn debug_hset_simple() { | ||||
|     
 | ||||
|     let port = 16500; | ||||
|     let option = DBOption { | ||||
|         dir: test_dir.to_string(), | ||||
|         dir: PathBuf::from(test_dir), | ||||
|         port, | ||||
|         debug: false, | ||||
|         encrypt: false, | ||||
|         encryption_key: None, | ||||
|         backend: herodb::options::BackendType::Redb, | ||||
|         admin_secret: "test-admin".to_string(), | ||||
|     }; | ||||
|     
 | ||||
|     let mut server = Server::new(option).await; | ||||
| @@ -47,6 +50,12 @@ async fn debug_hset_simple() { | ||||
|     sleep(Duration::from_millis(200)).await; | ||||
|     
 | ||||
|     let mut stream = TcpStream::connect(format!("127.0.0.1:{}", port)).await.unwrap(); | ||||
|     // Acquire ReadWrite permissions on this connection
 | ||||
|     let resp = send_command( | ||||
|         &mut stream, | ||||
|         "*4\r\n$6\r\nSELECT\r\n$1\r\n0\r\n$3\r\nKEY\r\n$10\r\ntest-admin\r\n", | ||||
|     ).await; | ||||
|     assert!(resp.contains("OK"), "Failed SELECT handshake: {}", resp); | ||||
|     
 | ||||
|     // Test simple HSET
 | ||||
|     println!("Testing HSET..."); | ||||
| @@ -1,4 +1,5 @@ | ||||
| use herodb::{server::Server, options::DBOption}; | ||||
| use std::path::PathBuf; | ||||
| use std::time::Duration; | ||||
| use tokio::io::{AsyncReadExt, AsyncWriteExt}; | ||||
| use tokio::net::TcpStream; | ||||
| @@ -13,11 +14,13 @@ async fn debug_hset_return_value() { | ||||
|     std::fs::create_dir_all(&test_dir).unwrap(); | ||||
|     
 | ||||
|     let option = DBOption { | ||||
|         dir: test_dir.to_string(), | ||||
|         dir: PathBuf::from(test_dir), | ||||
|         port: 16390, | ||||
|         debug: false, | ||||
|         encrypt: false, | ||||
|         encryption_key: None, | ||||
|         backend: herodb::options::BackendType::Redb, | ||||
|         admin_secret: "test-admin".to_string(), | ||||
|     }; | ||||
|     
 | ||||
|     let mut server = Server::new(option).await; | ||||
| @@ -40,11 +43,18 @@ async fn debug_hset_return_value() { | ||||
|     // Connect and test HSET
 | ||||
|     let mut stream = TcpStream::connect("127.0.0.1:16390").await.unwrap(); | ||||
| 
 | ||||
|     // Acquire ReadWrite permissions for this new connection
 | ||||
|     let handshake = "*4\r\n$6\r\nSELECT\r\n$1\r\n0\r\n$3\r\nKEY\r\n$10\r\ntest-admin\r\n"; | ||||
|     stream.write_all(handshake.as_bytes()).await.unwrap(); | ||||
|     let mut buffer = [0; 1024]; | ||||
|     let n = stream.read(&mut buffer).await.unwrap(); | ||||
|     let resp = String::from_utf8_lossy(&buffer[..n]); | ||||
|     assert!(resp.contains("OK"), "Failed SELECT handshake: {}", resp); | ||||
|     
 | ||||
|     // Send HSET command
 | ||||
|     let cmd = "*4\r\n$4\r\nHSET\r\n$4\r\nhash\r\n$6\r\nfield1\r\n$6\r\nvalue1\r\n"; | ||||
|     stream.write_all(cmd.as_bytes()).await.unwrap(); | ||||
|     
 | ||||
|     let mut buffer = [0; 1024]; | ||||
|     let n = stream.read(&mut buffer).await.unwrap(); | ||||
|     let response = String::from_utf8_lossy(&buffer[..n]); | ||||
|     
 | ||||
							
								
								
									
										484
									
								
								tests/lance_integration_tests.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										484
									
								
								tests/lance_integration_tests.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,484 @@ | ||||
| use redis::{Client, Connection, RedisResult, Value}; | ||||
| use std::process::{Child, Command}; | ||||
| use std::time::Duration; | ||||
|  | ||||
| use jsonrpsee::http_client::{HttpClient, HttpClientBuilder}; | ||||
| use herodb::rpc::{BackendType, DatabaseConfig, RpcClient}; | ||||
| use base64::Engine; | ||||
| use tokio::time::sleep; | ||||
|  | ||||
| // ------------------------ | ||||
| // Helpers | ||||
| // ------------------------ | ||||
|  | ||||
| fn get_redis_connection(port: u16) -> Connection { | ||||
|     let connection_info = format!("redis://127.0.0.1:{}", port); | ||||
|     let client = Client::open(connection_info).unwrap(); | ||||
|     let mut attempts = 0; | ||||
|     loop { | ||||
|         match client.get_connection() { | ||||
|             Ok(mut conn) => { | ||||
|                 if redis::cmd("PING").query::<String>(&mut conn).is_ok() { | ||||
|                     return conn; | ||||
|                 } | ||||
|             } | ||||
|             Err(e) => { | ||||
|                 if attempts >= 3600 { | ||||
|                     panic!("Failed to connect to Redis server after 3600 attempts: {}", e); | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|         attempts += 1; | ||||
|         std::thread::sleep(Duration::from_millis(500)); | ||||
|     } | ||||
| } | ||||
|  | ||||
| async fn get_rpc_client(port: u16) -> HttpClient { | ||||
|     let url = format!("http://127.0.0.1:{}", port + 1); // RPC port = Redis port + 1 | ||||
|     HttpClientBuilder::default().build(url).unwrap() | ||||
| } | ||||
|  | ||||
| /// Wait until RPC server is responsive (getServerStats succeeds) or panic after retries. | ||||
| async fn wait_for_rpc_ready(client: &HttpClient, max_attempts: u32, delay: Duration) { | ||||
|     for _ in 0..max_attempts { | ||||
|         match client.get_server_stats().await { | ||||
|             Ok(_) => return, | ||||
|             Err(_) => { | ||||
|                 sleep(delay).await; | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|     panic!("RPC server did not become ready in time"); | ||||
| } | ||||
|  | ||||
| // A guard to ensure the server process is killed when it goes out of scope and test dir cleaned. | ||||
| struct ServerProcessGuard { | ||||
|     process: Child, | ||||
|     test_dir: String, | ||||
| } | ||||
|  | ||||
| impl Drop for ServerProcessGuard { | ||||
|     fn drop(&mut self) { | ||||
|         eprintln!("Killing server process (pid: {})...", self.process.id()); | ||||
|         if let Err(e) = self.process.kill() { | ||||
|             eprintln!("Failed to kill server process: {}", e); | ||||
|         } | ||||
|         match self.process.wait() { | ||||
|             Ok(status) => eprintln!("Server process exited with: {}", status), | ||||
|             Err(e) => eprintln!("Failed to wait on server process: {}", e), | ||||
|         } | ||||
|  | ||||
|         // Clean up the specific test directory | ||||
|         eprintln!("Cleaning up test directory: {}", self.test_dir); | ||||
|         if let Err(e) = std::fs::remove_dir_all(&self.test_dir) { | ||||
|             eprintln!("Failed to clean up test directory: {}", e); | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| // Helper to set up the server and return guard + ports | ||||
| async fn setup_server() -> (ServerProcessGuard, u16) { | ||||
|     use std::sync::atomic::{AtomicU16, Ordering}; | ||||
|     static PORT_COUNTER: AtomicU16 = AtomicU16::new(17500); | ||||
|     let port = PORT_COUNTER.fetch_add(1, Ordering::SeqCst); | ||||
|  | ||||
|     let test_dir = format!("/tmp/herodb_lance_test_{}", port); | ||||
|  | ||||
|     // Clean up previous test data | ||||
|     if std::path::Path::new(&test_dir).exists() { | ||||
|         let _ = std::fs::remove_dir_all(&test_dir); | ||||
|     } | ||||
|     std::fs::create_dir_all(&test_dir).unwrap(); | ||||
|  | ||||
|     // Start the server in a subprocess with RPC enabled (follows tantivy test pattern) | ||||
|     let child = Command::new("cargo") | ||||
|         .args(&[ | ||||
|             "run", | ||||
|             "--", | ||||
|             "--dir", | ||||
|             &test_dir, | ||||
|             "--port", | ||||
|             &port.to_string(), | ||||
|             "--rpc-port", | ||||
|             &(port + 1).to_string(), | ||||
|             "--enable-rpc", | ||||
|             "--debug", | ||||
|             "--admin-secret", | ||||
|             "test-admin", | ||||
|         ]) | ||||
|         .spawn() | ||||
|         .expect("Failed to start server process"); | ||||
|  | ||||
|     let guard = ServerProcessGuard { | ||||
|         process: child, | ||||
|         test_dir, | ||||
|     }; | ||||
|  | ||||
|     // Give the server time to build and start (cargo run may compile first) | ||||
|     // Increase significantly to accommodate first-time dependency compilation in CI. | ||||
|     std::thread::sleep(Duration::from_millis(5000)); | ||||
|  | ||||
|     (guard, port) | ||||
| } | ||||
|  | ||||
| // Convenient helpers for assertions on redis::Value | ||||
| fn value_is_ok(v: &Value) -> bool { | ||||
|     match v { | ||||
|         Value::Okay => true, | ||||
|         Value::Status(s) if s == "OK" => true, | ||||
|         Value::Data(d) if d == b"OK" => true, | ||||
|         _ => false, | ||||
|     } | ||||
| } | ||||
|  | ||||
| fn value_is_int_eq(v: &Value, expected: i64) -> bool { | ||||
|     matches!(v, Value::Int(n) if *n == expected) | ||||
| } | ||||
|  | ||||
| fn value_is_str_eq(v: &Value, expected: &str) -> bool { | ||||
|     match v { | ||||
|         Value::Status(s) => s == expected, | ||||
|         Value::Data(d) => String::from_utf8_lossy(d) == expected, | ||||
|         _ => false, | ||||
|     } | ||||
| } | ||||
|  | ||||
| fn to_string_lossy(v: &Value) -> String { | ||||
|     match v { | ||||
|         Value::Nil => "Nil".to_string(), | ||||
|         Value::Int(n) => n.to_string(), | ||||
|         Value::Status(s) => s.clone(), | ||||
|         Value::Okay => "OK".to_string(), | ||||
|         Value::Data(d) => String::from_utf8_lossy(d).to_string(), | ||||
|         Value::Bulk(items) => { | ||||
|             let inner: Vec<String> = items.iter().map(to_string_lossy).collect(); | ||||
|             format!("[{}]", inner.join(", ")) | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| // Extract ids from LANCE.SEARCH / LANCE.SEARCHIMAGE reply which is: | ||||
| // Array of elements: [ [id, score, [k,v,...]], [id, score, ...], ... ] | ||||
| fn extract_hit_ids(v: &Value) -> Vec<String> { | ||||
|     let mut ids = Vec::new(); | ||||
|     if let Value::Bulk(items) = v { | ||||
|         for item in items { | ||||
|             if let Value::Bulk(row) = item { | ||||
|                 if !row.is_empty() { | ||||
|                     // first element is id (Data or Status) | ||||
|                     let id = match &row[0] { | ||||
|                         Value::Data(d) => String::from_utf8_lossy(d).to_string(), | ||||
|                         Value::Status(s) => s.clone(), | ||||
|                         Value::Int(n) => n.to_string(), | ||||
|                         _ => continue, | ||||
|                     }; | ||||
|                     ids.push(id); | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|     ids | ||||
| } | ||||
|  | ||||
| // Check whether a Bulk array (RESP array) contains a given string element. | ||||
| fn bulk_contains_string(v: &Value, needle: &str) -> bool { | ||||
|     match v { | ||||
|         Value::Bulk(items) => items.iter().any(|it| match it { | ||||
|             Value::Data(d) => String::from_utf8_lossy(d).contains(needle), | ||||
|             Value::Status(s) => s.contains(needle), | ||||
|             Value::Bulk(_) => bulk_contains_string(it, needle), | ||||
|             _ => false, | ||||
|         }), | ||||
|         _ => false, | ||||
|     } | ||||
| } | ||||
|  | ||||
| // ------------------------ | ||||
| // Test: Lance end-to-end (RESP) using only local embedders | ||||
| // ------------------------ | ||||
|  | ||||
| #[tokio::test] | ||||
| async fn test_lance_end_to_end() { | ||||
|     let (_guard, port) = setup_server().await; | ||||
|  | ||||
|     // First, wait for RESP to be available; this also gives cargo-run child ample time to finish building. | ||||
|     // Reuse the helper that retries PING until success. | ||||
|     { | ||||
|         let _conn_ready = get_redis_connection(port); | ||||
|         // Drop immediately; we only needed readiness. | ||||
|     } | ||||
|  | ||||
|     // Build RPC client and create a Lance DB | ||||
|     let rpc_client = get_rpc_client(port).await; | ||||
|     // Ensure RPC server is listening before we issue createDatabase (allow longer warm-up to accommodate first-build costs) | ||||
|     wait_for_rpc_ready(&rpc_client, 3600, Duration::from_millis(250)).await; | ||||
|  | ||||
|     let db_config = DatabaseConfig { | ||||
|         name: Some("media-db".to_string()), | ||||
|         storage_path: None, | ||||
|         max_size: None, | ||||
|         redis_version: None, | ||||
|     }; | ||||
|  | ||||
|     let db_id = rpc_client | ||||
|         .create_database(BackendType::Lance, db_config, None) | ||||
|         .await | ||||
|         .expect("create_database Lance failed"); | ||||
|  | ||||
|     assert_eq!(db_id, 1, "Expected first Lance DB id to be 1"); | ||||
|  | ||||
|     // Add access keys | ||||
|     let _ = rpc_client | ||||
|         .add_access_key(db_id, "readwrite_key".to_string(), "readwrite".to_string()) | ||||
|         .await | ||||
|         .expect("add_access_key readwrite failed"); | ||||
|  | ||||
|     let _ = rpc_client | ||||
|         .add_access_key(db_id, "read_key".to_string(), "read".to_string()) | ||||
|         .await | ||||
|         .expect("add_access_key read failed"); | ||||
|  | ||||
|     // Connect to Redis and SELECT DB with readwrite key | ||||
|     let mut conn = get_redis_connection(port); | ||||
|  | ||||
|     let sel_ok: RedisResult<String> = redis::cmd("SELECT") | ||||
|         .arg(db_id) | ||||
|         .arg("KEY") | ||||
|         .arg("readwrite_key") | ||||
|         .query(&mut conn); | ||||
|     assert!(sel_ok.is_ok(), "SELECT db with key failed: {:?}", sel_ok); | ||||
|     assert_eq!(sel_ok.unwrap(), "OK"); | ||||
|  | ||||
|     // 1) Configure embedding providers: textset -> testhash dim 64, imageset -> testimagehash dim 512 | ||||
|     let v = redis::cmd("LANCE.EMBEDDING") | ||||
|         .arg("CONFIG") | ||||
|         .arg("SET") | ||||
|         .arg("textset") | ||||
|         .arg("PROVIDER") | ||||
|         .arg("testhash") | ||||
|         .arg("MODEL") | ||||
|         .arg("any") | ||||
|         .arg("PARAM") | ||||
|         .arg("dim") | ||||
|         .arg("64") | ||||
|         .query::<Value>(&mut conn) | ||||
|         .unwrap(); | ||||
|     assert!(value_is_ok(&v), "Embedding config set (text) not OK: {}", to_string_lossy(&v)); | ||||
|  | ||||
|     let v = redis::cmd("LANCE.EMBEDDING") | ||||
|         .arg("CONFIG") | ||||
|         .arg("SET") | ||||
|         .arg("imageset") | ||||
|         .arg("PROVIDER") | ||||
|         .arg("testimagehash") | ||||
|         .arg("MODEL") | ||||
|         .arg("any") | ||||
|         .arg("PARAM") | ||||
|         .arg("dim") | ||||
|         .arg("512") | ||||
|         .query::<Value>(&mut conn) | ||||
|         .unwrap(); | ||||
|     assert!(value_is_ok(&v), "Embedding config set (image) not OK: {}", to_string_lossy(&v)); | ||||
|  | ||||
|     // 2) Create datasets | ||||
|     let v = redis::cmd("LANCE.CREATE") | ||||
|         .arg("textset") | ||||
|         .arg("DIM") | ||||
|         .arg(64) | ||||
|         .query::<Value>(&mut conn) | ||||
|         .unwrap(); | ||||
|     assert!(value_is_ok(&v), "LANCE.CREATE textset failed: {}", to_string_lossy(&v)); | ||||
|  | ||||
|     let v = redis::cmd("LANCE.CREATE") | ||||
|         .arg("imageset") | ||||
|         .arg("DIM") | ||||
|         .arg(512) | ||||
|         .query::<Value>(&mut conn) | ||||
|         .unwrap(); | ||||
|     assert!(value_is_ok(&v), "LANCE.CREATE imageset failed: {}", to_string_lossy(&v)); | ||||
|  | ||||
|     // 3) Store two text documents | ||||
|     let v = redis::cmd("LANCE.STORE") | ||||
|         .arg("textset") | ||||
|         .arg("ID") | ||||
|         .arg("doc-1") | ||||
|         .arg("TEXT") | ||||
|         .arg("The quick brown fox jumps over the lazy dog") | ||||
|         .arg("META") | ||||
|         .arg("title") | ||||
|         .arg("Fox") | ||||
|         .arg("category") | ||||
|         .arg("animal") | ||||
|         .query::<Value>(&mut conn) | ||||
|         .unwrap(); | ||||
|     assert!(value_is_ok(&v), "LANCE.STORE doc-1 failed: {}", to_string_lossy(&v)); | ||||
|  | ||||
|     let v = redis::cmd("LANCE.STORE") | ||||
|         .arg("textset") | ||||
|         .arg("ID") | ||||
|         .arg("doc-2") | ||||
|         .arg("TEXT") | ||||
|         .arg("A fast auburn fox vaulted a sleepy canine") | ||||
|         .arg("META") | ||||
|         .arg("title") | ||||
|         .arg("Paraphrase") | ||||
|         .arg("category") | ||||
|         .arg("animal") | ||||
|         .query::<Value>(&mut conn) | ||||
|         .unwrap(); | ||||
|     assert!(value_is_ok(&v), "LANCE.STORE doc-2 failed: {}", to_string_lossy(&v)); | ||||
|  | ||||
|     // 4) Store two images via BYTES (local fake bytes; embedder only hashes bytes, not decoding) | ||||
|     let img1: Vec<u8> = b"local-image-bytes-1-abcdefghijklmnopqrstuvwxyz".to_vec(); | ||||
|     let img2: Vec<u8> = b"local-image-bytes-2-ABCDEFGHIJKLMNOPQRSTUVWXYZ".to_vec(); | ||||
|     let img1_b64 = base64::engine::general_purpose::STANDARD.encode(&img1); | ||||
|     let img2_b64 = base64::engine::general_purpose::STANDARD.encode(&img2); | ||||
|  | ||||
|     let v = redis::cmd("LANCE.STOREIMAGE") | ||||
|         .arg("imageset") | ||||
|         .arg("ID") | ||||
|         .arg("img-1") | ||||
|         .arg("BYTES") | ||||
|         .arg(&img1_b64) | ||||
|         .arg("META") | ||||
|         .arg("title") | ||||
|         .arg("Local1") | ||||
|         .arg("group") | ||||
|         .arg("demo") | ||||
|         .query::<Value>(&mut conn) | ||||
|         .unwrap(); | ||||
|     assert!(value_is_ok(&v), "LANCE.STOREIMAGE img-1 failed: {}", to_string_lossy(&v)); | ||||
|  | ||||
|     let v = redis::cmd("LANCE.STOREIMAGE") | ||||
|         .arg("imageset") | ||||
|         .arg("ID") | ||||
|         .arg("img-2") | ||||
|         .arg("BYTES") | ||||
|         .arg(&img2_b64) | ||||
|         .arg("META") | ||||
|         .arg("title") | ||||
|         .arg("Local2") | ||||
|         .arg("group") | ||||
|         .arg("demo") | ||||
|         .query::<Value>(&mut conn) | ||||
|         .unwrap(); | ||||
|     assert!(value_is_ok(&v), "LANCE.STOREIMAGE img-2 failed: {}", to_string_lossy(&v)); | ||||
|  | ||||
|     // 5) Search text: K 2 QUERY "quick brown fox" RETURN 1 title | ||||
|     let v = redis::cmd("LANCE.SEARCH") | ||||
|         .arg("textset") | ||||
|         .arg("K") | ||||
|         .arg(2) | ||||
|         .arg("QUERY") | ||||
|         .arg("quick brown fox") | ||||
|         .arg("RETURN") | ||||
|         .arg(1) | ||||
|         .arg("title") | ||||
|         .query::<Value>(&mut conn) | ||||
|         .unwrap(); | ||||
|  | ||||
|     // Should be an array of hits | ||||
|     let ids = extract_hit_ids(&v); | ||||
|     assert!( | ||||
|         ids.contains(&"doc-1".to_string()) || ids.contains(&"doc-2".to_string()), | ||||
|         "LANCE.SEARCH should return doc-1/doc-2; got: {}", | ||||
|         to_string_lossy(&v) | ||||
|     ); | ||||
|  | ||||
|     // With FILTER on category | ||||
|     let v = redis::cmd("LANCE.SEARCH") | ||||
|         .arg("textset") | ||||
|         .arg("K") | ||||
|         .arg(2) | ||||
|         .arg("QUERY") | ||||
|         .arg("fox jumps") | ||||
|         .arg("FILTER") | ||||
|         .arg("category = 'animal'") | ||||
|         .arg("RETURN") | ||||
|         .arg(1) | ||||
|         .arg("title") | ||||
|         .query::<Value>(&mut conn) | ||||
|         .unwrap(); | ||||
|  | ||||
|     let ids_f = extract_hit_ids(&v); | ||||
|     assert!( | ||||
|         !ids_f.is_empty(), | ||||
|         "Filtered LANCE.SEARCH should return at least one document; got: {}", | ||||
|         to_string_lossy(&v) | ||||
|     ); | ||||
|  | ||||
|     // 6) Search images with QUERYBYTES | ||||
|     let query_img: Vec<u8> = b"local-image-query-3-1234567890".to_vec(); | ||||
|     let query_img_b64 = base64::engine::general_purpose::STANDARD.encode(&query_img); | ||||
|  | ||||
|     let v = redis::cmd("LANCE.SEARCHIMAGE") | ||||
|         .arg("imageset") | ||||
|         .arg("K") | ||||
|         .arg(2) | ||||
|         .arg("QUERYBYTES") | ||||
|         .arg(&query_img_b64) | ||||
|         .arg("RETURN") | ||||
|         .arg(1) | ||||
|         .arg("title") | ||||
|         .query::<Value>(&mut conn) | ||||
|         .unwrap(); | ||||
|  | ||||
|     // Should get 2 hits (img-1 and img-2) in some order; assert array non-empty | ||||
|     let img_ids = extract_hit_ids(&v); | ||||
|     assert!( | ||||
|         !img_ids.is_empty(), | ||||
|         "LANCE.SEARCHIMAGE should return non-empty results; got: {}", | ||||
|         to_string_lossy(&v) | ||||
|     ); | ||||
|  | ||||
|     // 7) Inspect datasets | ||||
|     let v = redis::cmd("LANCE.LIST").query::<Value>(&mut conn).unwrap(); | ||||
|     assert!( | ||||
|         bulk_contains_string(&v, "textset"), | ||||
|         "LANCE.LIST missing textset: {}", | ||||
|         to_string_lossy(&v) | ||||
|     ); | ||||
|     assert!( | ||||
|         bulk_contains_string(&v, "imageset"), | ||||
|         "LANCE.LIST missing imageset: {}", | ||||
|         to_string_lossy(&v) | ||||
|     ); | ||||
|  | ||||
|     // INFO textset | ||||
|     let info_text = redis::cmd("LANCE.INFO") | ||||
|         .arg("textset") | ||||
|         .query::<Value>(&mut conn) | ||||
|         .unwrap(); | ||||
|     // INFO returns Array [k,v,k,v,...] including "dimension" "64" and "row_count" "...". | ||||
|     let info_str = to_string_lossy(&info_text); | ||||
|     assert!( | ||||
|         info_str.contains("dimension") && info_str.contains("64"), | ||||
|         "LANCE.INFO textset should include dimension 64; got: {}", | ||||
|         info_str | ||||
|     ); | ||||
|  | ||||
|     // 8) Delete by id and drop datasets | ||||
|     let v = redis::cmd("LANCE.DEL") | ||||
|         .arg("textset") | ||||
|         .arg("doc-2") | ||||
|         .query::<Value>(&mut conn) | ||||
|         .unwrap(); | ||||
|     // Returns SimpleString "1" or Int 1 depending on encoding path; accept either | ||||
|     assert!( | ||||
|         value_is_int_eq(&v, 1) || value_is_str_eq(&v, "1"), | ||||
|         "LANCE.DEL doc-2 expected 1; got {}", | ||||
|         to_string_lossy(&v) | ||||
|     ); | ||||
|  | ||||
|     let v = redis::cmd("LANCE.DROP") | ||||
|         .arg("textset") | ||||
|         .query::<Value>(&mut conn) | ||||
|         .unwrap(); | ||||
|     assert!(value_is_ok(&v), "LANCE.DROP textset failed: {}", to_string_lossy(&v)); | ||||
|  | ||||
|     let v = redis::cmd("LANCE.DROP") | ||||
|         .arg("imageset") | ||||
|         .query::<Value>(&mut conn) | ||||
|         .unwrap(); | ||||
|     assert!(value_is_ok(&v), "LANCE.DROP imageset failed: {}", to_string_lossy(&v)); | ||||
| } | ||||
| @@ -12,9 +12,17 @@ fn get_redis_connection(port: u16) -> Connection { | ||||
|         match client.get_connection() { | ||||
|             Ok(mut conn) => { | ||||
|                 if redis::cmd("PING").query::<String>(&mut conn).is_ok() { | ||||
|                     // Acquire ReadWrite permissions on this connection
 | ||||
|                     let sel: RedisResult<String> = redis::cmd("SELECT") | ||||
|                         .arg(0) | ||||
|                         .arg("KEY") | ||||
|                         .arg("test-admin") | ||||
|                         .query(&mut conn); | ||||
|                     if sel.is_ok() { | ||||
|                         return conn; | ||||
|                     } | ||||
|                 } | ||||
|             } | ||||
|             Err(e) => { | ||||
|                 if attempts >= 120 { | ||||
|                     panic!( | ||||
| @@ -78,6 +86,8 @@ fn setup_server() -> (ServerProcessGuard, u16) { | ||||
|             "--port", | ||||
|             &port.to_string(), | ||||
|             "--debug", | ||||
|             "--admin-secret", | ||||
|             "test-admin", | ||||
|         ]) | ||||
|         .spawn() | ||||
|         .expect("Failed to start server process"); | ||||
| @@ -1,4 +1,5 @@ | ||||
| use herodb::{server::Server, options::DBOption}; | ||||
| use std::path::PathBuf; | ||||
| use std::time::Duration; | ||||
| use tokio::io::{AsyncReadExt, AsyncWriteExt}; | ||||
| use tokio::net::TcpStream; | ||||
| @@ -17,11 +18,13 @@ async fn start_test_server(test_name: &str) -> (Server, u16) { | ||||
|     std::fs::create_dir_all(&test_dir).unwrap(); | ||||
|     
 | ||||
|     let option = DBOption { | ||||
|         dir: test_dir, | ||||
|         dir: PathBuf::from(test_dir), | ||||
|         port, | ||||
|         debug: true, | ||||
|         encrypt: false, | ||||
|         encryption_key: None, | ||||
|         backend: herodb::options::BackendType::Redb, | ||||
|         admin_secret: "test-admin".to_string(), | ||||
|     }; | ||||
|     
 | ||||
|     let server = Server::new(option).await; | ||||
| @@ -33,7 +36,17 @@ async fn connect_to_server(port: u16) -> TcpStream { | ||||
|     let mut attempts = 0; | ||||
|     loop { | ||||
|         match TcpStream::connect(format!("127.0.0.1:{}", port)).await { | ||||
|             Ok(stream) => return stream, | ||||
|             Ok(mut stream) => { | ||||
|                 // Obtain ReadWrite permissions for this connection by selecting DB 0 with admin key
 | ||||
|                 let resp = send_command( | ||||
|                     &mut stream, | ||||
|                     "*4\r\n$6\r\nSELECT\r\n$1\r\n0\r\n$3\r\nKEY\r\n$10\r\ntest-admin\r\n", | ||||
|                 ).await; | ||||
|                 if !resp.contains("OK") { | ||||
|                     panic!("Failed to acquire write permissions via SELECT 0 KEY test-admin: {}", resp); | ||||
|                 } | ||||
|                 return stream; | ||||
|             } | ||||
|             Err(_) if attempts < 10 => { | ||||
|                 attempts += 1; | ||||
|                 sleep(Duration::from_millis(100)).await; | ||||
							
								
								
									
										86
									
								
								tests/rpc_tests.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										86
									
								
								tests/rpc_tests.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,86 @@ | ||||
| use herodb::rpc::{BackendType, DatabaseConfig}; | ||||
| use herodb::admin_meta; | ||||
| use herodb::options::BackendType as OptionsBackendType; | ||||
| use std::path::Path; | ||||
|  | ||||
| #[tokio::test] | ||||
| async fn test_rpc_server_basic() { | ||||
|     // This test would require starting the RPC server in a separate thread | ||||
|     // For now, we'll just test that the types compile correctly | ||||
|  | ||||
|     // Test serialization of types | ||||
|     let backend = BackendType::Redb; | ||||
|     let config = DatabaseConfig { | ||||
|         name: Some("test_db".to_string()), | ||||
|         storage_path: Some("/tmp/test".to_string()), | ||||
|         max_size: Some(1024 * 1024), | ||||
|         redis_version: Some("7.0".to_string()), | ||||
|     }; | ||||
|  | ||||
|     let backend_json = serde_json::to_string(&backend).unwrap(); | ||||
|     let config_json = serde_json::to_string(&config).unwrap(); | ||||
|  | ||||
|     assert_eq!(backend_json, "\"Redb\""); | ||||
|     assert!(config_json.contains("test_db")); | ||||
| } | ||||
|  | ||||
| #[tokio::test] | ||||
| async fn test_database_config_serialization() { | ||||
|     let config = DatabaseConfig { | ||||
|         name: Some("my_db".to_string()), | ||||
|         storage_path: None, | ||||
|         max_size: Some(1000000), | ||||
|         redis_version: Some("7.0".to_string()), | ||||
|     }; | ||||
|  | ||||
|     let json = serde_json::to_value(&config).unwrap(); | ||||
|     assert_eq!(json["name"], "my_db"); | ||||
|     assert_eq!(json["max_size"], 1000000); | ||||
|     assert_eq!(json["redis_version"], "7.0"); | ||||
| } | ||||
|  | ||||
| #[tokio::test] | ||||
| async fn test_backend_type_serialization() { | ||||
|     // Test that both Redb and Sled backends serialize correctly | ||||
|     let redb_backend = BackendType::Redb; | ||||
|     let sled_backend = BackendType::Sled; | ||||
|  | ||||
|     let redb_json = serde_json::to_string(&redb_backend).unwrap(); | ||||
|     let sled_json = serde_json::to_string(&sled_backend).unwrap(); | ||||
|  | ||||
|     assert_eq!(redb_json, "\"Redb\""); | ||||
|     assert_eq!(sled_json, "\"Sled\""); | ||||
|  | ||||
|     // Test deserialization | ||||
|     let redb_deserialized: BackendType = serde_json::from_str(&redb_json).unwrap(); | ||||
|     let sled_deserialized: BackendType = serde_json::from_str(&sled_json).unwrap(); | ||||
|  | ||||
|     assert!(matches!(redb_deserialized, BackendType::Redb)); | ||||
|     assert!(matches!(sled_deserialized, BackendType::Sled)); | ||||
| } | ||||
|  | ||||
| #[tokio::test] | ||||
| async fn test_database_name_persistence() { | ||||
|     let base_dir = "/tmp/test_db_name_persistence"; | ||||
|     let admin_secret = "test-admin-secret"; | ||||
|     let backend = OptionsBackendType::Redb; | ||||
|     let db_id = 1; | ||||
|     let test_name = "test-database-name"; | ||||
|  | ||||
|     // Clean up any existing test data | ||||
|     let _ = std::fs::remove_dir_all(base_dir); | ||||
|  | ||||
|     // Set the database name | ||||
|     admin_meta::set_database_name(Path::new(base_dir), backend.clone(), admin_secret, db_id, test_name) | ||||
|         .expect("Failed to set database name"); | ||||
|  | ||||
|     // Retrieve the database name | ||||
|     let retrieved_name = admin_meta::get_database_name(Path::new(base_dir), backend, admin_secret, db_id) | ||||
|         .expect("Failed to get database name"); | ||||
|  | ||||
|     // Verify the name matches | ||||
|     assert_eq!(retrieved_name, Some(test_name.to_string())); | ||||
|  | ||||
|     // Clean up | ||||
|     let _ = std::fs::remove_dir_all(base_dir); | ||||
| } | ||||
| @@ -1,4 +1,5 @@ | ||||
| use herodb::{server::Server, options::DBOption}; | ||||
| use std::path::PathBuf; | ||||
| use std::time::Duration; | ||||
| use tokio::time::sleep; | ||||
| use tokio::io::{AsyncReadExt, AsyncWriteExt}; | ||||
| @@ -19,11 +20,13 @@ async fn start_test_server(test_name: &str) -> (Server, u16) { | ||||
|     std::fs::create_dir_all(&test_dir).unwrap(); | ||||
|     
 | ||||
|     let option = DBOption { | ||||
|         dir: test_dir, | ||||
|         dir: PathBuf::from(test_dir), | ||||
|         port, | ||||
|         debug: true, | ||||
|         encrypt: false, | ||||
|         encryption_key: None, | ||||
|         backend: herodb::options::BackendType::Redb, | ||||
|         admin_secret: "test-admin".to_string(), | ||||
|     }; | ||||
|     
 | ||||
|     let server = Server::new(option).await; | ||||
| @@ -33,9 +36,16 @@ async fn start_test_server(test_name: &str) -> (Server, u16) { | ||||
| // Helper function to send Redis command and get response
 | ||||
| async fn send_redis_command(port: u16, command: &str) -> String { | ||||
|     let mut stream = TcpStream::connect(format!("127.0.0.1:{}", port)).await.unwrap(); | ||||
|     
 | ||||
|     // Acquire ReadWrite permissions on this new connection
 | ||||
|     let handshake = "*4\r\n$6\r\nSELECT\r\n$1\r\n0\r\n$3\r\nKEY\r\n$10\r\ntest-admin\r\n"; | ||||
|     stream.write_all(handshake.as_bytes()).await.unwrap(); | ||||
|     let mut buffer = [0; 1024]; | ||||
|     let _ = stream.read(&mut buffer).await.unwrap(); // Read and ignore the OK for handshake
 | ||||
|     
 | ||||
|     // Now send the intended command
 | ||||
|     stream.write_all(command.as_bytes()).await.unwrap(); | ||||
|     
 | ||||
|     let mut buffer = [0; 1024]; | ||||
|     let n = stream.read(&mut buffer).await.unwrap(); | ||||
|     String::from_utf8_lossy(&buffer[..n]).to_string() | ||||
| } | ||||
| @@ -186,9 +196,16 @@ async fn test_transaction_operations() { | ||||
|      // Use a single connection for the transaction
 | ||||
|     let mut stream = TcpStream::connect(format!("127.0.0.1:{}", port)).await.unwrap(); | ||||
|     
 | ||||
|     // Acquire write permissions for this connection
 | ||||
|     let handshake = "*4\r\n$6\r\nSELECT\r\n$1\r\n0\r\n$3\r\nKEY\r\n$10\r\ntest-admin\r\n"; | ||||
|     stream.write_all(handshake.as_bytes()).await.unwrap(); | ||||
|     let mut buffer = [0; 1024]; | ||||
|     let n = stream.read(&mut buffer).await.unwrap(); | ||||
|     let resp = String::from_utf8_lossy(&buffer[..n]); | ||||
|     assert!(resp.contains("OK")); | ||||
|     
 | ||||
|     // Test MULTI
 | ||||
|     stream.write_all("*1\r\n$5\r\nMULTI\r\n".as_bytes()).await.unwrap(); | ||||
|     let mut buffer = [0; 1024]; | ||||
|     let n = stream.read(&mut buffer).await.unwrap(); | ||||
|     let response = String::from_utf8_lossy(&buffer[..n]); | ||||
|     assert!(response.contains("OK")); | ||||
| @@ -1,4 +1,5 @@ | ||||
| use herodb::{server::Server, options::DBOption}; | ||||
| use std::path::PathBuf; | ||||
| use std::time::Duration; | ||||
| use tokio::io::{AsyncReadExt, AsyncWriteExt}; | ||||
| use tokio::net::TcpStream; | ||||
| @@ -17,11 +18,13 @@ async fn start_test_server(test_name: &str) -> (Server, u16) { | ||||
|     std::fs::create_dir_all(&test_dir).unwrap(); | ||||
|     
 | ||||
|     let option = DBOption { | ||||
|         dir: test_dir, | ||||
|         dir: PathBuf::from(test_dir), | ||||
|         port, | ||||
|         debug: false, | ||||
|         encrypt: false, | ||||
|         encryption_key: None, | ||||
|         backend: herodb::options::BackendType::Redb, | ||||
|         admin_secret: "test-admin".to_string(), | ||||
|     }; | ||||
|     
 | ||||
|     let server = Server::new(option).await; | ||||
| @@ -42,7 +45,17 @@ async fn connect_to_server(port: u16) -> TcpStream { | ||||
|     let mut attempts = 0; | ||||
|     loop { | ||||
|         match TcpStream::connect(format!("127.0.0.1:{}", port)).await { | ||||
|             Ok(stream) => return stream, | ||||
|             Ok(mut stream) => { | ||||
|                 // Acquire ReadWrite permissions for this connection
 | ||||
|                 let resp = send_command( | ||||
|                     &mut stream, | ||||
|                     "*4\r\n$6\r\nSELECT\r\n$1\r\n0\r\n$3\r\nKEY\r\n$10\r\ntest-admin\r\n", | ||||
|                 ).await; | ||||
|                 if !resp.contains("OK") { | ||||
|                     panic!("Failed to acquire write permissions via SELECT 0 KEY test-admin: {}", resp); | ||||
|                 } | ||||
|                 return stream; | ||||
|             } | ||||
|             Err(_) if attempts < 10 => { | ||||
|                 attempts += 1; | ||||
|                 sleep(Duration::from_millis(100)).await; | ||||
| @@ -97,13 +110,20 @@ async fn test_hset_clean_db() { | ||||
|     
 | ||||
|     let mut stream = connect_to_server(port).await; | ||||
| 
 | ||||
|     // Test HSET - should return 1 for new field
 | ||||
|     let response = send_command(&mut stream, "*4\r\n$4\r\nHSET\r\n$4\r\nhash\r\n$6\r\nfield1\r\n$6\r\nvalue1\r\n").await; | ||||
|     // Ensure clean DB state (admin DB 0 may be shared due to global singleton)
 | ||||
|     let flush = send_command(&mut stream, "*1\r\n$7\r\nFLUSHDB\r\n").await; | ||||
|     assert!(flush.contains("OK"), "Failed to FLUSHDB: {}", flush); | ||||
| 
 | ||||
|     // Test HSET - should return 1 for new field (use a unique key name to avoid collisions)
 | ||||
|     let key = "hash_clean"; | ||||
|     let hset_cmd = format!("*4\r\n$4\r\nHSET\r\n${}\r\n{}\r\n$6\r\nfield1\r\n$6\r\nvalue1\r\n", key.len(), key); | ||||
|     let response = send_command(&mut stream, &hset_cmd).await; | ||||
|     println!("HSET response: {}", response); | ||||
|     assert!(response.contains("1"), "Expected HSET to return 1, got: {}", response); | ||||
|     
 | ||||
|     // Test HGET
 | ||||
|     let response = send_command(&mut stream, "*3\r\n$4\r\nHGET\r\n$4\r\nhash\r\n$6\r\nfield1\r\n").await; | ||||
|     let hget_cmd = format!("*3\r\n$4\r\nHGET\r\n${}\r\n{}\r\n$6\r\nfield1\r\n", key.len(), key); | ||||
|     let response = send_command(&mut stream, &hget_cmd).await; | ||||
|     println!("HGET response: {}", response); | ||||
|     assert!(response.contains("value1")); | ||||
| } | ||||
							
								
								
									
										294
									
								
								tests/tantivy_integration_tests.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										294
									
								
								tests/tantivy_integration_tests.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,294 @@ | ||||
| use redis::{Client, Connection, RedisResult}; | ||||
| use std::process::{Child, Command}; | ||||
| use std::time::Duration; | ||||
| use jsonrpsee::http_client::{HttpClientBuilder, HttpClient}; | ||||
| use herodb::rpc::{RpcClient, BackendType, DatabaseConfig}; | ||||
|  | ||||
| // Helper function to get Redis connection, retrying until successful | ||||
| fn get_redis_connection(port: u16) -> Connection { | ||||
|     let connection_info = format!("redis://127.0.0.1:{}", port); | ||||
|     let client = Client::open(connection_info).unwrap(); | ||||
|     let mut attempts = 0; | ||||
|     loop { | ||||
|         match client.get_connection() { | ||||
|             Ok(mut conn) => { | ||||
|                 if redis::cmd("PING").query::<String>(&mut conn).is_ok() { | ||||
|                     return conn; | ||||
|                 } | ||||
|             } | ||||
|             Err(e) => { | ||||
|                 if attempts >= 120 { | ||||
|                     panic!( | ||||
|                         "Failed to connect to Redis server after 120 attempts: {}", | ||||
|                         e | ||||
|                     ); | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|         attempts += 1; | ||||
|         std::thread::sleep(Duration::from_millis(100)); | ||||
|     } | ||||
| } | ||||
|  | ||||
| // Helper function to get RPC client | ||||
| async fn get_rpc_client(port: u16) -> HttpClient { | ||||
|     let url = format!("http://127.0.0.1:{}", port + 1); // RPC port is Redis port + 1 | ||||
|     let client = HttpClientBuilder::default().build(url).unwrap(); | ||||
|     client | ||||
| } | ||||
|  | ||||
| // A guard to ensure the server process is killed when it goes out of scope | ||||
| struct ServerProcessGuard { | ||||
|     process: Child, | ||||
|     test_dir: String, | ||||
| } | ||||
|  | ||||
| impl Drop for ServerProcessGuard { | ||||
|     fn drop(&mut self) { | ||||
|         println!("Killing server process (pid: {})...", self.process.id()); | ||||
|         if let Err(e) = self.process.kill() { | ||||
|             eprintln!("Failed to kill server process: {}", e); | ||||
|         } | ||||
|         match self.process.wait() { | ||||
|             Ok(status) => println!("Server process exited with: {}", status), | ||||
|             Err(e) => eprintln!("Failed to wait on server process: {}", e), | ||||
|         } | ||||
|  | ||||
|         // Clean up the specific test directory | ||||
|         println!("Cleaning up test directory: {}", self.test_dir); | ||||
|         if let Err(e) = std::fs::remove_dir_all(&self.test_dir) { | ||||
|             eprintln!("Failed to clean up test directory: {}", e); | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| // Helper to set up the server and return connections | ||||
| async fn setup_server() -> (ServerProcessGuard, u16, Connection, HttpClient) { | ||||
|     use std::sync::atomic::{AtomicU16, Ordering}; | ||||
|     static PORT_COUNTER: AtomicU16 = AtomicU16::new(16500); | ||||
|     let port = PORT_COUNTER.fetch_add(1, Ordering::SeqCst); | ||||
|  | ||||
|     let test_dir = format!("/tmp/herodb_tantivy_test_{}", port); | ||||
|  | ||||
|     // Clean up previous test data | ||||
|     if std::path::Path::new(&test_dir).exists() { | ||||
|         let _ = std::fs::remove_dir_all(&test_dir); | ||||
|     } | ||||
|     std::fs::create_dir_all(&test_dir).unwrap(); | ||||
|  | ||||
|     // Start the server in a subprocess | ||||
|     let child = Command::new("cargo") | ||||
|         .args(&[ | ||||
|             "run", | ||||
|             "--", | ||||
|             "--dir", | ||||
|             &test_dir, | ||||
|             "--port", | ||||
|             &port.to_string(), | ||||
|             "--rpc-port", | ||||
|             &(port + 1).to_string(), | ||||
|             "--enable-rpc", | ||||
|             "--debug", | ||||
|             "--admin-secret", | ||||
|             "test-admin", | ||||
|         ]) | ||||
|         .spawn() | ||||
|         .expect("Failed to start server process"); | ||||
|  | ||||
|     // Create a new guard that also owns the test directory path | ||||
|     let guard = ServerProcessGuard { | ||||
|         process: child, | ||||
|         test_dir, | ||||
|     }; | ||||
|  | ||||
|     // Give the server time to build and start (cargo run may compile first) | ||||
|     std::thread::sleep(Duration::from_millis(3000)); | ||||
|  | ||||
|     let conn = get_redis_connection(port); | ||||
|     let rpc_client = get_rpc_client(port).await; | ||||
|  | ||||
|     (guard, port, conn, rpc_client) | ||||
| } | ||||
|  | ||||
|  | ||||
| #[tokio::test] | ||||
| async fn test_tantivy_full_text_search() { | ||||
|     let (_server_guard, _port, mut conn, rpc_client) = setup_server().await; | ||||
|  | ||||
|     // Create a Tantivy database via RPC | ||||
|     let db_config = DatabaseConfig { | ||||
|         name: Some("test_tantivy_db".to_string()), | ||||
|         storage_path: None, | ||||
|         max_size: None, | ||||
|         redis_version: None, | ||||
|     }; | ||||
|  | ||||
|     let db_id = rpc_client.create_database(BackendType::Tantivy, db_config, None).await.unwrap(); | ||||
|     assert_eq!(db_id, 1); | ||||
|  | ||||
|     // Add readwrite access key | ||||
|     let _ = rpc_client.add_access_key(db_id, "readwrite_key".to_string(), "readwrite".to_string()).await.unwrap(); | ||||
|  | ||||
|     // Add read-only access key | ||||
|     let _ = rpc_client.add_access_key(db_id, "read_key".to_string(), "read".to_string()).await.unwrap(); | ||||
|  | ||||
|     // Test with readwrite permissions | ||||
|     test_tantivy_with_readwrite_permissions(&mut conn, db_id).await; | ||||
|  | ||||
|     // Test with read-only permissions | ||||
|     test_tantivy_with_read_permissions(&mut conn, db_id).await; | ||||
|  | ||||
|     // Test access denied for invalid key | ||||
|     test_tantivy_access_denied(&mut conn, db_id).await; | ||||
| } | ||||
|  | ||||
| async fn test_tantivy_with_readwrite_permissions(conn: &mut Connection, db_id: u64) { | ||||
|     // Select database with readwrite key | ||||
|     let result: RedisResult<String> = redis::cmd("SELECT") | ||||
|         .arg(db_id) | ||||
|         .arg("KEY") | ||||
|         .arg("readwrite_key") | ||||
|         .query(conn); | ||||
|     assert!(result.is_ok()); | ||||
|     assert_eq!(result.unwrap(), "OK"); | ||||
|  | ||||
|     // Test FT.CREATE | ||||
|     let result: RedisResult<String> = redis::cmd("FT.CREATE") | ||||
|         .arg("test_index") | ||||
|         .arg("SCHEMA") | ||||
|         .arg("title") | ||||
|         .arg("TEXT") | ||||
|         .arg("content") | ||||
|         .arg("TEXT") | ||||
|         .arg("tags") | ||||
|         .arg("TAG") | ||||
|         .query(conn); | ||||
|     assert!(result.is_ok()); | ||||
|     assert_eq!(result.unwrap(), "OK"); | ||||
|  | ||||
|     // Test FT.ADD | ||||
|     let result: RedisResult<String> = redis::cmd("FT.ADD") | ||||
|         .arg("test_index") | ||||
|         .arg("doc1") | ||||
|         .arg("1.0") | ||||
|         .arg("title") | ||||
|         .arg("Hello World") | ||||
|         .arg("content") | ||||
|         .arg("This is a test document") | ||||
|         .arg("tags") | ||||
|         .arg("test,example") | ||||
|         .query(conn); | ||||
|     assert!(result.is_ok()); | ||||
|     assert_eq!(result.unwrap(), "OK"); | ||||
|  | ||||
|     // Add another document | ||||
|     let result: RedisResult<String> = redis::cmd("FT.ADD") | ||||
|         .arg("test_index") | ||||
|         .arg("doc2") | ||||
|         .arg("1.0") | ||||
|         .arg("title") | ||||
|         .arg("Goodbye World") | ||||
|         .arg("content") | ||||
|         .arg("Another test document") | ||||
|         .arg("tags") | ||||
|         .arg("test,another") | ||||
|         .query(conn); | ||||
|     assert!(result.is_ok()); | ||||
|     assert_eq!(result.unwrap(), "OK"); | ||||
|  | ||||
|     // Test FT.SEARCH | ||||
|     let result: RedisResult<Vec<String>> = redis::cmd("FT.SEARCH") | ||||
|         .arg("test_index") | ||||
|         .arg("test") | ||||
|         .query(conn); | ||||
|     assert!(result.is_ok()); | ||||
|     let results = result.unwrap(); | ||||
|     assert!(results.len() >= 3); // At least total count + 2 documents | ||||
|     assert_eq!(results[0], "2"); // Total matches | ||||
|  | ||||
|     // Test FT.INFO | ||||
|     let result: RedisResult<Vec<String>> = redis::cmd("FT.INFO") | ||||
|         .arg("test_index") | ||||
|         .query(conn); | ||||
|     assert!(result.is_ok()); | ||||
|     let info = result.unwrap(); | ||||
|     assert!(info.contains(&"index_name".to_string())); | ||||
|     assert!(info.contains(&"test_index".to_string())); | ||||
|  | ||||
|     // Test FT.DEL | ||||
|     let result: RedisResult<String> = redis::cmd("FT.DEL") | ||||
|         .arg("test_index") | ||||
|         .arg("doc1") | ||||
|         .query(conn); | ||||
|     assert!(result.is_ok()); | ||||
|     assert_eq!(result.unwrap(), "1"); | ||||
|  | ||||
|     // Verify document was deleted | ||||
|     let result: RedisResult<Vec<String>> = redis::cmd("FT.SEARCH") | ||||
|         .arg("test_index") | ||||
|         .arg("Hello") | ||||
|         .query(conn); | ||||
|     assert!(result.is_ok()); | ||||
|     let results = result.unwrap(); | ||||
|     assert_eq!(results[0], "0"); // No matches | ||||
|  | ||||
|     // Test FT.DROP | ||||
|     let result: RedisResult<String> = redis::cmd("FT.DROP") | ||||
|         .arg("test_index") | ||||
|         .query(conn); | ||||
|     assert!(result.is_ok()); | ||||
|     assert_eq!(result.unwrap(), "OK"); | ||||
|  | ||||
|     // Verify index was dropped | ||||
|     let result: RedisResult<String> = redis::cmd("FT.INFO") | ||||
|         .arg("test_index") | ||||
|         .query(conn); | ||||
|     assert!(result.is_err()); // Should fail | ||||
| } | ||||
|  | ||||
| async fn test_tantivy_with_read_permissions(conn: &mut Connection, db_id: u64) { | ||||
|     // Select database with read-only key | ||||
|     let result: RedisResult<String> = redis::cmd("SELECT") | ||||
|         .arg(db_id) | ||||
|         .arg("KEY") | ||||
|         .arg("read_key") | ||||
|         .query(conn); | ||||
|     assert!(result.is_ok()); | ||||
|     assert_eq!(result.unwrap(), "OK"); | ||||
|  | ||||
|     // Recreate index for testing | ||||
|     let result: RedisResult<String> = redis::cmd("FT.CREATE") | ||||
|         .arg("test_index_read") | ||||
|         .arg("SCHEMA") | ||||
|         .arg("title") | ||||
|         .arg("TEXT") | ||||
|         .query(conn); | ||||
|     assert!(result.is_err()); // Should fail due to read-only permissions | ||||
|     assert!(result.unwrap_err().to_string().contains("write permission denied")); | ||||
|  | ||||
|     // Add document should fail | ||||
|     let result: RedisResult<String> = redis::cmd("FT.ADD") | ||||
|         .arg("test_index_read") | ||||
|         .arg("doc1") | ||||
|         .arg("1.0") | ||||
|         .arg("title") | ||||
|         .arg("Test") | ||||
|         .query(conn); | ||||
|     assert!(result.is_err()); | ||||
|     assert!(result.unwrap_err().to_string().contains("write permission denied")); | ||||
|  | ||||
|     // But search should work (if index exists) | ||||
|     // First create index with write permissions, then switch to read | ||||
|     // For this test, we'll assume the index doesn't exist, so search fails differently | ||||
| } | ||||
|  | ||||
| async fn test_tantivy_access_denied(conn: &mut Connection, db_id: u64) { | ||||
|     // Try to select with invalid key | ||||
|     let result: RedisResult<String> = redis::cmd("SELECT") | ||||
|         .arg(db_id) | ||||
|         .arg("KEY") | ||||
|         .arg("invalid_key") | ||||
|         .query(conn); | ||||
|     assert!(result.is_err()); | ||||
|     assert!(result.unwrap_err().to_string().contains("invalid access key")); | ||||
| } | ||||
| @@ -1,4 +1,5 @@ | ||||
| use herodb::{options::DBOption, server::Server}; | ||||
| use std::path::PathBuf; | ||||
| use tokio::io::{AsyncReadExt, AsyncWriteExt}; | ||||
| use tokio::net::TcpStream; | ||||
| use tokio::time::{sleep, Duration}; | ||||
| @@ -17,11 +18,13 @@ async fn start_test_server(test_name: &str) -> (Server, u16) { | ||||
|     std::fs::create_dir_all(&test_dir).unwrap(); | ||||
| 
 | ||||
|     let option = DBOption { | ||||
|         dir: test_dir, | ||||
|         dir: PathBuf::from(test_dir), | ||||
|         port, | ||||
|         debug: false, | ||||
|         encrypt: false, | ||||
|         encryption_key: None, | ||||
|         backend: herodb::options::BackendType::Redb, | ||||
|         admin_secret: "test-admin".to_string(), | ||||
|     }; | ||||
| 
 | ||||
|     let server = Server::new(option).await; | ||||
| @@ -60,7 +63,17 @@ async fn connect(port: u16) -> TcpStream { | ||||
|     let mut attempts = 0; | ||||
|     loop { | ||||
|         match TcpStream::connect(format!("127.0.0.1:{}", port)).await { | ||||
|             Ok(s) => return s, | ||||
|             Ok(mut s) => { | ||||
|                 // Acquire ReadWrite permissions for this connection using admin DB 0
 | ||||
|                 let resp = send_cmd(&mut s, &["SELECT", "0", "KEY", "test-admin"]).await; | ||||
|                 assert_contains(&resp, "OK", "SELECT 0 KEY test-admin handshake"); | ||||
| 
 | ||||
|                 // Ensure clean slate per test on DB 0
 | ||||
|                 let fl = send_cmd(&mut s, &["FLUSHDB"]).await; | ||||
|                 assert_contains(&fl, "OK", "FLUSHDB after handshake"); | ||||
| 
 | ||||
|                 return s; | ||||
|             } | ||||
|             Err(_) if attempts < 30 => { | ||||
|                 attempts += 1; | ||||
|                 sleep(Duration::from_millis(100)).await; | ||||
| @@ -245,9 +258,9 @@ async fn test_01_connection_and_info() { | ||||
|     let getname = send_cmd(&mut s, &["CLIENT", "GETNAME"]).await; | ||||
|     assert_contains(&getname, "myapp", "CLIENT GETNAME"); | ||||
| 
 | ||||
|     // SELECT db
 | ||||
|     let sel = send_cmd(&mut s, &["SELECT", "0"]).await; | ||||
|     assert_contains(&sel, "OK", "SELECT 0"); | ||||
|     // SELECT db (requires key on DB 0)
 | ||||
|     let sel = send_cmd(&mut s, &["SELECT", "0", "KEY", "test-admin"]).await; | ||||
|     assert_contains(&sel, "OK", "SELECT 0 with key"); | ||||
| 
 | ||||
|     // QUIT should close connection after sending OK
 | ||||
|     let quit = send_cmd(&mut s, &["QUIT"]).await; | ||||
| @@ -279,6 +292,10 @@ async fn test_02_strings_and_expiry() { | ||||
|     let ex0 = send_cmd(&mut s, &["EXISTS", "user:1"]).await; | ||||
|     assert_contains(&ex0, "0", "EXISTS after DEL"); | ||||
|     
 | ||||
|     // DEL non-existent should return 0
 | ||||
|     let del0 = send_cmd(&mut s, &["DEL", "user:1"]).await; | ||||
|     assert_contains(&del0, "0", "DEL user:1 when not exists -> 0"); | ||||
|     
 | ||||
|     // INCR behavior
 | ||||
|     let i1 = send_cmd(&mut s, &["INCR", "count"]).await; | ||||
|     assert_contains(&i1, "1", "INCR new key -> 1"); | ||||
| @@ -500,11 +517,11 @@ async fn test_07_age_stateless_suite() { | ||||
|     let mut s = connect(port).await; | ||||
| 
 | ||||
|     // GENENC -> [recipient, identity]
 | ||||
|     let gen = send_cmd(&mut s, &["AGE", "GENENC"]).await; | ||||
|     let genenc = send_cmd(&mut s, &["AGE", "GENENC"]).await; | ||||
|     assert!( | ||||
|         gen.starts_with("*2\r\n$"), | ||||
|         genenc.starts_with("*2\r\n$"), | ||||
|         "AGE GENENC should return array [recipient, identity], got:\n{}", | ||||
|         gen | ||||
|         genenc | ||||
|     ); | ||||
| 
 | ||||
|     // Parse simple RESP array of two bulk strings to extract keys
 | ||||
| @@ -519,7 +536,7 @@ async fn test_07_age_stateless_suite() { | ||||
|         let ident = lines.next().unwrap_or("").to_string(); | ||||
|         (recip, ident) | ||||
|     } | ||||
|     let (recipient, identity) = parse_two_bulk_array(&gen); | ||||
|     let (recipient, identity) = parse_two_bulk_array(&genenc); | ||||
|     assert!( | ||||
|         recipient.starts_with("age1") && identity.starts_with("AGE-SECRET-KEY-1"), | ||||
|         "Unexpected AGE key formats.\nrecipient: {}\nidentity: {}", | ||||
| @@ -590,7 +607,7 @@ async fn test_08_age_persistent_named_suite() { | ||||
| 
 | ||||
|     // AGE LIST
 | ||||
|     let lst = send_cmd(&mut s, &["AGE", "LIST"]).await; | ||||
|     assert_contains(&lst, "encpub", "AGE LIST label encpub"); | ||||
|     // After flattening, LIST returns a flat array of managed key names
 | ||||
|     assert_contains(&lst, "app1", "AGE LIST includes app1"); | ||||
| } | ||||
| 
 | ||||
		Reference in New Issue
	
	Block a user