diff --git a/README.md b/README.md index 82762bf..b9a3efb 100644 --- a/README.md +++ b/README.md @@ -47,18 +47,24 @@ HeroDB can be interacted with using any standard Redis client, such as `redis-cl ### Example with `redis-cli` +Connections start with no database selected. You must SELECT a database first. + +- To work in the admin database (DB 0), authenticate with the admin secret: ```bash +redis-cli -p 6379 SELECT 0 KEY myadminsecret redis-cli -p 6379 SET mykey "Hello from HeroDB!" redis-cli -p 6379 GET mykey # → "Hello from HeroDB!" +``` +- To use a user database, first create one via the JSON-RPC API (see docs/rpc_examples.md), then select it: +```bash +# Suppose RPC created database id 1 +redis-cli -p 6379 SELECT 1 redis-cli -p 6379 HSET user:1 name "Alice" age "30" redis-cli -p 6379 HGET user:1 name # → "Alice" - redis-cli -p 6379 SCAN 0 MATCH user:* COUNT 10 -# → 1) "0" -# 2) 1) "user:1" ``` ## Cryptography diff --git a/docs/admin.md b/docs/admin.md index 55b9ffb..7ceffab 100644 --- a/docs/admin.md +++ b/docs/admin.md @@ -80,6 +80,7 @@ Keys in `DB 0` (internal layout, but useful to understand how things work): - Requires the exact admin secret as the `KEY` argument to `SELECT 0` - Permission is `ReadWrite` when the secret matches +Connections start with no database selected. Any command that requires storage (GET, SET, H*, L*, SCAN, etc.) will return an error until you issue a SELECT to choose a database. Admin DB 0 is never accessible without authenticating via SELECT 0 KEY <admin_secret>. ### How to select databases with optional `KEY` - Public DB (no key required) diff --git a/docs/cmds.md b/docs/cmds.md index 2f61c87..e132141 100644 --- a/docs/cmds.md +++ b/docs/cmds.md @@ -126,7 +126,9 @@ redis-cli -p 6381 --pipe < dump.rdb ## Authentication and Database Selection -HeroDB uses an `Admin DB 0` to govern database existence, access and per-db encryption. Access control is enforced via `Admin DB 0` metadata. See the full model in `docs/admin.md`. +Connections start with no database selected. Any storage-backed command (GET, SET, H*, L*, SCAN, etc.) will return an error until you issue a SELECT to choose a database. + +HeroDB uses an `Admin DB 0` to govern database existence, access and per-db encryption. Access control is enforced via `Admin DB 0` metadata. See the full model in (docs/admin.md:1). Examples: ```bash @@ -145,4 +147,10 @@ redis-cli -p $PORT SELECT 2 KEY my-db2-access-key # Admin DB 0 (requires admin secret) redis-cli -p $PORT SELECT 0 KEY my-admin-secret # → OK +``` + +```bash +# Before selecting a DB, storage commands will fail +redis-cli -p $PORT GET key +# → -ERR No database selected. Use SELECT [KEY ] first ``` \ No newline at end of file diff --git a/src/server.rs b/src/server.rs index 1aa8cf5..90aab93 100644 --- a/src/server.rs +++ b/src/server.rs @@ -21,6 +21,8 @@ use ureq::{Agent, AgentBuilder}; use std::time::Duration; use std::io::Read; +const NO_DB_SELECTED: u64 = u64::MAX; + #[derive(Clone)] pub struct Server { pub db_cache: std::sync::Arc>>>, @@ -65,7 +67,7 @@ impl Server { db_cache: Arc::new(std::sync::RwLock::new(HashMap::new())), option, client_name: None, - selected_db: 0, + selected_db: NO_DB_SELECTED, queued_cmd: None, current_permissions: None, @@ -103,6 +105,17 @@ impl Server { } pub fn current_storage(&self) -> Result, DBError> { + // Require explicit SELECT before any storage access + if self.selected_db == NO_DB_SELECTED { + return Err(DBError("No database selected. Use SELECT [KEY ] first".to_string())); + } + // Admin DB 0 access must be authenticated with SELECT 0 KEY + if self.selected_db == 0 { + if !matches!(self.current_permissions, Some(crate::rpc::Permissions::ReadWrite)) { + return Err(DBError("Admin DB 0 requires SELECT 0 KEY ".to_string())); + } + } + let mut cache = self.db_cache.write().unwrap(); if let Some(storage) = cache.get(&self.selected_db) { @@ -328,6 +341,10 @@ impl Server { /// Check if current permissions allow read operations pub fn has_read_permission(&self) -> bool { + // No DB selected -> no permissions + if self.selected_db == NO_DB_SELECTED { + return false; + } // If an explicit permission is set for this connection, honor it. if let Some(perms) = self.current_permissions.as_ref() { return matches!(*perms, crate::rpc::Permissions::Read | crate::rpc::Permissions::ReadWrite); @@ -347,6 +364,10 @@ impl Server { /// Check if current permissions allow write operations pub fn has_write_permission(&self) -> bool { + // No DB selected -> no permissions + if self.selected_db == NO_DB_SELECTED { + return false; + } // If an explicit permission is set for this connection, honor it. if let Some(perms) = self.current_permissions.as_ref() { return matches!(*perms, crate::rpc::Permissions::ReadWrite); diff --git a/tests/lance_integration_tests.rs b/tests/lance_integration_tests.rs new file mode 100644 index 0000000..6bf2aab --- /dev/null +++ b/tests/lance_integration_tests.rs @@ -0,0 +1,484 @@ +use redis::{Client, Connection, RedisResult, Value}; +use std::process::{Child, Command}; +use std::time::Duration; + +use jsonrpsee::http_client::{HttpClient, HttpClientBuilder}; +use herodb::rpc::{BackendType, DatabaseConfig, RpcClient}; +use base64::Engine; +use tokio::time::sleep; + +// ------------------------ +// Helpers +// ------------------------ + +fn get_redis_connection(port: u16) -> Connection { + let connection_info = format!("redis://127.0.0.1:{}", port); + let client = Client::open(connection_info).unwrap(); + let mut attempts = 0; + loop { + match client.get_connection() { + Ok(mut conn) => { + if redis::cmd("PING").query::(&mut conn).is_ok() { + return conn; + } + } + Err(e) => { + if attempts >= 3600 { + panic!("Failed to connect to Redis server after 3600 attempts: {}", e); + } + } + } + attempts += 1; + std::thread::sleep(Duration::from_millis(500)); + } +} + +async fn get_rpc_client(port: u16) -> HttpClient { + let url = format!("http://127.0.0.1:{}", port + 1); // RPC port = Redis port + 1 + HttpClientBuilder::default().build(url).unwrap() +} + +/// Wait until RPC server is responsive (getServerStats succeeds) or panic after retries. +async fn wait_for_rpc_ready(client: &HttpClient, max_attempts: u32, delay: Duration) { + for _ in 0..max_attempts { + match client.get_server_stats().await { + Ok(_) => return, + Err(_) => { + sleep(delay).await; + } + } + } + panic!("RPC server did not become ready in time"); +} + +// A guard to ensure the server process is killed when it goes out of scope and test dir cleaned. +struct ServerProcessGuard { + process: Child, + test_dir: String, +} + +impl Drop for ServerProcessGuard { + fn drop(&mut self) { + eprintln!("Killing server process (pid: {})...", self.process.id()); + if let Err(e) = self.process.kill() { + eprintln!("Failed to kill server process: {}", e); + } + match self.process.wait() { + Ok(status) => eprintln!("Server process exited with: {}", status), + Err(e) => eprintln!("Failed to wait on server process: {}", e), + } + + // Clean up the specific test directory + eprintln!("Cleaning up test directory: {}", self.test_dir); + if let Err(e) = std::fs::remove_dir_all(&self.test_dir) { + eprintln!("Failed to clean up test directory: {}", e); + } + } +} + +// Helper to set up the server and return guard + ports +async fn setup_server() -> (ServerProcessGuard, u16) { + use std::sync::atomic::{AtomicU16, Ordering}; + static PORT_COUNTER: AtomicU16 = AtomicU16::new(17500); + let port = PORT_COUNTER.fetch_add(1, Ordering::SeqCst); + + let test_dir = format!("/tmp/herodb_lance_test_{}", port); + + // Clean up previous test data + if std::path::Path::new(&test_dir).exists() { + let _ = std::fs::remove_dir_all(&test_dir); + } + std::fs::create_dir_all(&test_dir).unwrap(); + + // Start the server in a subprocess with RPC enabled (follows tantivy test pattern) + let child = Command::new("cargo") + .args(&[ + "run", + "--", + "--dir", + &test_dir, + "--port", + &port.to_string(), + "--rpc-port", + &(port + 1).to_string(), + "--enable-rpc", + "--debug", + "--admin-secret", + "test-admin", + ]) + .spawn() + .expect("Failed to start server process"); + + let guard = ServerProcessGuard { + process: child, + test_dir, + }; + + // Give the server time to build and start (cargo run may compile first) + // Increase significantly to accommodate first-time dependency compilation in CI. + std::thread::sleep(Duration::from_millis(60000)); + + (guard, port) +} + +// Convenient helpers for assertions on redis::Value +fn value_is_ok(v: &Value) -> bool { + match v { + Value::Okay => true, + Value::Status(s) if s == "OK" => true, + Value::Data(d) if d == b"OK" => true, + _ => false, + } +} + +fn value_is_int_eq(v: &Value, expected: i64) -> bool { + matches!(v, Value::Int(n) if *n == expected) +} + +fn value_is_str_eq(v: &Value, expected: &str) -> bool { + match v { + Value::Status(s) => s == expected, + Value::Data(d) => String::from_utf8_lossy(d) == expected, + _ => false, + } +} + +fn to_string_lossy(v: &Value) -> String { + match v { + Value::Nil => "Nil".to_string(), + Value::Int(n) => n.to_string(), + Value::Status(s) => s.clone(), + Value::Okay => "OK".to_string(), + Value::Data(d) => String::from_utf8_lossy(d).to_string(), + Value::Bulk(items) => { + let inner: Vec = items.iter().map(to_string_lossy).collect(); + format!("[{}]", inner.join(", ")) + } + } +} + +// Extract ids from LANCE.SEARCH / LANCE.SEARCHIMAGE reply which is: +// Array of elements: [ [id, score, [k,v,...]], [id, score, ...], ... ] +fn extract_hit_ids(v: &Value) -> Vec { + let mut ids = Vec::new(); + if let Value::Bulk(items) = v { + for item in items { + if let Value::Bulk(row) = item { + if !row.is_empty() { + // first element is id (Data or Status) + let id = match &row[0] { + Value::Data(d) => String::from_utf8_lossy(d).to_string(), + Value::Status(s) => s.clone(), + Value::Int(n) => n.to_string(), + _ => continue, + }; + ids.push(id); + } + } + } + } + ids +} + +// Check whether a Bulk array (RESP array) contains a given string element. +fn bulk_contains_string(v: &Value, needle: &str) -> bool { + match v { + Value::Bulk(items) => items.iter().any(|it| match it { + Value::Data(d) => String::from_utf8_lossy(d).contains(needle), + Value::Status(s) => s.contains(needle), + Value::Bulk(_) => bulk_contains_string(it, needle), + _ => false, + }), + _ => false, + } +} + +// ------------------------ +// Test: Lance end-to-end (RESP) using only local embedders +// ------------------------ + +#[tokio::test] +async fn test_lance_end_to_end() { + let (_guard, port) = setup_server().await; + + // First, wait for RESP to be available; this also gives cargo-run child ample time to finish building. + // Reuse the helper that retries PING until success. + { + let _conn_ready = get_redis_connection(port); + // Drop immediately; we only needed readiness. + } + + // Build RPC client and create a Lance DB + let rpc_client = get_rpc_client(port).await; + // Ensure RPC server is listening before we issue createDatabase (allow longer warm-up to accommodate first-build costs) + wait_for_rpc_ready(&rpc_client, 3600, Duration::from_millis(250)).await; + + let db_config = DatabaseConfig { + name: Some("media-db".to_string()), + storage_path: None, + max_size: None, + redis_version: None, + }; + + let db_id = rpc_client + .create_database(BackendType::Lance, db_config, None) + .await + .expect("create_database Lance failed"); + + assert_eq!(db_id, 1, "Expected first Lance DB id to be 1"); + + // Add access keys + let _ = rpc_client + .add_access_key(db_id, "readwrite_key".to_string(), "readwrite".to_string()) + .await + .expect("add_access_key readwrite failed"); + + let _ = rpc_client + .add_access_key(db_id, "read_key".to_string(), "read".to_string()) + .await + .expect("add_access_key read failed"); + + // Connect to Redis and SELECT DB with readwrite key + let mut conn = get_redis_connection(port); + + let sel_ok: RedisResult = redis::cmd("SELECT") + .arg(db_id) + .arg("KEY") + .arg("readwrite_key") + .query(&mut conn); + assert!(sel_ok.is_ok(), "SELECT db with key failed: {:?}", sel_ok); + assert_eq!(sel_ok.unwrap(), "OK"); + + // 1) Configure embedding providers: textset -> testhash dim 64, imageset -> testimagehash dim 512 + let v = redis::cmd("LANCE.EMBEDDING") + .arg("CONFIG") + .arg("SET") + .arg("textset") + .arg("PROVIDER") + .arg("testhash") + .arg("MODEL") + .arg("any") + .arg("PARAM") + .arg("dim") + .arg("64") + .query::(&mut conn) + .unwrap(); + assert!(value_is_ok(&v), "Embedding config set (text) not OK: {}", to_string_lossy(&v)); + + let v = redis::cmd("LANCE.EMBEDDING") + .arg("CONFIG") + .arg("SET") + .arg("imageset") + .arg("PROVIDER") + .arg("testimagehash") + .arg("MODEL") + .arg("any") + .arg("PARAM") + .arg("dim") + .arg("512") + .query::(&mut conn) + .unwrap(); + assert!(value_is_ok(&v), "Embedding config set (image) not OK: {}", to_string_lossy(&v)); + + // 2) Create datasets + let v = redis::cmd("LANCE.CREATE") + .arg("textset") + .arg("DIM") + .arg(64) + .query::(&mut conn) + .unwrap(); + assert!(value_is_ok(&v), "LANCE.CREATE textset failed: {}", to_string_lossy(&v)); + + let v = redis::cmd("LANCE.CREATE") + .arg("imageset") + .arg("DIM") + .arg(512) + .query::(&mut conn) + .unwrap(); + assert!(value_is_ok(&v), "LANCE.CREATE imageset failed: {}", to_string_lossy(&v)); + + // 3) Store two text documents + let v = redis::cmd("LANCE.STORE") + .arg("textset") + .arg("ID") + .arg("doc-1") + .arg("TEXT") + .arg("The quick brown fox jumps over the lazy dog") + .arg("META") + .arg("title") + .arg("Fox") + .arg("category") + .arg("animal") + .query::(&mut conn) + .unwrap(); + assert!(value_is_ok(&v), "LANCE.STORE doc-1 failed: {}", to_string_lossy(&v)); + + let v = redis::cmd("LANCE.STORE") + .arg("textset") + .arg("ID") + .arg("doc-2") + .arg("TEXT") + .arg("A fast auburn fox vaulted a sleepy canine") + .arg("META") + .arg("title") + .arg("Paraphrase") + .arg("category") + .arg("animal") + .query::(&mut conn) + .unwrap(); + assert!(value_is_ok(&v), "LANCE.STORE doc-2 failed: {}", to_string_lossy(&v)); + + // 4) Store two images via BYTES (local fake bytes; embedder only hashes bytes, not decoding) + let img1: Vec = b"local-image-bytes-1-abcdefghijklmnopqrstuvwxyz".to_vec(); + let img2: Vec = b"local-image-bytes-2-ABCDEFGHIJKLMNOPQRSTUVWXYZ".to_vec(); + let img1_b64 = base64::engine::general_purpose::STANDARD.encode(&img1); + let img2_b64 = base64::engine::general_purpose::STANDARD.encode(&img2); + + let v = redis::cmd("LANCE.STOREIMAGE") + .arg("imageset") + .arg("ID") + .arg("img-1") + .arg("BYTES") + .arg(&img1_b64) + .arg("META") + .arg("title") + .arg("Local1") + .arg("group") + .arg("demo") + .query::(&mut conn) + .unwrap(); + assert!(value_is_ok(&v), "LANCE.STOREIMAGE img-1 failed: {}", to_string_lossy(&v)); + + let v = redis::cmd("LANCE.STOREIMAGE") + .arg("imageset") + .arg("ID") + .arg("img-2") + .arg("BYTES") + .arg(&img2_b64) + .arg("META") + .arg("title") + .arg("Local2") + .arg("group") + .arg("demo") + .query::(&mut conn) + .unwrap(); + assert!(value_is_ok(&v), "LANCE.STOREIMAGE img-2 failed: {}", to_string_lossy(&v)); + + // 5) Search text: K 2 QUERY "quick brown fox" RETURN 1 title + let v = redis::cmd("LANCE.SEARCH") + .arg("textset") + .arg("K") + .arg(2) + .arg("QUERY") + .arg("quick brown fox") + .arg("RETURN") + .arg(1) + .arg("title") + .query::(&mut conn) + .unwrap(); + + // Should be an array of hits + let ids = extract_hit_ids(&v); + assert!( + ids.contains(&"doc-1".to_string()) || ids.contains(&"doc-2".to_string()), + "LANCE.SEARCH should return doc-1/doc-2; got: {}", + to_string_lossy(&v) + ); + + // With FILTER on category + let v = redis::cmd("LANCE.SEARCH") + .arg("textset") + .arg("K") + .arg(2) + .arg("QUERY") + .arg("fox jumps") + .arg("FILTER") + .arg("category = 'animal'") + .arg("RETURN") + .arg(1) + .arg("title") + .query::(&mut conn) + .unwrap(); + + let ids_f = extract_hit_ids(&v); + assert!( + !ids_f.is_empty(), + "Filtered LANCE.SEARCH should return at least one document; got: {}", + to_string_lossy(&v) + ); + + // 6) Search images with QUERYBYTES + let query_img: Vec = b"local-image-query-3-1234567890".to_vec(); + let query_img_b64 = base64::engine::general_purpose::STANDARD.encode(&query_img); + + let v = redis::cmd("LANCE.SEARCHIMAGE") + .arg("imageset") + .arg("K") + .arg(2) + .arg("QUERYBYTES") + .arg(&query_img_b64) + .arg("RETURN") + .arg(1) + .arg("title") + .query::(&mut conn) + .unwrap(); + + // Should get 2 hits (img-1 and img-2) in some order; assert array non-empty + let img_ids = extract_hit_ids(&v); + assert!( + !img_ids.is_empty(), + "LANCE.SEARCHIMAGE should return non-empty results; got: {}", + to_string_lossy(&v) + ); + + // 7) Inspect datasets + let v = redis::cmd("LANCE.LIST").query::(&mut conn).unwrap(); + assert!( + bulk_contains_string(&v, "textset"), + "LANCE.LIST missing textset: {}", + to_string_lossy(&v) + ); + assert!( + bulk_contains_string(&v, "imageset"), + "LANCE.LIST missing imageset: {}", + to_string_lossy(&v) + ); + + // INFO textset + let info_text = redis::cmd("LANCE.INFO") + .arg("textset") + .query::(&mut conn) + .unwrap(); + // INFO returns Array [k,v,k,v,...] including "dimension" "64" and "row_count" "...". + let info_str = to_string_lossy(&info_text); + assert!( + info_str.contains("dimension") && info_str.contains("64"), + "LANCE.INFO textset should include dimension 64; got: {}", + info_str + ); + + // 8) Delete by id and drop datasets + let v = redis::cmd("LANCE.DEL") + .arg("textset") + .arg("doc-2") + .query::(&mut conn) + .unwrap(); + // Returns SimpleString "1" or Int 1 depending on encoding path; accept either + assert!( + value_is_int_eq(&v, 1) || value_is_str_eq(&v, "1"), + "LANCE.DEL doc-2 expected 1; got {}", + to_string_lossy(&v) + ); + + let v = redis::cmd("LANCE.DROP") + .arg("textset") + .query::(&mut conn) + .unwrap(); + assert!(value_is_ok(&v), "LANCE.DROP textset failed: {}", to_string_lossy(&v)); + + let v = redis::cmd("LANCE.DROP") + .arg("imageset") + .query::(&mut conn) + .unwrap(); + assert!(value_is_ok(&v), "LANCE.DROP imageset failed: {}", to_string_lossy(&v)); +} \ No newline at end of file