484 lines
		
	
	
		
			15 KiB
		
	
	
	
		
			Rust
		
	
	
	
	
	
			
		
		
	
	
			484 lines
		
	
	
		
			15 KiB
		
	
	
	
		
			Rust
		
	
	
	
	
	
| use redis::{Client, Connection, RedisResult, Value};
 | |
| use std::process::{Child, Command};
 | |
| use std::time::Duration;
 | |
| 
 | |
| use jsonrpsee::http_client::{HttpClient, HttpClientBuilder};
 | |
| use herodb::rpc::{BackendType, DatabaseConfig, RpcClient};
 | |
| use base64::Engine;
 | |
| use tokio::time::sleep;
 | |
| 
 | |
| // ------------------------
 | |
| // Helpers
 | |
| // ------------------------
 | |
| 
 | |
| fn get_redis_connection(port: u16) -> Connection {
 | |
|     let connection_info = format!("redis://127.0.0.1:{}", port);
 | |
|     let client = Client::open(connection_info).unwrap();
 | |
|     let mut attempts = 0;
 | |
|     loop {
 | |
|         match client.get_connection() {
 | |
|             Ok(mut conn) => {
 | |
|                 if redis::cmd("PING").query::<String>(&mut conn).is_ok() {
 | |
|                     return conn;
 | |
|                 }
 | |
|             }
 | |
|             Err(e) => {
 | |
|                 if attempts >= 3600 {
 | |
|                     panic!("Failed to connect to Redis server after 3600 attempts: {}", e);
 | |
|                 }
 | |
|             }
 | |
|         }
 | |
|         attempts += 1;
 | |
|         std::thread::sleep(Duration::from_millis(500));
 | |
|     }
 | |
| }
 | |
| 
 | |
| async fn get_rpc_client(port: u16) -> HttpClient {
 | |
|     let url = format!("http://127.0.0.1:{}", port + 1); // RPC port = Redis port + 1
 | |
|     HttpClientBuilder::default().build(url).unwrap()
 | |
| }
 | |
| 
 | |
| /// Wait until RPC server is responsive (getServerStats succeeds) or panic after retries.
 | |
| async fn wait_for_rpc_ready(client: &HttpClient, max_attempts: u32, delay: Duration) {
 | |
|     for _ in 0..max_attempts {
 | |
|         match client.get_server_stats().await {
 | |
|             Ok(_) => return,
 | |
|             Err(_) => {
 | |
|                 sleep(delay).await;
 | |
|             }
 | |
|         }
 | |
|     }
 | |
|     panic!("RPC server did not become ready in time");
 | |
| }
 | |
| 
 | |
| // A guard to ensure the server process is killed when it goes out of scope and test dir cleaned.
 | |
| struct ServerProcessGuard {
 | |
|     process: Child,
 | |
|     test_dir: String,
 | |
| }
 | |
| 
 | |
| impl Drop for ServerProcessGuard {
 | |
|     fn drop(&mut self) {
 | |
|         eprintln!("Killing server process (pid: {})...", self.process.id());
 | |
|         if let Err(e) = self.process.kill() {
 | |
|             eprintln!("Failed to kill server process: {}", e);
 | |
|         }
 | |
|         match self.process.wait() {
 | |
|             Ok(status) => eprintln!("Server process exited with: {}", status),
 | |
|             Err(e) => eprintln!("Failed to wait on server process: {}", e),
 | |
|         }
 | |
| 
 | |
|         // Clean up the specific test directory
 | |
|         eprintln!("Cleaning up test directory: {}", self.test_dir);
 | |
|         if let Err(e) = std::fs::remove_dir_all(&self.test_dir) {
 | |
|             eprintln!("Failed to clean up test directory: {}", e);
 | |
|         }
 | |
|     }
 | |
| }
 | |
| 
 | |
| // Helper to set up the server and return guard + ports
 | |
| async fn setup_server() -> (ServerProcessGuard, u16) {
 | |
|     use std::sync::atomic::{AtomicU16, Ordering};
 | |
|     static PORT_COUNTER: AtomicU16 = AtomicU16::new(17500);
 | |
|     let port = PORT_COUNTER.fetch_add(1, Ordering::SeqCst);
 | |
| 
 | |
|     let test_dir = format!("/tmp/herodb_lance_test_{}", port);
 | |
| 
 | |
|     // Clean up previous test data
 | |
|     if std::path::Path::new(&test_dir).exists() {
 | |
|         let _ = std::fs::remove_dir_all(&test_dir);
 | |
|     }
 | |
|     std::fs::create_dir_all(&test_dir).unwrap();
 | |
| 
 | |
|     // Start the server in a subprocess with RPC enabled (follows tantivy test pattern)
 | |
|     let child = Command::new("cargo")
 | |
|         .args(&[
 | |
|             "run",
 | |
|             "--",
 | |
|             "--dir",
 | |
|             &test_dir,
 | |
|             "--port",
 | |
|             &port.to_string(),
 | |
|             "--rpc-port",
 | |
|             &(port + 1).to_string(),
 | |
|             "--enable-rpc",
 | |
|             "--debug",
 | |
|             "--admin-secret",
 | |
|             "test-admin",
 | |
|         ])
 | |
|         .spawn()
 | |
|         .expect("Failed to start server process");
 | |
| 
 | |
|     let guard = ServerProcessGuard {
 | |
|         process: child,
 | |
|         test_dir,
 | |
|     };
 | |
| 
 | |
|     // Give the server time to build and start (cargo run may compile first)
 | |
|     // Increase significantly to accommodate first-time dependency compilation in CI.
 | |
|     std::thread::sleep(Duration::from_millis(5000));
 | |
| 
 | |
|     (guard, port)
 | |
| }
 | |
| 
 | |
| // Convenient helpers for assertions on redis::Value
 | |
| fn value_is_ok(v: &Value) -> bool {
 | |
|     match v {
 | |
|         Value::Okay => true,
 | |
|         Value::Status(s) if s == "OK" => true,
 | |
|         Value::Data(d) if d == b"OK" => true,
 | |
|         _ => false,
 | |
|     }
 | |
| }
 | |
| 
 | |
| fn value_is_int_eq(v: &Value, expected: i64) -> bool {
 | |
|     matches!(v, Value::Int(n) if *n == expected)
 | |
| }
 | |
| 
 | |
| fn value_is_str_eq(v: &Value, expected: &str) -> bool {
 | |
|     match v {
 | |
|         Value::Status(s) => s == expected,
 | |
|         Value::Data(d) => String::from_utf8_lossy(d) == expected,
 | |
|         _ => false,
 | |
|     }
 | |
| }
 | |
| 
 | |
| fn to_string_lossy(v: &Value) -> String {
 | |
|     match v {
 | |
|         Value::Nil => "Nil".to_string(),
 | |
|         Value::Int(n) => n.to_string(),
 | |
|         Value::Status(s) => s.clone(),
 | |
|         Value::Okay => "OK".to_string(),
 | |
|         Value::Data(d) => String::from_utf8_lossy(d).to_string(),
 | |
|         Value::Bulk(items) => {
 | |
|             let inner: Vec<String> = items.iter().map(to_string_lossy).collect();
 | |
|             format!("[{}]", inner.join(", "))
 | |
|         }
 | |
|     }
 | |
| }
 | |
| 
 | |
| // Extract ids from LANCE.SEARCH / LANCE.SEARCHIMAGE reply which is:
 | |
| // Array of elements: [ [id, score, [k,v,...]], [id, score, ...], ... ]
 | |
| fn extract_hit_ids(v: &Value) -> Vec<String> {
 | |
|     let mut ids = Vec::new();
 | |
|     if let Value::Bulk(items) = v {
 | |
|         for item in items {
 | |
|             if let Value::Bulk(row) = item {
 | |
|                 if !row.is_empty() {
 | |
|                     // first element is id (Data or Status)
 | |
|                     let id = match &row[0] {
 | |
|                         Value::Data(d) => String::from_utf8_lossy(d).to_string(),
 | |
|                         Value::Status(s) => s.clone(),
 | |
|                         Value::Int(n) => n.to_string(),
 | |
|                         _ => continue,
 | |
|                     };
 | |
|                     ids.push(id);
 | |
|                 }
 | |
|             }
 | |
|         }
 | |
|     }
 | |
|     ids
 | |
| }
 | |
| 
 | |
| // Check whether a Bulk array (RESP array) contains a given string element.
 | |
| fn bulk_contains_string(v: &Value, needle: &str) -> bool {
 | |
|     match v {
 | |
|         Value::Bulk(items) => items.iter().any(|it| match it {
 | |
|             Value::Data(d) => String::from_utf8_lossy(d).contains(needle),
 | |
|             Value::Status(s) => s.contains(needle),
 | |
|             Value::Bulk(_) => bulk_contains_string(it, needle),
 | |
|             _ => false,
 | |
|         }),
 | |
|         _ => false,
 | |
|     }
 | |
| }
 | |
| 
 | |
| // ------------------------
 | |
| // Test: Lance end-to-end (RESP) using only local embedders
 | |
| // ------------------------
 | |
| 
 | |
| #[tokio::test]
 | |
| async fn test_lance_end_to_end() {
 | |
|     let (_guard, port) = setup_server().await;
 | |
| 
 | |
|     // First, wait for RESP to be available; this also gives cargo-run child ample time to finish building.
 | |
|     // Reuse the helper that retries PING until success.
 | |
|     {
 | |
|         let _conn_ready = get_redis_connection(port);
 | |
|         // Drop immediately; we only needed readiness.
 | |
|     }
 | |
| 
 | |
|     // Build RPC client and create a Lance DB
 | |
|     let rpc_client = get_rpc_client(port).await;
 | |
|     // Ensure RPC server is listening before we issue createDatabase (allow longer warm-up to accommodate first-build costs)
 | |
|     wait_for_rpc_ready(&rpc_client, 3600, Duration::from_millis(250)).await;
 | |
| 
 | |
|     let db_config = DatabaseConfig {
 | |
|         name: Some("media-db".to_string()),
 | |
|         storage_path: None,
 | |
|         max_size: None,
 | |
|         redis_version: None,
 | |
|     };
 | |
| 
 | |
|     let db_id = rpc_client
 | |
|         .create_database(BackendType::Lance, db_config, None)
 | |
|         .await
 | |
|         .expect("create_database Lance failed");
 | |
| 
 | |
|     assert_eq!(db_id, 1, "Expected first Lance DB id to be 1");
 | |
| 
 | |
|     // Add access keys
 | |
|     let _ = rpc_client
 | |
|         .add_access_key(db_id, "readwrite_key".to_string(), "readwrite".to_string())
 | |
|         .await
 | |
|         .expect("add_access_key readwrite failed");
 | |
| 
 | |
|     let _ = rpc_client
 | |
|         .add_access_key(db_id, "read_key".to_string(), "read".to_string())
 | |
|         .await
 | |
|         .expect("add_access_key read failed");
 | |
| 
 | |
|     // Connect to Redis and SELECT DB with readwrite key
 | |
|     let mut conn = get_redis_connection(port);
 | |
| 
 | |
|     let sel_ok: RedisResult<String> = redis::cmd("SELECT")
 | |
|         .arg(db_id)
 | |
|         .arg("KEY")
 | |
|         .arg("readwrite_key")
 | |
|         .query(&mut conn);
 | |
|     assert!(sel_ok.is_ok(), "SELECT db with key failed: {:?}", sel_ok);
 | |
|     assert_eq!(sel_ok.unwrap(), "OK");
 | |
| 
 | |
|     // 1) Configure embedding providers: textset -> testhash dim 64, imageset -> testimagehash dim 512
 | |
|     let v = redis::cmd("LANCE.EMBEDDING")
 | |
|         .arg("CONFIG")
 | |
|         .arg("SET")
 | |
|         .arg("textset")
 | |
|         .arg("PROVIDER")
 | |
|         .arg("testhash")
 | |
|         .arg("MODEL")
 | |
|         .arg("any")
 | |
|         .arg("PARAM")
 | |
|         .arg("dim")
 | |
|         .arg("64")
 | |
|         .query::<Value>(&mut conn)
 | |
|         .unwrap();
 | |
|     assert!(value_is_ok(&v), "Embedding config set (text) not OK: {}", to_string_lossy(&v));
 | |
| 
 | |
|     let v = redis::cmd("LANCE.EMBEDDING")
 | |
|         .arg("CONFIG")
 | |
|         .arg("SET")
 | |
|         .arg("imageset")
 | |
|         .arg("PROVIDER")
 | |
|         .arg("testimagehash")
 | |
|         .arg("MODEL")
 | |
|         .arg("any")
 | |
|         .arg("PARAM")
 | |
|         .arg("dim")
 | |
|         .arg("512")
 | |
|         .query::<Value>(&mut conn)
 | |
|         .unwrap();
 | |
|     assert!(value_is_ok(&v), "Embedding config set (image) not OK: {}", to_string_lossy(&v));
 | |
| 
 | |
|     // 2) Create datasets
 | |
|     let v = redis::cmd("LANCE.CREATE")
 | |
|         .arg("textset")
 | |
|         .arg("DIM")
 | |
|         .arg(64)
 | |
|         .query::<Value>(&mut conn)
 | |
|         .unwrap();
 | |
|     assert!(value_is_ok(&v), "LANCE.CREATE textset failed: {}", to_string_lossy(&v));
 | |
| 
 | |
|     let v = redis::cmd("LANCE.CREATE")
 | |
|         .arg("imageset")
 | |
|         .arg("DIM")
 | |
|         .arg(512)
 | |
|         .query::<Value>(&mut conn)
 | |
|         .unwrap();
 | |
|     assert!(value_is_ok(&v), "LANCE.CREATE imageset failed: {}", to_string_lossy(&v));
 | |
| 
 | |
|     // 3) Store two text documents
 | |
|     let v = redis::cmd("LANCE.STORE")
 | |
|         .arg("textset")
 | |
|         .arg("ID")
 | |
|         .arg("doc-1")
 | |
|         .arg("TEXT")
 | |
|         .arg("The quick brown fox jumps over the lazy dog")
 | |
|         .arg("META")
 | |
|         .arg("title")
 | |
|         .arg("Fox")
 | |
|         .arg("category")
 | |
|         .arg("animal")
 | |
|         .query::<Value>(&mut conn)
 | |
|         .unwrap();
 | |
|     assert!(value_is_ok(&v), "LANCE.STORE doc-1 failed: {}", to_string_lossy(&v));
 | |
| 
 | |
|     let v = redis::cmd("LANCE.STORE")
 | |
|         .arg("textset")
 | |
|         .arg("ID")
 | |
|         .arg("doc-2")
 | |
|         .arg("TEXT")
 | |
|         .arg("A fast auburn fox vaulted a sleepy canine")
 | |
|         .arg("META")
 | |
|         .arg("title")
 | |
|         .arg("Paraphrase")
 | |
|         .arg("category")
 | |
|         .arg("animal")
 | |
|         .query::<Value>(&mut conn)
 | |
|         .unwrap();
 | |
|     assert!(value_is_ok(&v), "LANCE.STORE doc-2 failed: {}", to_string_lossy(&v));
 | |
| 
 | |
|     // 4) Store two images via BYTES (local fake bytes; embedder only hashes bytes, not decoding)
 | |
|     let img1: Vec<u8> = b"local-image-bytes-1-abcdefghijklmnopqrstuvwxyz".to_vec();
 | |
|     let img2: Vec<u8> = b"local-image-bytes-2-ABCDEFGHIJKLMNOPQRSTUVWXYZ".to_vec();
 | |
|     let img1_b64 = base64::engine::general_purpose::STANDARD.encode(&img1);
 | |
|     let img2_b64 = base64::engine::general_purpose::STANDARD.encode(&img2);
 | |
| 
 | |
|     let v = redis::cmd("LANCE.STOREIMAGE")
 | |
|         .arg("imageset")
 | |
|         .arg("ID")
 | |
|         .arg("img-1")
 | |
|         .arg("BYTES")
 | |
|         .arg(&img1_b64)
 | |
|         .arg("META")
 | |
|         .arg("title")
 | |
|         .arg("Local1")
 | |
|         .arg("group")
 | |
|         .arg("demo")
 | |
|         .query::<Value>(&mut conn)
 | |
|         .unwrap();
 | |
|     assert!(value_is_ok(&v), "LANCE.STOREIMAGE img-1 failed: {}", to_string_lossy(&v));
 | |
| 
 | |
|     let v = redis::cmd("LANCE.STOREIMAGE")
 | |
|         .arg("imageset")
 | |
|         .arg("ID")
 | |
|         .arg("img-2")
 | |
|         .arg("BYTES")
 | |
|         .arg(&img2_b64)
 | |
|         .arg("META")
 | |
|         .arg("title")
 | |
|         .arg("Local2")
 | |
|         .arg("group")
 | |
|         .arg("demo")
 | |
|         .query::<Value>(&mut conn)
 | |
|         .unwrap();
 | |
|     assert!(value_is_ok(&v), "LANCE.STOREIMAGE img-2 failed: {}", to_string_lossy(&v));
 | |
| 
 | |
|     // 5) Search text: K 2 QUERY "quick brown fox" RETURN 1 title
 | |
|     let v = redis::cmd("LANCE.SEARCH")
 | |
|         .arg("textset")
 | |
|         .arg("K")
 | |
|         .arg(2)
 | |
|         .arg("QUERY")
 | |
|         .arg("quick brown fox")
 | |
|         .arg("RETURN")
 | |
|         .arg(1)
 | |
|         .arg("title")
 | |
|         .query::<Value>(&mut conn)
 | |
|         .unwrap();
 | |
| 
 | |
|     // Should be an array of hits
 | |
|     let ids = extract_hit_ids(&v);
 | |
|     assert!(
 | |
|         ids.contains(&"doc-1".to_string()) || ids.contains(&"doc-2".to_string()),
 | |
|         "LANCE.SEARCH should return doc-1/doc-2; got: {}",
 | |
|         to_string_lossy(&v)
 | |
|     );
 | |
| 
 | |
|     // With FILTER on category
 | |
|     let v = redis::cmd("LANCE.SEARCH")
 | |
|         .arg("textset")
 | |
|         .arg("K")
 | |
|         .arg(2)
 | |
|         .arg("QUERY")
 | |
|         .arg("fox jumps")
 | |
|         .arg("FILTER")
 | |
|         .arg("category = 'animal'")
 | |
|         .arg("RETURN")
 | |
|         .arg(1)
 | |
|         .arg("title")
 | |
|         .query::<Value>(&mut conn)
 | |
|         .unwrap();
 | |
| 
 | |
|     let ids_f = extract_hit_ids(&v);
 | |
|     assert!(
 | |
|         !ids_f.is_empty(),
 | |
|         "Filtered LANCE.SEARCH should return at least one document; got: {}",
 | |
|         to_string_lossy(&v)
 | |
|     );
 | |
| 
 | |
|     // 6) Search images with QUERYBYTES
 | |
|     let query_img: Vec<u8> = b"local-image-query-3-1234567890".to_vec();
 | |
|     let query_img_b64 = base64::engine::general_purpose::STANDARD.encode(&query_img);
 | |
| 
 | |
|     let v = redis::cmd("LANCE.SEARCHIMAGE")
 | |
|         .arg("imageset")
 | |
|         .arg("K")
 | |
|         .arg(2)
 | |
|         .arg("QUERYBYTES")
 | |
|         .arg(&query_img_b64)
 | |
|         .arg("RETURN")
 | |
|         .arg(1)
 | |
|         .arg("title")
 | |
|         .query::<Value>(&mut conn)
 | |
|         .unwrap();
 | |
| 
 | |
|     // Should get 2 hits (img-1 and img-2) in some order; assert array non-empty
 | |
|     let img_ids = extract_hit_ids(&v);
 | |
|     assert!(
 | |
|         !img_ids.is_empty(),
 | |
|         "LANCE.SEARCHIMAGE should return non-empty results; got: {}",
 | |
|         to_string_lossy(&v)
 | |
|     );
 | |
| 
 | |
|     // 7) Inspect datasets
 | |
|     let v = redis::cmd("LANCE.LIST").query::<Value>(&mut conn).unwrap();
 | |
|     assert!(
 | |
|         bulk_contains_string(&v, "textset"),
 | |
|         "LANCE.LIST missing textset: {}",
 | |
|         to_string_lossy(&v)
 | |
|     );
 | |
|     assert!(
 | |
|         bulk_contains_string(&v, "imageset"),
 | |
|         "LANCE.LIST missing imageset: {}",
 | |
|         to_string_lossy(&v)
 | |
|     );
 | |
| 
 | |
|     // INFO textset
 | |
|     let info_text = redis::cmd("LANCE.INFO")
 | |
|         .arg("textset")
 | |
|         .query::<Value>(&mut conn)
 | |
|         .unwrap();
 | |
|     // INFO returns Array [k,v,k,v,...] including "dimension" "64" and "row_count" "...".
 | |
|     let info_str = to_string_lossy(&info_text);
 | |
|     assert!(
 | |
|         info_str.contains("dimension") && info_str.contains("64"),
 | |
|         "LANCE.INFO textset should include dimension 64; got: {}",
 | |
|         info_str
 | |
|     );
 | |
| 
 | |
|     // 8) Delete by id and drop datasets
 | |
|     let v = redis::cmd("LANCE.DEL")
 | |
|         .arg("textset")
 | |
|         .arg("doc-2")
 | |
|         .query::<Value>(&mut conn)
 | |
|         .unwrap();
 | |
|     // Returns SimpleString "1" or Int 1 depending on encoding path; accept either
 | |
|     assert!(
 | |
|         value_is_int_eq(&v, 1) || value_is_str_eq(&v, "1"),
 | |
|         "LANCE.DEL doc-2 expected 1; got {}",
 | |
|         to_string_lossy(&v)
 | |
|     );
 | |
| 
 | |
|     let v = redis::cmd("LANCE.DROP")
 | |
|         .arg("textset")
 | |
|         .query::<Value>(&mut conn)
 | |
|         .unwrap();
 | |
|     assert!(value_is_ok(&v), "LANCE.DROP textset failed: {}", to_string_lossy(&v));
 | |
| 
 | |
|     let v = redis::cmd("LANCE.DROP")
 | |
|         .arg("imageset")
 | |
|         .query::<Value>(&mut conn)
 | |
|         .unwrap();
 | |
|     assert!(value_is_ok(&v), "LANCE.DROP imageset failed: {}", to_string_lossy(&v));
 | |
| } |