WIP 1: implement lancedb vector

This commit is contained in:
Maxime Van Hees
2025-09-29 11:24:31 +02:00
parent 77a53bae86
commit 6a4e2819bf
9 changed files with 5575 additions and 93 deletions

View File

@@ -16,6 +16,7 @@ pub enum BackendType {
Redb,
Sled,
Tantivy, // Full-text search backend (no KV storage)
Lance, // Vector search backend (no KV storage)
// Future: InMemory, Custom(String)
}
@@ -161,6 +162,82 @@ pub trait Rpc {
/// Drop an FT index
#[method(name = "ftDrop")]
async fn ft_drop(&self, db_id: u64, index_name: String) -> RpcResult<bool>;
// ----- LanceDB (Vector) RPC endpoints -----
/// Create a new Lance dataset in a Lance-backed DB
#[method(name = "lanceCreate")]
async fn lance_create(
&self,
db_id: u64,
name: String,
dim: usize,
) -> RpcResult<bool>;
/// Store a vector (with id and metadata) into a Lance dataset
#[method(name = "lanceStore")]
async fn lance_store(
&self,
db_id: u64,
name: String,
id: String,
vector: Vec<f32>,
meta: Option<HashMap<String, String>>,
) -> RpcResult<bool>;
/// Search a Lance dataset with a query vector
#[method(name = "lanceSearch")]
async fn lance_search(
&self,
db_id: u64,
name: String,
vector: Vec<f32>,
k: usize,
filter: Option<String>,
return_fields: Option<Vec<String>>,
) -> RpcResult<serde_json::Value>;
/// Create an ANN index on a Lance dataset
#[method(name = "lanceCreateIndex")]
async fn lance_create_index(
&self,
db_id: u64,
name: String,
index_type: String,
params: Option<HashMap<String, String>>,
) -> RpcResult<bool>;
/// List Lance datasets for a DB
#[method(name = "lanceList")]
async fn lance_list(
&self,
db_id: u64,
) -> RpcResult<Vec<String>>;
/// Get info for a Lance dataset
#[method(name = "lanceInfo")]
async fn lance_info(
&self,
db_id: u64,
name: String,
) -> RpcResult<serde_json::Value>;
/// Delete a record by id from a Lance dataset
#[method(name = "lanceDel")]
async fn lance_del(
&self,
db_id: u64,
name: String,
id: String,
) -> RpcResult<bool>;
/// Drop a Lance dataset
#[method(name = "lanceDrop")]
async fn lance_drop(
&self,
db_id: u64,
name: String,
) -> RpcResult<bool>;
}
/// RPC Server implementation
@@ -236,7 +313,10 @@ impl RpcServerImpl {
}
// Create server instance with resolved backend
let is_tantivy = matches!(effective_backend, crate::options::BackendType::Tantivy);
let is_search_only = matches!(
effective_backend,
crate::options::BackendType::Tantivy | crate::options::BackendType::Lance
);
let db_option = DBOption {
dir: self.base_dir.clone(),
port: 0, // Not used for RPC-managed databases
@@ -246,15 +326,15 @@ impl RpcServerImpl {
backend: effective_backend.clone(),
admin_secret: self.admin_secret.clone(),
};
let mut server = Server::new(db_option).await;
// Set the selected database to the db_id
server.selected_db = db_id;
// Lazily open/create physical storage according to admin meta (per-db encryption)
// Skip for Tantivy backend (no KV storage to open)
if !is_tantivy {
// Skip for search-only backends (Tantivy/Lance): no KV storage to open
if !is_search_only {
let _ = server.current_storage();
}
@@ -344,6 +424,7 @@ impl RpcServerImpl {
crate::options::BackendType::Redb => BackendType::Redb,
crate::options::BackendType::Sled => BackendType::Sled,
crate::options::BackendType::Tantivy => BackendType::Tantivy,
crate::options::BackendType::Lance => BackendType::Lance,
};
DatabaseInfo {
@@ -395,12 +476,16 @@ impl RpcServer for RpcServerImpl {
BackendType::Redb => crate::options::BackendType::Redb,
BackendType::Sled => crate::options::BackendType::Sled,
BackendType::Tantivy => crate::options::BackendType::Tantivy,
BackendType::Lance => crate::options::BackendType::Lance,
};
admin_meta::set_database_backend(&self.base_dir, self.backend.clone(), &self.admin_secret, db_id, opt_backend.clone())
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?;
// Create server instance using base_dir, chosen backend and admin secret
let is_tantivy_new = matches!(opt_backend, crate::options::BackendType::Tantivy);
let is_search_only_new = matches!(
opt_backend,
crate::options::BackendType::Tantivy | crate::options::BackendType::Lance
);
let option = DBOption {
dir: self.base_dir.clone(),
port: 0, // Not used for RPC-managed databases
@@ -410,13 +495,13 @@ impl RpcServer for RpcServerImpl {
backend: opt_backend.clone(),
admin_secret: self.admin_secret.clone(),
};
let mut server = Server::new(option).await;
server.selected_db = db_id;
// Initialize storage to create physical <id>.db with proper encryption from admin meta
// Skip for Tantivy backend (no KV storage to initialize)
if !is_tantivy_new {
// Skip for search-only backends (Tantivy/Lance): no KV storage to initialize
if !is_search_only_new {
let _ = server.current_storage();
}
@@ -676,4 +761,201 @@ impl RpcServer for RpcServerImpl {
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?;
Ok(true)
}
// ----- LanceDB (Vector) RPC endpoints -----
async fn lance_create(
&self,
db_id: u64,
name: String,
dim: usize,
) -> RpcResult<bool> {
let server = self.get_or_create_server(db_id).await?;
if db_id == 0 {
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "Lance not allowed on DB 0", None::<()>));
}
if !matches!(server.option.backend, crate::options::BackendType::Lance) {
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "DB backend is not Lance", None::<()>));
}
if !server.has_write_permission() {
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "write permission denied", None::<()>));
}
server.lance_store()
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?
.create_dataset(&name, dim).await
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?;
Ok(true)
}
async fn lance_store(
&self,
db_id: u64,
name: String,
id: String,
vector: Vec<f32>,
meta: Option<HashMap<String, String>>,
) -> RpcResult<bool> {
let server = self.get_or_create_server(db_id).await?;
if db_id == 0 {
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "Lance not allowed on DB 0", None::<()>));
}
if !matches!(server.option.backend, crate::options::BackendType::Lance) {
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "DB backend is not Lance", None::<()>));
}
if !server.has_write_permission() {
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "write permission denied", None::<()>));
}
server.lance_store()
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?
.store_vector(&name, &id, vector, meta.unwrap_or_default()).await
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?;
Ok(true)
}
async fn lance_search(
&self,
db_id: u64,
name: String,
vector: Vec<f32>,
k: usize,
filter: Option<String>,
return_fields: Option<Vec<String>>,
) -> RpcResult<serde_json::Value> {
let server = self.get_or_create_server(db_id).await?;
if db_id == 0 {
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "Lance not allowed on DB 0", None::<()>));
}
if !matches!(server.option.backend, crate::options::BackendType::Lance) {
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "DB backend is not Lance", None::<()>));
}
if !server.has_read_permission() {
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "read permission denied", None::<()>));
}
let results = server.lance_store()
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?
.search_vectors(&name, vector, k, filter, return_fields).await
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?;
let json_results: Vec<serde_json::Value> = results.into_iter().map(|(id, score, meta)| {
serde_json::json!({
"id": id,
"score": score,
"meta": meta,
})
}).collect();
Ok(serde_json::json!({ "results": json_results }))
}
async fn lance_create_index(
&self,
db_id: u64,
name: String,
index_type: String,
params: Option<HashMap<String, String>>,
) -> RpcResult<bool> {
let server = self.get_or_create_server(db_id).await?;
if db_id == 0 {
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "Lance not allowed on DB 0", None::<()>));
}
if !matches!(server.option.backend, crate::options::BackendType::Lance) {
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "DB backend is not Lance", None::<()>));
}
if !server.has_write_permission() {
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "write permission denied", None::<()>));
}
server.lance_store()
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?
.create_index(&name, &index_type, params.unwrap_or_default()).await
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?;
Ok(true)
}
async fn lance_list(
&self,
db_id: u64,
) -> RpcResult<Vec<String>> {
let server = self.get_or_create_server(db_id).await?;
if db_id == 0 {
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "Lance not allowed on DB 0", None::<()>));
}
if !matches!(server.option.backend, crate::options::BackendType::Lance) {
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "DB backend is not Lance", None::<()>));
}
if !server.has_read_permission() {
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "read permission denied", None::<()>));
}
let list = server.lance_store()
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?
.list_datasets().await
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?;
Ok(list)
}
async fn lance_info(
&self,
db_id: u64,
name: String,
) -> RpcResult<serde_json::Value> {
let server = self.get_or_create_server(db_id).await?;
if db_id == 0 {
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "Lance not allowed on DB 0", None::<()>));
}
if !matches!(server.option.backend, crate::options::BackendType::Lance) {
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "DB backend is not Lance", None::<()>));
}
if !server.has_read_permission() {
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "read permission denied", None::<()>));
}
let info = server.lance_store()
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?
.get_dataset_info(&name).await
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?;
Ok(serde_json::json!(info))
}
async fn lance_del(
&self,
db_id: u64,
name: String,
id: String,
) -> RpcResult<bool> {
let server = self.get_or_create_server(db_id).await?;
if db_id == 0 {
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "Lance not allowed on DB 0", None::<()>));
}
if !matches!(server.option.backend, crate::options::BackendType::Lance) {
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "DB backend is not Lance", None::<()>));
}
if !server.has_write_permission() {
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "write permission denied", None::<()>));
}
let ok = server.lance_store()
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?
.delete_by_id(&name, &id).await
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?;
Ok(ok)
}
async fn lance_drop(
&self,
db_id: u64,
name: String,
) -> RpcResult<bool> {
let server = self.get_or_create_server(db_id).await?;
if db_id == 0 {
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "Lance not allowed on DB 0", None::<()>));
}
if !matches!(server.option.backend, crate::options::BackendType::Lance) {
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "DB backend is not Lance", None::<()>));
}
if !server.has_write_permission() {
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "write permission denied", None::<()>));
}
let ok = server.lance_store()
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?
.drop_dataset(&name).await
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?;
Ok(ok)
}
}