WIP 1: implement lancedb vector

This commit is contained in:
Maxime Van Hees
2025-09-29 11:24:31 +02:00
parent 77a53bae86
commit 6a4e2819bf
9 changed files with 5575 additions and 93 deletions

View File

@@ -125,6 +125,41 @@ pub enum Cmd {
query: String,
group_by: Vec<String>,
reducers: Vec<String>,
},
// LanceDB vector search commands
LanceCreate {
name: String,
dim: usize,
},
LanceStore {
name: String,
id: String,
vector: Vec<f32>,
meta: Vec<(String, String)>,
},
LanceSearch {
name: String,
vector: Vec<f32>,
k: usize,
filter: Option<String>,
return_fields: Option<Vec<String>>,
},
LanceCreateIndex {
name: String,
index_type: String,
params: Vec<(String, String)>,
},
LanceList,
LanceInfo {
name: String,
},
LanceDel {
name: String,
id: String,
},
LanceDrop {
name: String,
}
}
@@ -815,6 +850,142 @@ impl Cmd {
let reducers = Vec::new();
Cmd::FtAggregate { index_name, query, group_by, reducers }
}
// ----- LANCE.* commands -----
"lance.create" => {
// LANCE.CREATE name DIM d
if cmd.len() != 4 || cmd[2].to_uppercase() != "DIM" {
return Err(DBError("ERR LANCE.CREATE requires: name DIM <dim>".to_string()));
}
let name = cmd[1].clone();
let dim: usize = cmd[3].parse().map_err(|_| DBError("ERR DIM must be an integer".to_string()))?;
Cmd::LanceCreate { name, dim }
}
"lance.store" => {
// LANCE.STORE name ID id VECTOR v1 v2 ... [META k v ...]
if cmd.len() < 6 {
return Err(DBError("ERR LANCE.STORE requires: name ID <id> VECTOR v1 v2 ... [META k v ...]".to_string()));
}
let name = cmd[1].clone();
let mut i = 2;
if cmd[i].to_uppercase() != "ID" || i + 1 >= cmd.len() {
return Err(DBError("ERR LANCE.STORE requires ID <id>".to_string()));
}
let id = cmd[i + 1].clone();
i += 2;
if i >= cmd.len() || cmd[i].to_uppercase() != "VECTOR" {
return Err(DBError("ERR LANCE.STORE requires VECTOR <f32...>".to_string()));
}
i += 1;
let mut vector: Vec<f32> = Vec::new();
while i < cmd.len() && cmd[i].to_uppercase() != "META" {
let v: f32 = cmd[i].parse().map_err(|_| DBError("ERR vector element must be a float32".to_string()))?;
vector.push(v);
i += 1;
}
let mut meta: Vec<(String, String)> = Vec::new();
if i < cmd.len() && cmd[i].to_uppercase() == "META" {
i += 1;
while i + 1 < cmd.len() {
meta.push((cmd[i].clone(), cmd[i + 1].clone()));
i += 2;
}
}
Cmd::LanceStore { name, id, vector, meta }
}
"lance.search" => {
// LANCE.SEARCH name K k VECTOR v1 v2 ... [FILTER expr] [RETURN n fields...]
if cmd.len() < 6 {
return Err(DBError("ERR LANCE.SEARCH requires: name K <k> VECTOR v1 v2 ... [FILTER expr] [RETURN n fields...]".to_string()));
}
let name = cmd[1].clone();
if cmd[2].to_uppercase() != "K" {
return Err(DBError("ERR LANCE.SEARCH requires K <k>".to_string()));
}
let k: usize = cmd[3].parse().map_err(|_| DBError("ERR K must be an integer".to_string()))?;
if cmd[4].to_uppercase() != "VECTOR" {
return Err(DBError("ERR LANCE.SEARCH requires VECTOR <f32...>".to_string()));
}
let mut i = 5;
let mut vector: Vec<f32> = Vec::new();
while i < cmd.len() && !["FILTER","RETURN"].contains(&cmd[i].to_uppercase().as_str()) {
let v: f32 = cmd[i].parse().map_err(|_| DBError("ERR vector element must be a float32".to_string()))?;
vector.push(v);
i += 1;
}
let mut filter: Option<String> = None;
let mut return_fields: Option<Vec<String>> = None;
while i < cmd.len() {
match cmd[i].to_uppercase().as_str() {
"FILTER" => {
if i + 1 >= cmd.len() {
return Err(DBError("ERR FILTER requires an expression".to_string()));
}
filter = Some(cmd[i + 1].clone());
i += 2;
}
"RETURN" => {
if i + 1 >= cmd.len() {
return Err(DBError("ERR RETURN requires field count".to_string()));
}
let n: usize = cmd[i + 1].parse().map_err(|_| DBError("ERR RETURN count must be integer".to_string()))?;
i += 2;
let mut fields = Vec::new();
for _ in 0..n {
if i < cmd.len() {
fields.push(cmd[i].clone());
i += 1;
}
}
return_fields = Some(fields);
}
_ => { i += 1; }
}
}
Cmd::LanceSearch { name, vector, k, filter, return_fields }
}
"lance.createindex" => {
// LANCE.CREATEINDEX name TYPE t [PARAM k v ...]
if cmd.len() < 4 || cmd[2].to_uppercase() != "TYPE" {
return Err(DBError("ERR LANCE.CREATEINDEX requires: name TYPE <type> [PARAM k v ...]".to_string()));
}
let name = cmd[1].clone();
let index_type = cmd[3].clone();
let mut params: Vec<(String, String)> = Vec::new();
let mut i = 4;
if i < cmd.len() && cmd[i].to_uppercase() == "PARAM" {
i += 1;
while i + 1 < cmd.len() {
params.push((cmd[i].clone(), cmd[i + 1].clone()));
i += 2;
}
}
Cmd::LanceCreateIndex { name, index_type, params }
}
"lance.list" => {
if cmd.len() != 1 {
return Err(DBError("ERR LANCE.LIST takes no arguments".to_string()));
}
Cmd::LanceList
}
"lance.info" => {
if cmd.len() != 2 {
return Err(DBError("ERR LANCE.INFO requires: name".to_string()));
}
Cmd::LanceInfo { name: cmd[1].clone() }
}
"lance.drop" => {
if cmd.len() != 2 {
return Err(DBError("ERR LANCE.DROP requires: name".to_string()));
}
Cmd::LanceDrop { name: cmd[1].clone() }
}
"lance.del" => {
if cmd.len() != 3 {
return Err(DBError("ERR LANCE.DEL requires: name id".to_string()));
}
Cmd::LanceDel { name: cmd[1].clone(), id: cmd[2].clone() }
}
_ => Cmd::Unknow(cmd[0].clone()),
},
protocol,
@@ -853,6 +1024,18 @@ impl Cmd {
.map(|b| matches!(b, crate::options::BackendType::Tantivy))
.unwrap_or(false);
// Determine Lance backend similarly
let is_lance_backend = crate::admin_meta::get_database_backend(
&server.option.dir,
server.option.backend.clone(),
&server.option.admin_secret,
server.selected_db,
)
.ok()
.flatten()
.map(|b| matches!(b, crate::options::BackendType::Lance))
.unwrap_or(false);
if is_tantivy_backend {
match &self {
Cmd::Select(..)
@@ -876,6 +1059,30 @@ impl Cmd {
}
}
// Lance backend gating: allow only LANCE.* and basic control/info commands
if is_lance_backend {
match &self {
Cmd::Select(..)
| Cmd::Quit
| Cmd::Client(..)
| Cmd::ClientSetName(..)
| Cmd::ClientGetName
| Cmd::Command(..)
| Cmd::Info(..)
| Cmd::LanceCreate { .. }
| Cmd::LanceStore { .. }
| Cmd::LanceSearch { .. }
| Cmd::LanceCreateIndex { .. }
| Cmd::LanceList
| Cmd::LanceInfo { .. }
| Cmd::LanceDel { .. }
| Cmd::LanceDrop { .. } => {}
_ => {
return Ok(Protocol::err("ERR backend is Lance; only LANCE.* commands are allowed"));
}
}
}
// If selected DB is not Tantivy, forbid all FT.* commands here.
if !is_tantivy_backend {
match &self {
@@ -893,6 +1100,23 @@ impl Cmd {
}
}
// If selected DB is not Lance, forbid all LANCE.* commands here.
if !is_lance_backend {
match &self {
Cmd::LanceCreate { .. }
| Cmd::LanceStore { .. }
| Cmd::LanceSearch { .. }
| Cmd::LanceCreateIndex { .. }
| Cmd::LanceList
| Cmd::LanceInfo { .. }
| Cmd::LanceDel { .. }
| Cmd::LanceDrop { .. } => {
return Ok(Protocol::err("ERR DB backend is not Lance; LANCE.* commands are not allowed"));
}
_ => {}
}
}
match self {
Cmd::Select(db, key) => select_cmd(server, db, key).await,
Cmd::Ping => Ok(Protocol::SimpleString("PONG".to_string())),
@@ -1015,6 +1239,96 @@ impl Cmd {
Ok(Protocol::err("FT.AGGREGATE not implemented yet"))
}
// LanceDB commands
Cmd::LanceCreate { name, dim } => {
if !server.has_write_permission() {
return Ok(Protocol::err("ERR write permission denied"));
}
match server.lance_store()?.create_dataset(&name, dim).await {
Ok(()) => Ok(Protocol::SimpleString("OK".to_string())),
Err(e) => Ok(Protocol::err(&e.0)),
}
}
Cmd::LanceStore { name, id, vector, meta } => {
if !server.has_write_permission() {
return Ok(Protocol::err("ERR write permission denied"));
}
let meta_map: std::collections::HashMap<String, String> = meta.into_iter().collect();
match server.lance_store()?.store_vector(&name, &id, vector, meta_map).await {
Ok(()) => Ok(Protocol::SimpleString("OK".to_string())),
Err(e) => Ok(Protocol::err(&e.0)),
}
}
Cmd::LanceSearch { name, vector, k, filter, return_fields } => {
match server.lance_store()?.search_vectors(&name, vector, k, filter, return_fields).await {
Ok(results) => {
// Encode as array of [id, score, [k1, v1, k2, v2, ...]]
let mut arr = Vec::new();
for (id, score, meta) in results {
let mut meta_arr: Vec<Protocol> = Vec::new();
for (k, v) in meta {
meta_arr.push(Protocol::BulkString(k));
meta_arr.push(Protocol::BulkString(v));
}
arr.push(Protocol::Array(vec![
Protocol::BulkString(id),
Protocol::BulkString(score.to_string()),
Protocol::Array(meta_arr),
]));
}
Ok(Protocol::Array(arr))
}
Err(e) => Ok(Protocol::err(&e.0)),
}
}
Cmd::LanceCreateIndex { name, index_type, params } => {
if !server.has_write_permission() {
return Ok(Protocol::err("ERR write permission denied"));
}
let params_map: std::collections::HashMap<String, String> = params.into_iter().collect();
match server.lance_store()?.create_index(&name, &index_type, params_map).await {
Ok(()) => Ok(Protocol::SimpleString("OK".to_string())),
Err(e) => Ok(Protocol::err(&e.0)),
}
}
Cmd::LanceList => {
match server.lance_store()?.list_datasets().await {
Ok(list) => Ok(Protocol::Array(list.into_iter().map(Protocol::BulkString).collect())),
Err(e) => Ok(Protocol::err(&e.0)),
}
}
Cmd::LanceInfo { name } => {
match server.lance_store()?.get_dataset_info(&name).await {
Ok(info) => {
let mut arr = Vec::new();
for (k, v) in info {
arr.push(Protocol::BulkString(k));
arr.push(Protocol::BulkString(v));
}
Ok(Protocol::Array(arr))
}
Err(e) => Ok(Protocol::err(&e.0)),
}
}
Cmd::LanceDel { name, id } => {
if !server.has_write_permission() {
return Ok(Protocol::err("ERR write permission denied"));
}
match server.lance_store()?.delete_by_id(&name, &id).await {
Ok(b) => Ok(Protocol::SimpleString(if b { "1" } else { "0" }.to_string())),
Err(e) => Ok(Protocol::err(&e.0)),
}
}
Cmd::LanceDrop { name } => {
if !server.has_write_permission() {
return Ok(Protocol::err("ERR write permission denied"));
}
match server.lance_store()?.drop_dataset(&name).await {
Ok(_b) => Ok(Protocol::SimpleString("OK".to_string())),
Err(e) => Ok(Protocol::err(&e.0)),
}
}
Cmd::Unknow(s) => Ok(Protocol::err(&format!("ERR unknown command `{}`", s))),
}
}
@@ -1114,8 +1428,8 @@ async fn select_cmd(server: &mut Server, db: u64, key: Option<String>) -> Result
.ok()
.flatten();
if matches!(eff_backend, Some(crate::options::BackendType::Tantivy)) {
// Tantivy DBs have no KV storage; allow SELECT to succeed
if matches!(eff_backend, Some(crate::options::BackendType::Tantivy) | Some(crate::options::BackendType::Lance)) {
// Search-only DBs (Tantivy/Lance) have no KV storage; allow SELECT to succeed
Ok(Protocol::SimpleString("OK".to_string()))
} else {
match server.current_storage() {
@@ -1459,9 +1773,9 @@ async fn dbsize_cmd(server: &Server) -> Result<Protocol, DBError> {
}
async fn info_cmd(server: &Server, section: &Option<String>) -> Result<Protocol, DBError> {
// For Tantivy backend, there is no KV storage; synthesize minimal info.
// For Tantivy or Lance backend, there is no KV storage; synthesize minimal info.
// Determine effective backend for the currently selected db.
let is_tantivy_db = crate::admin_meta::get_database_backend(
let is_search_only_db = crate::admin_meta::get_database_backend(
&server.option.dir,
server.option.backend.clone(),
&server.option.admin_secret,
@@ -1469,10 +1783,10 @@ async fn info_cmd(server: &Server, section: &Option<String>) -> Result<Protocol,
)
.ok()
.flatten()
.map(|b| matches!(b, crate::options::BackendType::Tantivy))
.map(|b| matches!(b, crate::options::BackendType::Tantivy | crate::options::BackendType::Lance))
.unwrap_or(false);
let storage_info: Vec<(String, String)> = if is_tantivy_db {
let storage_info: Vec<(String, String)> = if is_search_only_db {
vec![
("db_size".to_string(), "0".to_string()),
("is_encrypted".to_string(), "false".to_string()),