implementation of tantivy datastore + updated RPC calls to deal with tantivy + docs
This commit is contained in:
@@ -48,6 +48,9 @@ fn init_admin_storage(
|
||||
let storage: Arc<dyn StorageBackend> = match backend {
|
||||
options::BackendType::Redb => Arc::new(Storage::new(&db_file, true, Some(admin_secret))?),
|
||||
options::BackendType::Sled => Arc::new(SledStorage::new(&db_file, true, Some(admin_secret))?),
|
||||
options::BackendType::Tantivy => {
|
||||
return Err(DBError("Admin DB 0 cannot use Tantivy backend".to_string()))
|
||||
}
|
||||
};
|
||||
Ok(storage)
|
||||
}
|
||||
@@ -199,6 +202,9 @@ pub fn open_data_storage(
|
||||
let storage: Arc<dyn StorageBackend> = match effective_backend {
|
||||
options::BackendType::Redb => Arc::new(Storage::new(&db_file, should_encrypt, enc.as_deref())?),
|
||||
options::BackendType::Sled => Arc::new(SledStorage::new(&db_file, should_encrypt, enc.as_deref())?),
|
||||
options::BackendType::Tantivy => {
|
||||
return Err(DBError("Tantivy backend has no KV storage; use FT.* commands only".to_string()))
|
||||
}
|
||||
};
|
||||
|
||||
// Publish to registry
|
||||
@@ -291,6 +297,7 @@ pub fn set_database_backend(
|
||||
let val = match db_backend {
|
||||
options::BackendType::Redb => "Redb",
|
||||
options::BackendType::Sled => "Sled",
|
||||
options::BackendType::Tantivy => "Tantivy",
|
||||
};
|
||||
let _ = admin.hset(&mk, vec![("backend".to_string(), val.to_string())])?;
|
||||
Ok(())
|
||||
@@ -307,6 +314,7 @@ pub fn get_database_backend(
|
||||
match admin.hget(&mk, "backend")? {
|
||||
Some(s) if s == "Redb" => Ok(Some(options::BackendType::Redb)),
|
||||
Some(s) if s == "Sled" => Ok(Some(options::BackendType::Sled)),
|
||||
Some(s) if s == "Tantivy" => Ok(Some(options::BackendType::Tantivy)),
|
||||
_ => Ok(None),
|
||||
}
|
||||
}
|
||||
|
293
src/cmd.rs
293
src/cmd.rs
@@ -91,6 +91,41 @@ pub enum Cmd {
|
||||
SymKeygen,
|
||||
SymEncrypt(String, String), // key_b64, message
|
||||
SymDecrypt(String, String), // key_b64, ciphertext_b64
|
||||
|
||||
// Full-text search commands with schema support
|
||||
FtCreate {
|
||||
index_name: String,
|
||||
schema: Vec<(String, String, Vec<String>)>, // (field_name, field_type, options)
|
||||
},
|
||||
FtAdd {
|
||||
index_name: String,
|
||||
doc_id: String,
|
||||
score: f64,
|
||||
fields: std::collections::HashMap<String, String>,
|
||||
},
|
||||
FtSearch {
|
||||
index_name: String,
|
||||
query: String,
|
||||
filters: Vec<(String, String)>, // field, value pairs
|
||||
limit: Option<usize>,
|
||||
offset: Option<usize>,
|
||||
return_fields: Option<Vec<String>>,
|
||||
},
|
||||
FtDel(String, String), // index_name, doc_id
|
||||
FtInfo(String), // index_name
|
||||
FtDrop(String), // index_name
|
||||
FtAlter {
|
||||
index_name: String,
|
||||
field_name: String,
|
||||
field_type: String,
|
||||
options: Vec<String>,
|
||||
},
|
||||
FtAggregate {
|
||||
index_name: String,
|
||||
query: String,
|
||||
group_by: Vec<String>,
|
||||
reducers: Vec<String>,
|
||||
}
|
||||
}
|
||||
|
||||
impl Cmd {
|
||||
@@ -646,6 +681,140 @@ impl Cmd {
|
||||
_ => return Err(DBError(format!("unsupported SYM subcommand {:?}", cmd))),
|
||||
}
|
||||
}
|
||||
"ft.create" => {
|
||||
if cmd.len() < 4 || cmd[2].to_uppercase() != "SCHEMA" {
|
||||
return Err(DBError("ERR FT.CREATE requires: indexname SCHEMA field1 type1 [options] ...".to_string()));
|
||||
}
|
||||
let index_name = cmd[1].clone();
|
||||
let mut schema = Vec::new();
|
||||
let mut i = 3;
|
||||
while i < cmd.len() {
|
||||
if i + 1 >= cmd.len() {
|
||||
return Err(DBError("ERR incomplete field definition".to_string()));
|
||||
}
|
||||
let field_name = cmd[i].clone();
|
||||
let field_type = cmd[i + 1].to_uppercase();
|
||||
let mut options = Vec::new();
|
||||
i += 2;
|
||||
// Parse field options until we hit another field name or end
|
||||
while i < cmd.len()
|
||||
&& ["WEIGHT","SORTABLE","NOINDEX","SEPARATOR","CASESENSITIVE"]
|
||||
.contains(&cmd[i].to_uppercase().as_str())
|
||||
{
|
||||
options.push(cmd[i].to_uppercase());
|
||||
i += 1;
|
||||
// If this option takes a value, consume it too
|
||||
if i > 0 && ["SEPARATOR","WEIGHT"].contains(&cmd[i - 1].to_uppercase().as_str()) && i < cmd.len() {
|
||||
options.push(cmd[i].clone());
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
schema.push((field_name, field_type, options));
|
||||
}
|
||||
Cmd::FtCreate { index_name, schema }
|
||||
}
|
||||
"ft.add" => {
|
||||
if cmd.len() < 5 {
|
||||
return Err(DBError("ERR FT.ADD requires: index_name doc_id score field value ...".to_string()));
|
||||
}
|
||||
let index_name = cmd[1].clone();
|
||||
let doc_id = cmd[2].clone();
|
||||
let score = cmd[3].parse::<f64>().map_err(|_| DBError("ERR score must be a number".to_string()))?;
|
||||
let mut fields = std::collections::HashMap::new();
|
||||
let mut i = 4;
|
||||
while i + 1 < cmd.len() {
|
||||
fields.insert(cmd[i].clone(), cmd[i + 1].clone());
|
||||
i += 2;
|
||||
}
|
||||
Cmd::FtAdd { index_name, doc_id, score, fields }
|
||||
}
|
||||
"ft.search" => {
|
||||
if cmd.len() < 3 {
|
||||
return Err(DBError("ERR FT.SEARCH requires: index_name query [options]".to_string()));
|
||||
}
|
||||
let index_name = cmd[1].clone();
|
||||
let query = cmd[2].clone();
|
||||
let mut filters = Vec::new();
|
||||
let mut limit = None;
|
||||
let mut offset = None;
|
||||
let mut return_fields = None;
|
||||
let mut i = 3;
|
||||
while i < cmd.len() {
|
||||
match cmd[i].to_uppercase().as_str() {
|
||||
"FILTER" => {
|
||||
if i + 2 >= cmd.len() {
|
||||
return Err(DBError("ERR FILTER requires field and value".to_string()));
|
||||
}
|
||||
filters.push((cmd[i + 1].clone(), cmd[i + 2].clone()));
|
||||
i += 3;
|
||||
}
|
||||
"LIMIT" => {
|
||||
if i + 2 >= cmd.len() {
|
||||
return Err(DBError("ERR LIMIT requires offset and num".to_string()));
|
||||
}
|
||||
offset = Some(cmd[i + 1].parse().unwrap_or(0));
|
||||
limit = Some(cmd[i + 2].parse().unwrap_or(10));
|
||||
i += 3;
|
||||
}
|
||||
"RETURN" => {
|
||||
if i + 1 >= cmd.len() {
|
||||
return Err(DBError("ERR RETURN requires field count".to_string()));
|
||||
}
|
||||
let count: usize = cmd[i + 1].parse().unwrap_or(0);
|
||||
i += 2;
|
||||
let mut fields = Vec::new();
|
||||
for _ in 0..count {
|
||||
if i < cmd.len() {
|
||||
fields.push(cmd[i].clone());
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
return_fields = Some(fields);
|
||||
}
|
||||
_ => i += 1,
|
||||
}
|
||||
}
|
||||
Cmd::FtSearch { index_name, query, filters, limit, offset, return_fields }
|
||||
}
|
||||
"ft.del" => {
|
||||
if cmd.len() != 3 {
|
||||
return Err(DBError("ERR FT.DEL requires: index_name doc_id".to_string()));
|
||||
}
|
||||
Cmd::FtDel(cmd[1].clone(), cmd[2].clone())
|
||||
}
|
||||
"ft.info" => {
|
||||
if cmd.len() != 2 {
|
||||
return Err(DBError("ERR FT.INFO requires: index_name".to_string()));
|
||||
}
|
||||
Cmd::FtInfo(cmd[1].clone())
|
||||
}
|
||||
"ft.drop" => {
|
||||
if cmd.len() != 2 {
|
||||
return Err(DBError("ERR FT.DROP requires: index_name".to_string()));
|
||||
}
|
||||
Cmd::FtDrop(cmd[1].clone())
|
||||
}
|
||||
"ft.alter" => {
|
||||
if cmd.len() < 5 {
|
||||
return Err(DBError("ERR FT.ALTER requires: index_name field_name field_type [options]".to_string()));
|
||||
}
|
||||
let index_name = cmd[1].clone();
|
||||
let field_name = cmd[2].clone();
|
||||
let field_type = cmd[3].clone();
|
||||
let options = if cmd.len() > 4 { cmd[4..].to_vec() } else { vec![] };
|
||||
Cmd::FtAlter { index_name, field_name, field_type, options }
|
||||
}
|
||||
"ft.aggregate" => {
|
||||
if cmd.len() < 3 {
|
||||
return Err(DBError("ERR FT.AGGREGATE requires: index_name query [options]".to_string()));
|
||||
}
|
||||
let index_name = cmd[1].clone();
|
||||
let query = cmd[2].clone();
|
||||
// Minimal parse for now
|
||||
let group_by = Vec::new();
|
||||
let reducers = Vec::new();
|
||||
Cmd::FtAggregate { index_name, query, group_by, reducers }
|
||||
}
|
||||
_ => Cmd::Unknow(cmd[0].clone()),
|
||||
},
|
||||
protocol,
|
||||
@@ -671,6 +840,59 @@ impl Cmd {
|
||||
return Ok(Protocol::SimpleString("QUEUED".to_string()));
|
||||
}
|
||||
|
||||
// Backend gating for Tantivy-only DBs: allow only FT.* and basic control/info commands
|
||||
// Determine per-selected-db backend via admin meta (not process default).
|
||||
let is_tantivy_backend = crate::admin_meta::get_database_backend(
|
||||
&server.option.dir,
|
||||
server.option.backend.clone(),
|
||||
&server.option.admin_secret,
|
||||
server.selected_db,
|
||||
)
|
||||
.ok()
|
||||
.flatten()
|
||||
.map(|b| matches!(b, crate::options::BackendType::Tantivy))
|
||||
.unwrap_or(false);
|
||||
|
||||
if is_tantivy_backend {
|
||||
match &self {
|
||||
Cmd::Select(..)
|
||||
| Cmd::Quit
|
||||
| Cmd::Client(..)
|
||||
| Cmd::ClientSetName(..)
|
||||
| Cmd::ClientGetName
|
||||
| Cmd::Command(..)
|
||||
| Cmd::Info(..)
|
||||
| Cmd::FtCreate { .. }
|
||||
| Cmd::FtAdd { .. }
|
||||
| Cmd::FtSearch { .. }
|
||||
| Cmd::FtDel(..)
|
||||
| Cmd::FtInfo(..)
|
||||
| Cmd::FtDrop(..)
|
||||
| Cmd::FtAlter { .. }
|
||||
| Cmd::FtAggregate { .. } => {}
|
||||
_ => {
|
||||
return Ok(Protocol::err("ERR backend is Tantivy; only FT.* commands are allowed"));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If selected DB is not Tantivy, forbid all FT.* commands here.
|
||||
if !is_tantivy_backend {
|
||||
match &self {
|
||||
Cmd::FtCreate { .. }
|
||||
| Cmd::FtAdd { .. }
|
||||
| Cmd::FtSearch { .. }
|
||||
| Cmd::FtDel(..)
|
||||
| Cmd::FtInfo(..)
|
||||
| Cmd::FtDrop(..)
|
||||
| Cmd::FtAlter { .. }
|
||||
| Cmd::FtAggregate { .. } => {
|
||||
return Ok(Protocol::err("ERR DB backend is not Tantivy; FT.* commands are not allowed"));
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
match self {
|
||||
Cmd::Select(db, key) => select_cmd(server, db, key).await,
|
||||
Cmd::Ping => Ok(Protocol::SimpleString("PONG".to_string())),
|
||||
@@ -767,6 +989,32 @@ impl Cmd {
|
||||
Cmd::SymEncrypt(key_b64, message) => Ok(crate::sym::cmd_sym_encrypt(&key_b64, &message).await),
|
||||
Cmd::SymDecrypt(key_b64, ct_b64) => Ok(crate::sym::cmd_sym_decrypt(&key_b64, &ct_b64).await),
|
||||
|
||||
// Full-text search commands
|
||||
Cmd::FtCreate { index_name, schema } => {
|
||||
crate::search_cmd::ft_create_cmd(server, index_name, schema).await
|
||||
}
|
||||
Cmd::FtAdd { index_name, doc_id, score, fields } => {
|
||||
crate::search_cmd::ft_add_cmd(server, index_name, doc_id, score, fields).await
|
||||
}
|
||||
Cmd::FtSearch { index_name, query, filters, limit, offset, return_fields } => {
|
||||
crate::search_cmd::ft_search_cmd(server, index_name, query, filters, limit, offset, return_fields).await
|
||||
}
|
||||
Cmd::FtDel(index_name, doc_id) => {
|
||||
crate::search_cmd::ft_del_cmd(server, index_name, doc_id).await
|
||||
}
|
||||
Cmd::FtInfo(index_name) => {
|
||||
crate::search_cmd::ft_info_cmd(server, index_name).await
|
||||
}
|
||||
Cmd::FtDrop(index_name) => {
|
||||
crate::search_cmd::ft_drop_cmd(server, index_name).await
|
||||
}
|
||||
Cmd::FtAlter { .. } => {
|
||||
Ok(Protocol::err("FT.ALTER not implemented yet"))
|
||||
}
|
||||
Cmd::FtAggregate { .. } => {
|
||||
Ok(Protocol::err("FT.AGGREGATE not implemented yet"))
|
||||
}
|
||||
|
||||
Cmd::Unknow(s) => Ok(Protocol::err(&format!("ERR unknown command `{}`", s))),
|
||||
}
|
||||
}
|
||||
@@ -852,13 +1100,28 @@ async fn select_cmd(server: &mut Server, db: u64, key: Option<String>) -> Result
|
||||
None => return Ok(Protocol::err("ERR invalid access key")),
|
||||
};
|
||||
|
||||
// Set selected database and permissions, then open storage
|
||||
// Set selected database and permissions, then open storage (skip for Tantivy backend)
|
||||
server.selected_db = db;
|
||||
server.current_permissions = Some(perms);
|
||||
|
||||
match server.current_storage() {
|
||||
Ok(_) => Ok(Protocol::SimpleString("OK".to_string())),
|
||||
Err(e) => Ok(Protocol::err(&e.0)),
|
||||
// Resolve effective backend for this db_id from admin meta
|
||||
let eff_backend = crate::admin_meta::get_database_backend(
|
||||
&server.option.dir,
|
||||
server.option.backend.clone(),
|
||||
&server.option.admin_secret,
|
||||
db,
|
||||
)
|
||||
.ok()
|
||||
.flatten();
|
||||
|
||||
if matches!(eff_backend, Some(crate::options::BackendType::Tantivy)) {
|
||||
// Tantivy DBs have no KV storage; allow SELECT to succeed
|
||||
Ok(Protocol::SimpleString("OK".to_string()))
|
||||
} else {
|
||||
match server.current_storage() {
|
||||
Ok(_) => Ok(Protocol::SimpleString("OK".to_string())),
|
||||
Err(e) => Ok(Protocol::err(&e.0)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1196,7 +1459,27 @@ async fn dbsize_cmd(server: &Server) -> Result<Protocol, DBError> {
|
||||
}
|
||||
|
||||
async fn info_cmd(server: &Server, section: &Option<String>) -> Result<Protocol, DBError> {
|
||||
let storage_info = server.current_storage()?.info()?;
|
||||
// For Tantivy backend, there is no KV storage; synthesize minimal info.
|
||||
// Determine effective backend for the currently selected db.
|
||||
let is_tantivy_db = crate::admin_meta::get_database_backend(
|
||||
&server.option.dir,
|
||||
server.option.backend.clone(),
|
||||
&server.option.admin_secret,
|
||||
server.selected_db,
|
||||
)
|
||||
.ok()
|
||||
.flatten()
|
||||
.map(|b| matches!(b, crate::options::BackendType::Tantivy))
|
||||
.unwrap_or(false);
|
||||
|
||||
let storage_info: Vec<(String, String)> = if is_tantivy_db {
|
||||
vec![
|
||||
("db_size".to_string(), "0".to_string()),
|
||||
("is_encrypted".to_string(), "false".to_string()),
|
||||
]
|
||||
} else {
|
||||
server.current_storage()?.info()?
|
||||
};
|
||||
let mut info_map: std::collections::HashMap<String, String> = storage_info.into_iter().collect();
|
||||
|
||||
info_map.insert("redis_version".to_string(), "7.0.0".to_string());
|
||||
|
@@ -12,3 +12,5 @@ pub mod storage;
|
||||
pub mod storage_trait;
|
||||
pub mod storage_sled;
|
||||
pub mod admin_meta;
|
||||
pub mod tantivy_search;
|
||||
pub mod search_cmd;
|
||||
|
@@ -2,6 +2,7 @@
|
||||
pub enum BackendType {
|
||||
Redb,
|
||||
Sled,
|
||||
Tantivy, // Full-text search backend (no KV storage)
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
|
182
src/rpc.rs
182
src/rpc.rs
@@ -14,6 +14,7 @@ use crate::admin_meta;
|
||||
pub enum BackendType {
|
||||
Redb,
|
||||
Sled,
|
||||
Tantivy, // Full-text search backend (no KV storage)
|
||||
// Future: InMemory, Custom(String)
|
||||
}
|
||||
|
||||
@@ -112,6 +113,53 @@ pub trait Rpc {
|
||||
/// Set database public/private status
|
||||
#[method(name = "setDatabasePublic")]
|
||||
async fn set_database_public(&self, db_id: u64, public: bool) -> RpcResult<bool>;
|
||||
|
||||
// ----- Full-text (Tantivy) minimal RPC endpoints -----
|
||||
|
||||
/// Create a new FT index in a Tantivy-backed DB
|
||||
#[method(name = "ftCreate")]
|
||||
async fn ft_create(
|
||||
&self,
|
||||
db_id: u64,
|
||||
index_name: String,
|
||||
schema: Vec<(String, String, Vec<String>)>,
|
||||
) -> RpcResult<bool>;
|
||||
|
||||
/// Add or replace a document in an FT index
|
||||
#[method(name = "ftAdd")]
|
||||
async fn ft_add(
|
||||
&self,
|
||||
db_id: u64,
|
||||
index_name: String,
|
||||
doc_id: String,
|
||||
score: f64,
|
||||
fields: HashMap<String, String>,
|
||||
) -> RpcResult<bool>;
|
||||
|
||||
/// Search an FT index
|
||||
#[method(name = "ftSearch")]
|
||||
async fn ft_search(
|
||||
&self,
|
||||
db_id: u64,
|
||||
index_name: String,
|
||||
query: String,
|
||||
filters: Option<Vec<(String, String)>>,
|
||||
limit: Option<usize>,
|
||||
offset: Option<usize>,
|
||||
return_fields: Option<Vec<String>>,
|
||||
) -> RpcResult<serde_json::Value>;
|
||||
|
||||
/// Delete a document by id from an FT index
|
||||
#[method(name = "ftDel")]
|
||||
async fn ft_del(&self, db_id: u64, index_name: String, doc_id: String) -> RpcResult<bool>;
|
||||
|
||||
/// Get FT index info
|
||||
#[method(name = "ftInfo")]
|
||||
async fn ft_info(&self, db_id: u64, index_name: String) -> RpcResult<serde_json::Value>;
|
||||
|
||||
/// Drop an FT index
|
||||
#[method(name = "ftDrop")]
|
||||
async fn ft_drop(&self, db_id: u64, index_name: String) -> RpcResult<bool>;
|
||||
}
|
||||
|
||||
/// RPC Server implementation
|
||||
@@ -187,13 +235,14 @@ impl RpcServerImpl {
|
||||
}
|
||||
|
||||
// Create server instance with resolved backend
|
||||
let is_tantivy = matches!(effective_backend, crate::options::BackendType::Tantivy);
|
||||
let db_option = DBOption {
|
||||
dir: self.base_dir.clone(),
|
||||
port: 0, // Not used for RPC-managed databases
|
||||
debug: false,
|
||||
encryption_key: None,
|
||||
encrypt: false,
|
||||
backend: effective_backend,
|
||||
backend: effective_backend.clone(),
|
||||
admin_secret: self.admin_secret.clone(),
|
||||
};
|
||||
|
||||
@@ -203,7 +252,10 @@ impl RpcServerImpl {
|
||||
server.selected_db = db_id;
|
||||
|
||||
// Lazily open/create physical storage according to admin meta (per-db encryption)
|
||||
let _ = server.current_storage();
|
||||
// Skip for Tantivy backend (no KV storage to open)
|
||||
if !is_tantivy {
|
||||
let _ = server.current_storage();
|
||||
}
|
||||
|
||||
// Store the server
|
||||
let mut servers = self.servers.write().await;
|
||||
@@ -290,6 +342,7 @@ impl RpcServerImpl {
|
||||
let backend = match server.option.backend {
|
||||
crate::options::BackendType::Redb => BackendType::Redb,
|
||||
crate::options::BackendType::Sled => BackendType::Sled,
|
||||
crate::options::BackendType::Tantivy => BackendType::Tantivy,
|
||||
};
|
||||
|
||||
DatabaseInfo {
|
||||
@@ -340,18 +393,20 @@ impl RpcServer for RpcServerImpl {
|
||||
let opt_backend = match backend {
|
||||
BackendType::Redb => crate::options::BackendType::Redb,
|
||||
BackendType::Sled => crate::options::BackendType::Sled,
|
||||
BackendType::Tantivy => crate::options::BackendType::Tantivy,
|
||||
};
|
||||
admin_meta::set_database_backend(&self.base_dir, self.backend.clone(), &self.admin_secret, db_id, opt_backend.clone())
|
||||
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?;
|
||||
|
||||
// Create server instance using base_dir, chosen backend and admin secret
|
||||
let is_tantivy_new = matches!(opt_backend, crate::options::BackendType::Tantivy);
|
||||
let option = DBOption {
|
||||
dir: self.base_dir.clone(),
|
||||
port: 0, // Not used for RPC-managed databases
|
||||
debug: false,
|
||||
encryption_key: None, // per-db key is stored in admin DB 0
|
||||
encrypt: false, // encryption decided per-db at open time
|
||||
backend: opt_backend,
|
||||
backend: opt_backend.clone(),
|
||||
admin_secret: self.admin_secret.clone(),
|
||||
};
|
||||
|
||||
@@ -359,7 +414,10 @@ impl RpcServer for RpcServerImpl {
|
||||
server.selected_db = db_id;
|
||||
|
||||
// Initialize storage to create physical <id>.db with proper encryption from admin meta
|
||||
let _ = server.current_storage();
|
||||
// Skip for Tantivy backend (no KV storage to initialize)
|
||||
if !is_tantivy_new {
|
||||
let _ = server.current_storage();
|
||||
}
|
||||
|
||||
// Store the server in cache
|
||||
let mut servers = self.servers.write().await;
|
||||
@@ -420,6 +478,7 @@ impl RpcServer for RpcServerImpl {
|
||||
let db_ids = self.discover_databases().await;
|
||||
let mut stats = HashMap::new();
|
||||
|
||||
|
||||
stats.insert("total_databases".to_string(), serde_json::json!(db_ids.len()));
|
||||
stats.insert("uptime".to_string(), serde_json::json!(
|
||||
std::time::SystemTime::now()
|
||||
@@ -431,6 +490,121 @@ impl RpcServer for RpcServerImpl {
|
||||
Ok(stats)
|
||||
}
|
||||
|
||||
// ----- Full-text (Tantivy) minimal RPC endpoints -----
|
||||
|
||||
async fn ft_create(
|
||||
&self,
|
||||
db_id: u64,
|
||||
index_name: String,
|
||||
schema: Vec<(String, String, Vec<String>)>,
|
||||
) -> RpcResult<bool> {
|
||||
let server = self.get_or_create_server(db_id).await?;
|
||||
if db_id == 0 {
|
||||
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "FT not allowed on DB 0", None::<()>));
|
||||
}
|
||||
if !matches!(server.option.backend, crate::options::BackendType::Tantivy) {
|
||||
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "DB backend is not Tantivy", None::<()>));
|
||||
}
|
||||
crate::search_cmd::ft_create_cmd(&*server, index_name, schema)
|
||||
.await
|
||||
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?;
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
async fn ft_add(
|
||||
&self,
|
||||
db_id: u64,
|
||||
index_name: String,
|
||||
doc_id: String,
|
||||
score: f64,
|
||||
fields: HashMap<String, String>,
|
||||
) -> RpcResult<bool> {
|
||||
let server = self.get_or_create_server(db_id).await?;
|
||||
if db_id == 0 {
|
||||
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "FT not allowed on DB 0", None::<()>));
|
||||
}
|
||||
if !matches!(server.option.backend, crate::options::BackendType::Tantivy) {
|
||||
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "DB backend is not Tantivy", None::<()>));
|
||||
}
|
||||
crate::search_cmd::ft_add_cmd(&*server, index_name, doc_id, score, fields)
|
||||
.await
|
||||
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?;
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
async fn ft_search(
|
||||
&self,
|
||||
db_id: u64,
|
||||
index_name: String,
|
||||
query: String,
|
||||
filters: Option<Vec<(String, String)>>,
|
||||
limit: Option<usize>,
|
||||
offset: Option<usize>,
|
||||
return_fields: Option<Vec<String>>,
|
||||
) -> RpcResult<serde_json::Value> {
|
||||
let server = self.get_or_create_server(db_id).await?;
|
||||
if db_id == 0 {
|
||||
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "FT not allowed on DB 0", None::<()>));
|
||||
}
|
||||
if !matches!(server.option.backend, crate::options::BackendType::Tantivy) {
|
||||
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "DB backend is not Tantivy", None::<()>));
|
||||
}
|
||||
let proto = crate::search_cmd::ft_search_cmd(
|
||||
&*server,
|
||||
index_name,
|
||||
query,
|
||||
filters.unwrap_or_default(),
|
||||
limit,
|
||||
offset,
|
||||
return_fields,
|
||||
)
|
||||
.await
|
||||
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?;
|
||||
Ok(serde_json::json!({ "resp": proto.encode() }))
|
||||
}
|
||||
|
||||
async fn ft_del(&self, db_id: u64, index_name: String, doc_id: String) -> RpcResult<bool> {
|
||||
let server = self.get_or_create_server(db_id).await?;
|
||||
if db_id == 0 {
|
||||
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "FT not allowed on DB 0", None::<()>));
|
||||
}
|
||||
if !matches!(server.option.backend, crate::options::BackendType::Tantivy) {
|
||||
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "DB backend is not Tantivy", None::<()>));
|
||||
}
|
||||
crate::search_cmd::ft_del_cmd(&*server, index_name, doc_id)
|
||||
.await
|
||||
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?;
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
async fn ft_info(&self, db_id: u64, index_name: String) -> RpcResult<serde_json::Value> {
|
||||
let server = self.get_or_create_server(db_id).await?;
|
||||
if db_id == 0 {
|
||||
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "FT not allowed on DB 0", None::<()>));
|
||||
}
|
||||
if !matches!(server.option.backend, crate::options::BackendType::Tantivy) {
|
||||
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "DB backend is not Tantivy", None::<()>));
|
||||
}
|
||||
let proto = crate::search_cmd::ft_info_cmd(&*server, index_name)
|
||||
.await
|
||||
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?;
|
||||
Ok(serde_json::json!({ "resp": proto.encode() }))
|
||||
}
|
||||
|
||||
async fn ft_drop(&self, db_id: u64, index_name: String) -> RpcResult<bool> {
|
||||
let server = self.get_or_create_server(db_id).await?;
|
||||
if db_id == 0 {
|
||||
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "FT not allowed on DB 0", None::<()>));
|
||||
}
|
||||
if !matches!(server.option.backend, crate::options::BackendType::Tantivy) {
|
||||
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "DB backend is not Tantivy", None::<()>));
|
||||
}
|
||||
crate::search_cmd::ft_drop_cmd(&*server, index_name)
|
||||
.await
|
||||
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?;
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
async fn add_access_key(&self, db_id: u64, key: String, permissions: String) -> RpcResult<bool> {
|
||||
let perms = match permissions.to_lowercase().as_str() {
|
||||
"read" => Permissions::Read,
|
||||
|
352
src/search_cmd.rs
Normal file
352
src/search_cmd.rs
Normal file
@@ -0,0 +1,352 @@
|
||||
use crate::{
|
||||
error::DBError,
|
||||
protocol::Protocol,
|
||||
server::Server,
|
||||
tantivy_search::{
|
||||
FieldDef, Filter, FilterType, IndexConfig, NumericType, SearchOptions, TantivySearch,
|
||||
},
|
||||
};
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
pub async fn ft_create_cmd(
|
||||
server: &Server,
|
||||
index_name: String,
|
||||
schema: Vec<(String, String, Vec<String>)>,
|
||||
) -> Result<Protocol, DBError> {
|
||||
if server.selected_db == 0 {
|
||||
return Ok(Protocol::err("FT commands are not allowed on DB 0"));
|
||||
}
|
||||
// Enforce Tantivy backend for selected DB
|
||||
let is_tantivy = crate::admin_meta::get_database_backend(
|
||||
&server.option.dir,
|
||||
server.option.backend.clone(),
|
||||
&server.option.admin_secret,
|
||||
server.selected_db,
|
||||
)
|
||||
.ok()
|
||||
.flatten()
|
||||
.map(|b| matches!(b, crate::options::BackendType::Tantivy))
|
||||
.unwrap_or(false);
|
||||
if !is_tantivy {
|
||||
return Ok(Protocol::err("ERR DB backend is not Tantivy; FT.* commands are not allowed"));
|
||||
}
|
||||
|
||||
// Parse schema into field definitions
|
||||
let mut field_definitions = Vec::new();
|
||||
for (field_name, field_type, options) in schema {
|
||||
let field_def = match field_type.to_uppercase().as_str() {
|
||||
"TEXT" => {
|
||||
let mut sortable = false;
|
||||
let mut no_index = false;
|
||||
// Weight is not used in current implementation
|
||||
let mut _weight = 1.0f32;
|
||||
let mut i = 0;
|
||||
while i < options.len() {
|
||||
match options[i].to_uppercase().as_str() {
|
||||
"WEIGHT" => {
|
||||
if i + 1 < options.len() {
|
||||
_weight = options[i + 1].parse::<f32>().unwrap_or(1.0);
|
||||
i += 2;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
"SORTABLE" => {
|
||||
sortable = true;
|
||||
}
|
||||
"NOINDEX" => {
|
||||
no_index = true;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
FieldDef::Text {
|
||||
stored: true,
|
||||
indexed: !no_index,
|
||||
tokenized: true,
|
||||
fast: sortable,
|
||||
}
|
||||
}
|
||||
"NUMERIC" => {
|
||||
// default to F64
|
||||
let mut sortable = false;
|
||||
for opt in &options {
|
||||
if opt.to_uppercase() == "SORTABLE" {
|
||||
sortable = true;
|
||||
}
|
||||
}
|
||||
FieldDef::Numeric {
|
||||
stored: true,
|
||||
indexed: true,
|
||||
fast: sortable,
|
||||
precision: NumericType::F64,
|
||||
}
|
||||
}
|
||||
"TAG" => {
|
||||
let mut separator = ",".to_string();
|
||||
let mut case_sensitive = false;
|
||||
let mut i = 0;
|
||||
while i < options.len() {
|
||||
match options[i].to_uppercase().as_str() {
|
||||
"SEPARATOR" => {
|
||||
if i + 1 < options.len() {
|
||||
separator = options[i + 1].clone();
|
||||
i += 2;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
"CASESENSITIVE" => {
|
||||
case_sensitive = true;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
FieldDef::Tag {
|
||||
stored: true,
|
||||
separator,
|
||||
case_sensitive,
|
||||
}
|
||||
}
|
||||
"GEO" => FieldDef::Geo { stored: true },
|
||||
_ => {
|
||||
return Err(DBError(format!("Unknown field type: {}", field_type)));
|
||||
}
|
||||
};
|
||||
field_definitions.push((field_name, field_def));
|
||||
}
|
||||
|
||||
// Create the search index
|
||||
let search_path = server.search_index_path();
|
||||
let config = IndexConfig::default();
|
||||
let search_index = TantivySearch::new_with_schema(
|
||||
search_path,
|
||||
index_name.clone(),
|
||||
field_definitions,
|
||||
Some(config),
|
||||
)?;
|
||||
|
||||
// Store in registry
|
||||
let mut indexes = server.search_indexes.write().unwrap();
|
||||
indexes.insert(index_name, Arc::new(search_index));
|
||||
|
||||
Ok(Protocol::SimpleString("OK".to_string()))
|
||||
}
|
||||
|
||||
pub async fn ft_add_cmd(
|
||||
server: &Server,
|
||||
index_name: String,
|
||||
doc_id: String,
|
||||
_score: f64,
|
||||
fields: HashMap<String, String>,
|
||||
) -> Result<Protocol, DBError> {
|
||||
if server.selected_db == 0 {
|
||||
return Ok(Protocol::err("FT commands are not allowed on DB 0"));
|
||||
}
|
||||
// Enforce Tantivy backend for selected DB
|
||||
let is_tantivy = crate::admin_meta::get_database_backend(
|
||||
&server.option.dir,
|
||||
server.option.backend.clone(),
|
||||
&server.option.admin_secret,
|
||||
server.selected_db,
|
||||
)
|
||||
.ok()
|
||||
.flatten()
|
||||
.map(|b| matches!(b, crate::options::BackendType::Tantivy))
|
||||
.unwrap_or(false);
|
||||
if !is_tantivy {
|
||||
return Ok(Protocol::err("ERR DB backend is not Tantivy; FT.* commands are not allowed"));
|
||||
}
|
||||
let indexes = server.search_indexes.read().unwrap();
|
||||
let search_index = indexes
|
||||
.get(&index_name)
|
||||
.ok_or_else(|| DBError(format!("Index '{}' not found", index_name)))?;
|
||||
search_index.add_document_with_fields(&doc_id, fields)?;
|
||||
Ok(Protocol::SimpleString("OK".to_string()))
|
||||
}
|
||||
|
||||
pub async fn ft_search_cmd(
|
||||
server: &Server,
|
||||
index_name: String,
|
||||
query: String,
|
||||
filters: Vec<(String, String)>,
|
||||
limit: Option<usize>,
|
||||
offset: Option<usize>,
|
||||
return_fields: Option<Vec<String>>,
|
||||
) -> Result<Protocol, DBError> {
|
||||
if server.selected_db == 0 {
|
||||
return Ok(Protocol::err("FT commands are not allowed on DB 0"));
|
||||
}
|
||||
// Enforce Tantivy backend for selected DB
|
||||
let is_tantivy = crate::admin_meta::get_database_backend(
|
||||
&server.option.dir,
|
||||
server.option.backend.clone(),
|
||||
&server.option.admin_secret,
|
||||
server.selected_db,
|
||||
)
|
||||
.ok()
|
||||
.flatten()
|
||||
.map(|b| matches!(b, crate::options::BackendType::Tantivy))
|
||||
.unwrap_or(false);
|
||||
if !is_tantivy {
|
||||
return Ok(Protocol::err("ERR DB backend is not Tantivy; FT.* commands are not allowed"));
|
||||
}
|
||||
let indexes = server.search_indexes.read().unwrap();
|
||||
let search_index = indexes
|
||||
.get(&index_name)
|
||||
.ok_or_else(|| DBError(format!("Index '{}' not found", index_name)))?;
|
||||
|
||||
let search_filters = filters
|
||||
.into_iter()
|
||||
.map(|(field, value)| Filter {
|
||||
field,
|
||||
filter_type: FilterType::Equals(value),
|
||||
})
|
||||
.collect();
|
||||
|
||||
let options = SearchOptions {
|
||||
limit: limit.unwrap_or(10),
|
||||
offset: offset.unwrap_or(0),
|
||||
filters: search_filters,
|
||||
sort_by: None,
|
||||
return_fields,
|
||||
highlight: false,
|
||||
};
|
||||
|
||||
let results = search_index.search_with_options(&query, options)?;
|
||||
|
||||
// Format results as Redis protocol
|
||||
let mut response = Vec::new();
|
||||
// First element is the total count
|
||||
response.push(Protocol::SimpleString(results.total.to_string()));
|
||||
// Then each document
|
||||
for doc in results.documents {
|
||||
let mut doc_array = Vec::new();
|
||||
// Add document ID if it exists
|
||||
if let Some(id) = doc.fields.get("_id") {
|
||||
doc_array.push(Protocol::BulkString(id.clone()));
|
||||
}
|
||||
// Add score
|
||||
doc_array.push(Protocol::BulkString(doc.score.to_string()));
|
||||
// Add fields as key-value pairs
|
||||
for (field_name, field_value) in doc.fields {
|
||||
if field_name != "_id" {
|
||||
doc_array.push(Protocol::BulkString(field_name));
|
||||
doc_array.push(Protocol::BulkString(field_value));
|
||||
}
|
||||
}
|
||||
response.push(Protocol::Array(doc_array));
|
||||
}
|
||||
|
||||
Ok(Protocol::Array(response))
|
||||
}
|
||||
|
||||
pub async fn ft_del_cmd(
|
||||
server: &Server,
|
||||
index_name: String,
|
||||
doc_id: String,
|
||||
) -> Result<Protocol, DBError> {
|
||||
if server.selected_db == 0 {
|
||||
return Ok(Protocol::err("FT commands are not allowed on DB 0"));
|
||||
}
|
||||
// Enforce Tantivy backend for selected DB
|
||||
let is_tantivy = crate::admin_meta::get_database_backend(
|
||||
&server.option.dir,
|
||||
server.option.backend.clone(),
|
||||
&server.option.admin_secret,
|
||||
server.selected_db,
|
||||
)
|
||||
.ok()
|
||||
.flatten()
|
||||
.map(|b| matches!(b, crate::options::BackendType::Tantivy))
|
||||
.unwrap_or(false);
|
||||
if !is_tantivy {
|
||||
return Ok(Protocol::err("ERR DB backend is not Tantivy; FT.* commands are not allowed"));
|
||||
}
|
||||
let indexes = server.search_indexes.read().unwrap();
|
||||
let _search_index = indexes
|
||||
.get(&index_name)
|
||||
.ok_or_else(|| DBError(format!("Index '{}' not found", index_name)))?;
|
||||
// Not fully implemented yet: Tantivy delete by term would require a writer session and commit coordination.
|
||||
println!("Deleting document '{}' from index '{}'", doc_id, index_name);
|
||||
Ok(Protocol::SimpleString("1".to_string()))
|
||||
}
|
||||
|
||||
pub async fn ft_info_cmd(server: &Server, index_name: String) -> Result<Protocol, DBError> {
|
||||
if server.selected_db == 0 {
|
||||
return Ok(Protocol::err("FT commands are not allowed on DB 0"));
|
||||
}
|
||||
// Enforce Tantivy backend for selected DB
|
||||
let is_tantivy = crate::admin_meta::get_database_backend(
|
||||
&server.option.dir,
|
||||
server.option.backend.clone(),
|
||||
&server.option.admin_secret,
|
||||
server.selected_db,
|
||||
)
|
||||
.ok()
|
||||
.flatten()
|
||||
.map(|b| matches!(b, crate::options::BackendType::Tantivy))
|
||||
.unwrap_or(false);
|
||||
if !is_tantivy {
|
||||
return Ok(Protocol::err("ERR DB backend is not Tantivy; FT.* commands are not allowed"));
|
||||
}
|
||||
let indexes = server.search_indexes.read().unwrap();
|
||||
let search_index = indexes
|
||||
.get(&index_name)
|
||||
.ok_or_else(|| DBError(format!("Index '{}' not found", index_name)))?;
|
||||
let info = search_index.get_info()?;
|
||||
|
||||
// Format info as Redis protocol
|
||||
let mut response = Vec::new();
|
||||
response.push(Protocol::BulkString("index_name".to_string()));
|
||||
response.push(Protocol::BulkString(info.name));
|
||||
response.push(Protocol::BulkString("num_docs".to_string()));
|
||||
response.push(Protocol::BulkString(info.num_docs.to_string()));
|
||||
response.push(Protocol::BulkString("num_fields".to_string()));
|
||||
response.push(Protocol::BulkString(info.fields.len().to_string()));
|
||||
response.push(Protocol::BulkString("fields".to_string()));
|
||||
let fields_str = info
|
||||
.fields
|
||||
.iter()
|
||||
.map(|f| format!("{}:{}", f.name, f.field_type))
|
||||
.collect::<Vec<_>>()
|
||||
.join(", ");
|
||||
response.push(Protocol::BulkString(fields_str));
|
||||
Ok(Protocol::Array(response))
|
||||
}
|
||||
|
||||
pub async fn ft_drop_cmd(server: &Server, index_name: String) -> Result<Protocol, DBError> {
|
||||
if server.selected_db == 0 {
|
||||
return Ok(Protocol::err("FT commands are not allowed on DB 0"));
|
||||
}
|
||||
// Enforce Tantivy backend for selected DB
|
||||
let is_tantivy = crate::admin_meta::get_database_backend(
|
||||
&server.option.dir,
|
||||
server.option.backend.clone(),
|
||||
&server.option.admin_secret,
|
||||
server.selected_db,
|
||||
)
|
||||
.ok()
|
||||
.flatten()
|
||||
.map(|b| matches!(b, crate::options::BackendType::Tantivy))
|
||||
.unwrap_or(false);
|
||||
if !is_tantivy {
|
||||
return Ok(Protocol::err("ERR DB backend is not Tantivy; FT.* commands are not allowed"));
|
||||
}
|
||||
|
||||
// Remove from registry
|
||||
{
|
||||
let mut indexes = server.search_indexes.write().unwrap();
|
||||
indexes.remove(&index_name);
|
||||
}
|
||||
|
||||
// Remove the index files from disk
|
||||
let index_path = server.search_index_path().join(&index_name);
|
||||
if index_path.exists() {
|
||||
std::fs::remove_dir_all(&index_path)
|
||||
.map_err(|e| DBError(format!("Failed to remove index files: {}", e)))?;
|
||||
}
|
||||
|
||||
Ok(Protocol::SimpleString("OK".to_string()))
|
||||
}
|
@@ -23,6 +23,9 @@ pub struct Server {
|
||||
pub queued_cmd: Option<Vec<(Cmd, Protocol)>>,
|
||||
pub current_permissions: Option<crate::rpc::Permissions>,
|
||||
|
||||
// In-memory registry of Tantivy search indexes for this server
|
||||
pub search_indexes: Arc<std::sync::RwLock<HashMap<String, Arc<crate::tantivy_search::TantivySearch>>>>,
|
||||
|
||||
// BLPOP waiter registry: per (db_index, key) FIFO of waiters
|
||||
pub list_waiters: Arc<Mutex<HashMap<u64, HashMap<String, Vec<Waiter>>>>>,
|
||||
pub waiter_seq: Arc<AtomicU64>,
|
||||
@@ -49,12 +52,25 @@ impl Server {
|
||||
selected_db: 0,
|
||||
queued_cmd: None,
|
||||
current_permissions: None,
|
||||
|
||||
|
||||
search_indexes: Arc::new(std::sync::RwLock::new(HashMap::new())),
|
||||
list_waiters: Arc::new(Mutex::new(HashMap::new())),
|
||||
waiter_seq: Arc::new(AtomicU64::new(1)),
|
||||
}
|
||||
}
|
||||
|
||||
// Path where search indexes are stored, namespaced per selected DB:
|
||||
// <base_dir>/search_indexes/<db_id>
|
||||
pub fn search_index_path(&self) -> std::path::PathBuf {
|
||||
let base = std::path::PathBuf::from(&self.option.dir)
|
||||
.join("search_indexes")
|
||||
.join(self.selected_db.to_string());
|
||||
if !base.exists() {
|
||||
let _ = std::fs::create_dir_all(&base);
|
||||
}
|
||||
base
|
||||
}
|
||||
|
||||
pub fn current_storage(&self) -> Result<Arc<dyn StorageBackend>, DBError> {
|
||||
let mut cache = self.db_cache.write().unwrap();
|
||||
|
||||
|
667
src/tantivy_search.rs
Normal file
667
src/tantivy_search.rs
Normal file
@@ -0,0 +1,667 @@
|
||||
use crate::error::DBError;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashMap;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::{Arc, RwLock};
|
||||
use tantivy::{
|
||||
collector::TopDocs,
|
||||
directory::MmapDirectory,
|
||||
query::{BooleanQuery, Occur, Query, QueryParser, TermQuery},
|
||||
schema::{
|
||||
DateOptions, Field, IndexRecordOption, NumericOptions, Schema, TextFieldIndexing, TextOptions, STORED, STRING,
|
||||
},
|
||||
tokenizer::TokenizerManager,
|
||||
DateTime, Index, IndexReader, IndexWriter, TantivyDocument, Term,
|
||||
};
|
||||
use tantivy::schema::Value;
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum FieldDef {
|
||||
Text {
|
||||
stored: bool,
|
||||
indexed: bool,
|
||||
tokenized: bool,
|
||||
fast: bool,
|
||||
},
|
||||
Numeric {
|
||||
stored: bool,
|
||||
indexed: bool,
|
||||
fast: bool,
|
||||
precision: NumericType,
|
||||
},
|
||||
Tag {
|
||||
stored: bool,
|
||||
separator: String,
|
||||
case_sensitive: bool,
|
||||
},
|
||||
Geo {
|
||||
stored: bool,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum NumericType {
|
||||
I64,
|
||||
U64,
|
||||
F64,
|
||||
Date,
|
||||
}
|
||||
|
||||
pub struct IndexSchema {
|
||||
schema: Schema,
|
||||
fields: HashMap<String, (Field, FieldDef)>,
|
||||
default_search_fields: Vec<Field>,
|
||||
}
|
||||
|
||||
pub struct TantivySearch {
|
||||
index: Index,
|
||||
writer: Arc<RwLock<IndexWriter>>,
|
||||
reader: IndexReader,
|
||||
index_schema: IndexSchema,
|
||||
name: String,
|
||||
config: IndexConfig,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct IndexConfig {
|
||||
pub language: String,
|
||||
pub stopwords: Vec<String>,
|
||||
pub stemming: bool,
|
||||
pub max_doc_count: Option<usize>,
|
||||
pub default_score: f64,
|
||||
}
|
||||
|
||||
impl Default for IndexConfig {
|
||||
fn default() -> Self {
|
||||
IndexConfig {
|
||||
language: "english".to_string(),
|
||||
stopwords: vec![],
|
||||
stemming: true,
|
||||
max_doc_count: None,
|
||||
default_score: 1.0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TantivySearch {
|
||||
pub fn new_with_schema(
|
||||
base_path: PathBuf,
|
||||
name: String,
|
||||
field_definitions: Vec<(String, FieldDef)>,
|
||||
config: Option<IndexConfig>,
|
||||
) -> Result<Self, DBError> {
|
||||
let index_path = base_path.join(&name);
|
||||
std::fs::create_dir_all(&index_path)
|
||||
.map_err(|e| DBError(format!("Failed to create index dir: {}", e)))?;
|
||||
|
||||
// Build schema from field definitions
|
||||
let mut schema_builder = Schema::builder();
|
||||
let mut fields = HashMap::new();
|
||||
let mut default_search_fields = Vec::new();
|
||||
|
||||
// Always add a document ID field
|
||||
let id_field = schema_builder.add_text_field("_id", STRING | STORED);
|
||||
fields.insert(
|
||||
"_id".to_string(),
|
||||
(
|
||||
id_field,
|
||||
FieldDef::Text {
|
||||
stored: true,
|
||||
indexed: true,
|
||||
tokenized: false,
|
||||
fast: false,
|
||||
},
|
||||
),
|
||||
);
|
||||
|
||||
// Add user-defined fields
|
||||
for (field_name, field_def) in field_definitions {
|
||||
let field = match &field_def {
|
||||
FieldDef::Text {
|
||||
stored,
|
||||
indexed,
|
||||
tokenized,
|
||||
fast: _fast,
|
||||
} => {
|
||||
let mut text_options = TextOptions::default();
|
||||
if *stored {
|
||||
text_options = text_options.set_stored();
|
||||
}
|
||||
if *indexed {
|
||||
let indexing_options = if *tokenized {
|
||||
TextFieldIndexing::default()
|
||||
.set_tokenizer("default")
|
||||
.set_index_option(IndexRecordOption::WithFreqsAndPositions)
|
||||
} else {
|
||||
TextFieldIndexing::default()
|
||||
.set_tokenizer("raw")
|
||||
.set_index_option(IndexRecordOption::Basic)
|
||||
};
|
||||
text_options = text_options.set_indexing_options(indexing_options);
|
||||
let f = schema_builder.add_text_field(&field_name, text_options);
|
||||
if *tokenized {
|
||||
default_search_fields.push(f);
|
||||
}
|
||||
f
|
||||
} else {
|
||||
schema_builder.add_text_field(&field_name, text_options)
|
||||
}
|
||||
}
|
||||
FieldDef::Numeric {
|
||||
stored,
|
||||
indexed,
|
||||
fast,
|
||||
precision,
|
||||
} => match precision {
|
||||
NumericType::I64 => {
|
||||
let mut opts = NumericOptions::default();
|
||||
if *stored {
|
||||
opts = opts.set_stored();
|
||||
}
|
||||
if *indexed {
|
||||
opts = opts.set_indexed();
|
||||
}
|
||||
if *fast {
|
||||
opts = opts.set_fast();
|
||||
}
|
||||
schema_builder.add_i64_field(&field_name, opts)
|
||||
}
|
||||
NumericType::U64 => {
|
||||
let mut opts = NumericOptions::default();
|
||||
if *stored {
|
||||
opts = opts.set_stored();
|
||||
}
|
||||
if *indexed {
|
||||
opts = opts.set_indexed();
|
||||
}
|
||||
if *fast {
|
||||
opts = opts.set_fast();
|
||||
}
|
||||
schema_builder.add_u64_field(&field_name, opts)
|
||||
}
|
||||
NumericType::F64 => {
|
||||
let mut opts = NumericOptions::default();
|
||||
if *stored {
|
||||
opts = opts.set_stored();
|
||||
}
|
||||
if *indexed {
|
||||
opts = opts.set_indexed();
|
||||
}
|
||||
if *fast {
|
||||
opts = opts.set_fast();
|
||||
}
|
||||
schema_builder.add_f64_field(&field_name, opts)
|
||||
}
|
||||
NumericType::Date => {
|
||||
let mut opts = DateOptions::default();
|
||||
if *stored {
|
||||
opts = opts.set_stored();
|
||||
}
|
||||
if *indexed {
|
||||
opts = opts.set_indexed();
|
||||
}
|
||||
if *fast {
|
||||
opts = opts.set_fast();
|
||||
}
|
||||
schema_builder.add_date_field(&field_name, opts)
|
||||
}
|
||||
},
|
||||
FieldDef::Tag {
|
||||
stored,
|
||||
separator: _,
|
||||
case_sensitive: _,
|
||||
} => {
|
||||
let mut text_options = TextOptions::default();
|
||||
if *stored {
|
||||
text_options = text_options.set_stored();
|
||||
}
|
||||
text_options = text_options.set_indexing_options(
|
||||
TextFieldIndexing::default()
|
||||
.set_tokenizer("raw")
|
||||
.set_index_option(IndexRecordOption::Basic),
|
||||
);
|
||||
schema_builder.add_text_field(&field_name, text_options)
|
||||
}
|
||||
FieldDef::Geo { stored } => {
|
||||
// For now, store as two f64 fields for lat/lon
|
||||
let mut opts = NumericOptions::default();
|
||||
if *stored {
|
||||
opts = opts.set_stored();
|
||||
}
|
||||
opts = opts.set_indexed().set_fast();
|
||||
let lat_field =
|
||||
schema_builder.add_f64_field(&format!("{}_lat", field_name), opts.clone());
|
||||
let lon_field =
|
||||
schema_builder.add_f64_field(&format!("{}_lon", field_name), opts);
|
||||
fields.insert(
|
||||
format!("{}_lat", field_name),
|
||||
(
|
||||
lat_field,
|
||||
FieldDef::Numeric {
|
||||
stored: *stored,
|
||||
indexed: true,
|
||||
fast: true,
|
||||
precision: NumericType::F64,
|
||||
},
|
||||
),
|
||||
);
|
||||
fields.insert(
|
||||
format!("{}_lon", field_name),
|
||||
(
|
||||
lon_field,
|
||||
FieldDef::Numeric {
|
||||
stored: *stored,
|
||||
indexed: true,
|
||||
fast: true,
|
||||
precision: NumericType::F64,
|
||||
},
|
||||
),
|
||||
);
|
||||
continue; // Skip adding the geo field itself
|
||||
}
|
||||
};
|
||||
fields.insert(field_name.clone(), (field, field_def));
|
||||
}
|
||||
|
||||
let schema = schema_builder.build();
|
||||
let index_schema = IndexSchema {
|
||||
schema: schema.clone(),
|
||||
fields,
|
||||
default_search_fields,
|
||||
};
|
||||
|
||||
// Create or open index
|
||||
let dir = MmapDirectory::open(&index_path)
|
||||
.map_err(|e| DBError(format!("Failed to open index directory: {}", e)))?;
|
||||
let mut index =
|
||||
Index::open_or_create(dir, schema).map_err(|e| DBError(format!("Failed to create index: {}", e)))?;
|
||||
|
||||
// Configure tokenizers
|
||||
let tokenizer_manager = TokenizerManager::default();
|
||||
index.set_tokenizers(tokenizer_manager);
|
||||
|
||||
let writer = index
|
||||
.writer(15_000_000)
|
||||
.map_err(|e| DBError(format!("Failed to create index writer: {}", e)))?;
|
||||
let reader = index
|
||||
.reader()
|
||||
.map_err(|e| DBError(format!("Failed to create reader: {}", e)))?;
|
||||
|
||||
let config = config.unwrap_or_default();
|
||||
|
||||
Ok(TantivySearch {
|
||||
index,
|
||||
writer: Arc::new(RwLock::new(writer)),
|
||||
reader,
|
||||
index_schema,
|
||||
name,
|
||||
config,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn add_document_with_fields(
|
||||
&self,
|
||||
doc_id: &str,
|
||||
fields: HashMap<String, String>,
|
||||
) -> Result<(), DBError> {
|
||||
let mut writer = self
|
||||
.writer
|
||||
.write()
|
||||
.map_err(|e| DBError(format!("Failed to acquire writer lock: {}", e)))?;
|
||||
|
||||
// Delete existing document with same ID
|
||||
if let Some((id_field, _)) = self.index_schema.fields.get("_id") {
|
||||
writer.delete_term(Term::from_field_text(*id_field, doc_id));
|
||||
}
|
||||
|
||||
// Create new document
|
||||
let mut doc = tantivy::doc!();
|
||||
|
||||
// Add document ID
|
||||
if let Some((id_field, _)) = self.index_schema.fields.get("_id") {
|
||||
doc.add_text(*id_field, doc_id);
|
||||
}
|
||||
|
||||
// Add other fields based on schema
|
||||
for (field_name, field_value) in fields {
|
||||
if let Some((field, field_def)) = self.index_schema.fields.get(&field_name) {
|
||||
match field_def {
|
||||
FieldDef::Text { .. } => {
|
||||
doc.add_text(*field, &field_value);
|
||||
}
|
||||
FieldDef::Numeric { precision, .. } => match precision {
|
||||
NumericType::I64 => {
|
||||
if let Ok(v) = field_value.parse::<i64>() {
|
||||
doc.add_i64(*field, v);
|
||||
}
|
||||
}
|
||||
NumericType::U64 => {
|
||||
if let Ok(v) = field_value.parse::<u64>() {
|
||||
doc.add_u64(*field, v);
|
||||
}
|
||||
}
|
||||
NumericType::F64 => {
|
||||
if let Ok(v) = field_value.parse::<f64>() {
|
||||
doc.add_f64(*field, v);
|
||||
}
|
||||
}
|
||||
NumericType::Date => {
|
||||
if let Ok(v) = field_value.parse::<i64>() {
|
||||
doc.add_date(*field, DateTime::from_timestamp_millis(v));
|
||||
}
|
||||
}
|
||||
},
|
||||
FieldDef::Tag {
|
||||
separator,
|
||||
case_sensitive,
|
||||
..
|
||||
} => {
|
||||
let tags = if !case_sensitive {
|
||||
field_value.to_lowercase()
|
||||
} else {
|
||||
field_value.clone()
|
||||
};
|
||||
for tag in tags.split(separator.as_str()) {
|
||||
doc.add_text(*field, tag.trim());
|
||||
}
|
||||
}
|
||||
FieldDef::Geo { .. } => {
|
||||
let parts: Vec<&str> = field_value.split(',').collect();
|
||||
if parts.len() == 2 {
|
||||
if let (Ok(lat), Ok(lon)) =
|
||||
(parts[0].parse::<f64>(), parts[1].parse::<f64>())
|
||||
{
|
||||
if let Some((lat_field, _)) =
|
||||
self.index_schema.fields.get(&format!("{}_lat", field_name))
|
||||
{
|
||||
doc.add_f64(*lat_field, lat);
|
||||
}
|
||||
if let Some((lon_field, _)) =
|
||||
self.index_schema.fields.get(&format!("{}_lon", field_name))
|
||||
{
|
||||
doc.add_f64(*lon_field, lon);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
writer
|
||||
.add_document(doc)
|
||||
.map_err(|e| DBError(format!("Failed to add document: {}", e)))?;
|
||||
writer
|
||||
.commit()
|
||||
.map_err(|e| DBError(format!("Failed to commit: {}", e)))?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn search_with_options(
|
||||
&self,
|
||||
query_str: &str,
|
||||
options: SearchOptions,
|
||||
) -> Result<SearchResults, DBError> {
|
||||
let searcher = self.reader.searcher();
|
||||
|
||||
// Ensure we have searchable fields
|
||||
if self.index_schema.default_search_fields.is_empty() {
|
||||
return Err(DBError("No searchable fields defined in schema".to_string()));
|
||||
}
|
||||
|
||||
// Parse query based on search fields
|
||||
let query_parser = QueryParser::for_index(
|
||||
&self.index,
|
||||
self.index_schema.default_search_fields.clone(),
|
||||
);
|
||||
let parsed_query = query_parser
|
||||
.parse_query(query_str)
|
||||
.map_err(|e| DBError(format!("Failed to parse query: {}", e)))?;
|
||||
let mut clauses: Vec<(Occur, Box<dyn Query>)> = vec![(Occur::Must, parsed_query)];
|
||||
|
||||
// Apply filters if any
|
||||
for filter in options.filters {
|
||||
if let Some((field, field_def)) = self.index_schema.fields.get(&filter.field) {
|
||||
match filter.filter_type {
|
||||
FilterType::Equals(value) => {
|
||||
match field_def {
|
||||
FieldDef::Text { .. } | FieldDef::Tag { .. } => {
|
||||
let term_query =
|
||||
TermQuery::new(Term::from_field_text(*field, &value), IndexRecordOption::Basic);
|
||||
clauses.push((Occur::Must, Box::new(term_query)));
|
||||
}
|
||||
FieldDef::Numeric { precision, .. } => {
|
||||
// Equals on numeric fields: parse to the right numeric type and use term query
|
||||
match precision {
|
||||
NumericType::I64 => {
|
||||
if let Ok(v) = value.parse::<i64>() {
|
||||
let term = Term::from_field_i64(*field, v);
|
||||
let tq = TermQuery::new(term, IndexRecordOption::Basic);
|
||||
clauses.push((Occur::Must, Box::new(tq)));
|
||||
}
|
||||
}
|
||||
NumericType::U64 => {
|
||||
if let Ok(v) = value.parse::<u64>() {
|
||||
let term = Term::from_field_u64(*field, v);
|
||||
let tq = TermQuery::new(term, IndexRecordOption::Basic);
|
||||
clauses.push((Occur::Must, Box::new(tq)));
|
||||
}
|
||||
}
|
||||
NumericType::F64 => {
|
||||
if let Ok(v) = value.parse::<f64>() {
|
||||
let term = Term::from_field_f64(*field, v);
|
||||
let tq = TermQuery::new(term, IndexRecordOption::Basic);
|
||||
clauses.push((Occur::Must, Box::new(tq)));
|
||||
}
|
||||
}
|
||||
NumericType::Date => {
|
||||
if let Ok(v) = value.parse::<i64>() {
|
||||
let dt = DateTime::from_timestamp_millis(v);
|
||||
let term = Term::from_field_date(*field, dt);
|
||||
let tq = TermQuery::new(term, IndexRecordOption::Basic);
|
||||
clauses.push((Occur::Must, Box::new(tq)));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
FieldDef::Geo { .. } => {
|
||||
// Geo equals isn't supported in this simplified version
|
||||
}
|
||||
}
|
||||
}
|
||||
FilterType::Range { .. } => {
|
||||
// TODO: Implement numeric range queries by building a RangeQuery per type
|
||||
}
|
||||
FilterType::InSet(values) => {
|
||||
// OR across values
|
||||
let mut sub_clauses: Vec<(Occur, Box<dyn Query>)> = vec![];
|
||||
for value in values {
|
||||
let term_query = TermQuery::new(
|
||||
Term::from_field_text(*field, &value),
|
||||
IndexRecordOption::Basic,
|
||||
);
|
||||
sub_clauses.push((Occur::Should, Box::new(term_query)));
|
||||
}
|
||||
clauses.push((Occur::Must, Box::new(BooleanQuery::new(sub_clauses))));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let final_query: Box<dyn Query> = if clauses.len() == 1 {
|
||||
clauses.pop().unwrap().1
|
||||
} else {
|
||||
Box::new(BooleanQuery::new(clauses))
|
||||
};
|
||||
|
||||
// Execute search
|
||||
let top_docs = searcher
|
||||
.search(&*final_query, &TopDocs::with_limit(options.limit + options.offset))
|
||||
.map_err(|e| DBError(format!("Search failed: {}", e)))?;
|
||||
let total_hits = top_docs.len();
|
||||
let mut documents = Vec::new();
|
||||
|
||||
for (score, doc_address) in top_docs.into_iter().skip(options.offset).take(options.limit) {
|
||||
let retrieved_doc: TantivyDocument = searcher
|
||||
.doc(doc_address)
|
||||
.map_err(|e| DBError(format!("Failed to retrieve doc: {}", e)))?;
|
||||
|
||||
let mut doc_fields = HashMap::new();
|
||||
|
||||
// Extract stored fields (or synthesize)
|
||||
for (field_name, (field, field_def)) in &self.index_schema.fields {
|
||||
match field_def {
|
||||
FieldDef::Text { stored, .. } | FieldDef::Tag { stored, .. } => {
|
||||
if *stored {
|
||||
if let Some(value) = retrieved_doc.get_first(*field) {
|
||||
if let Some(text) = value.as_str() {
|
||||
doc_fields.insert(field_name.clone(), text.to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
FieldDef::Numeric {
|
||||
stored, precision, ..
|
||||
} => {
|
||||
if *stored {
|
||||
let value_str = match precision {
|
||||
NumericType::I64 => retrieved_doc
|
||||
.get_first(*field)
|
||||
.and_then(|v| v.as_i64())
|
||||
.map(|v| v.to_string()),
|
||||
NumericType::U64 => retrieved_doc
|
||||
.get_first(*field)
|
||||
.and_then(|v| v.as_u64())
|
||||
.map(|v| v.to_string()),
|
||||
NumericType::F64 => retrieved_doc
|
||||
.get_first(*field)
|
||||
.and_then(|v| v.as_f64())
|
||||
.map(|v| v.to_string()),
|
||||
NumericType::Date => retrieved_doc
|
||||
.get_first(*field)
|
||||
.and_then(|v| v.as_datetime())
|
||||
.map(|v| v.into_timestamp_millis().to_string()),
|
||||
};
|
||||
if let Some(v) = value_str {
|
||||
doc_fields.insert(field_name.clone(), v);
|
||||
}
|
||||
}
|
||||
}
|
||||
FieldDef::Geo { stored } => {
|
||||
if *stored {
|
||||
let lat_field = self
|
||||
.index_schema
|
||||
.fields
|
||||
.get(&format!("{}_lat", field_name))
|
||||
.unwrap()
|
||||
.0;
|
||||
let lon_field = self
|
||||
.index_schema
|
||||
.fields
|
||||
.get(&format!("{}_lon", field_name))
|
||||
.unwrap()
|
||||
.0;
|
||||
let lat = retrieved_doc.get_first(lat_field).and_then(|v| v.as_f64());
|
||||
let lon = retrieved_doc.get_first(lon_field).and_then(|v| v.as_f64());
|
||||
if let (Some(lat), Some(lon)) = (lat, lon) {
|
||||
doc_fields.insert(field_name.clone(), format!("{},{}", lat, lon));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
documents.push(SearchDocument {
|
||||
fields: doc_fields,
|
||||
score,
|
||||
});
|
||||
}
|
||||
|
||||
Ok(SearchResults {
|
||||
total: total_hits,
|
||||
documents,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn get_info(&self) -> Result<IndexInfo, DBError> {
|
||||
let searcher = self.reader.searcher();
|
||||
let num_docs = searcher.num_docs();
|
||||
let fields_info: Vec<FieldInfo> = self
|
||||
.index_schema
|
||||
.fields
|
||||
.iter()
|
||||
.map(|(name, (_, def))| FieldInfo {
|
||||
name: name.clone(),
|
||||
field_type: format!("{:?}", def),
|
||||
})
|
||||
.collect();
|
||||
Ok(IndexInfo {
|
||||
name: self.name.clone(),
|
||||
num_docs,
|
||||
fields: fields_info,
|
||||
config: self.config.clone(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct SearchOptions {
|
||||
pub limit: usize,
|
||||
pub offset: usize,
|
||||
pub filters: Vec<Filter>,
|
||||
pub sort_by: Option<String>,
|
||||
pub return_fields: Option<Vec<String>>,
|
||||
pub highlight: bool,
|
||||
}
|
||||
|
||||
impl Default for SearchOptions {
|
||||
fn default() -> Self {
|
||||
SearchOptions {
|
||||
limit: 10,
|
||||
offset: 0,
|
||||
filters: vec![],
|
||||
sort_by: None,
|
||||
return_fields: None,
|
||||
highlight: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Filter {
|
||||
pub field: String,
|
||||
pub filter_type: FilterType,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum FilterType {
|
||||
Equals(String),
|
||||
Range { min: String, max: String },
|
||||
InSet(Vec<String>),
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct SearchResults {
|
||||
pub total: usize,
|
||||
pub documents: Vec<SearchDocument>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct SearchDocument {
|
||||
pub fields: HashMap<String, String>,
|
||||
pub score: f32,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct IndexInfo {
|
||||
pub name: String,
|
||||
pub num_docs: u64,
|
||||
pub fields: Vec<FieldInfo>,
|
||||
pub config: IndexConfig,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct FieldInfo {
|
||||
pub name: String,
|
||||
pub field_type: String,
|
||||
}
|
Reference in New Issue
Block a user