implementation of tantivy datastore + updated RPC calls to deal with tantivy + docs

This commit is contained in:
Maxime Van Hees
2025-09-23 17:15:40 +02:00
parent c470772a13
commit 22ac4c9ed6
11 changed files with 2508 additions and 10 deletions

View File

@@ -48,6 +48,9 @@ fn init_admin_storage(
let storage: Arc<dyn StorageBackend> = match backend {
options::BackendType::Redb => Arc::new(Storage::new(&db_file, true, Some(admin_secret))?),
options::BackendType::Sled => Arc::new(SledStorage::new(&db_file, true, Some(admin_secret))?),
options::BackendType::Tantivy => {
return Err(DBError("Admin DB 0 cannot use Tantivy backend".to_string()))
}
};
Ok(storage)
}
@@ -199,6 +202,9 @@ pub fn open_data_storage(
let storage: Arc<dyn StorageBackend> = match effective_backend {
options::BackendType::Redb => Arc::new(Storage::new(&db_file, should_encrypt, enc.as_deref())?),
options::BackendType::Sled => Arc::new(SledStorage::new(&db_file, should_encrypt, enc.as_deref())?),
options::BackendType::Tantivy => {
return Err(DBError("Tantivy backend has no KV storage; use FT.* commands only".to_string()))
}
};
// Publish to registry
@@ -291,6 +297,7 @@ pub fn set_database_backend(
let val = match db_backend {
options::BackendType::Redb => "Redb",
options::BackendType::Sled => "Sled",
options::BackendType::Tantivy => "Tantivy",
};
let _ = admin.hset(&mk, vec![("backend".to_string(), val.to_string())])?;
Ok(())
@@ -307,6 +314,7 @@ pub fn get_database_backend(
match admin.hget(&mk, "backend")? {
Some(s) if s == "Redb" => Ok(Some(options::BackendType::Redb)),
Some(s) if s == "Sled" => Ok(Some(options::BackendType::Sled)),
Some(s) if s == "Tantivy" => Ok(Some(options::BackendType::Tantivy)),
_ => Ok(None),
}
}

View File

@@ -91,6 +91,41 @@ pub enum Cmd {
SymKeygen,
SymEncrypt(String, String), // key_b64, message
SymDecrypt(String, String), // key_b64, ciphertext_b64
// Full-text search commands with schema support
FtCreate {
index_name: String,
schema: Vec<(String, String, Vec<String>)>, // (field_name, field_type, options)
},
FtAdd {
index_name: String,
doc_id: String,
score: f64,
fields: std::collections::HashMap<String, String>,
},
FtSearch {
index_name: String,
query: String,
filters: Vec<(String, String)>, // field, value pairs
limit: Option<usize>,
offset: Option<usize>,
return_fields: Option<Vec<String>>,
},
FtDel(String, String), // index_name, doc_id
FtInfo(String), // index_name
FtDrop(String), // index_name
FtAlter {
index_name: String,
field_name: String,
field_type: String,
options: Vec<String>,
},
FtAggregate {
index_name: String,
query: String,
group_by: Vec<String>,
reducers: Vec<String>,
}
}
impl Cmd {
@@ -646,6 +681,140 @@ impl Cmd {
_ => return Err(DBError(format!("unsupported SYM subcommand {:?}", cmd))),
}
}
"ft.create" => {
if cmd.len() < 4 || cmd[2].to_uppercase() != "SCHEMA" {
return Err(DBError("ERR FT.CREATE requires: indexname SCHEMA field1 type1 [options] ...".to_string()));
}
let index_name = cmd[1].clone();
let mut schema = Vec::new();
let mut i = 3;
while i < cmd.len() {
if i + 1 >= cmd.len() {
return Err(DBError("ERR incomplete field definition".to_string()));
}
let field_name = cmd[i].clone();
let field_type = cmd[i + 1].to_uppercase();
let mut options = Vec::new();
i += 2;
// Parse field options until we hit another field name or end
while i < cmd.len()
&& ["WEIGHT","SORTABLE","NOINDEX","SEPARATOR","CASESENSITIVE"]
.contains(&cmd[i].to_uppercase().as_str())
{
options.push(cmd[i].to_uppercase());
i += 1;
// If this option takes a value, consume it too
if i > 0 && ["SEPARATOR","WEIGHT"].contains(&cmd[i - 1].to_uppercase().as_str()) && i < cmd.len() {
options.push(cmd[i].clone());
i += 1;
}
}
schema.push((field_name, field_type, options));
}
Cmd::FtCreate { index_name, schema }
}
"ft.add" => {
if cmd.len() < 5 {
return Err(DBError("ERR FT.ADD requires: index_name doc_id score field value ...".to_string()));
}
let index_name = cmd[1].clone();
let doc_id = cmd[2].clone();
let score = cmd[3].parse::<f64>().map_err(|_| DBError("ERR score must be a number".to_string()))?;
let mut fields = std::collections::HashMap::new();
let mut i = 4;
while i + 1 < cmd.len() {
fields.insert(cmd[i].clone(), cmd[i + 1].clone());
i += 2;
}
Cmd::FtAdd { index_name, doc_id, score, fields }
}
"ft.search" => {
if cmd.len() < 3 {
return Err(DBError("ERR FT.SEARCH requires: index_name query [options]".to_string()));
}
let index_name = cmd[1].clone();
let query = cmd[2].clone();
let mut filters = Vec::new();
let mut limit = None;
let mut offset = None;
let mut return_fields = None;
let mut i = 3;
while i < cmd.len() {
match cmd[i].to_uppercase().as_str() {
"FILTER" => {
if i + 2 >= cmd.len() {
return Err(DBError("ERR FILTER requires field and value".to_string()));
}
filters.push((cmd[i + 1].clone(), cmd[i + 2].clone()));
i += 3;
}
"LIMIT" => {
if i + 2 >= cmd.len() {
return Err(DBError("ERR LIMIT requires offset and num".to_string()));
}
offset = Some(cmd[i + 1].parse().unwrap_or(0));
limit = Some(cmd[i + 2].parse().unwrap_or(10));
i += 3;
}
"RETURN" => {
if i + 1 >= cmd.len() {
return Err(DBError("ERR RETURN requires field count".to_string()));
}
let count: usize = cmd[i + 1].parse().unwrap_or(0);
i += 2;
let mut fields = Vec::new();
for _ in 0..count {
if i < cmd.len() {
fields.push(cmd[i].clone());
i += 1;
}
}
return_fields = Some(fields);
}
_ => i += 1,
}
}
Cmd::FtSearch { index_name, query, filters, limit, offset, return_fields }
}
"ft.del" => {
if cmd.len() != 3 {
return Err(DBError("ERR FT.DEL requires: index_name doc_id".to_string()));
}
Cmd::FtDel(cmd[1].clone(), cmd[2].clone())
}
"ft.info" => {
if cmd.len() != 2 {
return Err(DBError("ERR FT.INFO requires: index_name".to_string()));
}
Cmd::FtInfo(cmd[1].clone())
}
"ft.drop" => {
if cmd.len() != 2 {
return Err(DBError("ERR FT.DROP requires: index_name".to_string()));
}
Cmd::FtDrop(cmd[1].clone())
}
"ft.alter" => {
if cmd.len() < 5 {
return Err(DBError("ERR FT.ALTER requires: index_name field_name field_type [options]".to_string()));
}
let index_name = cmd[1].clone();
let field_name = cmd[2].clone();
let field_type = cmd[3].clone();
let options = if cmd.len() > 4 { cmd[4..].to_vec() } else { vec![] };
Cmd::FtAlter { index_name, field_name, field_type, options }
}
"ft.aggregate" => {
if cmd.len() < 3 {
return Err(DBError("ERR FT.AGGREGATE requires: index_name query [options]".to_string()));
}
let index_name = cmd[1].clone();
let query = cmd[2].clone();
// Minimal parse for now
let group_by = Vec::new();
let reducers = Vec::new();
Cmd::FtAggregate { index_name, query, group_by, reducers }
}
_ => Cmd::Unknow(cmd[0].clone()),
},
protocol,
@@ -671,6 +840,59 @@ impl Cmd {
return Ok(Protocol::SimpleString("QUEUED".to_string()));
}
// Backend gating for Tantivy-only DBs: allow only FT.* and basic control/info commands
// Determine per-selected-db backend via admin meta (not process default).
let is_tantivy_backend = crate::admin_meta::get_database_backend(
&server.option.dir,
server.option.backend.clone(),
&server.option.admin_secret,
server.selected_db,
)
.ok()
.flatten()
.map(|b| matches!(b, crate::options::BackendType::Tantivy))
.unwrap_or(false);
if is_tantivy_backend {
match &self {
Cmd::Select(..)
| Cmd::Quit
| Cmd::Client(..)
| Cmd::ClientSetName(..)
| Cmd::ClientGetName
| Cmd::Command(..)
| Cmd::Info(..)
| Cmd::FtCreate { .. }
| Cmd::FtAdd { .. }
| Cmd::FtSearch { .. }
| Cmd::FtDel(..)
| Cmd::FtInfo(..)
| Cmd::FtDrop(..)
| Cmd::FtAlter { .. }
| Cmd::FtAggregate { .. } => {}
_ => {
return Ok(Protocol::err("ERR backend is Tantivy; only FT.* commands are allowed"));
}
}
}
// If selected DB is not Tantivy, forbid all FT.* commands here.
if !is_tantivy_backend {
match &self {
Cmd::FtCreate { .. }
| Cmd::FtAdd { .. }
| Cmd::FtSearch { .. }
| Cmd::FtDel(..)
| Cmd::FtInfo(..)
| Cmd::FtDrop(..)
| Cmd::FtAlter { .. }
| Cmd::FtAggregate { .. } => {
return Ok(Protocol::err("ERR DB backend is not Tantivy; FT.* commands are not allowed"));
}
_ => {}
}
}
match self {
Cmd::Select(db, key) => select_cmd(server, db, key).await,
Cmd::Ping => Ok(Protocol::SimpleString("PONG".to_string())),
@@ -767,6 +989,32 @@ impl Cmd {
Cmd::SymEncrypt(key_b64, message) => Ok(crate::sym::cmd_sym_encrypt(&key_b64, &message).await),
Cmd::SymDecrypt(key_b64, ct_b64) => Ok(crate::sym::cmd_sym_decrypt(&key_b64, &ct_b64).await),
// Full-text search commands
Cmd::FtCreate { index_name, schema } => {
crate::search_cmd::ft_create_cmd(server, index_name, schema).await
}
Cmd::FtAdd { index_name, doc_id, score, fields } => {
crate::search_cmd::ft_add_cmd(server, index_name, doc_id, score, fields).await
}
Cmd::FtSearch { index_name, query, filters, limit, offset, return_fields } => {
crate::search_cmd::ft_search_cmd(server, index_name, query, filters, limit, offset, return_fields).await
}
Cmd::FtDel(index_name, doc_id) => {
crate::search_cmd::ft_del_cmd(server, index_name, doc_id).await
}
Cmd::FtInfo(index_name) => {
crate::search_cmd::ft_info_cmd(server, index_name).await
}
Cmd::FtDrop(index_name) => {
crate::search_cmd::ft_drop_cmd(server, index_name).await
}
Cmd::FtAlter { .. } => {
Ok(Protocol::err("FT.ALTER not implemented yet"))
}
Cmd::FtAggregate { .. } => {
Ok(Protocol::err("FT.AGGREGATE not implemented yet"))
}
Cmd::Unknow(s) => Ok(Protocol::err(&format!("ERR unknown command `{}`", s))),
}
}
@@ -852,13 +1100,28 @@ async fn select_cmd(server: &mut Server, db: u64, key: Option<String>) -> Result
None => return Ok(Protocol::err("ERR invalid access key")),
};
// Set selected database and permissions, then open storage
// Set selected database and permissions, then open storage (skip for Tantivy backend)
server.selected_db = db;
server.current_permissions = Some(perms);
match server.current_storage() {
Ok(_) => Ok(Protocol::SimpleString("OK".to_string())),
Err(e) => Ok(Protocol::err(&e.0)),
// Resolve effective backend for this db_id from admin meta
let eff_backend = crate::admin_meta::get_database_backend(
&server.option.dir,
server.option.backend.clone(),
&server.option.admin_secret,
db,
)
.ok()
.flatten();
if matches!(eff_backend, Some(crate::options::BackendType::Tantivy)) {
// Tantivy DBs have no KV storage; allow SELECT to succeed
Ok(Protocol::SimpleString("OK".to_string()))
} else {
match server.current_storage() {
Ok(_) => Ok(Protocol::SimpleString("OK".to_string())),
Err(e) => Ok(Protocol::err(&e.0)),
}
}
}
@@ -1196,7 +1459,27 @@ async fn dbsize_cmd(server: &Server) -> Result<Protocol, DBError> {
}
async fn info_cmd(server: &Server, section: &Option<String>) -> Result<Protocol, DBError> {
let storage_info = server.current_storage()?.info()?;
// For Tantivy backend, there is no KV storage; synthesize minimal info.
// Determine effective backend for the currently selected db.
let is_tantivy_db = crate::admin_meta::get_database_backend(
&server.option.dir,
server.option.backend.clone(),
&server.option.admin_secret,
server.selected_db,
)
.ok()
.flatten()
.map(|b| matches!(b, crate::options::BackendType::Tantivy))
.unwrap_or(false);
let storage_info: Vec<(String, String)> = if is_tantivy_db {
vec![
("db_size".to_string(), "0".to_string()),
("is_encrypted".to_string(), "false".to_string()),
]
} else {
server.current_storage()?.info()?
};
let mut info_map: std::collections::HashMap<String, String> = storage_info.into_iter().collect();
info_map.insert("redis_version".to_string(), "7.0.0".to_string());

View File

@@ -12,3 +12,5 @@ pub mod storage;
pub mod storage_trait;
pub mod storage_sled;
pub mod admin_meta;
pub mod tantivy_search;
pub mod search_cmd;

View File

@@ -2,6 +2,7 @@
pub enum BackendType {
Redb,
Sled,
Tantivy, // Full-text search backend (no KV storage)
}
#[derive(Debug, Clone)]

View File

@@ -14,6 +14,7 @@ use crate::admin_meta;
pub enum BackendType {
Redb,
Sled,
Tantivy, // Full-text search backend (no KV storage)
// Future: InMemory, Custom(String)
}
@@ -112,6 +113,53 @@ pub trait Rpc {
/// Set database public/private status
#[method(name = "setDatabasePublic")]
async fn set_database_public(&self, db_id: u64, public: bool) -> RpcResult<bool>;
// ----- Full-text (Tantivy) minimal RPC endpoints -----
/// Create a new FT index in a Tantivy-backed DB
#[method(name = "ftCreate")]
async fn ft_create(
&self,
db_id: u64,
index_name: String,
schema: Vec<(String, String, Vec<String>)>,
) -> RpcResult<bool>;
/// Add or replace a document in an FT index
#[method(name = "ftAdd")]
async fn ft_add(
&self,
db_id: u64,
index_name: String,
doc_id: String,
score: f64,
fields: HashMap<String, String>,
) -> RpcResult<bool>;
/// Search an FT index
#[method(name = "ftSearch")]
async fn ft_search(
&self,
db_id: u64,
index_name: String,
query: String,
filters: Option<Vec<(String, String)>>,
limit: Option<usize>,
offset: Option<usize>,
return_fields: Option<Vec<String>>,
) -> RpcResult<serde_json::Value>;
/// Delete a document by id from an FT index
#[method(name = "ftDel")]
async fn ft_del(&self, db_id: u64, index_name: String, doc_id: String) -> RpcResult<bool>;
/// Get FT index info
#[method(name = "ftInfo")]
async fn ft_info(&self, db_id: u64, index_name: String) -> RpcResult<serde_json::Value>;
/// Drop an FT index
#[method(name = "ftDrop")]
async fn ft_drop(&self, db_id: u64, index_name: String) -> RpcResult<bool>;
}
/// RPC Server implementation
@@ -187,13 +235,14 @@ impl RpcServerImpl {
}
// Create server instance with resolved backend
let is_tantivy = matches!(effective_backend, crate::options::BackendType::Tantivy);
let db_option = DBOption {
dir: self.base_dir.clone(),
port: 0, // Not used for RPC-managed databases
debug: false,
encryption_key: None,
encrypt: false,
backend: effective_backend,
backend: effective_backend.clone(),
admin_secret: self.admin_secret.clone(),
};
@@ -203,7 +252,10 @@ impl RpcServerImpl {
server.selected_db = db_id;
// Lazily open/create physical storage according to admin meta (per-db encryption)
let _ = server.current_storage();
// Skip for Tantivy backend (no KV storage to open)
if !is_tantivy {
let _ = server.current_storage();
}
// Store the server
let mut servers = self.servers.write().await;
@@ -290,6 +342,7 @@ impl RpcServerImpl {
let backend = match server.option.backend {
crate::options::BackendType::Redb => BackendType::Redb,
crate::options::BackendType::Sled => BackendType::Sled,
crate::options::BackendType::Tantivy => BackendType::Tantivy,
};
DatabaseInfo {
@@ -340,18 +393,20 @@ impl RpcServer for RpcServerImpl {
let opt_backend = match backend {
BackendType::Redb => crate::options::BackendType::Redb,
BackendType::Sled => crate::options::BackendType::Sled,
BackendType::Tantivy => crate::options::BackendType::Tantivy,
};
admin_meta::set_database_backend(&self.base_dir, self.backend.clone(), &self.admin_secret, db_id, opt_backend.clone())
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?;
// Create server instance using base_dir, chosen backend and admin secret
let is_tantivy_new = matches!(opt_backend, crate::options::BackendType::Tantivy);
let option = DBOption {
dir: self.base_dir.clone(),
port: 0, // Not used for RPC-managed databases
debug: false,
encryption_key: None, // per-db key is stored in admin DB 0
encrypt: false, // encryption decided per-db at open time
backend: opt_backend,
backend: opt_backend.clone(),
admin_secret: self.admin_secret.clone(),
};
@@ -359,7 +414,10 @@ impl RpcServer for RpcServerImpl {
server.selected_db = db_id;
// Initialize storage to create physical <id>.db with proper encryption from admin meta
let _ = server.current_storage();
// Skip for Tantivy backend (no KV storage to initialize)
if !is_tantivy_new {
let _ = server.current_storage();
}
// Store the server in cache
let mut servers = self.servers.write().await;
@@ -420,6 +478,7 @@ impl RpcServer for RpcServerImpl {
let db_ids = self.discover_databases().await;
let mut stats = HashMap::new();
stats.insert("total_databases".to_string(), serde_json::json!(db_ids.len()));
stats.insert("uptime".to_string(), serde_json::json!(
std::time::SystemTime::now()
@@ -431,6 +490,121 @@ impl RpcServer for RpcServerImpl {
Ok(stats)
}
// ----- Full-text (Tantivy) minimal RPC endpoints -----
async fn ft_create(
&self,
db_id: u64,
index_name: String,
schema: Vec<(String, String, Vec<String>)>,
) -> RpcResult<bool> {
let server = self.get_or_create_server(db_id).await?;
if db_id == 0 {
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "FT not allowed on DB 0", None::<()>));
}
if !matches!(server.option.backend, crate::options::BackendType::Tantivy) {
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "DB backend is not Tantivy", None::<()>));
}
crate::search_cmd::ft_create_cmd(&*server, index_name, schema)
.await
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?;
Ok(true)
}
async fn ft_add(
&self,
db_id: u64,
index_name: String,
doc_id: String,
score: f64,
fields: HashMap<String, String>,
) -> RpcResult<bool> {
let server = self.get_or_create_server(db_id).await?;
if db_id == 0 {
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "FT not allowed on DB 0", None::<()>));
}
if !matches!(server.option.backend, crate::options::BackendType::Tantivy) {
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "DB backend is not Tantivy", None::<()>));
}
crate::search_cmd::ft_add_cmd(&*server, index_name, doc_id, score, fields)
.await
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?;
Ok(true)
}
async fn ft_search(
&self,
db_id: u64,
index_name: String,
query: String,
filters: Option<Vec<(String, String)>>,
limit: Option<usize>,
offset: Option<usize>,
return_fields: Option<Vec<String>>,
) -> RpcResult<serde_json::Value> {
let server = self.get_or_create_server(db_id).await?;
if db_id == 0 {
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "FT not allowed on DB 0", None::<()>));
}
if !matches!(server.option.backend, crate::options::BackendType::Tantivy) {
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "DB backend is not Tantivy", None::<()>));
}
let proto = crate::search_cmd::ft_search_cmd(
&*server,
index_name,
query,
filters.unwrap_or_default(),
limit,
offset,
return_fields,
)
.await
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?;
Ok(serde_json::json!({ "resp": proto.encode() }))
}
async fn ft_del(&self, db_id: u64, index_name: String, doc_id: String) -> RpcResult<bool> {
let server = self.get_or_create_server(db_id).await?;
if db_id == 0 {
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "FT not allowed on DB 0", None::<()>));
}
if !matches!(server.option.backend, crate::options::BackendType::Tantivy) {
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "DB backend is not Tantivy", None::<()>));
}
crate::search_cmd::ft_del_cmd(&*server, index_name, doc_id)
.await
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?;
Ok(true)
}
async fn ft_info(&self, db_id: u64, index_name: String) -> RpcResult<serde_json::Value> {
let server = self.get_or_create_server(db_id).await?;
if db_id == 0 {
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "FT not allowed on DB 0", None::<()>));
}
if !matches!(server.option.backend, crate::options::BackendType::Tantivy) {
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "DB backend is not Tantivy", None::<()>));
}
let proto = crate::search_cmd::ft_info_cmd(&*server, index_name)
.await
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?;
Ok(serde_json::json!({ "resp": proto.encode() }))
}
async fn ft_drop(&self, db_id: u64, index_name: String) -> RpcResult<bool> {
let server = self.get_or_create_server(db_id).await?;
if db_id == 0 {
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "FT not allowed on DB 0", None::<()>));
}
if !matches!(server.option.backend, crate::options::BackendType::Tantivy) {
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "DB backend is not Tantivy", None::<()>));
}
crate::search_cmd::ft_drop_cmd(&*server, index_name)
.await
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?;
Ok(true)
}
async fn add_access_key(&self, db_id: u64, key: String, permissions: String) -> RpcResult<bool> {
let perms = match permissions.to_lowercase().as_str() {
"read" => Permissions::Read,

352
src/search_cmd.rs Normal file
View File

@@ -0,0 +1,352 @@
use crate::{
error::DBError,
protocol::Protocol,
server::Server,
tantivy_search::{
FieldDef, Filter, FilterType, IndexConfig, NumericType, SearchOptions, TantivySearch,
},
};
use std::collections::HashMap;
use std::sync::Arc;
pub async fn ft_create_cmd(
server: &Server,
index_name: String,
schema: Vec<(String, String, Vec<String>)>,
) -> Result<Protocol, DBError> {
if server.selected_db == 0 {
return Ok(Protocol::err("FT commands are not allowed on DB 0"));
}
// Enforce Tantivy backend for selected DB
let is_tantivy = crate::admin_meta::get_database_backend(
&server.option.dir,
server.option.backend.clone(),
&server.option.admin_secret,
server.selected_db,
)
.ok()
.flatten()
.map(|b| matches!(b, crate::options::BackendType::Tantivy))
.unwrap_or(false);
if !is_tantivy {
return Ok(Protocol::err("ERR DB backend is not Tantivy; FT.* commands are not allowed"));
}
// Parse schema into field definitions
let mut field_definitions = Vec::new();
for (field_name, field_type, options) in schema {
let field_def = match field_type.to_uppercase().as_str() {
"TEXT" => {
let mut sortable = false;
let mut no_index = false;
// Weight is not used in current implementation
let mut _weight = 1.0f32;
let mut i = 0;
while i < options.len() {
match options[i].to_uppercase().as_str() {
"WEIGHT" => {
if i + 1 < options.len() {
_weight = options[i + 1].parse::<f32>().unwrap_or(1.0);
i += 2;
continue;
}
}
"SORTABLE" => {
sortable = true;
}
"NOINDEX" => {
no_index = true;
}
_ => {}
}
i += 1;
}
FieldDef::Text {
stored: true,
indexed: !no_index,
tokenized: true,
fast: sortable,
}
}
"NUMERIC" => {
// default to F64
let mut sortable = false;
for opt in &options {
if opt.to_uppercase() == "SORTABLE" {
sortable = true;
}
}
FieldDef::Numeric {
stored: true,
indexed: true,
fast: sortable,
precision: NumericType::F64,
}
}
"TAG" => {
let mut separator = ",".to_string();
let mut case_sensitive = false;
let mut i = 0;
while i < options.len() {
match options[i].to_uppercase().as_str() {
"SEPARATOR" => {
if i + 1 < options.len() {
separator = options[i + 1].clone();
i += 2;
continue;
}
}
"CASESENSITIVE" => {
case_sensitive = true;
}
_ => {}
}
i += 1;
}
FieldDef::Tag {
stored: true,
separator,
case_sensitive,
}
}
"GEO" => FieldDef::Geo { stored: true },
_ => {
return Err(DBError(format!("Unknown field type: {}", field_type)));
}
};
field_definitions.push((field_name, field_def));
}
// Create the search index
let search_path = server.search_index_path();
let config = IndexConfig::default();
let search_index = TantivySearch::new_with_schema(
search_path,
index_name.clone(),
field_definitions,
Some(config),
)?;
// Store in registry
let mut indexes = server.search_indexes.write().unwrap();
indexes.insert(index_name, Arc::new(search_index));
Ok(Protocol::SimpleString("OK".to_string()))
}
pub async fn ft_add_cmd(
server: &Server,
index_name: String,
doc_id: String,
_score: f64,
fields: HashMap<String, String>,
) -> Result<Protocol, DBError> {
if server.selected_db == 0 {
return Ok(Protocol::err("FT commands are not allowed on DB 0"));
}
// Enforce Tantivy backend for selected DB
let is_tantivy = crate::admin_meta::get_database_backend(
&server.option.dir,
server.option.backend.clone(),
&server.option.admin_secret,
server.selected_db,
)
.ok()
.flatten()
.map(|b| matches!(b, crate::options::BackendType::Tantivy))
.unwrap_or(false);
if !is_tantivy {
return Ok(Protocol::err("ERR DB backend is not Tantivy; FT.* commands are not allowed"));
}
let indexes = server.search_indexes.read().unwrap();
let search_index = indexes
.get(&index_name)
.ok_or_else(|| DBError(format!("Index '{}' not found", index_name)))?;
search_index.add_document_with_fields(&doc_id, fields)?;
Ok(Protocol::SimpleString("OK".to_string()))
}
pub async fn ft_search_cmd(
server: &Server,
index_name: String,
query: String,
filters: Vec<(String, String)>,
limit: Option<usize>,
offset: Option<usize>,
return_fields: Option<Vec<String>>,
) -> Result<Protocol, DBError> {
if server.selected_db == 0 {
return Ok(Protocol::err("FT commands are not allowed on DB 0"));
}
// Enforce Tantivy backend for selected DB
let is_tantivy = crate::admin_meta::get_database_backend(
&server.option.dir,
server.option.backend.clone(),
&server.option.admin_secret,
server.selected_db,
)
.ok()
.flatten()
.map(|b| matches!(b, crate::options::BackendType::Tantivy))
.unwrap_or(false);
if !is_tantivy {
return Ok(Protocol::err("ERR DB backend is not Tantivy; FT.* commands are not allowed"));
}
let indexes = server.search_indexes.read().unwrap();
let search_index = indexes
.get(&index_name)
.ok_or_else(|| DBError(format!("Index '{}' not found", index_name)))?;
let search_filters = filters
.into_iter()
.map(|(field, value)| Filter {
field,
filter_type: FilterType::Equals(value),
})
.collect();
let options = SearchOptions {
limit: limit.unwrap_or(10),
offset: offset.unwrap_or(0),
filters: search_filters,
sort_by: None,
return_fields,
highlight: false,
};
let results = search_index.search_with_options(&query, options)?;
// Format results as Redis protocol
let mut response = Vec::new();
// First element is the total count
response.push(Protocol::SimpleString(results.total.to_string()));
// Then each document
for doc in results.documents {
let mut doc_array = Vec::new();
// Add document ID if it exists
if let Some(id) = doc.fields.get("_id") {
doc_array.push(Protocol::BulkString(id.clone()));
}
// Add score
doc_array.push(Protocol::BulkString(doc.score.to_string()));
// Add fields as key-value pairs
for (field_name, field_value) in doc.fields {
if field_name != "_id" {
doc_array.push(Protocol::BulkString(field_name));
doc_array.push(Protocol::BulkString(field_value));
}
}
response.push(Protocol::Array(doc_array));
}
Ok(Protocol::Array(response))
}
pub async fn ft_del_cmd(
server: &Server,
index_name: String,
doc_id: String,
) -> Result<Protocol, DBError> {
if server.selected_db == 0 {
return Ok(Protocol::err("FT commands are not allowed on DB 0"));
}
// Enforce Tantivy backend for selected DB
let is_tantivy = crate::admin_meta::get_database_backend(
&server.option.dir,
server.option.backend.clone(),
&server.option.admin_secret,
server.selected_db,
)
.ok()
.flatten()
.map(|b| matches!(b, crate::options::BackendType::Tantivy))
.unwrap_or(false);
if !is_tantivy {
return Ok(Protocol::err("ERR DB backend is not Tantivy; FT.* commands are not allowed"));
}
let indexes = server.search_indexes.read().unwrap();
let _search_index = indexes
.get(&index_name)
.ok_or_else(|| DBError(format!("Index '{}' not found", index_name)))?;
// Not fully implemented yet: Tantivy delete by term would require a writer session and commit coordination.
println!("Deleting document '{}' from index '{}'", doc_id, index_name);
Ok(Protocol::SimpleString("1".to_string()))
}
pub async fn ft_info_cmd(server: &Server, index_name: String) -> Result<Protocol, DBError> {
if server.selected_db == 0 {
return Ok(Protocol::err("FT commands are not allowed on DB 0"));
}
// Enforce Tantivy backend for selected DB
let is_tantivy = crate::admin_meta::get_database_backend(
&server.option.dir,
server.option.backend.clone(),
&server.option.admin_secret,
server.selected_db,
)
.ok()
.flatten()
.map(|b| matches!(b, crate::options::BackendType::Tantivy))
.unwrap_or(false);
if !is_tantivy {
return Ok(Protocol::err("ERR DB backend is not Tantivy; FT.* commands are not allowed"));
}
let indexes = server.search_indexes.read().unwrap();
let search_index = indexes
.get(&index_name)
.ok_or_else(|| DBError(format!("Index '{}' not found", index_name)))?;
let info = search_index.get_info()?;
// Format info as Redis protocol
let mut response = Vec::new();
response.push(Protocol::BulkString("index_name".to_string()));
response.push(Protocol::BulkString(info.name));
response.push(Protocol::BulkString("num_docs".to_string()));
response.push(Protocol::BulkString(info.num_docs.to_string()));
response.push(Protocol::BulkString("num_fields".to_string()));
response.push(Protocol::BulkString(info.fields.len().to_string()));
response.push(Protocol::BulkString("fields".to_string()));
let fields_str = info
.fields
.iter()
.map(|f| format!("{}:{}", f.name, f.field_type))
.collect::<Vec<_>>()
.join(", ");
response.push(Protocol::BulkString(fields_str));
Ok(Protocol::Array(response))
}
pub async fn ft_drop_cmd(server: &Server, index_name: String) -> Result<Protocol, DBError> {
if server.selected_db == 0 {
return Ok(Protocol::err("FT commands are not allowed on DB 0"));
}
// Enforce Tantivy backend for selected DB
let is_tantivy = crate::admin_meta::get_database_backend(
&server.option.dir,
server.option.backend.clone(),
&server.option.admin_secret,
server.selected_db,
)
.ok()
.flatten()
.map(|b| matches!(b, crate::options::BackendType::Tantivy))
.unwrap_or(false);
if !is_tantivy {
return Ok(Protocol::err("ERR DB backend is not Tantivy; FT.* commands are not allowed"));
}
// Remove from registry
{
let mut indexes = server.search_indexes.write().unwrap();
indexes.remove(&index_name);
}
// Remove the index files from disk
let index_path = server.search_index_path().join(&index_name);
if index_path.exists() {
std::fs::remove_dir_all(&index_path)
.map_err(|e| DBError(format!("Failed to remove index files: {}", e)))?;
}
Ok(Protocol::SimpleString("OK".to_string()))
}

View File

@@ -23,6 +23,9 @@ pub struct Server {
pub queued_cmd: Option<Vec<(Cmd, Protocol)>>,
pub current_permissions: Option<crate::rpc::Permissions>,
// In-memory registry of Tantivy search indexes for this server
pub search_indexes: Arc<std::sync::RwLock<HashMap<String, Arc<crate::tantivy_search::TantivySearch>>>>,
// BLPOP waiter registry: per (db_index, key) FIFO of waiters
pub list_waiters: Arc<Mutex<HashMap<u64, HashMap<String, Vec<Waiter>>>>>,
pub waiter_seq: Arc<AtomicU64>,
@@ -49,12 +52,25 @@ impl Server {
selected_db: 0,
queued_cmd: None,
current_permissions: None,
search_indexes: Arc::new(std::sync::RwLock::new(HashMap::new())),
list_waiters: Arc::new(Mutex::new(HashMap::new())),
waiter_seq: Arc::new(AtomicU64::new(1)),
}
}
// Path where search indexes are stored, namespaced per selected DB:
// <base_dir>/search_indexes/<db_id>
pub fn search_index_path(&self) -> std::path::PathBuf {
let base = std::path::PathBuf::from(&self.option.dir)
.join("search_indexes")
.join(self.selected_db.to_string());
if !base.exists() {
let _ = std::fs::create_dir_all(&base);
}
base
}
pub fn current_storage(&self) -> Result<Arc<dyn StorageBackend>, DBError> {
let mut cache = self.db_cache.write().unwrap();

667
src/tantivy_search.rs Normal file
View File

@@ -0,0 +1,667 @@
use crate::error::DBError;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::path::PathBuf;
use std::sync::{Arc, RwLock};
use tantivy::{
collector::TopDocs,
directory::MmapDirectory,
query::{BooleanQuery, Occur, Query, QueryParser, TermQuery},
schema::{
DateOptions, Field, IndexRecordOption, NumericOptions, Schema, TextFieldIndexing, TextOptions, STORED, STRING,
},
tokenizer::TokenizerManager,
DateTime, Index, IndexReader, IndexWriter, TantivyDocument, Term,
};
use tantivy::schema::Value;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum FieldDef {
Text {
stored: bool,
indexed: bool,
tokenized: bool,
fast: bool,
},
Numeric {
stored: bool,
indexed: bool,
fast: bool,
precision: NumericType,
},
Tag {
stored: bool,
separator: String,
case_sensitive: bool,
},
Geo {
stored: bool,
},
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum NumericType {
I64,
U64,
F64,
Date,
}
pub struct IndexSchema {
schema: Schema,
fields: HashMap<String, (Field, FieldDef)>,
default_search_fields: Vec<Field>,
}
pub struct TantivySearch {
index: Index,
writer: Arc<RwLock<IndexWriter>>,
reader: IndexReader,
index_schema: IndexSchema,
name: String,
config: IndexConfig,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct IndexConfig {
pub language: String,
pub stopwords: Vec<String>,
pub stemming: bool,
pub max_doc_count: Option<usize>,
pub default_score: f64,
}
impl Default for IndexConfig {
fn default() -> Self {
IndexConfig {
language: "english".to_string(),
stopwords: vec![],
stemming: true,
max_doc_count: None,
default_score: 1.0,
}
}
}
impl TantivySearch {
pub fn new_with_schema(
base_path: PathBuf,
name: String,
field_definitions: Vec<(String, FieldDef)>,
config: Option<IndexConfig>,
) -> Result<Self, DBError> {
let index_path = base_path.join(&name);
std::fs::create_dir_all(&index_path)
.map_err(|e| DBError(format!("Failed to create index dir: {}", e)))?;
// Build schema from field definitions
let mut schema_builder = Schema::builder();
let mut fields = HashMap::new();
let mut default_search_fields = Vec::new();
// Always add a document ID field
let id_field = schema_builder.add_text_field("_id", STRING | STORED);
fields.insert(
"_id".to_string(),
(
id_field,
FieldDef::Text {
stored: true,
indexed: true,
tokenized: false,
fast: false,
},
),
);
// Add user-defined fields
for (field_name, field_def) in field_definitions {
let field = match &field_def {
FieldDef::Text {
stored,
indexed,
tokenized,
fast: _fast,
} => {
let mut text_options = TextOptions::default();
if *stored {
text_options = text_options.set_stored();
}
if *indexed {
let indexing_options = if *tokenized {
TextFieldIndexing::default()
.set_tokenizer("default")
.set_index_option(IndexRecordOption::WithFreqsAndPositions)
} else {
TextFieldIndexing::default()
.set_tokenizer("raw")
.set_index_option(IndexRecordOption::Basic)
};
text_options = text_options.set_indexing_options(indexing_options);
let f = schema_builder.add_text_field(&field_name, text_options);
if *tokenized {
default_search_fields.push(f);
}
f
} else {
schema_builder.add_text_field(&field_name, text_options)
}
}
FieldDef::Numeric {
stored,
indexed,
fast,
precision,
} => match precision {
NumericType::I64 => {
let mut opts = NumericOptions::default();
if *stored {
opts = opts.set_stored();
}
if *indexed {
opts = opts.set_indexed();
}
if *fast {
opts = opts.set_fast();
}
schema_builder.add_i64_field(&field_name, opts)
}
NumericType::U64 => {
let mut opts = NumericOptions::default();
if *stored {
opts = opts.set_stored();
}
if *indexed {
opts = opts.set_indexed();
}
if *fast {
opts = opts.set_fast();
}
schema_builder.add_u64_field(&field_name, opts)
}
NumericType::F64 => {
let mut opts = NumericOptions::default();
if *stored {
opts = opts.set_stored();
}
if *indexed {
opts = opts.set_indexed();
}
if *fast {
opts = opts.set_fast();
}
schema_builder.add_f64_field(&field_name, opts)
}
NumericType::Date => {
let mut opts = DateOptions::default();
if *stored {
opts = opts.set_stored();
}
if *indexed {
opts = opts.set_indexed();
}
if *fast {
opts = opts.set_fast();
}
schema_builder.add_date_field(&field_name, opts)
}
},
FieldDef::Tag {
stored,
separator: _,
case_sensitive: _,
} => {
let mut text_options = TextOptions::default();
if *stored {
text_options = text_options.set_stored();
}
text_options = text_options.set_indexing_options(
TextFieldIndexing::default()
.set_tokenizer("raw")
.set_index_option(IndexRecordOption::Basic),
);
schema_builder.add_text_field(&field_name, text_options)
}
FieldDef::Geo { stored } => {
// For now, store as two f64 fields for lat/lon
let mut opts = NumericOptions::default();
if *stored {
opts = opts.set_stored();
}
opts = opts.set_indexed().set_fast();
let lat_field =
schema_builder.add_f64_field(&format!("{}_lat", field_name), opts.clone());
let lon_field =
schema_builder.add_f64_field(&format!("{}_lon", field_name), opts);
fields.insert(
format!("{}_lat", field_name),
(
lat_field,
FieldDef::Numeric {
stored: *stored,
indexed: true,
fast: true,
precision: NumericType::F64,
},
),
);
fields.insert(
format!("{}_lon", field_name),
(
lon_field,
FieldDef::Numeric {
stored: *stored,
indexed: true,
fast: true,
precision: NumericType::F64,
},
),
);
continue; // Skip adding the geo field itself
}
};
fields.insert(field_name.clone(), (field, field_def));
}
let schema = schema_builder.build();
let index_schema = IndexSchema {
schema: schema.clone(),
fields,
default_search_fields,
};
// Create or open index
let dir = MmapDirectory::open(&index_path)
.map_err(|e| DBError(format!("Failed to open index directory: {}", e)))?;
let mut index =
Index::open_or_create(dir, schema).map_err(|e| DBError(format!("Failed to create index: {}", e)))?;
// Configure tokenizers
let tokenizer_manager = TokenizerManager::default();
index.set_tokenizers(tokenizer_manager);
let writer = index
.writer(15_000_000)
.map_err(|e| DBError(format!("Failed to create index writer: {}", e)))?;
let reader = index
.reader()
.map_err(|e| DBError(format!("Failed to create reader: {}", e)))?;
let config = config.unwrap_or_default();
Ok(TantivySearch {
index,
writer: Arc::new(RwLock::new(writer)),
reader,
index_schema,
name,
config,
})
}
pub fn add_document_with_fields(
&self,
doc_id: &str,
fields: HashMap<String, String>,
) -> Result<(), DBError> {
let mut writer = self
.writer
.write()
.map_err(|e| DBError(format!("Failed to acquire writer lock: {}", e)))?;
// Delete existing document with same ID
if let Some((id_field, _)) = self.index_schema.fields.get("_id") {
writer.delete_term(Term::from_field_text(*id_field, doc_id));
}
// Create new document
let mut doc = tantivy::doc!();
// Add document ID
if let Some((id_field, _)) = self.index_schema.fields.get("_id") {
doc.add_text(*id_field, doc_id);
}
// Add other fields based on schema
for (field_name, field_value) in fields {
if let Some((field, field_def)) = self.index_schema.fields.get(&field_name) {
match field_def {
FieldDef::Text { .. } => {
doc.add_text(*field, &field_value);
}
FieldDef::Numeric { precision, .. } => match precision {
NumericType::I64 => {
if let Ok(v) = field_value.parse::<i64>() {
doc.add_i64(*field, v);
}
}
NumericType::U64 => {
if let Ok(v) = field_value.parse::<u64>() {
doc.add_u64(*field, v);
}
}
NumericType::F64 => {
if let Ok(v) = field_value.parse::<f64>() {
doc.add_f64(*field, v);
}
}
NumericType::Date => {
if let Ok(v) = field_value.parse::<i64>() {
doc.add_date(*field, DateTime::from_timestamp_millis(v));
}
}
},
FieldDef::Tag {
separator,
case_sensitive,
..
} => {
let tags = if !case_sensitive {
field_value.to_lowercase()
} else {
field_value.clone()
};
for tag in tags.split(separator.as_str()) {
doc.add_text(*field, tag.trim());
}
}
FieldDef::Geo { .. } => {
let parts: Vec<&str> = field_value.split(',').collect();
if parts.len() == 2 {
if let (Ok(lat), Ok(lon)) =
(parts[0].parse::<f64>(), parts[1].parse::<f64>())
{
if let Some((lat_field, _)) =
self.index_schema.fields.get(&format!("{}_lat", field_name))
{
doc.add_f64(*lat_field, lat);
}
if let Some((lon_field, _)) =
self.index_schema.fields.get(&format!("{}_lon", field_name))
{
doc.add_f64(*lon_field, lon);
}
}
}
}
}
}
}
writer
.add_document(doc)
.map_err(|e| DBError(format!("Failed to add document: {}", e)))?;
writer
.commit()
.map_err(|e| DBError(format!("Failed to commit: {}", e)))?;
Ok(())
}
pub fn search_with_options(
&self,
query_str: &str,
options: SearchOptions,
) -> Result<SearchResults, DBError> {
let searcher = self.reader.searcher();
// Ensure we have searchable fields
if self.index_schema.default_search_fields.is_empty() {
return Err(DBError("No searchable fields defined in schema".to_string()));
}
// Parse query based on search fields
let query_parser = QueryParser::for_index(
&self.index,
self.index_schema.default_search_fields.clone(),
);
let parsed_query = query_parser
.parse_query(query_str)
.map_err(|e| DBError(format!("Failed to parse query: {}", e)))?;
let mut clauses: Vec<(Occur, Box<dyn Query>)> = vec![(Occur::Must, parsed_query)];
// Apply filters if any
for filter in options.filters {
if let Some((field, field_def)) = self.index_schema.fields.get(&filter.field) {
match filter.filter_type {
FilterType::Equals(value) => {
match field_def {
FieldDef::Text { .. } | FieldDef::Tag { .. } => {
let term_query =
TermQuery::new(Term::from_field_text(*field, &value), IndexRecordOption::Basic);
clauses.push((Occur::Must, Box::new(term_query)));
}
FieldDef::Numeric { precision, .. } => {
// Equals on numeric fields: parse to the right numeric type and use term query
match precision {
NumericType::I64 => {
if let Ok(v) = value.parse::<i64>() {
let term = Term::from_field_i64(*field, v);
let tq = TermQuery::new(term, IndexRecordOption::Basic);
clauses.push((Occur::Must, Box::new(tq)));
}
}
NumericType::U64 => {
if let Ok(v) = value.parse::<u64>() {
let term = Term::from_field_u64(*field, v);
let tq = TermQuery::new(term, IndexRecordOption::Basic);
clauses.push((Occur::Must, Box::new(tq)));
}
}
NumericType::F64 => {
if let Ok(v) = value.parse::<f64>() {
let term = Term::from_field_f64(*field, v);
let tq = TermQuery::new(term, IndexRecordOption::Basic);
clauses.push((Occur::Must, Box::new(tq)));
}
}
NumericType::Date => {
if let Ok(v) = value.parse::<i64>() {
let dt = DateTime::from_timestamp_millis(v);
let term = Term::from_field_date(*field, dt);
let tq = TermQuery::new(term, IndexRecordOption::Basic);
clauses.push((Occur::Must, Box::new(tq)));
}
}
}
}
FieldDef::Geo { .. } => {
// Geo equals isn't supported in this simplified version
}
}
}
FilterType::Range { .. } => {
// TODO: Implement numeric range queries by building a RangeQuery per type
}
FilterType::InSet(values) => {
// OR across values
let mut sub_clauses: Vec<(Occur, Box<dyn Query>)> = vec![];
for value in values {
let term_query = TermQuery::new(
Term::from_field_text(*field, &value),
IndexRecordOption::Basic,
);
sub_clauses.push((Occur::Should, Box::new(term_query)));
}
clauses.push((Occur::Must, Box::new(BooleanQuery::new(sub_clauses))));
}
}
}
}
let final_query: Box<dyn Query> = if clauses.len() == 1 {
clauses.pop().unwrap().1
} else {
Box::new(BooleanQuery::new(clauses))
};
// Execute search
let top_docs = searcher
.search(&*final_query, &TopDocs::with_limit(options.limit + options.offset))
.map_err(|e| DBError(format!("Search failed: {}", e)))?;
let total_hits = top_docs.len();
let mut documents = Vec::new();
for (score, doc_address) in top_docs.into_iter().skip(options.offset).take(options.limit) {
let retrieved_doc: TantivyDocument = searcher
.doc(doc_address)
.map_err(|e| DBError(format!("Failed to retrieve doc: {}", e)))?;
let mut doc_fields = HashMap::new();
// Extract stored fields (or synthesize)
for (field_name, (field, field_def)) in &self.index_schema.fields {
match field_def {
FieldDef::Text { stored, .. } | FieldDef::Tag { stored, .. } => {
if *stored {
if let Some(value) = retrieved_doc.get_first(*field) {
if let Some(text) = value.as_str() {
doc_fields.insert(field_name.clone(), text.to_string());
}
}
}
}
FieldDef::Numeric {
stored, precision, ..
} => {
if *stored {
let value_str = match precision {
NumericType::I64 => retrieved_doc
.get_first(*field)
.and_then(|v| v.as_i64())
.map(|v| v.to_string()),
NumericType::U64 => retrieved_doc
.get_first(*field)
.and_then(|v| v.as_u64())
.map(|v| v.to_string()),
NumericType::F64 => retrieved_doc
.get_first(*field)
.and_then(|v| v.as_f64())
.map(|v| v.to_string()),
NumericType::Date => retrieved_doc
.get_first(*field)
.and_then(|v| v.as_datetime())
.map(|v| v.into_timestamp_millis().to_string()),
};
if let Some(v) = value_str {
doc_fields.insert(field_name.clone(), v);
}
}
}
FieldDef::Geo { stored } => {
if *stored {
let lat_field = self
.index_schema
.fields
.get(&format!("{}_lat", field_name))
.unwrap()
.0;
let lon_field = self
.index_schema
.fields
.get(&format!("{}_lon", field_name))
.unwrap()
.0;
let lat = retrieved_doc.get_first(lat_field).and_then(|v| v.as_f64());
let lon = retrieved_doc.get_first(lon_field).and_then(|v| v.as_f64());
if let (Some(lat), Some(lon)) = (lat, lon) {
doc_fields.insert(field_name.clone(), format!("{},{}", lat, lon));
}
}
}
}
}
documents.push(SearchDocument {
fields: doc_fields,
score,
});
}
Ok(SearchResults {
total: total_hits,
documents,
})
}
pub fn get_info(&self) -> Result<IndexInfo, DBError> {
let searcher = self.reader.searcher();
let num_docs = searcher.num_docs();
let fields_info: Vec<FieldInfo> = self
.index_schema
.fields
.iter()
.map(|(name, (_, def))| FieldInfo {
name: name.clone(),
field_type: format!("{:?}", def),
})
.collect();
Ok(IndexInfo {
name: self.name.clone(),
num_docs,
fields: fields_info,
config: self.config.clone(),
})
}
}
#[derive(Debug, Clone)]
pub struct SearchOptions {
pub limit: usize,
pub offset: usize,
pub filters: Vec<Filter>,
pub sort_by: Option<String>,
pub return_fields: Option<Vec<String>>,
pub highlight: bool,
}
impl Default for SearchOptions {
fn default() -> Self {
SearchOptions {
limit: 10,
offset: 0,
filters: vec![],
sort_by: None,
return_fields: None,
highlight: false,
}
}
}
#[derive(Debug, Clone)]
pub struct Filter {
pub field: String,
pub filter_type: FilterType,
}
#[derive(Debug, Clone)]
pub enum FilterType {
Equals(String),
Range { min: String, max: String },
InSet(Vec<String>),
}
#[derive(Debug)]
pub struct SearchResults {
pub total: usize,
pub documents: Vec<SearchDocument>,
}
#[derive(Debug)]
pub struct SearchDocument {
pub fields: HashMap<String, String>,
pub score: f32,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct IndexInfo {
pub name: String,
pub num_docs: u64,
pub fields: Vec<FieldInfo>,
pub config: IndexConfig,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct FieldInfo {
pub name: String,
pub field_type: String,
}