...
This commit is contained in:
847
Cargo.lock
generated
847
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -24,6 +24,7 @@ age = "0.10"
|
|||||||
secrecy = "0.8"
|
secrecy = "0.8"
|
||||||
ed25519-dalek = "2"
|
ed25519-dalek = "2"
|
||||||
base64 = "0.22"
|
base64 = "0.22"
|
||||||
|
tantivy = "0.25.0"
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
redis = { version = "0.24", features = ["aio", "tokio-comp"] }
|
redis = { version = "0.24", features = ["aio", "tokio-comp"] }
|
||||||
|
1251
herodb/specs/backgroundinfo/lance.md
Normal file
1251
herodb/specs/backgroundinfo/lance.md
Normal file
File diff suppressed because it is too large
Load Diff
6847
herodb/specs/backgroundinfo/lancedb.md
Normal file
6847
herodb/specs/backgroundinfo/lancedb.md
Normal file
File diff suppressed because it is too large
Load Diff
@@ -84,6 +84,41 @@ pub enum Cmd {
|
|||||||
AgeSignName(String, String), // name, message
|
AgeSignName(String, String), // name, message
|
||||||
AgeVerifyName(String, String, String), // name, message, signature_b64
|
AgeVerifyName(String, String, String), // name, message, signature_b64
|
||||||
AgeList,
|
AgeList,
|
||||||
|
|
||||||
|
// Full-text search commands with schema support
|
||||||
|
FtCreate {
|
||||||
|
index_name: String,
|
||||||
|
schema: Vec<(String, String, Vec<String>)>, // (field_name, field_type, options)
|
||||||
|
},
|
||||||
|
FtAdd {
|
||||||
|
index_name: String,
|
||||||
|
doc_id: String,
|
||||||
|
score: f64,
|
||||||
|
fields: std::collections::HashMap<String, String>,
|
||||||
|
},
|
||||||
|
FtSearch {
|
||||||
|
index_name: String,
|
||||||
|
query: String,
|
||||||
|
filters: Vec<(String, String)>, // field, value pairs
|
||||||
|
limit: Option<usize>,
|
||||||
|
offset: Option<usize>,
|
||||||
|
return_fields: Option<Vec<String>>,
|
||||||
|
},
|
||||||
|
FtDel(String, String), // index_name, doc_id
|
||||||
|
FtInfo(String), // index_name
|
||||||
|
FtDrop(String), // index_name
|
||||||
|
FtAlter {
|
||||||
|
index_name: String,
|
||||||
|
field_name: String,
|
||||||
|
field_type: String,
|
||||||
|
options: Vec<String>,
|
||||||
|
},
|
||||||
|
FtAggregate {
|
||||||
|
index_name: String,
|
||||||
|
query: String,
|
||||||
|
group_by: Vec<String>,
|
||||||
|
reducers: Vec<String>,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Cmd {
|
impl Cmd {
|
||||||
@@ -616,6 +651,45 @@ impl Cmd {
|
|||||||
_ => return Err(DBError(format!("unsupported AGE subcommand {:?}", cmd))),
|
_ => return Err(DBError(format!("unsupported AGE subcommand {:?}", cmd))),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
"ft.create" => {
|
||||||
|
if cmd.len() < 4 || cmd[2].to_uppercase() != "SCHEMA" {
|
||||||
|
return Err(DBError("ERR FT.CREATE requires: indexname SCHEMA field1 type1 [options] ...".to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
let index_name = cmd[1].clone();
|
||||||
|
let mut schema = Vec::new();
|
||||||
|
let mut i = 3;
|
||||||
|
|
||||||
|
while i < cmd.len() {
|
||||||
|
if i + 1 >= cmd.len() {
|
||||||
|
return Err(DBError("ERR incomplete field definition".to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
let field_name = cmd[i].clone();
|
||||||
|
let field_type = cmd[i + 1].to_uppercase();
|
||||||
|
let mut options = Vec::new();
|
||||||
|
i += 2;
|
||||||
|
|
||||||
|
// Parse field options until we hit another field name or end
|
||||||
|
while i < cmd.len() && !["TEXT", "NUMERIC", "TAG", "GEO"].contains(&cmd[i].to_uppercase().as_str()) {
|
||||||
|
options.push(cmd[i].to_uppercase());
|
||||||
|
i += 1;
|
||||||
|
|
||||||
|
// If this option takes a value, consume it too
|
||||||
|
if i > 0 && ["SEPARATOR", "WEIGHT"].contains(&cmd[i-1].to_uppercase().as_str()) && i < cmd.len() {
|
||||||
|
options.push(cmd[i].clone());
|
||||||
|
i += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
schema.push((field_name, field_type, options));
|
||||||
|
}
|
||||||
|
|
||||||
|
Cmd::FtCreate {
|
||||||
|
index_name,
|
||||||
|
schema,
|
||||||
|
}
|
||||||
|
}
|
||||||
_ => Cmd::Unknow(cmd[0].clone()),
|
_ => Cmd::Unknow(cmd[0].clone()),
|
||||||
},
|
},
|
||||||
protocol,
|
protocol,
|
||||||
@@ -730,6 +804,44 @@ impl Cmd {
|
|||||||
Cmd::AgeSignName(name, message) => Ok(crate::age::cmd_age_sign_name(server, &name, &message).await),
|
Cmd::AgeSignName(name, message) => Ok(crate::age::cmd_age_sign_name(server, &name, &message).await),
|
||||||
Cmd::AgeVerifyName(name, message, sig_b64) => Ok(crate::age::cmd_age_verify_name(server, &name, &message, &sig_b64).await),
|
Cmd::AgeVerifyName(name, message, sig_b64) => Ok(crate::age::cmd_age_verify_name(server, &name, &message, &sig_b64).await),
|
||||||
Cmd::AgeList => Ok(crate::age::cmd_age_list(server).await),
|
Cmd::AgeList => Ok(crate::age::cmd_age_list(server).await),
|
||||||
|
|
||||||
|
// Full-text search commands
|
||||||
|
Cmd::FtCreate { index_name, schema } => {
|
||||||
|
// TODO: Implement the actual logic for creating a full-text search index.
|
||||||
|
// This will involve parsing the schema and setting up the Tantivy index.
|
||||||
|
println!("Creating index '{}' with schema: {:?}", index_name, schema);
|
||||||
|
Ok(Protocol::SimpleString("OK".to_string()))
|
||||||
|
}
|
||||||
|
Cmd::FtAdd { index_name, doc_id, score: _, fields: _ } => {
|
||||||
|
// TODO: Implement adding a document to the index.
|
||||||
|
println!("Adding document '{}' to index '{}'", doc_id, index_name);
|
||||||
|
Ok(Protocol::SimpleString("OK".to_string()))
|
||||||
|
}
|
||||||
|
Cmd::FtSearch { index_name, query, .. } => {
|
||||||
|
// TODO: Implement search functionality.
|
||||||
|
println!("Searching index '{}' for query '{}'", index_name, query);
|
||||||
|
Ok(Protocol::SimpleString("OK".to_string()))
|
||||||
|
}
|
||||||
|
Cmd::FtDel(index_name, doc_id) => {
|
||||||
|
println!("Deleting doc '{}' from index '{}'", doc_id, index_name);
|
||||||
|
Ok(Protocol::SimpleString("OK".to_string()))
|
||||||
|
}
|
||||||
|
Cmd::FtInfo(index_name) => {
|
||||||
|
println!("Getting info for index '{}'", index_name);
|
||||||
|
Ok(Protocol::SimpleString("OK".to_string()))
|
||||||
|
}
|
||||||
|
Cmd::FtDrop(index_name) => {
|
||||||
|
println!("Dropping index '{}'", index_name);
|
||||||
|
Ok(Protocol::SimpleString("OK".to_string()))
|
||||||
|
}
|
||||||
|
Cmd::FtAlter { index_name, .. } => {
|
||||||
|
println!("Altering index '{}'", index_name);
|
||||||
|
Ok(Protocol::SimpleString("OK".to_string()))
|
||||||
|
}
|
||||||
|
Cmd::FtAggregate { index_name, .. } => {
|
||||||
|
println!("Aggregating on index '{}'", index_name);
|
||||||
|
Ok(Protocol::SimpleString("OK".to_string()))
|
||||||
|
}
|
||||||
Cmd::Unknow(s) => Ok(Protocol::err(&format!("ERR unknown command `{}`", s))),
|
Cmd::Unknow(s) => Ok(Protocol::err(&format!("ERR unknown command `{}`", s))),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -8,3 +8,4 @@ pub mod server;
|
|||||||
pub mod storage;
|
pub mod storage;
|
||||||
pub mod storage_trait; // Add this
|
pub mod storage_trait; // Add this
|
||||||
pub mod storage_sled; // Add this
|
pub mod storage_sled; // Add this
|
||||||
|
pub mod tantivy_search;
|
||||||
|
@@ -51,6 +51,7 @@ async fn main() {
|
|||||||
// new DB option
|
// new DB option
|
||||||
let option = herodb::options::DBOption {
|
let option = herodb::options::DBOption {
|
||||||
dir: args.dir,
|
dir: args.dir,
|
||||||
|
port,
|
||||||
debug: args.debug,
|
debug: args.debug,
|
||||||
encryption_key: args.encryption_key,
|
encryption_key: args.encryption_key,
|
||||||
encrypt: args.encrypt,
|
encrypt: args.encrypt,
|
||||||
|
@@ -7,6 +7,7 @@ pub enum BackendType {
|
|||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct DBOption {
|
pub struct DBOption {
|
||||||
pub dir: String,
|
pub dir: String,
|
||||||
|
pub port: u16,
|
||||||
pub debug: bool,
|
pub debug: bool,
|
||||||
pub encrypt: bool,
|
pub encrypt: bool,
|
||||||
pub encryption_key: Option<String>,
|
pub encryption_key: Option<String>,
|
||||||
|
570
herodb/src/tantivy_search.rs
Normal file
570
herodb/src/tantivy_search.rs
Normal file
@@ -0,0 +1,570 @@
|
|||||||
|
use tantivy::{
|
||||||
|
collector::TopDocs,
|
||||||
|
directory::MmapDirectory,
|
||||||
|
query::{QueryParser, BooleanQuery, Query, TermQuery, Occur},
|
||||||
|
schema::{Schema, Field, TextOptions, TextFieldIndexing,
|
||||||
|
STORED, STRING, Value},
|
||||||
|
Index, IndexWriter, IndexReader, ReloadPolicy,
|
||||||
|
Term, DateTime,
|
||||||
|
tokenizer::{TokenizerManager},
|
||||||
|
};
|
||||||
|
use std::path::PathBuf;
|
||||||
|
use std::sync::{Arc, RwLock};
|
||||||
|
use std::collections::HashMap;
|
||||||
|
use crate::error::DBError;
|
||||||
|
use serde::{Serialize, Deserialize};
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub enum FieldDef {
|
||||||
|
Text {
|
||||||
|
stored: bool,
|
||||||
|
indexed: bool,
|
||||||
|
tokenized: bool,
|
||||||
|
fast: bool,
|
||||||
|
},
|
||||||
|
Numeric {
|
||||||
|
stored: bool,
|
||||||
|
indexed: bool,
|
||||||
|
fast: bool,
|
||||||
|
precision: NumericType,
|
||||||
|
},
|
||||||
|
Tag {
|
||||||
|
stored: bool,
|
||||||
|
separator: String,
|
||||||
|
case_sensitive: bool,
|
||||||
|
},
|
||||||
|
Geo {
|
||||||
|
stored: bool,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub enum NumericType {
|
||||||
|
I64,
|
||||||
|
U64,
|
||||||
|
F64,
|
||||||
|
Date,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct IndexSchema {
|
||||||
|
schema: Schema,
|
||||||
|
fields: HashMap<String, (Field, FieldDef)>,
|
||||||
|
default_search_fields: Vec<Field>,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct TantivySearch {
|
||||||
|
index: Index,
|
||||||
|
writer: Arc<RwLock<IndexWriter>>,
|
||||||
|
reader: IndexReader,
|
||||||
|
index_schema: IndexSchema,
|
||||||
|
name: String,
|
||||||
|
config: IndexConfig,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct IndexConfig {
|
||||||
|
pub language: String,
|
||||||
|
pub stopwords: Vec<String>,
|
||||||
|
pub stemming: bool,
|
||||||
|
pub max_doc_count: Option<usize>,
|
||||||
|
pub default_score: f64,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for IndexConfig {
|
||||||
|
fn default() -> Self {
|
||||||
|
IndexConfig {
|
||||||
|
language: "english".to_string(),
|
||||||
|
stopwords: vec![],
|
||||||
|
stemming: true,
|
||||||
|
max_doc_count: None,
|
||||||
|
default_score: 1.0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TantivySearch {
|
||||||
|
pub fn new_with_schema(
|
||||||
|
base_path: PathBuf,
|
||||||
|
name: String,
|
||||||
|
field_definitions: Vec<(String, FieldDef)>,
|
||||||
|
config: Option<IndexConfig>,
|
||||||
|
) -> Result<Self, DBError> {
|
||||||
|
let index_path = base_path.join(&name);
|
||||||
|
std::fs::create_dir_all(&index_path)
|
||||||
|
.map_err(|e| DBError(format!("Failed to create index dir: {}", e)))?;
|
||||||
|
|
||||||
|
// Build schema from field definitions
|
||||||
|
let mut schema_builder = Schema::builder();
|
||||||
|
let mut fields = HashMap::new();
|
||||||
|
let mut default_search_fields = Vec::new();
|
||||||
|
|
||||||
|
// Always add a document ID field
|
||||||
|
let id_field = schema_builder.add_text_field("_id", STRING | STORED);
|
||||||
|
fields.insert("_id".to_string(), (id_field, FieldDef::Text {
|
||||||
|
stored: true,
|
||||||
|
indexed: true,
|
||||||
|
tokenized: false,
|
||||||
|
fast: false,
|
||||||
|
}));
|
||||||
|
|
||||||
|
// Add user-defined fields
|
||||||
|
for (field_name, field_def) in field_definitions {
|
||||||
|
let field = match &field_def {
|
||||||
|
FieldDef::Text { stored, indexed, tokenized, fast: _fast } => {
|
||||||
|
let mut text_options = TextOptions::default();
|
||||||
|
|
||||||
|
if *stored {
|
||||||
|
text_options = text_options.set_stored();
|
||||||
|
}
|
||||||
|
|
||||||
|
if *indexed {
|
||||||
|
let indexing_options = if *tokenized {
|
||||||
|
TextFieldIndexing::default()
|
||||||
|
.set_tokenizer("default")
|
||||||
|
.set_index_option(tantivy::schema::IndexRecordOption::WithFreqsAndPositions)
|
||||||
|
} else {
|
||||||
|
TextFieldIndexing::default()
|
||||||
|
.set_tokenizer("raw")
|
||||||
|
.set_index_option(tantivy::schema::IndexRecordOption::Basic)
|
||||||
|
};
|
||||||
|
text_options = text_options.set_indexing_options(indexing_options);
|
||||||
|
|
||||||
|
let f = schema_builder.add_text_field(&field_name, text_options);
|
||||||
|
if *tokenized {
|
||||||
|
default_search_fields.push(f);
|
||||||
|
}
|
||||||
|
f
|
||||||
|
} else {
|
||||||
|
schema_builder.add_text_field(&field_name, text_options)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
FieldDef::Numeric { stored, indexed, fast, precision } => {
|
||||||
|
match precision {
|
||||||
|
NumericType::I64 => {
|
||||||
|
let mut opts = tantivy::schema::NumericOptions::default();
|
||||||
|
if *stored { opts = opts.set_stored(); }
|
||||||
|
if *indexed { opts = opts.set_indexed(); }
|
||||||
|
if *fast { opts = opts.set_fast(); }
|
||||||
|
schema_builder.add_i64_field(&field_name, opts)
|
||||||
|
}
|
||||||
|
NumericType::U64 => {
|
||||||
|
let mut opts = tantivy::schema::NumericOptions::default();
|
||||||
|
if *stored { opts = opts.set_stored(); }
|
||||||
|
if *indexed { opts = opts.set_indexed(); }
|
||||||
|
if *fast { opts = opts.set_fast(); }
|
||||||
|
schema_builder.add_u64_field(&field_name, opts)
|
||||||
|
}
|
||||||
|
NumericType::F64 => {
|
||||||
|
let mut opts = tantivy::schema::NumericOptions::default();
|
||||||
|
if *stored { opts = opts.set_stored(); }
|
||||||
|
if *indexed { opts = opts.set_indexed(); }
|
||||||
|
if *fast { opts = opts.set_fast(); }
|
||||||
|
schema_builder.add_f64_field(&field_name, opts)
|
||||||
|
}
|
||||||
|
NumericType::Date => {
|
||||||
|
let mut opts = tantivy::schema::DateOptions::default();
|
||||||
|
if *stored { opts = opts.set_stored(); }
|
||||||
|
if *indexed { opts = opts.set_indexed(); }
|
||||||
|
if *fast { opts = opts.set_fast(); }
|
||||||
|
schema_builder.add_date_field(&field_name, opts)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
FieldDef::Tag { stored, separator: _, case_sensitive: _ } => {
|
||||||
|
let mut text_options = TextOptions::default();
|
||||||
|
if *stored {
|
||||||
|
text_options = text_options.set_stored();
|
||||||
|
}
|
||||||
|
text_options = text_options.set_indexing_options(
|
||||||
|
TextFieldIndexing::default()
|
||||||
|
.set_tokenizer("raw")
|
||||||
|
.set_index_option(tantivy::schema::IndexRecordOption::Basic)
|
||||||
|
);
|
||||||
|
schema_builder.add_text_field(&field_name, text_options)
|
||||||
|
}
|
||||||
|
FieldDef::Geo { stored } => {
|
||||||
|
// For now, store as two f64 fields for lat/lon
|
||||||
|
let mut opts = tantivy::schema::NumericOptions::default();
|
||||||
|
if *stored { opts = opts.set_stored(); }
|
||||||
|
opts = opts.set_indexed().set_fast();
|
||||||
|
|
||||||
|
let lat_field = schema_builder.add_f64_field(&format!("{}_lat", field_name), opts.clone());
|
||||||
|
let lon_field = schema_builder.add_f64_field(&format!("{}_lon", field_name), opts);
|
||||||
|
|
||||||
|
fields.insert(format!("{}_lat", field_name), (lat_field, FieldDef::Numeric {
|
||||||
|
stored: *stored,
|
||||||
|
indexed: true,
|
||||||
|
fast: true,
|
||||||
|
precision: NumericType::F64,
|
||||||
|
}));
|
||||||
|
fields.insert(format!("{}_lon", field_name), (lon_field, FieldDef::Numeric {
|
||||||
|
stored: *stored,
|
||||||
|
indexed: true,
|
||||||
|
fast: true,
|
||||||
|
precision: NumericType::F64,
|
||||||
|
}));
|
||||||
|
continue; // Skip adding the geo field itself
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
fields.insert(field_name.clone(), (field, field_def));
|
||||||
|
}
|
||||||
|
|
||||||
|
let schema = schema_builder.build();
|
||||||
|
let index_schema = IndexSchema {
|
||||||
|
schema: schema.clone(),
|
||||||
|
fields,
|
||||||
|
default_search_fields,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Create or open index
|
||||||
|
let dir = MmapDirectory::open(&index_path)
|
||||||
|
.map_err(|e| DBError(format!("Failed to open index directory: {}", e)))?;
|
||||||
|
|
||||||
|
let mut index = Index::open_or_create(dir, schema)
|
||||||
|
.map_err(|e| DBError(format!("Failed to create index: {}", e)))?;
|
||||||
|
|
||||||
|
// Configure tokenizers
|
||||||
|
let tokenizer_manager = TokenizerManager::default();
|
||||||
|
index.set_tokenizers(tokenizer_manager);
|
||||||
|
|
||||||
|
let writer = index.writer(50_000_000)
|
||||||
|
.map_err(|e| DBError(format!("Failed to create index writer: {}", e)))?;
|
||||||
|
|
||||||
|
let reader = index.reader_builder()
|
||||||
|
.reload_policy(ReloadPolicy::Manual)
|
||||||
|
.try_into()
|
||||||
|
.map_err(|e| DBError(format!("Failed to create reader: {}", e)))?;
|
||||||
|
|
||||||
|
let config = config.unwrap_or_default();
|
||||||
|
|
||||||
|
Ok(TantivySearch {
|
||||||
|
index,
|
||||||
|
writer: Arc::new(RwLock::new(writer)),
|
||||||
|
reader,
|
||||||
|
index_schema,
|
||||||
|
name,
|
||||||
|
config,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn add_document_with_fields(
|
||||||
|
&self,
|
||||||
|
doc_id: &str,
|
||||||
|
fields: HashMap<String, String>,
|
||||||
|
) -> Result<(), DBError> {
|
||||||
|
let mut writer = self.writer.write()
|
||||||
|
.map_err(|e| DBError(format!("Failed to acquire writer lock: {}", e)))?;
|
||||||
|
|
||||||
|
// Delete existing document with same ID
|
||||||
|
if let Some((id_field, _)) = self.index_schema.fields.get("_id") {
|
||||||
|
writer.delete_term(Term::from_field_text(*id_field, doc_id));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create new document
|
||||||
|
let mut doc = tantivy::doc!();
|
||||||
|
|
||||||
|
// Add document ID
|
||||||
|
if let Some((id_field, _)) = self.index_schema.fields.get("_id") {
|
||||||
|
doc.add_text(*id_field, doc_id);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add other fields based on schema
|
||||||
|
for (field_name, field_value) in fields {
|
||||||
|
if let Some((field, field_def)) = self.index_schema.fields.get(&field_name) {
|
||||||
|
match field_def {
|
||||||
|
FieldDef::Text { .. } => {
|
||||||
|
doc.add_text(*field, &field_value);
|
||||||
|
}
|
||||||
|
FieldDef::Numeric { precision, .. } => {
|
||||||
|
match precision {
|
||||||
|
NumericType::I64 => {
|
||||||
|
if let Ok(v) = field_value.parse::<i64>() {
|
||||||
|
doc.add_i64(*field, v);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
NumericType::U64 => {
|
||||||
|
if let Ok(v) = field_value.parse::<u64>() {
|
||||||
|
doc.add_u64(*field, v);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
NumericType::F64 => {
|
||||||
|
if let Ok(v) = field_value.parse::<f64>() {
|
||||||
|
doc.add_f64(*field, v);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
NumericType::Date => {
|
||||||
|
if let Ok(v) = field_value.parse::<i64>() {
|
||||||
|
doc.add_date(*field, DateTime::from_timestamp_millis(v));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
FieldDef::Tag { separator, case_sensitive, .. } => {
|
||||||
|
let tags = if !case_sensitive {
|
||||||
|
field_value.to_lowercase()
|
||||||
|
} else {
|
||||||
|
field_value.clone()
|
||||||
|
};
|
||||||
|
|
||||||
|
// Store tags as separate terms for efficient filtering
|
||||||
|
for tag in tags.split(separator.as_str()) {
|
||||||
|
doc.add_text(*field, tag.trim());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
FieldDef::Geo { .. } => {
|
||||||
|
// Parse "lat,lon" format
|
||||||
|
let parts: Vec<&str> = field_value.split(',').collect();
|
||||||
|
if parts.len() == 2 {
|
||||||
|
if let (Ok(lat), Ok(lon)) = (parts[0].parse::<f64>(), parts[1].parse::<f64>()) {
|
||||||
|
if let Some((lat_field, _)) = self.index_schema.fields.get(&format!("{}_lat", field_name)) {
|
||||||
|
doc.add_f64(*lat_field, lat);
|
||||||
|
}
|
||||||
|
if let Some((lon_field, _)) = self.index_schema.fields.get(&format!("{}_lon", field_name)) {
|
||||||
|
doc.add_f64(*lon_field, lon);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
writer.add_document(doc).map_err(|e| DBError(format!("Failed to add document: {}", e)))?;
|
||||||
|
|
||||||
|
writer.commit()
|
||||||
|
.map_err(|e| DBError(format!("Failed to commit: {}", e)))?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn search_with_options(
|
||||||
|
&self,
|
||||||
|
query_str: &str,
|
||||||
|
options: SearchOptions,
|
||||||
|
) -> Result<SearchResults, DBError> {
|
||||||
|
let searcher = self.reader.searcher();
|
||||||
|
|
||||||
|
// Parse query based on search fields
|
||||||
|
let query: Box<dyn Query> = if self.index_schema.default_search_fields.is_empty() {
|
||||||
|
return Err(DBError("No searchable fields defined in schema".to_string()));
|
||||||
|
} else {
|
||||||
|
let query_parser = QueryParser::for_index(
|
||||||
|
&self.index,
|
||||||
|
self.index_schema.default_search_fields.clone(),
|
||||||
|
);
|
||||||
|
|
||||||
|
Box::new(query_parser.parse_query(query_str)
|
||||||
|
.map_err(|e| DBError(format!("Failed to parse query: {}", e)))?)
|
||||||
|
};
|
||||||
|
|
||||||
|
// Apply filters if any
|
||||||
|
let final_query = if !options.filters.is_empty() {
|
||||||
|
let mut queries: Vec<(Occur, Box<dyn Query>)> = Vec::new();
|
||||||
|
queries.push((Occur::Must, query));
|
||||||
|
|
||||||
|
|
||||||
|
// Add filters
|
||||||
|
for filter in options.filters {
|
||||||
|
if let Some((field, _)) = self.index_schema.fields.get(&filter.field) {
|
||||||
|
match filter.filter_type {
|
||||||
|
FilterType::Equals(value) => {
|
||||||
|
let term_query = TermQuery::new(
|
||||||
|
Term::from_field_text(*field, &value),
|
||||||
|
tantivy::schema::IndexRecordOption::Basic,
|
||||||
|
);
|
||||||
|
queries.push((Occur::Must, Box::new(term_query)));
|
||||||
|
}
|
||||||
|
FilterType::Range { min, max } => {
|
||||||
|
// Would need numeric field handling here
|
||||||
|
// Simplified for now
|
||||||
|
}
|
||||||
|
FilterType::InSet(values) => {
|
||||||
|
let mut sub_queries: Vec<(Occur, Box<dyn Query>)> = Vec::new();
|
||||||
|
for value in values {
|
||||||
|
let term_query = TermQuery::new(
|
||||||
|
Term::from_field_text(*field, &value),
|
||||||
|
tantivy::schema::IndexRecordOption::Basic,
|
||||||
|
);
|
||||||
|
sub_queries.push((Occur::Should, Box::new(term_query)));
|
||||||
|
}
|
||||||
|
queries.push((Occur::Must, Box::new(BooleanQuery::new(sub_queries))));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Box::new(BooleanQuery::new(queries))
|
||||||
|
} else {
|
||||||
|
query
|
||||||
|
};
|
||||||
|
|
||||||
|
// Execute search
|
||||||
|
let top_docs = searcher.search(
|
||||||
|
&*final_query,
|
||||||
|
&TopDocs::with_limit(options.limit + options.offset)
|
||||||
|
).map_err(|e| DBError(format!("Search failed: {}", e)))?;
|
||||||
|
|
||||||
|
let total_hits = top_docs.len();
|
||||||
|
let mut documents = Vec::new();
|
||||||
|
|
||||||
|
for (score, doc_address) in top_docs.iter().skip(options.offset).take(options.limit) {
|
||||||
|
let retrieved_doc: tantivy::TantivyDocument = searcher.doc(*doc_address)
|
||||||
|
.map_err(|e| DBError(format!("Failed to retrieve doc: {}", e)))?;
|
||||||
|
|
||||||
|
let mut doc_fields = HashMap::new();
|
||||||
|
|
||||||
|
// Extract all stored fields
|
||||||
|
for (field_name, (field, field_def)) in &self.index_schema.fields {
|
||||||
|
match field_def {
|
||||||
|
FieldDef::Text { stored, .. } |
|
||||||
|
FieldDef::Tag { stored, .. } => {
|
||||||
|
if *stored {
|
||||||
|
if let Some(value) = retrieved_doc.get_first(*field) {
|
||||||
|
if let Some(text) = value.as_str() {
|
||||||
|
doc_fields.insert(field_name.clone(), text.to_string());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
FieldDef::Numeric { stored, precision, .. } => {
|
||||||
|
if *stored {
|
||||||
|
let value_str = match precision {
|
||||||
|
NumericType::I64 => {
|
||||||
|
retrieved_doc.get_first(*field)
|
||||||
|
.and_then(|v| v.as_i64())
|
||||||
|
.map(|v| v.to_string())
|
||||||
|
}
|
||||||
|
NumericType::U64 => {
|
||||||
|
retrieved_doc.get_first(*field)
|
||||||
|
.and_then(|v| v.as_u64())
|
||||||
|
.map(|v| v.to_string())
|
||||||
|
}
|
||||||
|
NumericType::F64 => {
|
||||||
|
retrieved_doc.get_first(*field)
|
||||||
|
.and_then(|v| v.as_f64())
|
||||||
|
.map(|v| v.to_string())
|
||||||
|
}
|
||||||
|
NumericType::Date => {
|
||||||
|
retrieved_doc.get_first(*field)
|
||||||
|
.and_then(|v| v.as_datetime())
|
||||||
|
.map(|v| v.into_timestamp_millis().to_string())
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
if let Some(v) = value_str {
|
||||||
|
doc_fields.insert(field_name.clone(), v);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
FieldDef::Geo { stored } => {
|
||||||
|
if *stored {
|
||||||
|
let lat = retrieved_doc.get_first(
|
||||||
|
self.index_schema.fields.get(&format!("{}_lat", field_name)).unwrap().0
|
||||||
|
).and_then(|v| v.as_f64());
|
||||||
|
|
||||||
|
let lon = retrieved_doc.get_first(
|
||||||
|
self.index_schema.fields.get(&format!("{}_lon", field_name)).unwrap().0
|
||||||
|
).and_then(|v| v.as_f64());
|
||||||
|
|
||||||
|
if let (Some(lat), Some(lon)) = (lat, lon) {
|
||||||
|
doc_fields.insert(field_name.clone(), format!("{},{}", lat, lon));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
documents.push(SearchDocument {
|
||||||
|
fields: doc_fields,
|
||||||
|
score: *score,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(SearchResults {
|
||||||
|
total: total_hits,
|
||||||
|
documents,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get_info(&self) -> Result<IndexInfo, DBError> {
|
||||||
|
let searcher = self.reader.searcher();
|
||||||
|
let num_docs = searcher.num_docs();
|
||||||
|
|
||||||
|
let fields_info: Vec<FieldInfo> = self.index_schema.fields.iter().map(|(name, (_, def))| {
|
||||||
|
FieldInfo {
|
||||||
|
name: name.clone(),
|
||||||
|
field_type: format!("{:?}", def),
|
||||||
|
}
|
||||||
|
}).collect();
|
||||||
|
|
||||||
|
Ok(IndexInfo {
|
||||||
|
name: self.name.clone(),
|
||||||
|
num_docs,
|
||||||
|
fields: fields_info,
|
||||||
|
config: self.config.clone(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct SearchOptions {
|
||||||
|
pub limit: usize,
|
||||||
|
pub offset: usize,
|
||||||
|
pub filters: Vec<Filter>,
|
||||||
|
pub sort_by: Option<String>,
|
||||||
|
pub return_fields: Option<Vec<String>>,
|
||||||
|
pub highlight: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for SearchOptions {
|
||||||
|
fn default() -> Self {
|
||||||
|
SearchOptions {
|
||||||
|
limit: 10,
|
||||||
|
offset: 0,
|
||||||
|
filters: vec![],
|
||||||
|
sort_by: None,
|
||||||
|
return_fields: None,
|
||||||
|
highlight: false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct Filter {
|
||||||
|
pub field: String,
|
||||||
|
pub filter_type: FilterType,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub enum FilterType {
|
||||||
|
Equals(String),
|
||||||
|
Range { min: String, max: String },
|
||||||
|
InSet(Vec<String>),
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct SearchResults {
|
||||||
|
pub total: usize,
|
||||||
|
pub documents: Vec<SearchDocument>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct SearchDocument {
|
||||||
|
pub fields: HashMap<String, String>,
|
||||||
|
pub score: f32,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Serialize, Deserialize)]
|
||||||
|
pub struct IndexInfo {
|
||||||
|
pub name: String,
|
||||||
|
pub num_docs: u64,
|
||||||
|
pub fields: Vec<FieldInfo>,
|
||||||
|
pub config: IndexConfig,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Serialize, Deserialize)]
|
||||||
|
pub struct FieldInfo {
|
||||||
|
pub name: String,
|
||||||
|
pub field_type: String,
|
||||||
|
}
|
@@ -27,6 +27,7 @@ async fn debug_hset_simple() {
|
|||||||
debug: false,
|
debug: false,
|
||||||
encrypt: false,
|
encrypt: false,
|
||||||
encryption_key: None,
|
encryption_key: None,
|
||||||
|
backend: herodb::options::BackendType::Redb,
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut server = Server::new(option).await;
|
let mut server = Server::new(option).await;
|
||||||
|
@@ -18,6 +18,7 @@ async fn debug_hset_return_value() {
|
|||||||
debug: false,
|
debug: false,
|
||||||
encrypt: false,
|
encrypt: false,
|
||||||
encryption_key: None,
|
encryption_key: None,
|
||||||
|
backend: herodb::options::BackendType::Redb,
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut server = Server::new(option).await;
|
let mut server = Server::new(option).await;
|
||||||
|
@@ -22,6 +22,7 @@ async fn start_test_server(test_name: &str) -> (Server, u16) {
|
|||||||
debug: true,
|
debug: true,
|
||||||
encrypt: false,
|
encrypt: false,
|
||||||
encryption_key: None,
|
encryption_key: None,
|
||||||
|
backend: herodb::options::BackendType::Redb,
|
||||||
};
|
};
|
||||||
|
|
||||||
let server = Server::new(option).await;
|
let server = Server::new(option).await;
|
||||||
|
@@ -24,6 +24,7 @@ async fn start_test_server(test_name: &str) -> (Server, u16) {
|
|||||||
debug: true,
|
debug: true,
|
||||||
encrypt: false,
|
encrypt: false,
|
||||||
encryption_key: None,
|
encryption_key: None,
|
||||||
|
backend: herodb::options::BackendType::Redb,
|
||||||
};
|
};
|
||||||
|
|
||||||
let server = Server::new(option).await;
|
let server = Server::new(option).await;
|
||||||
|
@@ -22,6 +22,7 @@ async fn start_test_server(test_name: &str) -> (Server, u16) {
|
|||||||
debug: false,
|
debug: false,
|
||||||
encrypt: false,
|
encrypt: false,
|
||||||
encryption_key: None,
|
encryption_key: None,
|
||||||
|
backend: herodb::options::BackendType::Redb,
|
||||||
};
|
};
|
||||||
|
|
||||||
let server = Server::new(option).await;
|
let server = Server::new(option).await;
|
||||||
|
@@ -22,6 +22,7 @@ async fn start_test_server(test_name: &str) -> (Server, u16) {
|
|||||||
debug: false,
|
debug: false,
|
||||||
encrypt: false,
|
encrypt: false,
|
||||||
encryption_key: None,
|
encryption_key: None,
|
||||||
|
backend: herodb::options::BackendType::Redb,
|
||||||
};
|
};
|
||||||
|
|
||||||
let server = Server::new(option).await;
|
let server = Server::new(option).await;
|
||||||
|
Reference in New Issue
Block a user