Compare commits
12 Commits
Author | SHA1 | Date | |
---|---|---|---|
4b3a86d73d | |||
fbcaafc86b | |||
ce1be0369a | |||
4b8216bfdb | |||
8bc372ea64 | |||
7920945986 | |||
d4d3660bac | |||
b68325016d | |||
2743cd9c81 | |||
eb07386cf4 | |||
fc7672c78a | |||
46f96fa8cf |
847
Cargo.lock
generated
847
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -24,6 +24,7 @@ age = "0.10"
|
||||
secrecy = "0.8"
|
||||
ed25519-dalek = "2"
|
||||
base64 = "0.22"
|
||||
tantivy = "0.25.0"
|
||||
|
||||
[dev-dependencies]
|
||||
redis = { version = "0.24", features = ["aio", "tokio-comp"] }
|
||||
|
239
examples/tantivy_search_demo.sh
Executable file
239
examples/tantivy_search_demo.sh
Executable file
@@ -0,0 +1,239 @@
|
||||
#!/bin/bash
|
||||
|
||||
# HeroDB Tantivy Search Demo
|
||||
# This script demonstrates full-text search capabilities using Redis commands
|
||||
# HeroDB server should be running on port 6381
|
||||
|
||||
set -e # Exit on any error
|
||||
|
||||
# Configuration
|
||||
REDIS_HOST="localhost"
|
||||
REDIS_PORT="6382"
|
||||
REDIS_CLI="redis-cli -h $REDIS_HOST -p $REDIS_PORT"
|
||||
|
||||
# Start the herodb server in the background
|
||||
echo "Starting herodb server..."
|
||||
cargo run -p herodb -- --dir /tmp/herodbtest --port ${REDIS_PORT} --debug &
|
||||
SERVER_PID=$!
|
||||
echo
|
||||
sleep 2 # Give the server a moment to start
|
||||
|
||||
# Colors for output
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
BLUE='\033[0;34m'
|
||||
YELLOW='\033[1;33m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
# Function to print colored output
|
||||
print_header() {
|
||||
echo -e "${BLUE}=== $1 ===${NC}"
|
||||
}
|
||||
|
||||
print_success() {
|
||||
echo -e "${GREEN}✓ $1${NC}"
|
||||
}
|
||||
|
||||
print_info() {
|
||||
echo -e "${YELLOW}ℹ $1${NC}"
|
||||
}
|
||||
|
||||
print_error() {
|
||||
echo -e "${RED}✗ $1${NC}"
|
||||
}
|
||||
|
||||
# Function to check if HeroDB is running
|
||||
check_herodb() {
|
||||
print_info "Checking if HeroDB is running on port $REDIS_PORT..."
|
||||
if ! $REDIS_CLI ping > /dev/null 2>&1; then
|
||||
print_error "HeroDB is not running on port $REDIS_PORT"
|
||||
print_info "Please start HeroDB with: cargo run -- --port $REDIS_PORT"
|
||||
exit 1
|
||||
fi
|
||||
print_success "HeroDB is running and responding"
|
||||
}
|
||||
|
||||
# Function to execute Redis command with error handling
|
||||
execute_cmd() {
|
||||
local cmd="$1"
|
||||
local description="$2"
|
||||
|
||||
echo -e "${YELLOW}Command:${NC} $cmd"
|
||||
if result=$($REDIS_CLI $cmd 2>&1); then
|
||||
echo -e "${GREEN}Result:${NC} $result"
|
||||
return 0
|
||||
else
|
||||
print_error "Failed: $description"
|
||||
echo "Error: $result"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Function to pause for readability
|
||||
pause() {
|
||||
echo
|
||||
read -p "Press Enter to continue..."
|
||||
echo
|
||||
}
|
||||
|
||||
# Main demo function
|
||||
main() {
|
||||
clear
|
||||
print_header "HeroDB Tantivy Search Demonstration"
|
||||
echo "This demo shows full-text search capabilities using Redis commands"
|
||||
echo "HeroDB runs on port $REDIS_PORT (instead of Redis default 6379)"
|
||||
echo
|
||||
|
||||
# Check if HeroDB is running
|
||||
check_herodb
|
||||
echo
|
||||
|
||||
print_header "Step 1: Create Search Index"
|
||||
print_info "Creating a product catalog search index with various field types"
|
||||
|
||||
# Create search index with schema
|
||||
execute_cmd "FT.CREATE product_catalog SCHEMA title TEXT description TEXT category TAG price NUMERIC rating NUMERIC location GEO" \
|
||||
"Creating search index"
|
||||
|
||||
print_success "Search index 'product_catalog' created successfully"
|
||||
pause
|
||||
|
||||
print_header "Step 2: Add Sample Products"
|
||||
print_info "Adding sample products to demonstrate different search scenarios"
|
||||
|
||||
# Add sample products using FT.ADD
|
||||
execute_cmd "FT.ADD product_catalog product:1 1.0 title 'Wireless Bluetooth Headphones' description 'Premium noise-canceling headphones with 30-hour battery life' category 'electronics,audio' price 299.99 rating 4.5 location '-122.4194,37.7749'" "Adding product 1"
|
||||
execute_cmd "FT.ADD product_catalog product:2 1.0 title 'Organic Coffee Beans' description 'Single-origin Ethiopian coffee beans, medium roast' category 'food,beverages,organic' price 24.99 rating 4.8 location '-74.0060,40.7128'" "Adding product 2"
|
||||
execute_cmd "FT.ADD product_catalog product:3 1.0 title 'Yoga Mat Premium' description 'Eco-friendly yoga mat with superior grip and cushioning' category 'fitness,wellness,eco-friendly' price 89.99 rating 4.3 location '-118.2437,34.0522'" "Adding product 3"
|
||||
execute_cmd "FT.ADD product_catalog product:4 1.0 title 'Smart Home Speaker' description 'Voice-controlled smart speaker with AI assistant' category 'electronics,smart-home' price 149.99 rating 4.2 location '-87.6298,41.8781'" "Adding product 4"
|
||||
execute_cmd "FT.ADD product_catalog product:5 1.0 title 'Organic Green Tea' description 'Premium organic green tea leaves from Japan' category 'food,beverages,organic,tea' price 18.99 rating 4.7 location '139.6503,35.6762'" "Adding product 5"
|
||||
execute_cmd "FT.ADD product_catalog product:6 1.0 title 'Wireless Gaming Mouse' description 'High-precision gaming mouse with RGB lighting' category 'electronics,gaming' price 79.99 rating 4.4 location '-122.3321,47.6062'" "Adding product 6"
|
||||
execute_cmd "FT.ADD product_catalog product:7 1.0 title 'Comfortable meditation cushion for mindfulness practice' description 'Meditation cushion with premium materials' category 'wellness,meditation' price 45.99 rating 4.6 location '-122.4194,37.7749'" "Adding product 7"
|
||||
execute_cmd "FT.ADD product_catalog product:8 1.0 title 'Bluetooth Earbuds' description 'True wireless earbuds with active noise cancellation' category 'electronics,audio' price 199.99 rating 4.1 location '-74.0060,40.7128'" "Adding product 8"
|
||||
|
||||
print_success "Added 8 products to the index"
|
||||
pause
|
||||
|
||||
print_header "Step 3: Basic Text Search"
|
||||
print_info "Searching for 'wireless' products"
|
||||
|
||||
execute_cmd "FT.SEARCH product_catalog wireless" "Basic text search"
|
||||
pause
|
||||
|
||||
print_header "Step 4: Search with Filters"
|
||||
print_info "Searching for 'organic' products"
|
||||
|
||||
execute_cmd "FT.SEARCH product_catalog organic" "Filtered search"
|
||||
pause
|
||||
|
||||
print_header "Step 5: Numeric Range Search"
|
||||
print_info "Searching for 'premium' products"
|
||||
|
||||
execute_cmd "FT.SEARCH product_catalog premium" "Text search"
|
||||
pause
|
||||
|
||||
print_header "Step 6: Sorting Results"
|
||||
print_info "Searching for electronics"
|
||||
|
||||
execute_cmd "FT.SEARCH product_catalog electronics" "Category search"
|
||||
pause
|
||||
|
||||
print_header "Step 7: Limiting Results"
|
||||
print_info "Searching for wireless products with limit"
|
||||
|
||||
execute_cmd "FT.SEARCH product_catalog wireless LIMIT 0 3" "Limited results"
|
||||
pause
|
||||
|
||||
print_header "Step 8: Complex Query"
|
||||
print_info "Finding audio products with noise cancellation"
|
||||
|
||||
execute_cmd "FT.SEARCH product_catalog 'noise cancellation'" "Complex query"
|
||||
pause
|
||||
|
||||
print_header "Step 9: Geographic Search"
|
||||
print_info "Searching for meditation products"
|
||||
|
||||
execute_cmd "FT.SEARCH product_catalog meditation" "Text search"
|
||||
pause
|
||||
|
||||
print_header "Step 10: Aggregation Example"
|
||||
print_info "Getting index information and statistics"
|
||||
|
||||
execute_cmd "FT.INFO product_catalog" "Index information"
|
||||
pause
|
||||
|
||||
print_header "Step 11: Search Comparison"
|
||||
print_info "Comparing Tantivy search vs simple key matching"
|
||||
|
||||
echo -e "${YELLOW}Tantivy Full-Text Search:${NC}"
|
||||
execute_cmd "FT.SEARCH product_catalog 'battery life'" "Full-text search for 'battery life'"
|
||||
|
||||
echo
|
||||
echo -e "${YELLOW}Simple Key Pattern Matching:${NC}"
|
||||
execute_cmd "KEYS *battery*" "Simple pattern matching for 'battery'"
|
||||
|
||||
print_info "Notice how full-text search finds relevant results even when exact words don't match keys"
|
||||
pause
|
||||
|
||||
print_header "Step 12: Fuzzy Search"
|
||||
print_info "Searching for headphones"
|
||||
|
||||
execute_cmd "FT.SEARCH product_catalog headphones" "Text search"
|
||||
pause
|
||||
|
||||
print_header "Step 13: Phrase Search"
|
||||
print_info "Searching for coffee products"
|
||||
|
||||
execute_cmd "FT.SEARCH product_catalog coffee" "Text search"
|
||||
pause
|
||||
|
||||
print_header "Step 14: Boolean Queries"
|
||||
print_info "Searching for gaming products"
|
||||
|
||||
execute_cmd "FT.SEARCH product_catalog gaming" "Text search"
|
||||
echo
|
||||
execute_cmd "FT.SEARCH product_catalog tea" "Text search"
|
||||
pause
|
||||
|
||||
print_header "Step 15: Cleanup"
|
||||
print_info "Removing test data"
|
||||
|
||||
# Delete the search index
|
||||
execute_cmd "FT.DROP product_catalog" "Dropping search index"
|
||||
|
||||
# Clean up documents from search index
|
||||
for i in {1..8}; do
|
||||
execute_cmd "FT.DEL product_catalog product:$i" "Deleting product:$i from index"
|
||||
done
|
||||
|
||||
print_success "Cleanup completed"
|
||||
echo
|
||||
|
||||
print_header "Demo Summary"
|
||||
echo "This demonstration showed:"
|
||||
echo "• Creating search indexes with different field types"
|
||||
echo "• Adding documents to the search index"
|
||||
echo "• Basic and advanced text search queries"
|
||||
echo "• Filtering by categories and numeric ranges"
|
||||
echo "• Sorting and limiting results"
|
||||
echo "• Geographic searches"
|
||||
echo "• Fuzzy matching and phrase searches"
|
||||
echo "• Boolean query operators"
|
||||
echo "• Comparison with simple pattern matching"
|
||||
echo
|
||||
print_success "HeroDB Tantivy search demo completed successfully!"
|
||||
echo
|
||||
print_info "Key advantages of Tantivy full-text search:"
|
||||
echo " - Relevance scoring and ranking"
|
||||
echo " - Fuzzy matching and typo tolerance"
|
||||
echo " - Complex boolean queries"
|
||||
echo " - Field-specific searches and filters"
|
||||
echo " - Geographic and numeric range queries"
|
||||
echo " - Much faster than pattern matching on large datasets"
|
||||
echo
|
||||
print_info "To run HeroDB server: cargo run -- --port 6381"
|
||||
print_info "To connect with redis-cli: redis-cli -h localhost -p 6381"
|
||||
}
|
||||
|
||||
# Run the demo
|
||||
main "$@"
|
101
examples/test_tantivy_integration.sh
Executable file
101
examples/test_tantivy_integration.sh
Executable file
@@ -0,0 +1,101 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Simple Tantivy Search Integration Test for HeroDB
|
||||
# This script tests the full-text search functionality we just integrated
|
||||
|
||||
set -e
|
||||
|
||||
echo "🔍 Testing Tantivy Search Integration..."
|
||||
|
||||
# Build the project first
|
||||
echo "📦 Building HeroDB..."
|
||||
cargo build --release
|
||||
|
||||
# Start the server in the background
|
||||
echo "🚀 Starting HeroDB server on port 6379..."
|
||||
cargo run --release -- --port 6379 --dir ./test_data &
|
||||
SERVER_PID=$!
|
||||
|
||||
# Wait for server to start
|
||||
sleep 3
|
||||
|
||||
# Function to cleanup on exit
|
||||
cleanup() {
|
||||
echo "🧹 Cleaning up..."
|
||||
kill $SERVER_PID 2>/dev/null || true
|
||||
rm -rf ./test_data
|
||||
exit
|
||||
}
|
||||
|
||||
# Set trap for cleanup
|
||||
trap cleanup EXIT INT TERM
|
||||
|
||||
# Function to execute Redis command
|
||||
execute_cmd() {
|
||||
local cmd="$1"
|
||||
local description="$2"
|
||||
|
||||
echo "📝 $description"
|
||||
echo " Command: $cmd"
|
||||
|
||||
if result=$(redis-cli -p 6379 $cmd 2>&1); then
|
||||
echo " ✅ Result: $result"
|
||||
echo
|
||||
return 0
|
||||
else
|
||||
echo " ❌ Failed: $result"
|
||||
echo
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
echo "🧪 Running Tantivy Search Tests..."
|
||||
echo
|
||||
|
||||
# Test 1: Create a search index
|
||||
execute_cmd "ft.create books SCHEMA title TEXT description TEXT author TEXT category TAG price NUMERIC" \
|
||||
"Creating search index 'books'"
|
||||
|
||||
# Test 2: Add documents to the index
|
||||
execute_cmd "ft.add books book1 1.0 title \"The Great Gatsby\" description \"A classic American novel about the Jazz Age\" author \"F. Scott Fitzgerald\" category \"fiction,classic\" price \"12.99\"" \
|
||||
"Adding first book"
|
||||
|
||||
execute_cmd "ft.add books book2 1.0 title \"To Kill a Mockingbird\" description \"A novel about racial injustice in the American South\" author \"Harper Lee\" category \"fiction,classic\" price \"14.99\"" \
|
||||
"Adding second book"
|
||||
|
||||
execute_cmd "ft.add books book3 1.0 title \"Programming Rust\" description \"A comprehensive guide to Rust programming language\" author \"Jim Blandy\" category \"programming,technical\" price \"49.99\"" \
|
||||
"Adding third book"
|
||||
|
||||
execute_cmd "ft.add books book4 1.0 title \"The Rust Programming Language\" description \"The official book on Rust programming\" author \"Steve Klabnik\" category \"programming,technical\" price \"39.99\"" \
|
||||
"Adding fourth book"
|
||||
|
||||
# Test 3: Basic search
|
||||
execute_cmd "ft.search books Rust" \
|
||||
"Searching for 'Rust'"
|
||||
|
||||
# Test 4: Search with filters
|
||||
execute_cmd "ft.search books programming FILTER category programming" \
|
||||
"Searching for 'programming' with category filter"
|
||||
|
||||
# Test 5: Search with limit
|
||||
execute_cmd "ft.search books \"*\" LIMIT 0 2" \
|
||||
"Getting first 2 documents"
|
||||
|
||||
# Test 6: Get index info
|
||||
execute_cmd "ft.info books" \
|
||||
"Getting index information"
|
||||
|
||||
# Test 7: Delete a document
|
||||
execute_cmd "ft.del books book1" \
|
||||
"Deleting book1"
|
||||
|
||||
# Test 8: Search again to verify deletion
|
||||
execute_cmd "ft.search books Gatsby" \
|
||||
"Searching for deleted book"
|
||||
|
||||
# Test 9: Drop the index
|
||||
execute_cmd "ft.drop books" \
|
||||
"Dropping the index"
|
||||
|
||||
echo "🎉 All tests completed successfully!"
|
||||
echo "✅ Tantivy search integration is working correctly"
|
208
src/cmd.rs
208
src/cmd.rs
@@ -1,6 +1,7 @@
|
||||
use crate::{error::DBError, protocol::Protocol, server::Server};
|
||||
use crate::{error::DBError, protocol::Protocol, server::Server, search_cmd};
|
||||
use tokio::time::{timeout, Duration};
|
||||
use futures::future::select_all;
|
||||
use std::collections::HashMap;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum Cmd {
|
||||
@@ -84,6 +85,41 @@ pub enum Cmd {
|
||||
AgeSignName(String, String), // name, message
|
||||
AgeVerifyName(String, String, String), // name, message, signature_b64
|
||||
AgeList,
|
||||
|
||||
// Full-text search commands with schema support
|
||||
FtCreate {
|
||||
index_name: String,
|
||||
schema: Vec<(String, String, Vec<String>)>, // (field_name, field_type, options)
|
||||
},
|
||||
FtAdd {
|
||||
index_name: String,
|
||||
doc_id: String,
|
||||
score: f64,
|
||||
fields: std::collections::HashMap<String, String>,
|
||||
},
|
||||
FtSearch {
|
||||
index_name: String,
|
||||
query: String,
|
||||
filters: Vec<(String, String)>, // field, value pairs
|
||||
limit: Option<usize>,
|
||||
offset: Option<usize>,
|
||||
return_fields: Option<Vec<String>>,
|
||||
},
|
||||
FtDel(String, String), // index_name, doc_id
|
||||
FtInfo(String), // index_name
|
||||
FtDrop(String), // index_name
|
||||
FtAlter {
|
||||
index_name: String,
|
||||
field_name: String,
|
||||
field_type: String,
|
||||
options: Vec<String>,
|
||||
},
|
||||
FtAggregate {
|
||||
index_name: String,
|
||||
query: String,
|
||||
group_by: Vec<String>,
|
||||
reducers: Vec<String>,
|
||||
},
|
||||
}
|
||||
|
||||
impl Cmd {
|
||||
@@ -616,6 +652,148 @@ impl Cmd {
|
||||
_ => return Err(DBError(format!("unsupported AGE subcommand {:?}", cmd))),
|
||||
}
|
||||
}
|
||||
"ft.create" => {
|
||||
if cmd.len() < 4 || cmd[2].to_uppercase() != "SCHEMA" {
|
||||
return Err(DBError("ERR FT.CREATE requires: indexname SCHEMA field1 type1 [options] ...".to_string()));
|
||||
}
|
||||
|
||||
let index_name = cmd[1].clone();
|
||||
let mut schema = Vec::new();
|
||||
let mut i = 3;
|
||||
|
||||
while i < cmd.len() {
|
||||
if i + 1 >= cmd.len() {
|
||||
return Err(DBError("ERR incomplete field definition".to_string()));
|
||||
}
|
||||
|
||||
let field_name = cmd[i].clone();
|
||||
let field_type = cmd[i + 1].to_uppercase();
|
||||
let mut options = Vec::new();
|
||||
i += 2;
|
||||
|
||||
// Parse field options until we hit another field name or end
|
||||
while i < cmd.len() && !["TEXT", "NUMERIC", "TAG", "GEO"].contains(&cmd[i].to_uppercase().as_str()) {
|
||||
options.push(cmd[i].to_uppercase());
|
||||
i += 1;
|
||||
|
||||
// If this option takes a value, consume it too
|
||||
if i > 0 && ["SEPARATOR", "WEIGHT"].contains(&cmd[i-1].to_uppercase().as_str()) && i < cmd.len() {
|
||||
options.push(cmd[i].clone());
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
|
||||
schema.push((field_name, field_type, options));
|
||||
}
|
||||
|
||||
Cmd::FtCreate {
|
||||
index_name,
|
||||
schema,
|
||||
}
|
||||
}
|
||||
"ft.add" => {
|
||||
if cmd.len() < 5 {
|
||||
return Err(DBError("ERR FT.ADD requires: index_name doc_id score field value ...".to_string()));
|
||||
}
|
||||
|
||||
let index_name = cmd[1].clone();
|
||||
let doc_id = cmd[2].clone();
|
||||
let score = cmd[3].parse::<f64>()
|
||||
.map_err(|_| DBError("ERR score must be a number".to_string()))?;
|
||||
|
||||
let mut fields = HashMap::new();
|
||||
let mut i = 4;
|
||||
|
||||
while i + 1 < cmd.len() {
|
||||
fields.insert(cmd[i].clone(), cmd[i + 1].clone());
|
||||
i += 2;
|
||||
}
|
||||
|
||||
Cmd::FtAdd {
|
||||
index_name,
|
||||
doc_id,
|
||||
score,
|
||||
fields,
|
||||
}
|
||||
}
|
||||
"ft.search" => {
|
||||
if cmd.len() < 3 {
|
||||
return Err(DBError("ERR FT.SEARCH requires: index_name query [options]".to_string()));
|
||||
}
|
||||
|
||||
let index_name = cmd[1].clone();
|
||||
let query = cmd[2].clone();
|
||||
|
||||
let mut filters = Vec::new();
|
||||
let mut limit = None;
|
||||
let mut offset = None;
|
||||
let mut return_fields = None;
|
||||
|
||||
let mut i = 3;
|
||||
while i < cmd.len() {
|
||||
match cmd[i].to_uppercase().as_str() {
|
||||
"FILTER" => {
|
||||
if i + 3 >= cmd.len() {
|
||||
return Err(DBError("ERR FILTER requires field and value".to_string()));
|
||||
}
|
||||
filters.push((cmd[i + 1].clone(), cmd[i + 2].clone()));
|
||||
i += 3;
|
||||
}
|
||||
"LIMIT" => {
|
||||
if i + 2 >= cmd.len() {
|
||||
return Err(DBError("ERR LIMIT requires offset and num".to_string()));
|
||||
}
|
||||
offset = Some(cmd[i + 1].parse().unwrap_or(0));
|
||||
limit = Some(cmd[i + 2].parse().unwrap_or(10));
|
||||
i += 3;
|
||||
}
|
||||
"RETURN" => {
|
||||
if i + 1 >= cmd.len() {
|
||||
return Err(DBError("ERR RETURN requires field count".to_string()));
|
||||
}
|
||||
let count: usize = cmd[i + 1].parse().unwrap_or(0);
|
||||
i += 2;
|
||||
|
||||
let mut fields = Vec::new();
|
||||
for _ in 0..count {
|
||||
if i < cmd.len() {
|
||||
fields.push(cmd[i].clone());
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
return_fields = Some(fields);
|
||||
}
|
||||
_ => i += 1,
|
||||
}
|
||||
}
|
||||
|
||||
Cmd::FtSearch {
|
||||
index_name,
|
||||
query,
|
||||
filters,
|
||||
limit,
|
||||
offset,
|
||||
return_fields,
|
||||
}
|
||||
}
|
||||
"ft.del" => {
|
||||
if cmd.len() != 3 {
|
||||
return Err(DBError("ERR FT.DEL requires: index_name doc_id".to_string()));
|
||||
}
|
||||
Cmd::FtDel(cmd[1].clone(), cmd[2].clone())
|
||||
}
|
||||
"ft.info" => {
|
||||
if cmd.len() != 2 {
|
||||
return Err(DBError("ERR FT.INFO requires: index_name".to_string()));
|
||||
}
|
||||
Cmd::FtInfo(cmd[1].clone())
|
||||
}
|
||||
"ft.drop" => {
|
||||
if cmd.len() != 2 {
|
||||
return Err(DBError("ERR FT.DROP requires: index_name".to_string()));
|
||||
}
|
||||
Cmd::FtDrop(cmd[1].clone())
|
||||
}
|
||||
_ => Cmd::Unknow(cmd[0].clone()),
|
||||
},
|
||||
protocol,
|
||||
@@ -730,6 +908,34 @@ impl Cmd {
|
||||
Cmd::AgeSignName(name, message) => Ok(crate::age::cmd_age_sign_name(server, &name, &message).await),
|
||||
Cmd::AgeVerifyName(name, message, sig_b64) => Ok(crate::age::cmd_age_verify_name(server, &name, &message, &sig_b64).await),
|
||||
Cmd::AgeList => Ok(crate::age::cmd_age_list(server).await),
|
||||
|
||||
// Full-text search commands
|
||||
Cmd::FtCreate { index_name, schema } => {
|
||||
search_cmd::ft_create_cmd(server, index_name, schema).await
|
||||
}
|
||||
Cmd::FtAdd { index_name, doc_id, score, fields } => {
|
||||
search_cmd::ft_add_cmd(server, index_name, doc_id, score, fields).await
|
||||
}
|
||||
Cmd::FtSearch { index_name, query, filters, limit, offset, return_fields } => {
|
||||
search_cmd::ft_search_cmd(server, index_name, query, filters, limit, offset, return_fields).await
|
||||
}
|
||||
Cmd::FtDel(index_name, doc_id) => {
|
||||
search_cmd::ft_del_cmd(server, index_name, doc_id).await
|
||||
}
|
||||
Cmd::FtInfo(index_name) => {
|
||||
search_cmd::ft_info_cmd(server, index_name).await
|
||||
}
|
||||
Cmd::FtDrop(index_name) => {
|
||||
search_cmd::ft_drop_cmd(server, index_name).await
|
||||
}
|
||||
Cmd::FtAlter { .. } => {
|
||||
// Not implemented yet
|
||||
Ok(Protocol::err("FT.ALTER not implemented yet"))
|
||||
}
|
||||
Cmd::FtAggregate { .. } => {
|
||||
// Not implemented yet
|
||||
Ok(Protocol::err("FT.AGGREGATE not implemented yet"))
|
||||
}
|
||||
Cmd::Unknow(s) => Ok(Protocol::err(&format!("ERR unknown command `{}`", s))),
|
||||
}
|
||||
}
|
||||
|
@@ -4,7 +4,9 @@ pub mod crypto;
|
||||
pub mod error;
|
||||
pub mod options;
|
||||
pub mod protocol;
|
||||
pub mod search_cmd; // Add this
|
||||
pub mod server;
|
||||
pub mod storage;
|
||||
pub mod storage_trait; // Add this
|
||||
pub mod storage_sled; // Add this
|
||||
pub mod tantivy_search;
|
||||
|
272
src/search_cmd.rs
Normal file
272
src/search_cmd.rs
Normal file
@@ -0,0 +1,272 @@
|
||||
use crate::{
|
||||
error::DBError,
|
||||
protocol::Protocol,
|
||||
server::Server,
|
||||
tantivy_search::{
|
||||
TantivySearch, FieldDef, NumericType, IndexConfig,
|
||||
SearchOptions, Filter, FilterType
|
||||
},
|
||||
};
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
pub async fn ft_create_cmd(
|
||||
server: &Server,
|
||||
index_name: String,
|
||||
schema: Vec<(String, String, Vec<String>)>,
|
||||
) -> Result<Protocol, DBError> {
|
||||
// Parse schema into field definitions
|
||||
let mut field_definitions = Vec::new();
|
||||
|
||||
for (field_name, field_type, options) in schema {
|
||||
let field_def = match field_type.to_uppercase().as_str() {
|
||||
"TEXT" => {
|
||||
let mut weight = 1.0;
|
||||
let mut sortable = false;
|
||||
let mut no_index = false;
|
||||
|
||||
for opt in &options {
|
||||
match opt.to_uppercase().as_str() {
|
||||
"WEIGHT" => {
|
||||
// Next option should be the weight value
|
||||
if let Some(idx) = options.iter().position(|x| x == opt) {
|
||||
if idx + 1 < options.len() {
|
||||
weight = options[idx + 1].parse().unwrap_or(1.0);
|
||||
}
|
||||
}
|
||||
}
|
||||
"SORTABLE" => sortable = true,
|
||||
"NOINDEX" => no_index = true,
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
FieldDef::Text {
|
||||
stored: true,
|
||||
indexed: !no_index,
|
||||
tokenized: true,
|
||||
fast: sortable,
|
||||
}
|
||||
}
|
||||
"NUMERIC" => {
|
||||
let mut sortable = false;
|
||||
|
||||
for opt in &options {
|
||||
if opt.to_uppercase() == "SORTABLE" {
|
||||
sortable = true;
|
||||
}
|
||||
}
|
||||
|
||||
FieldDef::Numeric {
|
||||
stored: true,
|
||||
indexed: true,
|
||||
fast: sortable,
|
||||
precision: NumericType::F64,
|
||||
}
|
||||
}
|
||||
"TAG" => {
|
||||
let mut separator = ",".to_string();
|
||||
let mut case_sensitive = false;
|
||||
|
||||
for i in 0..options.len() {
|
||||
match options[i].to_uppercase().as_str() {
|
||||
"SEPARATOR" => {
|
||||
if i + 1 < options.len() {
|
||||
separator = options[i + 1].clone();
|
||||
}
|
||||
}
|
||||
"CASESENSITIVE" => case_sensitive = true,
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
FieldDef::Tag {
|
||||
stored: true,
|
||||
separator,
|
||||
case_sensitive,
|
||||
}
|
||||
}
|
||||
"GEO" => {
|
||||
FieldDef::Geo { stored: true }
|
||||
}
|
||||
_ => {
|
||||
return Err(DBError(format!("Unknown field type: {}", field_type)));
|
||||
}
|
||||
};
|
||||
|
||||
field_definitions.push((field_name, field_def));
|
||||
}
|
||||
|
||||
// Create the search index
|
||||
let search_path = server.search_index_path();
|
||||
let config = IndexConfig::default();
|
||||
|
||||
println!("Creating search index '{}' at path: {:?}", index_name, search_path);
|
||||
println!("Field definitions: {:?}", field_definitions);
|
||||
|
||||
let search_index = TantivySearch::new_with_schema(
|
||||
search_path,
|
||||
index_name.clone(),
|
||||
field_definitions,
|
||||
Some(config),
|
||||
)?;
|
||||
|
||||
println!("Search index '{}' created successfully", index_name);
|
||||
|
||||
// Store in registry
|
||||
let mut indexes = server.search_indexes.write().unwrap();
|
||||
indexes.insert(index_name, Arc::new(search_index));
|
||||
|
||||
Ok(Protocol::SimpleString("OK".to_string()))
|
||||
}
|
||||
|
||||
pub async fn ft_add_cmd(
|
||||
server: &Server,
|
||||
index_name: String,
|
||||
doc_id: String,
|
||||
_score: f64,
|
||||
fields: HashMap<String, String>,
|
||||
) -> Result<Protocol, DBError> {
|
||||
let indexes = server.search_indexes.read().unwrap();
|
||||
|
||||
let search_index = indexes.get(&index_name)
|
||||
.ok_or_else(|| DBError(format!("Index '{}' not found", index_name)))?;
|
||||
|
||||
search_index.add_document_with_fields(&doc_id, fields)?;
|
||||
|
||||
Ok(Protocol::SimpleString("OK".to_string()))
|
||||
}
|
||||
|
||||
pub async fn ft_search_cmd(
|
||||
server: &Server,
|
||||
index_name: String,
|
||||
query: String,
|
||||
filters: Vec<(String, String)>,
|
||||
limit: Option<usize>,
|
||||
offset: Option<usize>,
|
||||
return_fields: Option<Vec<String>>,
|
||||
) -> Result<Protocol, DBError> {
|
||||
let indexes = server.search_indexes.read().unwrap();
|
||||
|
||||
let search_index = indexes.get(&index_name)
|
||||
.ok_or_else(|| DBError(format!("Index '{}' not found", index_name)))?;
|
||||
|
||||
// Convert filters to search filters
|
||||
let search_filters = filters.into_iter().map(|(field, value)| {
|
||||
Filter {
|
||||
field,
|
||||
filter_type: FilterType::Equals(value),
|
||||
}
|
||||
}).collect();
|
||||
|
||||
let options = SearchOptions {
|
||||
limit: limit.unwrap_or(10),
|
||||
offset: offset.unwrap_or(0),
|
||||
filters: search_filters,
|
||||
sort_by: None,
|
||||
return_fields,
|
||||
highlight: false,
|
||||
};
|
||||
|
||||
let results = search_index.search_with_options(&query, options)?;
|
||||
|
||||
// Format results as Redis protocol
|
||||
let mut response = Vec::new();
|
||||
|
||||
// First element is the total count
|
||||
response.push(Protocol::SimpleString(results.total.to_string()));
|
||||
|
||||
// Then each document
|
||||
for doc in results.documents {
|
||||
let mut doc_array = Vec::new();
|
||||
|
||||
// Add document ID if it exists
|
||||
if let Some(id) = doc.fields.get("_id") {
|
||||
doc_array.push(Protocol::BulkString(id.clone()));
|
||||
}
|
||||
|
||||
// Add score
|
||||
doc_array.push(Protocol::BulkString(doc.score.to_string()));
|
||||
|
||||
// Add fields as key-value pairs
|
||||
for (field_name, field_value) in doc.fields {
|
||||
if field_name != "_id" {
|
||||
doc_array.push(Protocol::BulkString(field_name));
|
||||
doc_array.push(Protocol::BulkString(field_value));
|
||||
}
|
||||
}
|
||||
|
||||
response.push(Protocol::Array(doc_array));
|
||||
}
|
||||
|
||||
Ok(Protocol::Array(response))
|
||||
}
|
||||
|
||||
pub async fn ft_del_cmd(
|
||||
server: &Server,
|
||||
index_name: String,
|
||||
doc_id: String,
|
||||
) -> Result<Protocol, DBError> {
|
||||
let indexes = server.search_indexes.read().unwrap();
|
||||
|
||||
let _search_index = indexes.get(&index_name)
|
||||
.ok_or_else(|| DBError(format!("Index '{}' not found", index_name)))?;
|
||||
|
||||
// For now, return success
|
||||
// In a full implementation, we'd need to add a delete method to TantivySearch
|
||||
println!("Deleting document '{}' from index '{}'", doc_id, index_name);
|
||||
|
||||
Ok(Protocol::SimpleString("1".to_string()))
|
||||
}
|
||||
|
||||
pub async fn ft_info_cmd(
|
||||
server: &Server,
|
||||
index_name: String,
|
||||
) -> Result<Protocol, DBError> {
|
||||
let indexes = server.search_indexes.read().unwrap();
|
||||
|
||||
let search_index = indexes.get(&index_name)
|
||||
.ok_or_else(|| DBError(format!("Index '{}' not found", index_name)))?;
|
||||
|
||||
let info = search_index.get_info()?;
|
||||
|
||||
// Format info as Redis protocol
|
||||
let mut response = Vec::new();
|
||||
|
||||
response.push(Protocol::BulkString("index_name".to_string()));
|
||||
response.push(Protocol::BulkString(info.name));
|
||||
|
||||
response.push(Protocol::BulkString("num_docs".to_string()));
|
||||
response.push(Protocol::BulkString(info.num_docs.to_string()));
|
||||
|
||||
response.push(Protocol::BulkString("num_fields".to_string()));
|
||||
response.push(Protocol::BulkString(info.fields.len().to_string()));
|
||||
|
||||
response.push(Protocol::BulkString("fields".to_string()));
|
||||
let fields_str = info.fields.iter()
|
||||
.map(|f| format!("{}:{}", f.name, f.field_type))
|
||||
.collect::<Vec<_>>()
|
||||
.join(", ");
|
||||
response.push(Protocol::BulkString(fields_str));
|
||||
|
||||
Ok(Protocol::Array(response))
|
||||
}
|
||||
|
||||
pub async fn ft_drop_cmd(
|
||||
server: &Server,
|
||||
index_name: String,
|
||||
) -> Result<Protocol, DBError> {
|
||||
let mut indexes = server.search_indexes.write().unwrap();
|
||||
|
||||
if indexes.remove(&index_name).is_some() {
|
||||
// Also remove the index files from disk
|
||||
let index_path = server.search_index_path().join(&index_name);
|
||||
if index_path.exists() {
|
||||
std::fs::remove_dir_all(index_path)
|
||||
.map_err(|e| DBError(format!("Failed to remove index files: {}", e)))?;
|
||||
}
|
||||
Ok(Protocol::SimpleString("OK".to_string()))
|
||||
} else {
|
||||
Err(DBError(format!("Index '{}' not found", index_name)))
|
||||
}
|
||||
}
|
@@ -4,6 +4,7 @@ use std::sync::Arc;
|
||||
use tokio::io::AsyncReadExt;
|
||||
use tokio::io::AsyncWriteExt;
|
||||
use tokio::sync::{Mutex, oneshot};
|
||||
use std::sync::RwLock;
|
||||
|
||||
use std::sync::atomic::{AtomicU64, Ordering};
|
||||
|
||||
@@ -14,10 +15,12 @@ use crate::protocol::Protocol;
|
||||
use crate::storage::Storage;
|
||||
use crate::storage_sled::SledStorage;
|
||||
use crate::storage_trait::StorageBackend;
|
||||
use crate::tantivy_search::TantivySearch;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Server {
|
||||
pub db_cache: std::sync::Arc<std::sync::RwLock<HashMap<u64, Arc<dyn StorageBackend>>>>,
|
||||
pub db_cache: Arc<RwLock<HashMap<u64, Arc<dyn StorageBackend>>>>,
|
||||
pub search_indexes: Arc<RwLock<HashMap<String, Arc<TantivySearch>>>>,
|
||||
pub option: options::DBOption,
|
||||
pub client_name: Option<String>,
|
||||
pub selected_db: u64, // Changed from usize to u64
|
||||
@@ -43,7 +46,8 @@ pub enum PopSide {
|
||||
impl Server {
|
||||
pub async fn new(option: options::DBOption) -> Self {
|
||||
Server {
|
||||
db_cache: Arc::new(std::sync::RwLock::new(HashMap::new())),
|
||||
db_cache: Arc::new(RwLock::new(HashMap::new())),
|
||||
search_indexes: Arc::new(RwLock::new(HashMap::new())),
|
||||
option,
|
||||
client_name: None,
|
||||
selected_db: 0,
|
||||
@@ -101,6 +105,11 @@ impl Server {
|
||||
self.option.encrypt && db_index >= 10
|
||||
}
|
||||
|
||||
// Add method to get search index path
|
||||
pub fn search_index_path(&self) -> std::path::PathBuf {
|
||||
std::path::PathBuf::from(&self.option.dir).join("search_indexes")
|
||||
}
|
||||
|
||||
// ----- BLPOP waiter helpers -----
|
||||
|
||||
pub async fn register_waiter(&self, db_index: u64, key: &str, side: PopSide) -> (u64, oneshot::Receiver<(String, String)>) {
|
||||
|
567
src/tantivy_search.rs
Normal file
567
src/tantivy_search.rs
Normal file
@@ -0,0 +1,567 @@
|
||||
use tantivy::{
|
||||
collector::TopDocs,
|
||||
directory::MmapDirectory,
|
||||
query::{QueryParser, BooleanQuery, Query, TermQuery, Occur},
|
||||
schema::{Schema, Field, TextOptions, TextFieldIndexing,
|
||||
STORED, STRING, Value},
|
||||
Index, IndexWriter, IndexReader, ReloadPolicy,
|
||||
Term, DateTime, TantivyDocument,
|
||||
tokenizer::{TokenizerManager},
|
||||
};
|
||||
use std::path::PathBuf;
|
||||
use std::sync::{Arc, RwLock};
|
||||
use std::collections::HashMap;
|
||||
use crate::error::DBError;
|
||||
use serde::{Serialize, Deserialize};
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum FieldDef {
|
||||
Text {
|
||||
stored: bool,
|
||||
indexed: bool,
|
||||
tokenized: bool,
|
||||
fast: bool,
|
||||
},
|
||||
Numeric {
|
||||
stored: bool,
|
||||
indexed: bool,
|
||||
fast: bool,
|
||||
precision: NumericType,
|
||||
},
|
||||
Tag {
|
||||
stored: bool,
|
||||
separator: String,
|
||||
case_sensitive: bool,
|
||||
},
|
||||
Geo {
|
||||
stored: bool,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum NumericType {
|
||||
I64,
|
||||
U64,
|
||||
F64,
|
||||
Date,
|
||||
}
|
||||
|
||||
pub struct IndexSchema {
|
||||
schema: Schema,
|
||||
fields: HashMap<String, (Field, FieldDef)>,
|
||||
default_search_fields: Vec<Field>,
|
||||
}
|
||||
|
||||
pub struct TantivySearch {
|
||||
index: Index,
|
||||
writer: Arc<RwLock<IndexWriter>>,
|
||||
reader: IndexReader,
|
||||
index_schema: IndexSchema,
|
||||
name: String,
|
||||
config: IndexConfig,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct IndexConfig {
|
||||
pub language: String,
|
||||
pub stopwords: Vec<String>,
|
||||
pub stemming: bool,
|
||||
pub max_doc_count: Option<usize>,
|
||||
pub default_score: f64,
|
||||
}
|
||||
|
||||
impl Default for IndexConfig {
|
||||
fn default() -> Self {
|
||||
IndexConfig {
|
||||
language: "english".to_string(),
|
||||
stopwords: vec![],
|
||||
stemming: true,
|
||||
max_doc_count: None,
|
||||
default_score: 1.0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TantivySearch {
|
||||
pub fn new_with_schema(
|
||||
base_path: PathBuf,
|
||||
name: String,
|
||||
field_definitions: Vec<(String, FieldDef)>,
|
||||
config: Option<IndexConfig>,
|
||||
) -> Result<Self, DBError> {
|
||||
let index_path = base_path.join(&name);
|
||||
std::fs::create_dir_all(&index_path)
|
||||
.map_err(|e| DBError(format!("Failed to create index dir: {}", e)))?;
|
||||
|
||||
// Build schema from field definitions
|
||||
let mut schema_builder = Schema::builder();
|
||||
let mut fields = HashMap::new();
|
||||
let mut default_search_fields = Vec::new();
|
||||
|
||||
// Always add a document ID field
|
||||
let id_field = schema_builder.add_text_field("_id", STRING | STORED);
|
||||
fields.insert("_id".to_string(), (id_field, FieldDef::Text {
|
||||
stored: true,
|
||||
indexed: true,
|
||||
tokenized: false,
|
||||
fast: false,
|
||||
}));
|
||||
|
||||
// Add user-defined fields
|
||||
for (field_name, field_def) in field_definitions {
|
||||
let field = match &field_def {
|
||||
FieldDef::Text { stored, indexed, tokenized, fast: _fast } => {
|
||||
let mut text_options = TextOptions::default();
|
||||
|
||||
if *stored {
|
||||
text_options = text_options.set_stored();
|
||||
}
|
||||
|
||||
if *indexed {
|
||||
let indexing_options = if *tokenized {
|
||||
TextFieldIndexing::default()
|
||||
.set_tokenizer("default")
|
||||
.set_index_option(tantivy::schema::IndexRecordOption::WithFreqsAndPositions)
|
||||
} else {
|
||||
TextFieldIndexing::default()
|
||||
.set_tokenizer("raw")
|
||||
.set_index_option(tantivy::schema::IndexRecordOption::Basic)
|
||||
};
|
||||
text_options = text_options.set_indexing_options(indexing_options);
|
||||
|
||||
let f = schema_builder.add_text_field(&field_name, text_options);
|
||||
if *tokenized {
|
||||
default_search_fields.push(f);
|
||||
}
|
||||
f
|
||||
} else {
|
||||
schema_builder.add_text_field(&field_name, text_options)
|
||||
}
|
||||
}
|
||||
FieldDef::Numeric { stored, indexed, fast, precision } => {
|
||||
match precision {
|
||||
NumericType::I64 => {
|
||||
let mut opts = tantivy::schema::NumericOptions::default();
|
||||
if *stored { opts = opts.set_stored(); }
|
||||
if *indexed { opts = opts.set_indexed(); }
|
||||
if *fast { opts = opts.set_fast(); }
|
||||
schema_builder.add_i64_field(&field_name, opts)
|
||||
}
|
||||
NumericType::U64 => {
|
||||
let mut opts = tantivy::schema::NumericOptions::default();
|
||||
if *stored { opts = opts.set_stored(); }
|
||||
if *indexed { opts = opts.set_indexed(); }
|
||||
if *fast { opts = opts.set_fast(); }
|
||||
schema_builder.add_u64_field(&field_name, opts)
|
||||
}
|
||||
NumericType::F64 => {
|
||||
let mut opts = tantivy::schema::NumericOptions::default();
|
||||
if *stored { opts = opts.set_stored(); }
|
||||
if *indexed { opts = opts.set_indexed(); }
|
||||
if *fast { opts = opts.set_fast(); }
|
||||
schema_builder.add_f64_field(&field_name, opts)
|
||||
}
|
||||
NumericType::Date => {
|
||||
let mut opts = tantivy::schema::DateOptions::default();
|
||||
if *stored { opts = opts.set_stored(); }
|
||||
if *indexed { opts = opts.set_indexed(); }
|
||||
if *fast { opts = opts.set_fast(); }
|
||||
schema_builder.add_date_field(&field_name, opts)
|
||||
}
|
||||
}
|
||||
}
|
||||
FieldDef::Tag { stored, separator: _, case_sensitive: _ } => {
|
||||
let mut text_options = TextOptions::default();
|
||||
if *stored {
|
||||
text_options = text_options.set_stored();
|
||||
}
|
||||
text_options = text_options.set_indexing_options(
|
||||
TextFieldIndexing::default()
|
||||
.set_tokenizer("raw")
|
||||
.set_index_option(tantivy::schema::IndexRecordOption::Basic)
|
||||
);
|
||||
schema_builder.add_text_field(&field_name, text_options)
|
||||
}
|
||||
FieldDef::Geo { stored } => {
|
||||
// For now, store as two f64 fields for lat/lon
|
||||
let mut opts = tantivy::schema::NumericOptions::default();
|
||||
if *stored { opts = opts.set_stored(); }
|
||||
opts = opts.set_indexed().set_fast();
|
||||
|
||||
let lat_field = schema_builder.add_f64_field(&format!("{}_lat", field_name), opts.clone());
|
||||
let lon_field = schema_builder.add_f64_field(&format!("{}_lon", field_name), opts);
|
||||
|
||||
fields.insert(format!("{}_lat", field_name), (lat_field, FieldDef::Numeric {
|
||||
stored: *stored,
|
||||
indexed: true,
|
||||
fast: true,
|
||||
precision: NumericType::F64,
|
||||
}));
|
||||
fields.insert(format!("{}_lon", field_name), (lon_field, FieldDef::Numeric {
|
||||
stored: *stored,
|
||||
indexed: true,
|
||||
fast: true,
|
||||
precision: NumericType::F64,
|
||||
}));
|
||||
continue; // Skip adding the geo field itself
|
||||
}
|
||||
};
|
||||
|
||||
fields.insert(field_name.clone(), (field, field_def));
|
||||
}
|
||||
|
||||
let schema = schema_builder.build();
|
||||
let index_schema = IndexSchema {
|
||||
schema: schema.clone(),
|
||||
fields,
|
||||
default_search_fields,
|
||||
};
|
||||
|
||||
// Create or open index
|
||||
let dir = MmapDirectory::open(&index_path)
|
||||
.map_err(|e| DBError(format!("Failed to open index directory: {}", e)))?;
|
||||
|
||||
let mut index = Index::open_or_create(dir, schema)
|
||||
.map_err(|e| DBError(format!("Failed to create index: {}", e)))?;
|
||||
|
||||
// Configure tokenizers
|
||||
let tokenizer_manager = TokenizerManager::default();
|
||||
index.set_tokenizers(tokenizer_manager);
|
||||
|
||||
let writer = index.writer(1_000_000)
|
||||
.map_err(|e| DBError(format!("Failed to create index writer: {}", e)))?;
|
||||
|
||||
let reader = index
|
||||
.reader_builder()
|
||||
.reload_policy(ReloadPolicy::OnCommitWithDelay)
|
||||
.try_into()
|
||||
.map_err(|e| DBError(format!("Failed to create reader: {}", e)))?;
|
||||
|
||||
let config = config.unwrap_or_default();
|
||||
|
||||
Ok(TantivySearch {
|
||||
index,
|
||||
writer: Arc::new(RwLock::new(writer)),
|
||||
reader,
|
||||
index_schema,
|
||||
name,
|
||||
config,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn add_document_with_fields(
|
||||
&self,
|
||||
doc_id: &str,
|
||||
fields: HashMap<String, String>,
|
||||
) -> Result<(), DBError> {
|
||||
let mut writer = self.writer.write()
|
||||
.map_err(|e| DBError(format!("Failed to acquire writer lock: {}", e)))?;
|
||||
|
||||
// Delete existing document with same ID
|
||||
if let Some((id_field, _)) = self.index_schema.fields.get("_id") {
|
||||
writer.delete_term(Term::from_field_text(*id_field, doc_id));
|
||||
}
|
||||
|
||||
// Create new document
|
||||
let mut doc = tantivy::doc!();
|
||||
|
||||
// Add document ID
|
||||
if let Some((id_field, _)) = self.index_schema.fields.get("_id") {
|
||||
doc.add_text(*id_field, doc_id);
|
||||
}
|
||||
|
||||
// Add other fields based on schema
|
||||
for (field_name, field_value) in fields {
|
||||
if let Some((field, field_def)) = self.index_schema.fields.get(&field_name) {
|
||||
match field_def {
|
||||
FieldDef::Text { .. } => {
|
||||
doc.add_text(*field, &field_value);
|
||||
}
|
||||
FieldDef::Numeric { precision, .. } => {
|
||||
match precision {
|
||||
NumericType::I64 => {
|
||||
if let Ok(v) = field_value.parse::<i64>() {
|
||||
doc.add_i64(*field, v);
|
||||
}
|
||||
}
|
||||
NumericType::U64 => {
|
||||
if let Ok(v) = field_value.parse::<u64>() {
|
||||
doc.add_u64(*field, v);
|
||||
}
|
||||
}
|
||||
NumericType::F64 => {
|
||||
if let Ok(v) = field_value.parse::<f64>() {
|
||||
doc.add_f64(*field, v);
|
||||
}
|
||||
}
|
||||
NumericType::Date => {
|
||||
if let Ok(v) = field_value.parse::<i64>() {
|
||||
doc.add_date(*field, DateTime::from_timestamp_millis(v));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
FieldDef::Tag { separator, case_sensitive, .. } => {
|
||||
let tags = if !case_sensitive {
|
||||
field_value.to_lowercase()
|
||||
} else {
|
||||
field_value.clone()
|
||||
};
|
||||
|
||||
// Store tags as separate terms for efficient filtering
|
||||
for tag in tags.split(separator.as_str()) {
|
||||
doc.add_text(*field, tag.trim());
|
||||
}
|
||||
}
|
||||
FieldDef::Geo { .. } => {
|
||||
// Parse "lat,lon" format
|
||||
let parts: Vec<&str> = field_value.split(',').collect();
|
||||
if parts.len() == 2 {
|
||||
if let (Ok(lat), Ok(lon)) = (parts[0].parse::<f64>(), parts[1].parse::<f64>()) {
|
||||
if let Some((lat_field, _)) = self.index_schema.fields.get(&format!("{}_lat", field_name)) {
|
||||
doc.add_f64(*lat_field, lat);
|
||||
}
|
||||
if let Some((lon_field, _)) = self.index_schema.fields.get(&format!("{}_lon", field_name)) {
|
||||
doc.add_f64(*lon_field, lon);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
writer.add_document(doc).map_err(|e| DBError(format!("Failed to add document: {}", e)))?;
|
||||
|
||||
writer.commit()
|
||||
.map_err(|e| DBError(format!("Failed to commit: {}", e)))?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn search_with_options(
|
||||
&self,
|
||||
query_str: &str,
|
||||
options: SearchOptions,
|
||||
) -> Result<SearchResults, DBError> {
|
||||
let searcher = self.reader.searcher();
|
||||
|
||||
// Parse query based on search fields
|
||||
let query: Box<dyn Query> = if self.index_schema.default_search_fields.is_empty() {
|
||||
return Err(DBError("No searchable fields defined in schema".to_string()));
|
||||
} else {
|
||||
let query_parser = QueryParser::for_index(
|
||||
&self.index,
|
||||
self.index_schema.default_search_fields.clone(),
|
||||
);
|
||||
|
||||
Box::new(query_parser.parse_query(query_str)
|
||||
.map_err(|e| DBError(format!("Failed to parse query: {}", e)))?)
|
||||
};
|
||||
|
||||
// Apply filters if any
|
||||
let final_query = if !options.filters.is_empty() {
|
||||
let mut clauses: Vec<(Occur, Box<dyn Query>)> = vec![(Occur::Must, query)];
|
||||
|
||||
// Add filters
|
||||
for filter in options.filters {
|
||||
if let Some((field, _)) = self.index_schema.fields.get(&filter.field) {
|
||||
match filter.filter_type {
|
||||
FilterType::Equals(value) => {
|
||||
let term_query = TermQuery::new(
|
||||
Term::from_field_text(*field, &value),
|
||||
tantivy::schema::IndexRecordOption::Basic,
|
||||
);
|
||||
clauses.push((Occur::Must, Box::new(term_query)));
|
||||
}
|
||||
FilterType::Range { min: _, max: _ } => {
|
||||
// Would need numeric field handling here
|
||||
// Simplified for now
|
||||
}
|
||||
FilterType::InSet(values) => {
|
||||
let mut sub_clauses: Vec<(Occur, Box<dyn Query>)> = vec![];
|
||||
for value in values {
|
||||
let term_query = TermQuery::new(
|
||||
Term::from_field_text(*field, &value),
|
||||
tantivy::schema::IndexRecordOption::Basic,
|
||||
);
|
||||
sub_clauses.push((Occur::Should, Box::new(term_query)));
|
||||
}
|
||||
clauses.push((Occur::Must, Box::new(BooleanQuery::new(sub_clauses))));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Box::new(BooleanQuery::new(clauses))
|
||||
} else {
|
||||
query
|
||||
};
|
||||
|
||||
// Execute search
|
||||
let top_docs = searcher.search(
|
||||
&*final_query,
|
||||
&TopDocs::with_limit(options.limit + options.offset)
|
||||
).map_err(|e| DBError(format!("Search failed: {}", e)))?;
|
||||
|
||||
let total_hits = top_docs.len();
|
||||
let mut documents = Vec::new();
|
||||
|
||||
for (score, doc_address) in top_docs.iter().skip(options.offset).take(options.limit) {
|
||||
let retrieved_doc: TantivyDocument = searcher.doc(*doc_address)
|
||||
.map_err(|e| DBError(format!("Failed to retrieve doc: {}", e)))?;
|
||||
|
||||
let mut doc_fields = HashMap::new();
|
||||
|
||||
// Extract all stored fields
|
||||
for (field_name, (field, field_def)) in &self.index_schema.fields {
|
||||
match field_def {
|
||||
FieldDef::Text { stored, .. } |
|
||||
FieldDef::Tag { stored, .. } => {
|
||||
if *stored {
|
||||
if let Some(value) = retrieved_doc.get_first(*field) {
|
||||
if let Some(text) = value.as_str() {
|
||||
doc_fields.insert(field_name.clone(), text.to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
FieldDef::Numeric { stored, precision, .. } => {
|
||||
if *stored {
|
||||
let value_str = match precision {
|
||||
NumericType::I64 => {
|
||||
retrieved_doc.get_first(*field)
|
||||
.and_then(|v| v.as_i64())
|
||||
.map(|v| v.to_string())
|
||||
}
|
||||
NumericType::U64 => {
|
||||
retrieved_doc.get_first(*field)
|
||||
.and_then(|v| v.as_u64())
|
||||
.map(|v| v.to_string())
|
||||
}
|
||||
NumericType::F64 => {
|
||||
retrieved_doc.get_first(*field)
|
||||
.and_then(|v| v.as_f64())
|
||||
.map(|v| v.to_string())
|
||||
}
|
||||
NumericType::Date => {
|
||||
retrieved_doc.get_first(*field)
|
||||
.and_then(|v| v.as_datetime())
|
||||
.map(|v| v.into_timestamp_millis().to_string())
|
||||
}
|
||||
};
|
||||
|
||||
if let Some(v) = value_str {
|
||||
doc_fields.insert(field_name.clone(), v);
|
||||
}
|
||||
}
|
||||
}
|
||||
FieldDef::Geo { stored } => {
|
||||
if *stored {
|
||||
let lat_field = self.index_schema.fields.get(&format!("{}_lat", field_name)).unwrap().0;
|
||||
let lon_field = self.index_schema.fields.get(&format!("{}_lon", field_name)).unwrap().0;
|
||||
|
||||
let lat = retrieved_doc.get_first(lat_field).and_then(|v| v.as_f64());
|
||||
let lon = retrieved_doc.get_first(lon_field).and_then(|v| v.as_f64());
|
||||
|
||||
if let (Some(lat), Some(lon)) = (lat, lon) {
|
||||
doc_fields.insert(field_name.clone(), format!("{},{}", lat, lon));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
documents.push(SearchDocument {
|
||||
fields: doc_fields,
|
||||
score: *score,
|
||||
});
|
||||
}
|
||||
|
||||
Ok(SearchResults {
|
||||
total: total_hits,
|
||||
documents,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn get_info(&self) -> Result<IndexInfo, DBError> {
|
||||
let searcher = self.reader.searcher();
|
||||
let num_docs = searcher.num_docs();
|
||||
|
||||
let fields_info: Vec<FieldInfo> = self.index_schema.fields.iter().map(|(name, (_, def))| {
|
||||
FieldInfo {
|
||||
name: name.clone(),
|
||||
field_type: format!("{:?}", def),
|
||||
}
|
||||
}).collect();
|
||||
|
||||
Ok(IndexInfo {
|
||||
name: self.name.clone(),
|
||||
num_docs,
|
||||
fields: fields_info,
|
||||
config: self.config.clone(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct SearchOptions {
|
||||
pub limit: usize,
|
||||
pub offset: usize,
|
||||
pub filters: Vec<Filter>,
|
||||
pub sort_by: Option<String>,
|
||||
pub return_fields: Option<Vec<String>>,
|
||||
pub highlight: bool,
|
||||
}
|
||||
|
||||
impl Default for SearchOptions {
|
||||
fn default() -> Self {
|
||||
SearchOptions {
|
||||
limit: 10,
|
||||
offset: 0,
|
||||
filters: vec![],
|
||||
sort_by: None,
|
||||
return_fields: None,
|
||||
highlight: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Filter {
|
||||
pub field: String,
|
||||
pub filter_type: FilterType,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum FilterType {
|
||||
Equals(String),
|
||||
Range { min: String, max: String },
|
||||
InSet(Vec<String>),
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct SearchResults {
|
||||
pub total: usize,
|
||||
pub documents: Vec<SearchDocument>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct SearchDocument {
|
||||
pub fields: HashMap<String, String>,
|
||||
pub score: f32,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct IndexInfo {
|
||||
pub name: String,
|
||||
pub num_docs: u64,
|
||||
pub fields: Vec<FieldInfo>,
|
||||
pub config: IndexConfig,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct FieldInfo {
|
||||
pub name: String,
|
||||
pub field_type: String,
|
||||
}
|
Reference in New Issue
Block a user