Compare commits
22 Commits
Author | SHA1 | Date | |
---|---|---|---|
4b3a86d73d | |||
a1127b72da | |||
fbcaafc86b | |||
3850df89be | |||
ce1be0369a | |||
45195d403e | |||
4b8216bfdb | |||
f17b441ca1 | |||
8bc372ea64 | |||
7920945986 | |||
ff4ea1d844 | |||
d4d3660bac | |||
c9e1dcdb6c | |||
b68325016d | |||
2743cd9c81 | |||
eb07386cf4 | |||
fc7672c78a | |||
46f96fa8cf | |||
56699b9abb | |||
dd90a49615 | |||
9054737e84 | |||
09553f54c8 |
1018
Cargo.lock
generated
1018
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
37
Cargo.toml
37
Cargo.toml
@@ -1,9 +1,30 @@
|
|||||||
[workspace]
|
[package]
|
||||||
members = ["herodb"]
|
name = "herodb"
|
||||||
resolver = "2"
|
version = "0.0.1"
|
||||||
|
authors = ["Pin Fang <fpfangpin@hotmail.com>"]
|
||||||
|
edition = "2021"
|
||||||
|
|
||||||
# You can define shared profiles for all workspace members here
|
[dependencies]
|
||||||
[profile.release]
|
anyhow = "1.0.59"
|
||||||
lto = true
|
bytes = "1.3.0"
|
||||||
codegen-units = 1
|
thiserror = "1.0.32"
|
||||||
strip = true
|
tokio = { version = "1.23.0", features = ["full"] }
|
||||||
|
clap = { version = "4.5.20", features = ["derive"] }
|
||||||
|
byteorder = "1.4.3"
|
||||||
|
futures = "0.3"
|
||||||
|
sled = "0.34"
|
||||||
|
redb = "2.1.3"
|
||||||
|
serde = { version = "1.0", features = ["derive"] }
|
||||||
|
serde_json = "1.0"
|
||||||
|
bincode = "1.3"
|
||||||
|
chacha20poly1305 = "0.10.1"
|
||||||
|
rand = "0.8"
|
||||||
|
sha2 = "0.10"
|
||||||
|
age = "0.10"
|
||||||
|
secrecy = "0.8"
|
||||||
|
ed25519-dalek = "2"
|
||||||
|
base64 = "0.22"
|
||||||
|
tantivy = "0.25.0"
|
||||||
|
|
||||||
|
[dev-dependencies]
|
||||||
|
redis = { version = "0.24", features = ["aio", "tokio-comp"] }
|
||||||
|
85
README.md
Normal file
85
README.md
Normal file
@@ -0,0 +1,85 @@
|
|||||||
|
# HeroDB
|
||||||
|
|
||||||
|
HeroDB is a Redis-compatible database built with Rust, offering a flexible and secure storage solution. It supports two primary storage backends: `redb` (default) and `sled`, both with full encryption capabilities. HeroDB aims to provide a robust and performant key-value store with advanced features like data-at-rest encryption, hash operations, list operations, and cursor-based scanning.
|
||||||
|
|
||||||
|
## Purpose
|
||||||
|
|
||||||
|
The main purpose of HeroDB is to offer a lightweight, embeddable, and Redis-compatible database that prioritizes data security through transparent encryption. It's designed for applications that require fast, reliable data storage with the option for strong cryptographic protection, without the overhead of a full-fledged Redis server.
|
||||||
|
|
||||||
|
## Features
|
||||||
|
|
||||||
|
- **Redis Compatibility**: Supports a subset of Redis commands over RESP (Redis Serialization Protocol) via TCP.
|
||||||
|
- **Dual Backend Support**:
|
||||||
|
- `redb` (default): Optimized for concurrent access and high-throughput scenarios.
|
||||||
|
- `sled`: A lock-free, log-structured database, excellent for specific workloads.
|
||||||
|
- **Data-at-Rest Encryption**: Transparent encryption for both backends using the `age` encryption library.
|
||||||
|
- **Key-Value Operations**: Full support for basic string, hash, and list operations.
|
||||||
|
- **Expiration**: Time-to-live (TTL) functionality for keys.
|
||||||
|
- **Scanning**: Cursor-based iteration for keys and hash fields (`SCAN`, `HSCAN`).
|
||||||
|
- **AGE Cryptography Commands**: HeroDB-specific extensions for cryptographic operations.
|
||||||
|
|
||||||
|
## Quick Start
|
||||||
|
|
||||||
|
### Building HeroDB
|
||||||
|
|
||||||
|
To build HeroDB, navigate to the project root and run:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cargo build --release
|
||||||
|
```
|
||||||
|
|
||||||
|
### Running HeroDB
|
||||||
|
|
||||||
|
You can start HeroDB with different backends and encryption options:
|
||||||
|
|
||||||
|
#### Default `redb` Backend
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./target/release/herodb --dir /tmp/herodb_redb --port 6379
|
||||||
|
```
|
||||||
|
|
||||||
|
#### `sled` Backend
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./target/release/herodb --dir /tmp/herodb_sled --port 6379 --sled
|
||||||
|
```
|
||||||
|
|
||||||
|
#### `redb` with Encryption
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./target/release/herodb --dir /tmp/herodb_encrypted --port 6379 --encrypt --key mysecretkey
|
||||||
|
```
|
||||||
|
|
||||||
|
#### `sled` with Encryption
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./target/release/herodb --dir /tmp/herodb_sled_encrypted --port 6379 --sled --encrypt --key mysecretkey
|
||||||
|
```
|
||||||
|
|
||||||
|
## Usage with Redis Clients
|
||||||
|
|
||||||
|
HeroDB can be interacted with using any standard Redis client, such as `redis-cli`, `redis-py` (Python), or `ioredis` (Node.js).
|
||||||
|
|
||||||
|
### Example with `redis-cli`
|
||||||
|
|
||||||
|
```bash
|
||||||
|
redis-cli -p 6379 SET mykey "Hello from HeroDB!"
|
||||||
|
redis-cli -p 6379 GET mykey
|
||||||
|
# → "Hello from HeroDB!"
|
||||||
|
|
||||||
|
redis-cli -p 6379 HSET user:1 name "Alice" age "30"
|
||||||
|
redis-cli -p 6379 HGET user:1 name
|
||||||
|
# → "Alice"
|
||||||
|
|
||||||
|
redis-cli -p 6379 SCAN 0 MATCH user:* COUNT 10
|
||||||
|
# → 1) "0"
|
||||||
|
# 2) 1) "user:1"
|
||||||
|
```
|
||||||
|
|
||||||
|
## Documentation
|
||||||
|
|
||||||
|
For more detailed information on commands, features, and advanced usage, please refer to the documentation:
|
||||||
|
|
||||||
|
- [Basics](docs/basics.md)
|
||||||
|
- [Supported Commands](docs/cmds.md)
|
||||||
|
- [AGE Cryptography](docs/age.md)
|
125
docs/cmds.md
Normal file
125
docs/cmds.md
Normal file
@@ -0,0 +1,125 @@
|
|||||||
|
|
||||||
|
## Backend Support
|
||||||
|
|
||||||
|
HeroDB supports two storage backends, both with full encryption support:
|
||||||
|
|
||||||
|
- **redb** (default): Full-featured, optimized for production use
|
||||||
|
- **sled**: Alternative embedded database with encryption support
|
||||||
|
|
||||||
|
### Starting HeroDB with Different Backends
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Use default redb backend
|
||||||
|
./target/release/herodb --dir /tmp/herodb_redb --port 6379
|
||||||
|
|
||||||
|
# Use sled backend
|
||||||
|
./target/release/herodb --dir /tmp/herodb_sled --port 6379 --sled
|
||||||
|
|
||||||
|
# Use redb with encryption
|
||||||
|
./target/release/herodb --dir /tmp/herodb_encrypted --port 6379 --encrypt --key mysecretkey
|
||||||
|
|
||||||
|
# Use sled with encryption
|
||||||
|
./target/release/herodb --dir /tmp/herodb_sled_encrypted --port 6379 --sled --encrypt --key mysecretkey
|
||||||
|
```
|
||||||
|
|
||||||
|
### Command Support by Backend
|
||||||
|
|
||||||
|
Command Category | redb | sled | Notes |
|
||||||
|
|-----------------|------|------|-------|
|
||||||
|
**Strings** | | | |
|
||||||
|
SET | ✅ | ✅ | Full support |
|
||||||
|
GET | ✅ | ✅ | Full support |
|
||||||
|
DEL | ✅ | ✅ | Full support |
|
||||||
|
EXISTS | ✅ | ✅ | Full support |
|
||||||
|
INCR/DECR | ✅ | ✅ | Full support |
|
||||||
|
MGET/MSET | ✅ | ✅ | Full support |
|
||||||
|
**Hashes** | | | |
|
||||||
|
HSET | ✅ | ✅ | Full support |
|
||||||
|
HGET | ✅ | ✅ | Full support |
|
||||||
|
HGETALL | ✅ | ✅ | Full support |
|
||||||
|
HDEL | ✅ | ✅ | Full support |
|
||||||
|
HEXISTS | ✅ | ✅ | Full support |
|
||||||
|
HKEYS | ✅ | ✅ | Full support |
|
||||||
|
HVALS | ✅ | ✅ | Full support |
|
||||||
|
HLEN | ✅ | ✅ | Full support |
|
||||||
|
HMGET | ✅ | ✅ | Full support |
|
||||||
|
HSETNX | ✅ | ✅ | Full support |
|
||||||
|
HINCRBY/HINCRBYFLOAT | ✅ | ✅ | Full support |
|
||||||
|
HSCAN | ✅ | ✅ | Full support with pattern matching |
|
||||||
|
**Lists** | | | |
|
||||||
|
LPUSH/RPUSH | ✅ | ✅ | Full support |
|
||||||
|
LPOP/RPOP | ✅ | ✅ | Full support |
|
||||||
|
LLEN | ✅ | ✅ | Full support |
|
||||||
|
LRANGE | ✅ | ✅ | Full support |
|
||||||
|
LINDEX | ✅ | ✅ | Full support |
|
||||||
|
LTRIM | ✅ | ✅ | Full support |
|
||||||
|
LREM | ✅ | ✅ | Full support |
|
||||||
|
BLPOP/BRPOP | ✅ | ❌ | Blocking operations not in sled |
|
||||||
|
**Expiration** | | | |
|
||||||
|
EXPIRE | ✅ | ✅ | Full support in both |
|
||||||
|
TTL | ✅ | ✅ | Full support in both |
|
||||||
|
PERSIST | ✅ | ✅ | Full support in both |
|
||||||
|
SETEX/PSETEX | ✅ | ✅ | Full support in both |
|
||||||
|
EXPIREAT/PEXPIREAT | ✅ | ✅ | Full support in both |
|
||||||
|
**Scanning** | | | |
|
||||||
|
KEYS | ✅ | ✅ | Full support with patterns |
|
||||||
|
SCAN | ✅ | ✅ | Full cursor-based iteration |
|
||||||
|
HSCAN | ✅ | ✅ | Full cursor-based iteration |
|
||||||
|
**Transactions** | | | |
|
||||||
|
MULTI/EXEC/DISCARD | ✅ | ❌ | Only supported in redb |
|
||||||
|
**Encryption** | | | |
|
||||||
|
Data-at-rest encryption | ✅ | ✅ | Both support [age](age.tech) encryption |
|
||||||
|
AGE commands | ✅ | ✅ | Both support AGE crypto commands |
|
||||||
|
**Full-Text Search** | | | |
|
||||||
|
FT.CREATE | ✅ | ✅ | Create search index with schema |
|
||||||
|
FT.ADD | ✅ | ✅ | Add document to search index |
|
||||||
|
FT.SEARCH | ✅ | ✅ | Search documents with query |
|
||||||
|
FT.DEL | ✅ | ✅ | Delete document from index |
|
||||||
|
FT.INFO | ✅ | ✅ | Get index information |
|
||||||
|
FT.DROP | ✅ | ✅ | Drop search index |
|
||||||
|
FT.ALTER | ✅ | ✅ | Alter index schema |
|
||||||
|
FT.AGGREGATE | ✅ | ✅ | Aggregate search results |
|
||||||
|
|
||||||
|
### Performance Considerations
|
||||||
|
|
||||||
|
- **redb**: Optimized for concurrent access, better for high-throughput scenarios
|
||||||
|
- **sled**: Lock-free architecture, excellent for specific workloads
|
||||||
|
|
||||||
|
### Encryption Features
|
||||||
|
|
||||||
|
Both backends support:
|
||||||
|
- Transparent data-at-rest encryption using the `age` encryption library
|
||||||
|
- Per-database encryption (databases >= 10 are encrypted when `--encrypt` flag is used)
|
||||||
|
- Secure key derivation using the master key
|
||||||
|
|
||||||
|
### Backend Selection Examples
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Example: Testing both backends
|
||||||
|
redis-cli -p 6379 SET mykey "redb value"
|
||||||
|
redis-cli -p 6381 SET mykey "sled value"
|
||||||
|
|
||||||
|
# Example: Using encryption with both
|
||||||
|
./target/release/herodb --port 6379 --encrypt --key secret123
|
||||||
|
./target/release/herodb --port 6381 --sled --encrypt --key secret123
|
||||||
|
|
||||||
|
# Both support the same Redis commands
|
||||||
|
redis-cli -p 6379 HSET user:1 name "Alice" age "30"
|
||||||
|
redis-cli -p 6381 HSET user:1 name "Alice" age "30"
|
||||||
|
|
||||||
|
# Both support SCAN operations
|
||||||
|
redis-cli -p 6379 SCAN 0 MATCH user:* COUNT 10
|
||||||
|
redis-cli -p 6381 SCAN 0 MATCH user:* COUNT 10
|
||||||
|
```
|
||||||
|
|
||||||
|
### Migration Between Backends
|
||||||
|
|
||||||
|
To migrate data between backends, use Redis replication or dump/restore:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Export from redb
|
||||||
|
redis-cli -p 6379 --rdb dump.rdb
|
||||||
|
|
||||||
|
# Import to sled
|
||||||
|
redis-cli -p 6381 --pipe < dump.rdb
|
||||||
|
```
|
397
docs/search.md
Normal file
397
docs/search.md
Normal file
@@ -0,0 +1,397 @@
|
|||||||
|
# Full-Text Search with Tantivy
|
||||||
|
|
||||||
|
HeroDB includes powerful full-text search capabilities powered by [Tantivy](https://github.com/quickwit-oss/tantivy), a fast full-text search engine library written in Rust. This provides Redis-compatible search commands similar to RediSearch.
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
The search functionality allows you to:
|
||||||
|
- Create search indexes with custom schemas
|
||||||
|
- Index documents with multiple field types
|
||||||
|
- Perform complex queries with filters
|
||||||
|
- Support for text, numeric, date, and geographic data
|
||||||
|
- Real-time search with high performance
|
||||||
|
|
||||||
|
## Search Commands
|
||||||
|
|
||||||
|
### FT.CREATE - Create Search Index
|
||||||
|
|
||||||
|
Create a new search index with a defined schema.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
FT.CREATE index_name SCHEMA field_name field_type [options] [field_name field_type [options] ...]
|
||||||
|
```
|
||||||
|
|
||||||
|
**Field Types:**
|
||||||
|
- `TEXT` - Full-text searchable text fields
|
||||||
|
- `NUMERIC` - Numeric fields (integers, floats)
|
||||||
|
- `TAG` - Tag fields for exact matching
|
||||||
|
- `GEO` - Geographic coordinates (lat,lon)
|
||||||
|
- `DATE` - Date/timestamp fields
|
||||||
|
|
||||||
|
**Field Options:**
|
||||||
|
- `STORED` - Store field value for retrieval
|
||||||
|
- `INDEXED` - Make field searchable
|
||||||
|
- `TOKENIZED` - Enable tokenization for text fields
|
||||||
|
- `FAST` - Enable fast access for numeric fields
|
||||||
|
|
||||||
|
**Example:**
|
||||||
|
```bash
|
||||||
|
# Create a product search index
|
||||||
|
FT.CREATE products SCHEMA
|
||||||
|
title TEXT STORED INDEXED TOKENIZED
|
||||||
|
description TEXT STORED INDEXED TOKENIZED
|
||||||
|
price NUMERIC STORED INDEXED FAST
|
||||||
|
category TAG STORED
|
||||||
|
location GEO STORED
|
||||||
|
created_date DATE STORED INDEXED
|
||||||
|
```
|
||||||
|
|
||||||
|
### FT.ADD - Add Document to Index
|
||||||
|
|
||||||
|
Add a document to a search index.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
FT.ADD index_name doc_id [SCORE score] FIELDS field_name field_value [field_name field_value ...]
|
||||||
|
```
|
||||||
|
|
||||||
|
**Example:**
|
||||||
|
```bash
|
||||||
|
# Add a product document
|
||||||
|
FT.ADD products product:1 SCORE 1.0 FIELDS
|
||||||
|
title "Wireless Headphones"
|
||||||
|
description "High-quality wireless headphones with noise cancellation"
|
||||||
|
price 199.99
|
||||||
|
category "electronics"
|
||||||
|
location "37.7749,-122.4194"
|
||||||
|
created_date 1640995200000
|
||||||
|
```
|
||||||
|
|
||||||
|
### FT.SEARCH - Search Documents
|
||||||
|
|
||||||
|
Search for documents in an index.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
FT.SEARCH index_name query [LIMIT offset count] [FILTER field min max] [RETURN field [field ...]]
|
||||||
|
```
|
||||||
|
|
||||||
|
**Query Syntax:**
|
||||||
|
- Simple terms: `wireless headphones`
|
||||||
|
- Phrase queries: `"noise cancellation"`
|
||||||
|
- Field-specific: `title:wireless`
|
||||||
|
- Boolean operators: `wireless AND headphones`
|
||||||
|
- Wildcards: `head*`
|
||||||
|
|
||||||
|
**Examples:**
|
||||||
|
```bash
|
||||||
|
# Simple text search
|
||||||
|
FT.SEARCH products "wireless headphones"
|
||||||
|
|
||||||
|
# Search with filters
|
||||||
|
FT.SEARCH products "headphones" FILTER price 100 300 LIMIT 0 10
|
||||||
|
|
||||||
|
# Field-specific search
|
||||||
|
FT.SEARCH products "title:wireless AND category:electronics"
|
||||||
|
|
||||||
|
# Return specific fields only
|
||||||
|
FT.SEARCH products "*" RETURN title price
|
||||||
|
```
|
||||||
|
|
||||||
|
### FT.DEL - Delete Document
|
||||||
|
|
||||||
|
Remove a document from the search index.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
FT.DEL index_name doc_id
|
||||||
|
```
|
||||||
|
|
||||||
|
**Example:**
|
||||||
|
```bash
|
||||||
|
FT.DEL products product:1
|
||||||
|
```
|
||||||
|
|
||||||
|
### FT.INFO - Get Index Information
|
||||||
|
|
||||||
|
Get information about a search index.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
FT.INFO index_name
|
||||||
|
```
|
||||||
|
|
||||||
|
**Returns:**
|
||||||
|
- Index name and document count
|
||||||
|
- Field definitions and types
|
||||||
|
- Index configuration
|
||||||
|
|
||||||
|
**Example:**
|
||||||
|
```bash
|
||||||
|
FT.INFO products
|
||||||
|
```
|
||||||
|
|
||||||
|
### FT.DROP - Drop Index
|
||||||
|
|
||||||
|
Delete an entire search index.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
FT.DROP index_name
|
||||||
|
```
|
||||||
|
|
||||||
|
**Example:**
|
||||||
|
```bash
|
||||||
|
FT.DROP products
|
||||||
|
```
|
||||||
|
|
||||||
|
### FT.ALTER - Alter Index Schema
|
||||||
|
|
||||||
|
Add new fields to an existing index.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
FT.ALTER index_name SCHEMA ADD field_name field_type [options]
|
||||||
|
```
|
||||||
|
|
||||||
|
**Example:**
|
||||||
|
```bash
|
||||||
|
FT.ALTER products SCHEMA ADD brand TAG STORED
|
||||||
|
```
|
||||||
|
|
||||||
|
### FT.AGGREGATE - Aggregate Search Results
|
||||||
|
|
||||||
|
Perform aggregations on search results.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
FT.AGGREGATE index_name query [GROUPBY field] [REDUCE function field AS alias]
|
||||||
|
```
|
||||||
|
|
||||||
|
**Example:**
|
||||||
|
```bash
|
||||||
|
# Group products by category and count
|
||||||
|
FT.AGGREGATE products "*" GROUPBY category REDUCE COUNT 0 AS count
|
||||||
|
```
|
||||||
|
|
||||||
|
## Field Types in Detail
|
||||||
|
|
||||||
|
### TEXT Fields
|
||||||
|
- **Purpose**: Full-text search on natural language content
|
||||||
|
- **Features**: Tokenization, stemming, stop-word removal
|
||||||
|
- **Options**: `STORED`, `INDEXED`, `TOKENIZED`
|
||||||
|
- **Example**: Product titles, descriptions, content
|
||||||
|
|
||||||
|
### NUMERIC Fields
|
||||||
|
- **Purpose**: Numeric data for range queries and sorting
|
||||||
|
- **Types**: I64, U64, F64
|
||||||
|
- **Options**: `STORED`, `INDEXED`, `FAST`
|
||||||
|
- **Example**: Prices, quantities, ratings
|
||||||
|
|
||||||
|
### TAG Fields
|
||||||
|
- **Purpose**: Exact-match categorical data
|
||||||
|
- **Features**: No tokenization, exact string matching
|
||||||
|
- **Options**: `STORED`, case sensitivity control
|
||||||
|
- **Example**: Categories, brands, status values
|
||||||
|
|
||||||
|
### GEO Fields
|
||||||
|
- **Purpose**: Geographic coordinates
|
||||||
|
- **Format**: "latitude,longitude" (e.g., "37.7749,-122.4194")
|
||||||
|
- **Features**: Geographic distance queries
|
||||||
|
- **Options**: `STORED`
|
||||||
|
|
||||||
|
### DATE Fields
|
||||||
|
- **Purpose**: Timestamp and date data
|
||||||
|
- **Format**: Unix timestamp in milliseconds
|
||||||
|
- **Features**: Range queries, temporal filtering
|
||||||
|
- **Options**: `STORED`, `INDEXED`, `FAST`
|
||||||
|
|
||||||
|
## Search Query Syntax
|
||||||
|
|
||||||
|
### Basic Queries
|
||||||
|
```bash
|
||||||
|
# Single term
|
||||||
|
FT.SEARCH products "wireless"
|
||||||
|
|
||||||
|
# Multiple terms (AND by default)
|
||||||
|
FT.SEARCH products "wireless headphones"
|
||||||
|
|
||||||
|
# Phrase query
|
||||||
|
FT.SEARCH products "\"noise cancellation\""
|
||||||
|
```
|
||||||
|
|
||||||
|
### Field-Specific Queries
|
||||||
|
```bash
|
||||||
|
# Search in specific field
|
||||||
|
FT.SEARCH products "title:wireless"
|
||||||
|
|
||||||
|
# Multiple field queries
|
||||||
|
FT.SEARCH products "title:wireless AND description:bluetooth"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Boolean Operators
|
||||||
|
```bash
|
||||||
|
# AND operator
|
||||||
|
FT.SEARCH products "wireless AND headphones"
|
||||||
|
|
||||||
|
# OR operator
|
||||||
|
FT.SEARCH products "wireless OR bluetooth"
|
||||||
|
|
||||||
|
# NOT operator
|
||||||
|
FT.SEARCH products "headphones NOT wired"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Wildcards and Fuzzy Search
|
||||||
|
```bash
|
||||||
|
# Wildcard search
|
||||||
|
FT.SEARCH products "head*"
|
||||||
|
|
||||||
|
# Fuzzy search (approximate matching)
|
||||||
|
FT.SEARCH products "%headphone%"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Range Queries
|
||||||
|
```bash
|
||||||
|
# Numeric range in query
|
||||||
|
FT.SEARCH products "@price:[100 300]"
|
||||||
|
|
||||||
|
# Date range
|
||||||
|
FT.SEARCH products "@created_date:[1640995200000 1672531200000]"
|
||||||
|
```
|
||||||
|
|
||||||
|
## Filtering and Sorting
|
||||||
|
|
||||||
|
### FILTER Clause
|
||||||
|
```bash
|
||||||
|
# Numeric filter
|
||||||
|
FT.SEARCH products "headphones" FILTER price 100 300
|
||||||
|
|
||||||
|
# Multiple filters
|
||||||
|
FT.SEARCH products "*" FILTER price 100 500 FILTER rating 4 5
|
||||||
|
```
|
||||||
|
|
||||||
|
### LIMIT Clause
|
||||||
|
```bash
|
||||||
|
# Pagination
|
||||||
|
FT.SEARCH products "wireless" LIMIT 0 10 # First 10 results
|
||||||
|
FT.SEARCH products "wireless" LIMIT 10 10 # Next 10 results
|
||||||
|
```
|
||||||
|
|
||||||
|
### RETURN Clause
|
||||||
|
```bash
|
||||||
|
# Return specific fields
|
||||||
|
FT.SEARCH products "*" RETURN title price
|
||||||
|
|
||||||
|
# Return all stored fields (default)
|
||||||
|
FT.SEARCH products "*"
|
||||||
|
```
|
||||||
|
|
||||||
|
## Performance Considerations
|
||||||
|
|
||||||
|
### Indexing Strategy
|
||||||
|
- Only index fields you need to search on
|
||||||
|
- Use `FAST` option for frequently filtered numeric fields
|
||||||
|
- Consider storage vs. search performance trade-offs
|
||||||
|
|
||||||
|
### Query Optimization
|
||||||
|
- Use specific field queries when possible
|
||||||
|
- Combine filters with text queries for better performance
|
||||||
|
- Use pagination with LIMIT for large result sets
|
||||||
|
|
||||||
|
### Memory Usage
|
||||||
|
- Tantivy indexes are memory-mapped for performance
|
||||||
|
- Index size depends on document count and field configuration
|
||||||
|
- Monitor disk space for index storage
|
||||||
|
|
||||||
|
## Integration with Redis Commands
|
||||||
|
|
||||||
|
Search indexes work alongside regular Redis data:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Store product data in Redis hash
|
||||||
|
HSET product:1 title "Wireless Headphones" price "199.99"
|
||||||
|
|
||||||
|
# Index the same data for search
|
||||||
|
FT.ADD products product:1 FIELDS title "Wireless Headphones" price 199.99
|
||||||
|
|
||||||
|
# Search returns document IDs that can be used with Redis commands
|
||||||
|
FT.SEARCH products "wireless"
|
||||||
|
# Returns: product:1
|
||||||
|
|
||||||
|
# Retrieve full data using Redis
|
||||||
|
HGETALL product:1
|
||||||
|
```
|
||||||
|
|
||||||
|
## Example Use Cases
|
||||||
|
|
||||||
|
### E-commerce Product Search
|
||||||
|
```bash
|
||||||
|
# Create product catalog index
|
||||||
|
FT.CREATE catalog SCHEMA
|
||||||
|
name TEXT STORED INDEXED TOKENIZED
|
||||||
|
description TEXT INDEXED TOKENIZED
|
||||||
|
price NUMERIC STORED INDEXED FAST
|
||||||
|
category TAG STORED
|
||||||
|
brand TAG STORED
|
||||||
|
rating NUMERIC STORED FAST
|
||||||
|
|
||||||
|
# Add products
|
||||||
|
FT.ADD catalog prod:1 FIELDS name "iPhone 14" price 999 category "phones" brand "apple" rating 4.5
|
||||||
|
FT.ADD catalog prod:2 FIELDS name "Samsung Galaxy" price 899 category "phones" brand "samsung" rating 4.3
|
||||||
|
|
||||||
|
# Search queries
|
||||||
|
FT.SEARCH catalog "iPhone"
|
||||||
|
FT.SEARCH catalog "phones" FILTER price 800 1000
|
||||||
|
FT.SEARCH catalog "@brand:apple"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Content Management
|
||||||
|
```bash
|
||||||
|
# Create content index
|
||||||
|
FT.CREATE content SCHEMA
|
||||||
|
title TEXT STORED INDEXED TOKENIZED
|
||||||
|
body TEXT INDEXED TOKENIZED
|
||||||
|
author TAG STORED
|
||||||
|
published DATE STORED INDEXED
|
||||||
|
tags TAG STORED
|
||||||
|
|
||||||
|
# Search content
|
||||||
|
FT.SEARCH content "machine learning"
|
||||||
|
FT.SEARCH content "@author:john AND @tags:ai"
|
||||||
|
FT.SEARCH content "*" FILTER published 1640995200000 1672531200000
|
||||||
|
```
|
||||||
|
|
||||||
|
### Geographic Search
|
||||||
|
```bash
|
||||||
|
# Create location-based index
|
||||||
|
FT.CREATE places SCHEMA
|
||||||
|
name TEXT STORED INDEXED TOKENIZED
|
||||||
|
location GEO STORED
|
||||||
|
type TAG STORED
|
||||||
|
|
||||||
|
# Add locations
|
||||||
|
FT.ADD places place:1 FIELDS name "Golden Gate Bridge" location "37.8199,-122.4783" type "landmark"
|
||||||
|
|
||||||
|
# Geographic queries (future feature)
|
||||||
|
FT.SEARCH places "@location:[37.7749 -122.4194 10 km]"
|
||||||
|
```
|
||||||
|
|
||||||
|
## Error Handling
|
||||||
|
|
||||||
|
Common error responses:
|
||||||
|
- `ERR index not found` - Index doesn't exist
|
||||||
|
- `ERR field not found` - Field not defined in schema
|
||||||
|
- `ERR invalid query syntax` - Malformed query
|
||||||
|
- `ERR document not found` - Document ID doesn't exist
|
||||||
|
|
||||||
|
## Best Practices
|
||||||
|
|
||||||
|
1. **Schema Design**: Plan your schema carefully - changes require reindexing
|
||||||
|
2. **Field Selection**: Only store and index fields you actually need
|
||||||
|
3. **Batch Operations**: Add multiple documents efficiently
|
||||||
|
4. **Query Testing**: Test queries for performance with realistic data
|
||||||
|
5. **Monitoring**: Monitor index size and query performance
|
||||||
|
6. **Backup**: Include search indexes in backup strategies
|
||||||
|
|
||||||
|
## Future Enhancements
|
||||||
|
|
||||||
|
Planned features:
|
||||||
|
- Geographic distance queries
|
||||||
|
- Advanced aggregations and faceting
|
||||||
|
- Highlighting of search results
|
||||||
|
- Synonyms and custom analyzers
|
||||||
|
- Real-time suggestions and autocomplete
|
||||||
|
- Index replication and sharding
|
171
examples/README.md
Normal file
171
examples/README.md
Normal file
@@ -0,0 +1,171 @@
|
|||||||
|
# HeroDB Tantivy Search Examples
|
||||||
|
|
||||||
|
This directory contains examples demonstrating HeroDB's full-text search capabilities powered by Tantivy.
|
||||||
|
|
||||||
|
## Tantivy Search Demo (Bash Script)
|
||||||
|
|
||||||
|
### Overview
|
||||||
|
The `tantivy_search_demo.sh` script provides a comprehensive demonstration of HeroDB's search functionality using Redis commands. It showcases various search scenarios including basic text search, filtering, sorting, geographic queries, and more.
|
||||||
|
|
||||||
|
### Prerequisites
|
||||||
|
1. **HeroDB Server**: The server must be running on port 6381
|
||||||
|
2. **Redis CLI**: The `redis-cli` tool must be installed and available in your PATH
|
||||||
|
|
||||||
|
### Running the Demo
|
||||||
|
|
||||||
|
#### Step 1: Start HeroDB Server
|
||||||
|
```bash
|
||||||
|
# From the project root directory
|
||||||
|
cargo run -- --port 6381
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Step 2: Run the Demo (in a new terminal)
|
||||||
|
```bash
|
||||||
|
# From the project root directory
|
||||||
|
./examples/tantivy_search_demo.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
### What the Demo Covers
|
||||||
|
|
||||||
|
The script demonstrates 15 different search scenarios:
|
||||||
|
|
||||||
|
1. **Index Creation** - Creating a search index with various field types
|
||||||
|
2. **Data Insertion** - Adding sample products to the index
|
||||||
|
3. **Basic Text Search** - Simple keyword searches
|
||||||
|
4. **Filtered Search** - Combining text search with category filters
|
||||||
|
5. **Numeric Range Search** - Finding products within price ranges
|
||||||
|
6. **Sorting Results** - Ordering results by different fields
|
||||||
|
7. **Limited Results** - Pagination and result limiting
|
||||||
|
8. **Complex Queries** - Multi-field searches with sorting
|
||||||
|
9. **Geographic Search** - Location-based queries
|
||||||
|
10. **Index Information** - Getting statistics about the search index
|
||||||
|
11. **Search Comparison** - Tantivy vs simple pattern matching
|
||||||
|
12. **Fuzzy Search** - Typo tolerance and approximate matching
|
||||||
|
13. **Phrase Search** - Exact phrase matching
|
||||||
|
14. **Boolean Queries** - AND, OR, NOT operators
|
||||||
|
15. **Cleanup** - Removing test data
|
||||||
|
|
||||||
|
### Sample Data
|
||||||
|
|
||||||
|
The demo uses a product catalog with the following fields:
|
||||||
|
- **title** (TEXT) - Product name with higher search weight
|
||||||
|
- **description** (TEXT) - Detailed product description
|
||||||
|
- **category** (TAG) - Comma-separated categories
|
||||||
|
- **price** (NUMERIC) - Product price for range queries
|
||||||
|
- **rating** (NUMERIC) - Customer rating for sorting
|
||||||
|
- **location** (GEO) - Geographic coordinates for location searches
|
||||||
|
|
||||||
|
### Key Redis Commands Demonstrated
|
||||||
|
|
||||||
|
#### Index Management
|
||||||
|
```bash
|
||||||
|
# Create search index
|
||||||
|
FT.CREATE product_catalog ON HASH PREFIX 1 product: SCHEMA title TEXT WEIGHT 2.0 SORTABLE description TEXT category TAG SEPARATOR , price NUMERIC SORTABLE rating NUMERIC SORTABLE location GEO
|
||||||
|
|
||||||
|
# Get index information
|
||||||
|
FT.INFO product_catalog
|
||||||
|
|
||||||
|
# Drop index
|
||||||
|
FT.DROPINDEX product_catalog
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Search Queries
|
||||||
|
```bash
|
||||||
|
# Basic text search
|
||||||
|
FT.SEARCH product_catalog wireless
|
||||||
|
|
||||||
|
# Filtered search
|
||||||
|
FT.SEARCH product_catalog 'organic @category:{food}'
|
||||||
|
|
||||||
|
# Numeric range
|
||||||
|
FT.SEARCH product_catalog '@price:[50 150]'
|
||||||
|
|
||||||
|
# Sorted results
|
||||||
|
FT.SEARCH product_catalog '@category:{electronics}' SORTBY price ASC
|
||||||
|
|
||||||
|
# Geographic search
|
||||||
|
FT.SEARCH product_catalog '@location:[37.7749 -122.4194 50 km]'
|
||||||
|
|
||||||
|
# Boolean queries
|
||||||
|
FT.SEARCH product_catalog 'wireless AND audio'
|
||||||
|
FT.SEARCH product_catalog 'coffee OR tea'
|
||||||
|
|
||||||
|
# Phrase search
|
||||||
|
FT.SEARCH product_catalog '"noise canceling"'
|
||||||
|
```
|
||||||
|
|
||||||
|
### Interactive Features
|
||||||
|
|
||||||
|
The demo script includes:
|
||||||
|
- **Colored output** for better readability
|
||||||
|
- **Pause between steps** to review results
|
||||||
|
- **Error handling** with clear error messages
|
||||||
|
- **Automatic cleanup** of test data
|
||||||
|
- **Progress indicators** showing what each step demonstrates
|
||||||
|
|
||||||
|
### Troubleshooting
|
||||||
|
|
||||||
|
#### HeroDB Not Running
|
||||||
|
```
|
||||||
|
✗ HeroDB is not running on port 6381
|
||||||
|
ℹ Please start HeroDB with: cargo run -- --port 6381
|
||||||
|
```
|
||||||
|
**Solution**: Start the HeroDB server in a separate terminal.
|
||||||
|
|
||||||
|
#### Redis CLI Not Found
|
||||||
|
```
|
||||||
|
redis-cli: command not found
|
||||||
|
```
|
||||||
|
**Solution**: Install Redis tools or use an alternative Redis client.
|
||||||
|
|
||||||
|
#### Connection Refused
|
||||||
|
```
|
||||||
|
Could not connect to Redis at localhost:6381: Connection refused
|
||||||
|
```
|
||||||
|
**Solution**: Ensure HeroDB is running and listening on the correct port.
|
||||||
|
|
||||||
|
### Manual Testing
|
||||||
|
|
||||||
|
You can also run individual commands manually:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Connect to HeroDB
|
||||||
|
redis-cli -h localhost -p 6381
|
||||||
|
|
||||||
|
# Create a simple index
|
||||||
|
FT.CREATE myindex ON HASH SCHEMA title TEXT description TEXT
|
||||||
|
|
||||||
|
# Add a document
|
||||||
|
HSET doc:1 title "Hello World" description "This is a test document"
|
||||||
|
|
||||||
|
# Search
|
||||||
|
FT.SEARCH myindex hello
|
||||||
|
```
|
||||||
|
|
||||||
|
### Performance Notes
|
||||||
|
|
||||||
|
- **Indexing**: Documents are indexed in real-time as they're added
|
||||||
|
- **Search Speed**: Full-text search is much faster than pattern matching on large datasets
|
||||||
|
- **Memory Usage**: Tantivy indexes are memory-efficient and disk-backed
|
||||||
|
- **Scalability**: Supports millions of documents with sub-second search times
|
||||||
|
|
||||||
|
### Advanced Features
|
||||||
|
|
||||||
|
The demo showcases advanced Tantivy features:
|
||||||
|
- **Relevance Scoring** - Results ranked by relevance
|
||||||
|
- **Fuzzy Matching** - Handles typos and approximate matches
|
||||||
|
- **Field Weighting** - Title field has higher search weight
|
||||||
|
- **Multi-field Search** - Search across multiple fields simultaneously
|
||||||
|
- **Geographic Queries** - Distance-based location searches
|
||||||
|
- **Numeric Ranges** - Efficient range queries on numeric fields
|
||||||
|
- **Tag Filtering** - Fast categorical filtering
|
||||||
|
|
||||||
|
### Next Steps
|
||||||
|
|
||||||
|
After running the demo, explore:
|
||||||
|
1. **Custom Schemas** - Define your own field types and configurations
|
||||||
|
2. **Large Datasets** - Test with thousands or millions of documents
|
||||||
|
3. **Real Applications** - Integrate search into your applications
|
||||||
|
4. **Performance Tuning** - Optimize for your specific use case
|
||||||
|
|
||||||
|
For more information, see the [search documentation](../herodb/docs/search.md).
|
186
examples/simple_demo.sh
Normal file
186
examples/simple_demo.sh
Normal file
@@ -0,0 +1,186 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Simple HeroDB Demo - Basic Redis Commands
|
||||||
|
# This script demonstrates basic Redis functionality that's currently implemented
|
||||||
|
|
||||||
|
set -e # Exit on any error
|
||||||
|
|
||||||
|
# Configuration
|
||||||
|
REDIS_HOST="localhost"
|
||||||
|
REDIS_PORT="6381"
|
||||||
|
REDIS_CLI="redis-cli -h $REDIS_HOST -p $REDIS_PORT"
|
||||||
|
|
||||||
|
# Colors for output
|
||||||
|
RED='\033[0;31m'
|
||||||
|
GREEN='\033[0;32m'
|
||||||
|
BLUE='\033[0;34m'
|
||||||
|
YELLOW='\033[1;33m'
|
||||||
|
NC='\033[0m' # No Color
|
||||||
|
|
||||||
|
# Function to print colored output
|
||||||
|
print_header() {
|
||||||
|
echo -e "${BLUE}=== $1 ===${NC}"
|
||||||
|
}
|
||||||
|
|
||||||
|
print_success() {
|
||||||
|
echo -e "${GREEN}✓ $1${NC}"
|
||||||
|
}
|
||||||
|
|
||||||
|
print_info() {
|
||||||
|
echo -e "${YELLOW}ℹ $1${NC}"
|
||||||
|
}
|
||||||
|
|
||||||
|
print_error() {
|
||||||
|
echo -e "${RED}✗ $1${NC}"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Function to check if HeroDB is running
|
||||||
|
check_herodb() {
|
||||||
|
print_info "Checking if HeroDB is running on port $REDIS_PORT..."
|
||||||
|
if ! $REDIS_CLI ping > /dev/null 2>&1; then
|
||||||
|
print_error "HeroDB is not running on port $REDIS_PORT"
|
||||||
|
print_info "Please start HeroDB with: cargo run -- --port $REDIS_PORT"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
print_success "HeroDB is running and responding"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Function to execute Redis command with error handling
|
||||||
|
execute_cmd() {
|
||||||
|
local cmd="$1"
|
||||||
|
local description="$2"
|
||||||
|
|
||||||
|
echo -e "${YELLOW}Command:${NC} $cmd"
|
||||||
|
if result=$($REDIS_CLI $cmd 2>&1); then
|
||||||
|
echo -e "${GREEN}Result:${NC} $result"
|
||||||
|
return 0
|
||||||
|
else
|
||||||
|
print_error "Failed: $description"
|
||||||
|
echo "Error: $result"
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
# Main demo function
|
||||||
|
main() {
|
||||||
|
clear
|
||||||
|
print_header "HeroDB Basic Functionality Demo"
|
||||||
|
echo "This demo shows basic Redis commands that are currently implemented"
|
||||||
|
echo "HeroDB runs on port $REDIS_PORT (instead of Redis default 6379)"
|
||||||
|
echo
|
||||||
|
|
||||||
|
# Check if HeroDB is running
|
||||||
|
check_herodb
|
||||||
|
echo
|
||||||
|
|
||||||
|
print_header "Step 1: Basic Key-Value Operations"
|
||||||
|
|
||||||
|
execute_cmd "SET greeting 'Hello HeroDB!'" "Setting a simple key-value pair"
|
||||||
|
echo
|
||||||
|
execute_cmd "GET greeting" "Getting the value"
|
||||||
|
echo
|
||||||
|
execute_cmd "SET counter 42" "Setting a numeric value"
|
||||||
|
echo
|
||||||
|
execute_cmd "INCR counter" "Incrementing the counter"
|
||||||
|
echo
|
||||||
|
execute_cmd "GET counter" "Getting the incremented value"
|
||||||
|
echo
|
||||||
|
|
||||||
|
print_header "Step 2: Hash Operations"
|
||||||
|
|
||||||
|
execute_cmd "HSET user:1 name 'John Doe' email 'john@example.com' age 30" "Setting hash fields"
|
||||||
|
echo
|
||||||
|
execute_cmd "HGET user:1 name" "Getting a specific field"
|
||||||
|
echo
|
||||||
|
execute_cmd "HGETALL user:1" "Getting all fields"
|
||||||
|
echo
|
||||||
|
execute_cmd "HLEN user:1" "Getting hash length"
|
||||||
|
echo
|
||||||
|
|
||||||
|
print_header "Step 3: List Operations"
|
||||||
|
|
||||||
|
execute_cmd "LPUSH tasks 'Write code' 'Test code' 'Deploy code'" "Adding items to list"
|
||||||
|
echo
|
||||||
|
execute_cmd "LLEN tasks" "Getting list length"
|
||||||
|
echo
|
||||||
|
execute_cmd "LRANGE tasks 0 -1" "Getting all list items"
|
||||||
|
echo
|
||||||
|
execute_cmd "LPOP tasks" "Popping from left"
|
||||||
|
echo
|
||||||
|
execute_cmd "LRANGE tasks 0 -1" "Checking remaining items"
|
||||||
|
echo
|
||||||
|
|
||||||
|
print_header "Step 4: Key Management"
|
||||||
|
|
||||||
|
execute_cmd "KEYS *" "Listing all keys"
|
||||||
|
echo
|
||||||
|
execute_cmd "EXISTS greeting" "Checking if key exists"
|
||||||
|
echo
|
||||||
|
execute_cmd "TYPE user:1" "Getting key type"
|
||||||
|
echo
|
||||||
|
execute_cmd "DBSIZE" "Getting database size"
|
||||||
|
echo
|
||||||
|
|
||||||
|
print_header "Step 5: Expiration"
|
||||||
|
|
||||||
|
execute_cmd "SET temp_key 'temporary value'" "Setting temporary key"
|
||||||
|
echo
|
||||||
|
execute_cmd "EXPIRE temp_key 5" "Setting 5 second expiration"
|
||||||
|
echo
|
||||||
|
execute_cmd "TTL temp_key" "Checking time to live"
|
||||||
|
echo
|
||||||
|
print_info "Waiting 2 seconds..."
|
||||||
|
sleep 2
|
||||||
|
execute_cmd "TTL temp_key" "Checking TTL again"
|
||||||
|
echo
|
||||||
|
|
||||||
|
print_header "Step 6: Multiple Operations"
|
||||||
|
|
||||||
|
execute_cmd "MSET key1 'value1' key2 'value2' key3 'value3'" "Setting multiple keys"
|
||||||
|
echo
|
||||||
|
execute_cmd "MGET key1 key2 key3" "Getting multiple values"
|
||||||
|
echo
|
||||||
|
execute_cmd "DEL key1 key2" "Deleting multiple keys"
|
||||||
|
echo
|
||||||
|
execute_cmd "EXISTS key1 key2 key3" "Checking existence of multiple keys"
|
||||||
|
echo
|
||||||
|
|
||||||
|
print_header "Step 7: Search Commands (Placeholder)"
|
||||||
|
print_info "Testing FT.CREATE command (currently returns placeholder response)"
|
||||||
|
|
||||||
|
execute_cmd "FT.CREATE test_index SCHEMA title TEXT description TEXT" "Creating search index"
|
||||||
|
echo
|
||||||
|
|
||||||
|
print_header "Step 8: Server Information"
|
||||||
|
|
||||||
|
execute_cmd "INFO" "Getting server information"
|
||||||
|
echo
|
||||||
|
execute_cmd "CONFIG GET dir" "Getting configuration"
|
||||||
|
echo
|
||||||
|
|
||||||
|
print_header "Step 9: Cleanup"
|
||||||
|
|
||||||
|
execute_cmd "FLUSHDB" "Clearing database"
|
||||||
|
echo
|
||||||
|
execute_cmd "DBSIZE" "Confirming database is empty"
|
||||||
|
echo
|
||||||
|
|
||||||
|
print_header "Demo Summary"
|
||||||
|
echo "This demonstration showed:"
|
||||||
|
echo "• Basic key-value operations (GET, SET, INCR)"
|
||||||
|
echo "• Hash operations (HSET, HGET, HGETALL)"
|
||||||
|
echo "• List operations (LPUSH, LPOP, LRANGE)"
|
||||||
|
echo "• Key management (KEYS, EXISTS, TYPE, DEL)"
|
||||||
|
echo "• Expiration handling (EXPIRE, TTL)"
|
||||||
|
echo "• Multiple key operations (MSET, MGET)"
|
||||||
|
echo "• Server information commands"
|
||||||
|
echo
|
||||||
|
print_success "HeroDB basic functionality demo completed successfully!"
|
||||||
|
echo
|
||||||
|
print_info "Note: Full-text search (FT.*) commands are defined but not yet fully implemented"
|
||||||
|
print_info "To run HeroDB server: cargo run -- --port 6381"
|
||||||
|
print_info "To connect with redis-cli: redis-cli -h localhost -p 6381"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Run the demo
|
||||||
|
main "$@"
|
239
examples/tantivy_search_demo.sh
Executable file
239
examples/tantivy_search_demo.sh
Executable file
@@ -0,0 +1,239 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# HeroDB Tantivy Search Demo
|
||||||
|
# This script demonstrates full-text search capabilities using Redis commands
|
||||||
|
# HeroDB server should be running on port 6381
|
||||||
|
|
||||||
|
set -e # Exit on any error
|
||||||
|
|
||||||
|
# Configuration
|
||||||
|
REDIS_HOST="localhost"
|
||||||
|
REDIS_PORT="6382"
|
||||||
|
REDIS_CLI="redis-cli -h $REDIS_HOST -p $REDIS_PORT"
|
||||||
|
|
||||||
|
# Start the herodb server in the background
|
||||||
|
echo "Starting herodb server..."
|
||||||
|
cargo run -p herodb -- --dir /tmp/herodbtest --port ${REDIS_PORT} --debug &
|
||||||
|
SERVER_PID=$!
|
||||||
|
echo
|
||||||
|
sleep 2 # Give the server a moment to start
|
||||||
|
|
||||||
|
# Colors for output
|
||||||
|
RED='\033[0;31m'
|
||||||
|
GREEN='\033[0;32m'
|
||||||
|
BLUE='\033[0;34m'
|
||||||
|
YELLOW='\033[1;33m'
|
||||||
|
NC='\033[0m' # No Color
|
||||||
|
|
||||||
|
# Function to print colored output
|
||||||
|
print_header() {
|
||||||
|
echo -e "${BLUE}=== $1 ===${NC}"
|
||||||
|
}
|
||||||
|
|
||||||
|
print_success() {
|
||||||
|
echo -e "${GREEN}✓ $1${NC}"
|
||||||
|
}
|
||||||
|
|
||||||
|
print_info() {
|
||||||
|
echo -e "${YELLOW}ℹ $1${NC}"
|
||||||
|
}
|
||||||
|
|
||||||
|
print_error() {
|
||||||
|
echo -e "${RED}✗ $1${NC}"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Function to check if HeroDB is running
|
||||||
|
check_herodb() {
|
||||||
|
print_info "Checking if HeroDB is running on port $REDIS_PORT..."
|
||||||
|
if ! $REDIS_CLI ping > /dev/null 2>&1; then
|
||||||
|
print_error "HeroDB is not running on port $REDIS_PORT"
|
||||||
|
print_info "Please start HeroDB with: cargo run -- --port $REDIS_PORT"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
print_success "HeroDB is running and responding"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Function to execute Redis command with error handling
|
||||||
|
execute_cmd() {
|
||||||
|
local cmd="$1"
|
||||||
|
local description="$2"
|
||||||
|
|
||||||
|
echo -e "${YELLOW}Command:${NC} $cmd"
|
||||||
|
if result=$($REDIS_CLI $cmd 2>&1); then
|
||||||
|
echo -e "${GREEN}Result:${NC} $result"
|
||||||
|
return 0
|
||||||
|
else
|
||||||
|
print_error "Failed: $description"
|
||||||
|
echo "Error: $result"
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
# Function to pause for readability
|
||||||
|
pause() {
|
||||||
|
echo
|
||||||
|
read -p "Press Enter to continue..."
|
||||||
|
echo
|
||||||
|
}
|
||||||
|
|
||||||
|
# Main demo function
|
||||||
|
main() {
|
||||||
|
clear
|
||||||
|
print_header "HeroDB Tantivy Search Demonstration"
|
||||||
|
echo "This demo shows full-text search capabilities using Redis commands"
|
||||||
|
echo "HeroDB runs on port $REDIS_PORT (instead of Redis default 6379)"
|
||||||
|
echo
|
||||||
|
|
||||||
|
# Check if HeroDB is running
|
||||||
|
check_herodb
|
||||||
|
echo
|
||||||
|
|
||||||
|
print_header "Step 1: Create Search Index"
|
||||||
|
print_info "Creating a product catalog search index with various field types"
|
||||||
|
|
||||||
|
# Create search index with schema
|
||||||
|
execute_cmd "FT.CREATE product_catalog SCHEMA title TEXT description TEXT category TAG price NUMERIC rating NUMERIC location GEO" \
|
||||||
|
"Creating search index"
|
||||||
|
|
||||||
|
print_success "Search index 'product_catalog' created successfully"
|
||||||
|
pause
|
||||||
|
|
||||||
|
print_header "Step 2: Add Sample Products"
|
||||||
|
print_info "Adding sample products to demonstrate different search scenarios"
|
||||||
|
|
||||||
|
# Add sample products using FT.ADD
|
||||||
|
execute_cmd "FT.ADD product_catalog product:1 1.0 title 'Wireless Bluetooth Headphones' description 'Premium noise-canceling headphones with 30-hour battery life' category 'electronics,audio' price 299.99 rating 4.5 location '-122.4194,37.7749'" "Adding product 1"
|
||||||
|
execute_cmd "FT.ADD product_catalog product:2 1.0 title 'Organic Coffee Beans' description 'Single-origin Ethiopian coffee beans, medium roast' category 'food,beverages,organic' price 24.99 rating 4.8 location '-74.0060,40.7128'" "Adding product 2"
|
||||||
|
execute_cmd "FT.ADD product_catalog product:3 1.0 title 'Yoga Mat Premium' description 'Eco-friendly yoga mat with superior grip and cushioning' category 'fitness,wellness,eco-friendly' price 89.99 rating 4.3 location '-118.2437,34.0522'" "Adding product 3"
|
||||||
|
execute_cmd "FT.ADD product_catalog product:4 1.0 title 'Smart Home Speaker' description 'Voice-controlled smart speaker with AI assistant' category 'electronics,smart-home' price 149.99 rating 4.2 location '-87.6298,41.8781'" "Adding product 4"
|
||||||
|
execute_cmd "FT.ADD product_catalog product:5 1.0 title 'Organic Green Tea' description 'Premium organic green tea leaves from Japan' category 'food,beverages,organic,tea' price 18.99 rating 4.7 location '139.6503,35.6762'" "Adding product 5"
|
||||||
|
execute_cmd "FT.ADD product_catalog product:6 1.0 title 'Wireless Gaming Mouse' description 'High-precision gaming mouse with RGB lighting' category 'electronics,gaming' price 79.99 rating 4.4 location '-122.3321,47.6062'" "Adding product 6"
|
||||||
|
execute_cmd "FT.ADD product_catalog product:7 1.0 title 'Comfortable meditation cushion for mindfulness practice' description 'Meditation cushion with premium materials' category 'wellness,meditation' price 45.99 rating 4.6 location '-122.4194,37.7749'" "Adding product 7"
|
||||||
|
execute_cmd "FT.ADD product_catalog product:8 1.0 title 'Bluetooth Earbuds' description 'True wireless earbuds with active noise cancellation' category 'electronics,audio' price 199.99 rating 4.1 location '-74.0060,40.7128'" "Adding product 8"
|
||||||
|
|
||||||
|
print_success "Added 8 products to the index"
|
||||||
|
pause
|
||||||
|
|
||||||
|
print_header "Step 3: Basic Text Search"
|
||||||
|
print_info "Searching for 'wireless' products"
|
||||||
|
|
||||||
|
execute_cmd "FT.SEARCH product_catalog wireless" "Basic text search"
|
||||||
|
pause
|
||||||
|
|
||||||
|
print_header "Step 4: Search with Filters"
|
||||||
|
print_info "Searching for 'organic' products"
|
||||||
|
|
||||||
|
execute_cmd "FT.SEARCH product_catalog organic" "Filtered search"
|
||||||
|
pause
|
||||||
|
|
||||||
|
print_header "Step 5: Numeric Range Search"
|
||||||
|
print_info "Searching for 'premium' products"
|
||||||
|
|
||||||
|
execute_cmd "FT.SEARCH product_catalog premium" "Text search"
|
||||||
|
pause
|
||||||
|
|
||||||
|
print_header "Step 6: Sorting Results"
|
||||||
|
print_info "Searching for electronics"
|
||||||
|
|
||||||
|
execute_cmd "FT.SEARCH product_catalog electronics" "Category search"
|
||||||
|
pause
|
||||||
|
|
||||||
|
print_header "Step 7: Limiting Results"
|
||||||
|
print_info "Searching for wireless products with limit"
|
||||||
|
|
||||||
|
execute_cmd "FT.SEARCH product_catalog wireless LIMIT 0 3" "Limited results"
|
||||||
|
pause
|
||||||
|
|
||||||
|
print_header "Step 8: Complex Query"
|
||||||
|
print_info "Finding audio products with noise cancellation"
|
||||||
|
|
||||||
|
execute_cmd "FT.SEARCH product_catalog 'noise cancellation'" "Complex query"
|
||||||
|
pause
|
||||||
|
|
||||||
|
print_header "Step 9: Geographic Search"
|
||||||
|
print_info "Searching for meditation products"
|
||||||
|
|
||||||
|
execute_cmd "FT.SEARCH product_catalog meditation" "Text search"
|
||||||
|
pause
|
||||||
|
|
||||||
|
print_header "Step 10: Aggregation Example"
|
||||||
|
print_info "Getting index information and statistics"
|
||||||
|
|
||||||
|
execute_cmd "FT.INFO product_catalog" "Index information"
|
||||||
|
pause
|
||||||
|
|
||||||
|
print_header "Step 11: Search Comparison"
|
||||||
|
print_info "Comparing Tantivy search vs simple key matching"
|
||||||
|
|
||||||
|
echo -e "${YELLOW}Tantivy Full-Text Search:${NC}"
|
||||||
|
execute_cmd "FT.SEARCH product_catalog 'battery life'" "Full-text search for 'battery life'"
|
||||||
|
|
||||||
|
echo
|
||||||
|
echo -e "${YELLOW}Simple Key Pattern Matching:${NC}"
|
||||||
|
execute_cmd "KEYS *battery*" "Simple pattern matching for 'battery'"
|
||||||
|
|
||||||
|
print_info "Notice how full-text search finds relevant results even when exact words don't match keys"
|
||||||
|
pause
|
||||||
|
|
||||||
|
print_header "Step 12: Fuzzy Search"
|
||||||
|
print_info "Searching for headphones"
|
||||||
|
|
||||||
|
execute_cmd "FT.SEARCH product_catalog headphones" "Text search"
|
||||||
|
pause
|
||||||
|
|
||||||
|
print_header "Step 13: Phrase Search"
|
||||||
|
print_info "Searching for coffee products"
|
||||||
|
|
||||||
|
execute_cmd "FT.SEARCH product_catalog coffee" "Text search"
|
||||||
|
pause
|
||||||
|
|
||||||
|
print_header "Step 14: Boolean Queries"
|
||||||
|
print_info "Searching for gaming products"
|
||||||
|
|
||||||
|
execute_cmd "FT.SEARCH product_catalog gaming" "Text search"
|
||||||
|
echo
|
||||||
|
execute_cmd "FT.SEARCH product_catalog tea" "Text search"
|
||||||
|
pause
|
||||||
|
|
||||||
|
print_header "Step 15: Cleanup"
|
||||||
|
print_info "Removing test data"
|
||||||
|
|
||||||
|
# Delete the search index
|
||||||
|
execute_cmd "FT.DROP product_catalog" "Dropping search index"
|
||||||
|
|
||||||
|
# Clean up documents from search index
|
||||||
|
for i in {1..8}; do
|
||||||
|
execute_cmd "FT.DEL product_catalog product:$i" "Deleting product:$i from index"
|
||||||
|
done
|
||||||
|
|
||||||
|
print_success "Cleanup completed"
|
||||||
|
echo
|
||||||
|
|
||||||
|
print_header "Demo Summary"
|
||||||
|
echo "This demonstration showed:"
|
||||||
|
echo "• Creating search indexes with different field types"
|
||||||
|
echo "• Adding documents to the search index"
|
||||||
|
echo "• Basic and advanced text search queries"
|
||||||
|
echo "• Filtering by categories and numeric ranges"
|
||||||
|
echo "• Sorting and limiting results"
|
||||||
|
echo "• Geographic searches"
|
||||||
|
echo "• Fuzzy matching and phrase searches"
|
||||||
|
echo "• Boolean query operators"
|
||||||
|
echo "• Comparison with simple pattern matching"
|
||||||
|
echo
|
||||||
|
print_success "HeroDB Tantivy search demo completed successfully!"
|
||||||
|
echo
|
||||||
|
print_info "Key advantages of Tantivy full-text search:"
|
||||||
|
echo " - Relevance scoring and ranking"
|
||||||
|
echo " - Fuzzy matching and typo tolerance"
|
||||||
|
echo " - Complex boolean queries"
|
||||||
|
echo " - Field-specific searches and filters"
|
||||||
|
echo " - Geographic and numeric range queries"
|
||||||
|
echo " - Much faster than pattern matching on large datasets"
|
||||||
|
echo
|
||||||
|
print_info "To run HeroDB server: cargo run -- --port 6381"
|
||||||
|
print_info "To connect with redis-cli: redis-cli -h localhost -p 6381"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Run the demo
|
||||||
|
main "$@"
|
101
examples/test_tantivy_integration.sh
Executable file
101
examples/test_tantivy_integration.sh
Executable file
@@ -0,0 +1,101 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Simple Tantivy Search Integration Test for HeroDB
|
||||||
|
# This script tests the full-text search functionality we just integrated
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
echo "🔍 Testing Tantivy Search Integration..."
|
||||||
|
|
||||||
|
# Build the project first
|
||||||
|
echo "📦 Building HeroDB..."
|
||||||
|
cargo build --release
|
||||||
|
|
||||||
|
# Start the server in the background
|
||||||
|
echo "🚀 Starting HeroDB server on port 6379..."
|
||||||
|
cargo run --release -- --port 6379 --dir ./test_data &
|
||||||
|
SERVER_PID=$!
|
||||||
|
|
||||||
|
# Wait for server to start
|
||||||
|
sleep 3
|
||||||
|
|
||||||
|
# Function to cleanup on exit
|
||||||
|
cleanup() {
|
||||||
|
echo "🧹 Cleaning up..."
|
||||||
|
kill $SERVER_PID 2>/dev/null || true
|
||||||
|
rm -rf ./test_data
|
||||||
|
exit
|
||||||
|
}
|
||||||
|
|
||||||
|
# Set trap for cleanup
|
||||||
|
trap cleanup EXIT INT TERM
|
||||||
|
|
||||||
|
# Function to execute Redis command
|
||||||
|
execute_cmd() {
|
||||||
|
local cmd="$1"
|
||||||
|
local description="$2"
|
||||||
|
|
||||||
|
echo "📝 $description"
|
||||||
|
echo " Command: $cmd"
|
||||||
|
|
||||||
|
if result=$(redis-cli -p 6379 $cmd 2>&1); then
|
||||||
|
echo " ✅ Result: $result"
|
||||||
|
echo
|
||||||
|
return 0
|
||||||
|
else
|
||||||
|
echo " ❌ Failed: $result"
|
||||||
|
echo
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
echo "🧪 Running Tantivy Search Tests..."
|
||||||
|
echo
|
||||||
|
|
||||||
|
# Test 1: Create a search index
|
||||||
|
execute_cmd "ft.create books SCHEMA title TEXT description TEXT author TEXT category TAG price NUMERIC" \
|
||||||
|
"Creating search index 'books'"
|
||||||
|
|
||||||
|
# Test 2: Add documents to the index
|
||||||
|
execute_cmd "ft.add books book1 1.0 title \"The Great Gatsby\" description \"A classic American novel about the Jazz Age\" author \"F. Scott Fitzgerald\" category \"fiction,classic\" price \"12.99\"" \
|
||||||
|
"Adding first book"
|
||||||
|
|
||||||
|
execute_cmd "ft.add books book2 1.0 title \"To Kill a Mockingbird\" description \"A novel about racial injustice in the American South\" author \"Harper Lee\" category \"fiction,classic\" price \"14.99\"" \
|
||||||
|
"Adding second book"
|
||||||
|
|
||||||
|
execute_cmd "ft.add books book3 1.0 title \"Programming Rust\" description \"A comprehensive guide to Rust programming language\" author \"Jim Blandy\" category \"programming,technical\" price \"49.99\"" \
|
||||||
|
"Adding third book"
|
||||||
|
|
||||||
|
execute_cmd "ft.add books book4 1.0 title \"The Rust Programming Language\" description \"The official book on Rust programming\" author \"Steve Klabnik\" category \"programming,technical\" price \"39.99\"" \
|
||||||
|
"Adding fourth book"
|
||||||
|
|
||||||
|
# Test 3: Basic search
|
||||||
|
execute_cmd "ft.search books Rust" \
|
||||||
|
"Searching for 'Rust'"
|
||||||
|
|
||||||
|
# Test 4: Search with filters
|
||||||
|
execute_cmd "ft.search books programming FILTER category programming" \
|
||||||
|
"Searching for 'programming' with category filter"
|
||||||
|
|
||||||
|
# Test 5: Search with limit
|
||||||
|
execute_cmd "ft.search books \"*\" LIMIT 0 2" \
|
||||||
|
"Getting first 2 documents"
|
||||||
|
|
||||||
|
# Test 6: Get index info
|
||||||
|
execute_cmd "ft.info books" \
|
||||||
|
"Getting index information"
|
||||||
|
|
||||||
|
# Test 7: Delete a document
|
||||||
|
execute_cmd "ft.del books book1" \
|
||||||
|
"Deleting book1"
|
||||||
|
|
||||||
|
# Test 8: Search again to verify deletion
|
||||||
|
execute_cmd "ft.search books Gatsby" \
|
||||||
|
"Searching for deleted book"
|
||||||
|
|
||||||
|
# Test 9: Drop the index
|
||||||
|
execute_cmd "ft.drop books" \
|
||||||
|
"Dropping the index"
|
||||||
|
|
||||||
|
echo "🎉 All tests completed successfully!"
|
||||||
|
echo "✅ Tantivy search integration is working correctly"
|
@@ -1,28 +0,0 @@
|
|||||||
[package]
|
|
||||||
name = "herodb"
|
|
||||||
version = "0.0.1"
|
|
||||||
authors = ["Pin Fang <fpfangpin@hotmail.com>"]
|
|
||||||
edition = "2021"
|
|
||||||
|
|
||||||
[dependencies]
|
|
||||||
anyhow = "1.0.59"
|
|
||||||
bytes = "1.3.0"
|
|
||||||
thiserror = "1.0.32"
|
|
||||||
tokio = { version = "1.23.0", features = ["full"] }
|
|
||||||
clap = { version = "4.5.20", features = ["derive"] }
|
|
||||||
byteorder = "1.4.3"
|
|
||||||
futures = "0.3"
|
|
||||||
redb = "2.1.3"
|
|
||||||
serde = { version = "1.0", features = ["derive"] }
|
|
||||||
serde_json = "1.0"
|
|
||||||
bincode = "1.3.3"
|
|
||||||
chacha20poly1305 = "0.10.1"
|
|
||||||
rand = "0.8"
|
|
||||||
sha2 = "0.10"
|
|
||||||
age = "0.10"
|
|
||||||
secrecy = "0.8"
|
|
||||||
ed25519-dalek = "2"
|
|
||||||
base64 = "0.22"
|
|
||||||
|
|
||||||
[dev-dependencies]
|
|
||||||
redis = { version = "0.24", features = ["aio", "tokio-comp"] }
|
|
@@ -1,227 +0,0 @@
|
|||||||
|
|
||||||
# HeroDB Redis Protocol Support: Commands & Client Usage
|
|
||||||
|
|
||||||
HeroDB is a Redis-compatible database built using the `redb` database backend.
|
|
||||||
|
|
||||||
It supports a subset of Redis commands over the standard RESP (Redis Serialization Protocol) via TCP, allowing you to interact with it using standard Redis clients like `redis-cli`, Python's `redis-py`, Node.js's `ioredis`, etc.
|
|
||||||
|
|
||||||
This document provides:
|
|
||||||
- A list of all currently supported Redis commands.
|
|
||||||
- Example usage with standard Redis clients.
|
|
||||||
- Bash and Rust test-inspired usage examples.
|
|
||||||
|
|
||||||
## Quick Start
|
|
||||||
|
|
||||||
Assuming the server is running on localhost at port `$PORT`:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Build HeroDB
|
|
||||||
cargo build --release
|
|
||||||
|
|
||||||
# Start HeroDB server
|
|
||||||
./target/release/herodb --dir /tmp/herodb_data --port 6381 --debug
|
|
||||||
```
|
|
||||||
|
|
||||||
## Using Standard Redis Clients
|
|
||||||
|
|
||||||
### With `redis-cli`
|
|
||||||
|
|
||||||
```bash
|
|
||||||
redis-cli -p 6381 SET mykey "hello"
|
|
||||||
redis-cli -p 6381 GET mykey
|
|
||||||
```
|
|
||||||
|
|
||||||
### With Python (`redis-py`)
|
|
||||||
|
|
||||||
```python
|
|
||||||
import redis
|
|
||||||
|
|
||||||
r = redis.Redis(host='localhost', port=6381, db=0)
|
|
||||||
r.set('mykey', 'hello')
|
|
||||||
print(r.get('mykey').decode())
|
|
||||||
```
|
|
||||||
|
|
||||||
### With Node.js (`ioredis`)
|
|
||||||
|
|
||||||
```js
|
|
||||||
const Redis = require("ioredis");
|
|
||||||
const redis = new Redis({ port: 6381, host: "localhost" });
|
|
||||||
|
|
||||||
await redis.set("mykey", "hello");
|
|
||||||
const value = await redis.get("mykey");
|
|
||||||
console.log(value); // "hello"
|
|
||||||
```
|
|
||||||
|
|
||||||
## Supported Redis Commands
|
|
||||||
|
|
||||||
### String Commands
|
|
||||||
|
|
||||||
| Command | Description | Example Usage |
|
|
||||||
|---------------|------------------------------------------|-------------------------------------------|
|
|
||||||
| `SET` | Set a key to a string value | `SET name "Alice"` |
|
|
||||||
| `GET` | Get the value of a key | `GET name` |
|
|
||||||
| `DEL` | Delete one or more keys | `DEL name age` |
|
|
||||||
| `INCR` | Increment the integer value of a key | `INCR counter` |
|
|
||||||
| `DECR` | Decrement the integer value of a key | `DECR counter` |
|
|
||||||
| `INCRBY` | Increment key by a given integer | `INCRBY counter 5` |
|
|
||||||
| `DECRBY` | Decrement key by a given integer | `DECRBY counter 3` |
|
|
||||||
| `EXISTS` | Check if a key exists | `EXISTS name` |
|
|
||||||
| `TYPE` | Return the type of a key | `TYPE name` |
|
|
||||||
|
|
||||||
### Hash Commands
|
|
||||||
|
|
||||||
| Command | Description | Example Usage |
|
|
||||||
|---------------|------------------------------------------|-------------------------------------------|
|
|
||||||
| `HSET` | Set field in hash stored at key | `HSET user:1 name "Alice"` |
|
|
||||||
| `HGET` | Get value of a field in hash | `HGET user:1 name` |
|
|
||||||
| `HGETALL` | Get all fields and values in a hash | `HGETALL user:1` |
|
|
||||||
| `HDEL` | Delete one or more fields from hash | `HDEL user:1 name age` |
|
|
||||||
| `HEXISTS` | Check if field exists in hash | `HEXISTS user:1 name` |
|
|
||||||
| `HKEYS` | Get all field names in a hash | `HKEYS user:1` |
|
|
||||||
| `HVALS` | Get all values in a hash | `HVALS user:1` |
|
|
||||||
| `HLEN` | Get number of fields in a hash | `HLEN user:1` |
|
|
||||||
| `HMGET` | Get values of multiple fields | `HMGET user:1 name age` |
|
|
||||||
| `HSETNX` | Set field only if it does not exist | `HSETNX user:1 email alice@example.com` |
|
|
||||||
|
|
||||||
### List Commands
|
|
||||||
|
|
||||||
| Command | Description | Example Usage |
|
|
||||||
|---------------|------------------------------------------|-------------------------------------------|
|
|
||||||
| `LPUSH` | Insert elements at the head of a list | `LPUSH mylist "item1" "item2"` |
|
|
||||||
| `RPUSH` | Insert elements at the tail of a list | `RPUSH mylist "item3" "item4"` |
|
|
||||||
| `LPOP` | Remove and return element from head | `LPOP mylist` |
|
|
||||||
| `RPOP` | Remove and return element from tail | `RPOP mylist` |
|
|
||||||
| `BLPOP` | Blocking remove from head with timeout | `BLPOP mylist1 mylist2 5` |
|
|
||||||
| `BRPOP` | Blocking remove from tail with timeout | `BRPOP mylist1 mylist2 5` |
|
|
||||||
| `LLEN` | Get the length of a list | `LLEN mylist` |
|
|
||||||
| `LREM` | Remove elements from list | `LREM mylist 2 "item"` |
|
|
||||||
| `LTRIM` | Trim list to specified range | `LTRIM mylist 0 5` |
|
|
||||||
| `LINDEX` | Get element by index | `LINDEX mylist 0` |
|
|
||||||
| `LRANGE` | Get range of elements | `LRANGE mylist 0 -1` |
|
|
||||||
|
|
||||||
### Keys & Scanning
|
|
||||||
|
|
||||||
| Command | Description | Example Usage |
|
|
||||||
|---------------|------------------------------------------|-------------------------------------------|
|
|
||||||
| `KEYS` | Find all keys matching a pattern | `KEYS user:*` |
|
|
||||||
| `SCAN` | Incrementally iterate keys | `SCAN 0 MATCH user:* COUNT 10` |
|
|
||||||
|
|
||||||
### Expiration
|
|
||||||
|
|
||||||
| Command | Description | Example Usage |
|
|
||||||
|---------------|------------------------------------------|-------------------------------------------|
|
|
||||||
| `EXPIRE` | Set a key's time to live in seconds | `EXPIRE tempkey 60` |
|
|
||||||
| `TTL` | Get the time to live for a key | `TTL tempkey` |
|
|
||||||
| `PERSIST` | Remove the expiration from a key | `PERSIST tempkey` |
|
|
||||||
|
|
||||||
### Transactions
|
|
||||||
|
|
||||||
| Command | Description | Example Usage |
|
|
||||||
|---------------|------------------------------------------|-------------------------------------------|
|
|
||||||
| `MULTI` | Start a transaction block | `MULTI` |
|
|
||||||
| `EXEC` | Execute all commands in a transaction | `EXEC` |
|
|
||||||
| `DISCARD` | Discard all commands in a transaction | `DISCARD` |
|
|
||||||
|
|
||||||
### Configuration
|
|
||||||
|
|
||||||
| Command | Description | Example Usage |
|
|
||||||
|---------------|------------------------------------------|-------------------------------------------|
|
|
||||||
| `CONFIG GET` | Get configuration parameters | `CONFIG GET dir` |
|
|
||||||
| `CONFIG SET` | Set configuration parameters | `CONFIG SET maxmemory 100mb` |
|
|
||||||
|
|
||||||
### Info & Monitoring
|
|
||||||
|
|
||||||
| Command | Description | Example Usage |
|
|
||||||
|---------------|------------------------------------------|-------------------------------------------|
|
|
||||||
| `INFO` | Get information and statistics about server | `INFO` |
|
|
||||||
| `PING` | Ping the server | `PING` |
|
|
||||||
|
|
||||||
### AGE Cryptography Commands
|
|
||||||
|
|
||||||
| Command | Description | Example Usage |
|
|
||||||
|--------------------|-----------------------------------------------|-----------------------------------------------|
|
|
||||||
| `AGE GENENC` | Generate ephemeral encryption keypair | `AGE GENENC` |
|
|
||||||
| `AGE GENSIGN` | Generate ephemeral signing keypair | `AGE GENSIGN` |
|
|
||||||
| `AGE ENCRYPT` | Encrypt a message using a public key | `AGE ENCRYPT <recipient> "msg"` |
|
|
||||||
| `AGE DECRYPT` | Decrypt a message using a secret key | `AGE DECRYPT <identity> <ciphertext>` |
|
|
||||||
| `AGE SIGN` | Sign a message using a secret key | `AGE SIGN <sign_secret> "msg"` |
|
|
||||||
| `AGE VERIFY` | Verify a signature using a public key | `AGE VERIFY <pubkey> "msg" <signature>` |
|
|
||||||
| `AGE KEYGEN` | Create and persist a named encryption key | `AGE KEYGEN app1` |
|
|
||||||
| `AGE SIGNKEYGEN` | Create and persist a named signing key | `AGE SIGNKEYGEN app1` |
|
|
||||||
| `AGE ENCRYPTNAME` | Encrypt using a named key | `AGE ENCRYPTNAME app1 "msg"` |
|
|
||||||
| `AGE DECRYPTNAME` | Decrypt using a named key | `AGE DECRYPTNAME app1 <ciphertext>` |
|
|
||||||
| `AGE SIGNNAME` | Sign using a named key | `AGE SIGNNAME app1 "msg"` |
|
|
||||||
| `AGE VERIFYNAME` | Verify using a named key | `AGE VERIFYNAME app1 "msg" <signature>` |
|
|
||||||
| `AGE LIST` | List all persisted named keys | `AGE LIST` |
|
|
||||||
|
|
||||||
> Note: AGE commands are not part of standard Redis. They are HeroDB-specific extensions for cryptographic operations.
|
|
||||||
|
|
||||||
## Example Usage
|
|
||||||
|
|
||||||
### Basic String Operations
|
|
||||||
|
|
||||||
```bash
|
|
||||||
redis-cli -p 6381 SET greeting "Hello, HeroDB!"
|
|
||||||
redis-cli -p 6381 GET greeting
|
|
||||||
# → "Hello, HeroDB!"
|
|
||||||
|
|
||||||
redis-cli -p 6381 INCR visits
|
|
||||||
redis-cli -p 6381 INCR visits
|
|
||||||
redis-cli -p 6381 GET visits
|
|
||||||
# → "2"
|
|
||||||
```
|
|
||||||
|
|
||||||
### Hash Operations
|
|
||||||
|
|
||||||
```bash
|
|
||||||
redis-cli -p 6381 HSET user:1000 name "Alice" age "30" city "NYC"
|
|
||||||
redis-cli -p 6381 HGET user:1000 name
|
|
||||||
# → "Alice"
|
|
||||||
|
|
||||||
redis-cli -p 6381 HGETALL user:1000
|
|
||||||
# → 1) "name"
|
|
||||||
# 2) "Alice"
|
|
||||||
# 3) "age"
|
|
||||||
# 4) "30"
|
|
||||||
# 5) "city"
|
|
||||||
# 6) "NYC"
|
|
||||||
```
|
|
||||||
|
|
||||||
### Expiration
|
|
||||||
|
|
||||||
```bash
|
|
||||||
redis-cli -p 6381 SET tempkey "temporary"
|
|
||||||
redis-cli -p 6381 EXPIRE tempkey 5
|
|
||||||
redis-cli -p 6381 TTL tempkey
|
|
||||||
# → (integer) 4
|
|
||||||
|
|
||||||
# After 5 seconds:
|
|
||||||
redis-cli -p 6381 GET tempkey
|
|
||||||
# → (nil)
|
|
||||||
```
|
|
||||||
|
|
||||||
### Transactions
|
|
||||||
|
|
||||||
```bash
|
|
||||||
redis-cli -p 6381 MULTI
|
|
||||||
redis-cli -p 6381 SET txkey1 "value1"
|
|
||||||
redis-cli -p 6381 SET txkey2 "value2"
|
|
||||||
redis-cli -p 6381 INCR counter
|
|
||||||
redis-cli -p 6381 EXEC
|
|
||||||
# → 1) OK
|
|
||||||
# 2) OK
|
|
||||||
# 3) (integer) 3
|
|
||||||
```
|
|
||||||
|
|
||||||
### Scanning Keys
|
|
||||||
|
|
||||||
```bash
|
|
||||||
redis-cli -p 6381 SET scankey1 "val1"
|
|
||||||
redis-cli -p 6381 SET scankey2 "val2"
|
|
||||||
redis-cli -p 6381 HSET scanhash field1 "val1"
|
|
||||||
|
|
||||||
redis-cli -p 6381 SCAN 0 MATCH scankey*
|
|
||||||
# → 1) "0"
|
|
||||||
# 2) 1) "scankey1"
|
|
||||||
# 2) "scankey2"
|
|
||||||
```
|
|
@@ -1,8 +0,0 @@
|
|||||||
#[derive(Clone)]
|
|
||||||
pub struct DBOption {
|
|
||||||
pub dir: String,
|
|
||||||
pub port: u16,
|
|
||||||
pub debug: bool,
|
|
||||||
pub encrypt: bool,
|
|
||||||
pub encryption_key: Option<String>, // Master encryption key
|
|
||||||
}
|
|
@@ -1,126 +0,0 @@
|
|||||||
use std::{
|
|
||||||
path::Path,
|
|
||||||
time::{SystemTime, UNIX_EPOCH},
|
|
||||||
};
|
|
||||||
|
|
||||||
use redb::{Database, TableDefinition};
|
|
||||||
use serde::{Deserialize, Serialize};
|
|
||||||
|
|
||||||
use crate::crypto::CryptoFactory;
|
|
||||||
use crate::error::DBError;
|
|
||||||
|
|
||||||
// Re-export modules
|
|
||||||
mod storage_basic;
|
|
||||||
mod storage_hset;
|
|
||||||
mod storage_lists;
|
|
||||||
mod storage_extra;
|
|
||||||
|
|
||||||
// Re-export implementations
|
|
||||||
// Note: These imports are used by the impl blocks in the submodules
|
|
||||||
// The compiler shows them as unused because they're not directly used in this file
|
|
||||||
// but they're needed for the Storage struct methods to be available
|
|
||||||
pub use storage_extra::*;
|
|
||||||
|
|
||||||
// Table definitions for different Redis data types
|
|
||||||
const TYPES_TABLE: TableDefinition<&str, &str> = TableDefinition::new("types");
|
|
||||||
const STRINGS_TABLE: TableDefinition<&str, &[u8]> = TableDefinition::new("strings");
|
|
||||||
const HASHES_TABLE: TableDefinition<(&str, &str), &[u8]> = TableDefinition::new("hashes");
|
|
||||||
const LISTS_TABLE: TableDefinition<&str, &[u8]> = TableDefinition::new("lists");
|
|
||||||
const STREAMS_META_TABLE: TableDefinition<&str, &[u8]> = TableDefinition::new("streams_meta");
|
|
||||||
const STREAMS_DATA_TABLE: TableDefinition<(&str, &str), &[u8]> = TableDefinition::new("streams_data");
|
|
||||||
const ENCRYPTED_TABLE: TableDefinition<&str, u8> = TableDefinition::new("encrypted");
|
|
||||||
const EXPIRATION_TABLE: TableDefinition<&str, u64> = TableDefinition::new("expiration");
|
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize, Debug, Clone)]
|
|
||||||
pub struct StreamEntry {
|
|
||||||
pub fields: Vec<(String, String)>,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize, Debug, Clone)]
|
|
||||||
pub struct ListValue {
|
|
||||||
pub elements: Vec<String>,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[inline]
|
|
||||||
pub fn now_in_millis() -> u128 {
|
|
||||||
let start = SystemTime::now();
|
|
||||||
let duration_since_epoch = start.duration_since(UNIX_EPOCH).unwrap();
|
|
||||||
duration_since_epoch.as_millis()
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct Storage {
|
|
||||||
db: Database,
|
|
||||||
crypto: Option<CryptoFactory>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Storage {
|
|
||||||
pub fn new(path: impl AsRef<Path>, should_encrypt: bool, master_key: Option<&str>) -> Result<Self, DBError> {
|
|
||||||
let db = Database::create(path)?;
|
|
||||||
|
|
||||||
// Create tables if they don't exist
|
|
||||||
let write_txn = db.begin_write()?;
|
|
||||||
{
|
|
||||||
let _ = write_txn.open_table(TYPES_TABLE)?;
|
|
||||||
let _ = write_txn.open_table(STRINGS_TABLE)?;
|
|
||||||
let _ = write_txn.open_table(HASHES_TABLE)?;
|
|
||||||
let _ = write_txn.open_table(LISTS_TABLE)?;
|
|
||||||
let _ = write_txn.open_table(STREAMS_META_TABLE)?;
|
|
||||||
let _ = write_txn.open_table(STREAMS_DATA_TABLE)?;
|
|
||||||
let _ = write_txn.open_table(ENCRYPTED_TABLE)?;
|
|
||||||
let _ = write_txn.open_table(EXPIRATION_TABLE)?;
|
|
||||||
}
|
|
||||||
write_txn.commit()?;
|
|
||||||
|
|
||||||
// Check if database was previously encrypted
|
|
||||||
let read_txn = db.begin_read()?;
|
|
||||||
let encrypted_table = read_txn.open_table(ENCRYPTED_TABLE)?;
|
|
||||||
let was_encrypted = encrypted_table.get("encrypted")?.map(|v| v.value() == 1).unwrap_or(false);
|
|
||||||
drop(read_txn);
|
|
||||||
|
|
||||||
let crypto = if should_encrypt || was_encrypted {
|
|
||||||
if let Some(key) = master_key {
|
|
||||||
Some(CryptoFactory::new(key.as_bytes()))
|
|
||||||
} else {
|
|
||||||
return Err(DBError("Encryption requested but no master key provided".to_string()));
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
};
|
|
||||||
|
|
||||||
// If we're enabling encryption for the first time, mark it
|
|
||||||
if should_encrypt && !was_encrypted {
|
|
||||||
let write_txn = db.begin_write()?;
|
|
||||||
{
|
|
||||||
let mut encrypted_table = write_txn.open_table(ENCRYPTED_TABLE)?;
|
|
||||||
encrypted_table.insert("encrypted", &1u8)?;
|
|
||||||
}
|
|
||||||
write_txn.commit()?;
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(Storage {
|
|
||||||
db,
|
|
||||||
crypto,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn is_encrypted(&self) -> bool {
|
|
||||||
self.crypto.is_some()
|
|
||||||
}
|
|
||||||
|
|
||||||
// Helper methods for encryption
|
|
||||||
fn encrypt_if_needed(&self, data: &[u8]) -> Result<Vec<u8>, DBError> {
|
|
||||||
if let Some(crypto) = &self.crypto {
|
|
||||||
Ok(crypto.encrypt(data))
|
|
||||||
} else {
|
|
||||||
Ok(data.to_vec())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn decrypt_if_needed(&self, data: &[u8]) -> Result<Vec<u8>, DBError> {
|
|
||||||
if let Some(crypto) = &self.crypto {
|
|
||||||
Ok(crypto.decrypt(data)?)
|
|
||||||
} else {
|
|
||||||
Ok(data.to_vec())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
1251
specs/backgroundinfo/lance.md
Normal file
1251
specs/backgroundinfo/lance.md
Normal file
File diff suppressed because it is too large
Load Diff
6847
specs/backgroundinfo/lancedb.md
Normal file
6847
specs/backgroundinfo/lancedb.md
Normal file
File diff suppressed because it is too large
Load Diff
113
specs/backgroundinfo/sled.md
Normal file
113
specs/backgroundinfo/sled.md
Normal file
@@ -0,0 +1,113 @@
|
|||||||
|
========================
|
||||||
|
CODE SNIPPETS
|
||||||
|
========================
|
||||||
|
TITLE: Basic Database Operations with sled in Rust
|
||||||
|
DESCRIPTION: This snippet demonstrates fundamental operations using the `sled` embedded database in Rust. It covers opening a database tree, inserting and retrieving key-value pairs, performing range queries, deleting entries, and executing an atomic compare-and-swap operation. It also shows how to flush changes to disk for durability.
|
||||||
|
|
||||||
|
SOURCE: https://github.com/spacejam/sled/blob/main/README.md#_snippet_0
|
||||||
|
|
||||||
|
LANGUAGE: Rust
|
||||||
|
CODE:
|
||||||
|
```
|
||||||
|
let tree = sled::open("/tmp/welcome-to-sled")?;
|
||||||
|
|
||||||
|
// insert and get, similar to std's BTreeMap
|
||||||
|
let old_value = tree.insert("key", "value")?;
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
tree.get(&"key")?,
|
||||||
|
Some(sled::IVec::from("value")),
|
||||||
|
);
|
||||||
|
|
||||||
|
// range queries
|
||||||
|
for kv_result in tree.range("key_1".."key_9") {}
|
||||||
|
|
||||||
|
// deletion
|
||||||
|
let old_value = tree.remove(&"key")?;
|
||||||
|
|
||||||
|
// atomic compare and swap
|
||||||
|
tree.compare_and_swap(
|
||||||
|
"key",
|
||||||
|
Some("current_value"),
|
||||||
|
Some("new_value"),
|
||||||
|
)?;
|
||||||
|
|
||||||
|
// block until all operations are stable on disk
|
||||||
|
// (flush_async also available to get a Future)
|
||||||
|
tree.flush()?;
|
||||||
|
```
|
||||||
|
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
TITLE: Subscribing to sled Events Asynchronously (Rust)
|
||||||
|
DESCRIPTION: This snippet demonstrates how to asynchronously subscribe to events on key prefixes in a `sled` database. It initializes a `sled` database, creates a `Subscriber` for all key prefixes, inserts a key-value pair to trigger an event, and then uses `extreme::run` to await and process incoming events. The `Subscriber` struct implements `Future<Output=Option<Event>>`, allowing it to be awaited in an async context.
|
||||||
|
|
||||||
|
SOURCE: https://github.com/spacejam/sled/blob/main/README.md#_snippet_1
|
||||||
|
|
||||||
|
LANGUAGE: Rust
|
||||||
|
CODE:
|
||||||
|
```
|
||||||
|
let sled = sled::open("my_db").unwrap();
|
||||||
|
|
||||||
|
let mut sub = sled.watch_prefix("");
|
||||||
|
|
||||||
|
sled.insert(b"a", b"a").unwrap();
|
||||||
|
|
||||||
|
extreme::run(async move {
|
||||||
|
while let Some(event) = (&mut sub).await {
|
||||||
|
println!("got event {:?}", event);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
TITLE: Iterating Subscriber Events with Async/Await in Rust
|
||||||
|
DESCRIPTION: This snippet demonstrates how to asynchronously iterate over events from a `Subscriber` instance in Rust. Since `Subscriber` now implements `Future`, it can be awaited in a loop to process incoming events, enabling efficient prefix watching. The loop continues as long as new events are available.
|
||||||
|
|
||||||
|
SOURCE: https://github.com/spacejam/sled/blob/main/CHANGELOG.md#_snippet_0
|
||||||
|
|
||||||
|
LANGUAGE: Rust
|
||||||
|
CODE:
|
||||||
|
```
|
||||||
|
while let Some(event) = (&mut subscriber).await {}
|
||||||
|
```
|
||||||
|
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
TITLE: Suppressing TSAN Race on Arc::drop in Rust
|
||||||
|
DESCRIPTION: This suppression addresses a false positive race detection by ThreadSanitizer in Rust's `Arc::drop` implementation. TSAN fails to correctly reason about the raw atomic `Acquire` fence used after the strong-count atomic subtraction with a `Release` fence in the `Drop` implementation, leading to an erroneous race report.
|
||||||
|
|
||||||
|
SOURCE: https://github.com/spacejam/sled/blob/main/tsan_suppressions.txt#_snippet_0
|
||||||
|
|
||||||
|
LANGUAGE: TSAN Suppression
|
||||||
|
CODE:
|
||||||
|
```
|
||||||
|
race:Arc*drop
|
||||||
|
```
|
||||||
|
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
TITLE: Suppressing TSAN Race on std::thread::local in Rust
|
||||||
|
DESCRIPTION: This suppression addresses ThreadSanitizer false positives when using Rust's `std::thread::local`. Similar to `lazy_static`, `std::thread::local` utilizes implicit barriers that TSAN fails to recognize, leading to incorrect race condition reports.
|
||||||
|
|
||||||
|
SOURCE: https://github.com/spacejam/sled/blob/main/tsan_suppressions.txt#_snippet_2
|
||||||
|
|
||||||
|
LANGUAGE: TSAN Suppression
|
||||||
|
CODE:
|
||||||
|
```
|
||||||
|
race:std::thread::local
|
||||||
|
```
|
||||||
|
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
TITLE: Suppressing TSAN Race on lazy_static in Rust
|
||||||
|
DESCRIPTION: This suppression targets ThreadSanitizer false positives related to the `lazy_static` crate in Rust. `lazy_static` relies on implicit memory barriers that TSAN does not correctly detect, causing it to report races where none exist.
|
||||||
|
|
||||||
|
SOURCE: https://github.com/spacejam/sled/blob/main/tsan_suppressions.txt#_snippet_1
|
||||||
|
|
||||||
|
LANGUAGE: TSAN Suppression
|
||||||
|
CODE:
|
||||||
|
```
|
||||||
|
race:lazy_static
|
||||||
|
```
|
735
specs/lance_implementation.md
Normal file
735
specs/lance_implementation.md
Normal file
@@ -0,0 +1,735 @@
|
|||||||
|
Looking at your request to ensure embedding happens with an external embedding provider configured in Redis at `config:core:aiembed:url`, I'll modify the implementation to use an external embedding service instead of local models.
|
||||||
|
|
||||||
|
## Updated Implementation with External Embedding Provider
|
||||||
|
|
||||||
|
### 1. Update Dependencies in `Cargo.toml`
|
||||||
|
|
||||||
|
```toml
|
||||||
|
[dependencies]
|
||||||
|
# ... existing dependencies ...
|
||||||
|
lance = "0.18"
|
||||||
|
lance-index = "0.18"
|
||||||
|
lance-linalg = "0.18"
|
||||||
|
arrow = "52"
|
||||||
|
arrow-array = "52"
|
||||||
|
arrow-schema = "52"
|
||||||
|
parquet = "52"
|
||||||
|
uuid = { version = "1.10", features = ["v4"] }
|
||||||
|
reqwest = { version = "0.11", features = ["json"] }
|
||||||
|
serde = { version = "1.0", features = ["derive"] }
|
||||||
|
serde_json = "1.0"
|
||||||
|
base64 = "0.22"
|
||||||
|
image = "0.25"
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. Create Enhanced Lance Module with External Embedding
|
||||||
|
|
||||||
|
Create `src/lance_store.rs`:
|
||||||
|
|
||||||
|
```rust
|
||||||
|
use std::collections::HashMap;
|
||||||
|
use std::path::PathBuf;
|
||||||
|
use std::sync::Arc;
|
||||||
|
use tokio::sync::RwLock;
|
||||||
|
|
||||||
|
use arrow::array::{Float32Array, StringArray, BinaryArray, ArrayRef};
|
||||||
|
use arrow::datatypes::{DataType, Field, Schema};
|
||||||
|
use arrow::record_batch::RecordBatch;
|
||||||
|
use lance::dataset::{Dataset, WriteParams, WriteMode};
|
||||||
|
use lance::index::vector::VectorIndexParams;
|
||||||
|
use lance_index::vector::pq::PQBuildParams;
|
||||||
|
use lance_index::vector::ivf::IvfBuildParams;
|
||||||
|
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use crate::error::DBError;
|
||||||
|
use crate::cmd::Protocol;
|
||||||
|
|
||||||
|
#[derive(Debug, Serialize, Deserialize)]
|
||||||
|
struct EmbeddingRequest {
|
||||||
|
texts: Option<Vec<String>>,
|
||||||
|
images: Option<Vec<String>>, // base64 encoded
|
||||||
|
model: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Serialize, Deserialize)]
|
||||||
|
struct EmbeddingResponse {
|
||||||
|
embeddings: Vec<Vec<f32>>,
|
||||||
|
model: String,
|
||||||
|
usage: Option<HashMap<String, u32>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct LanceStore {
|
||||||
|
datasets: Arc<RwLock<HashMap<String, Arc<Dataset>>>>,
|
||||||
|
data_dir: PathBuf,
|
||||||
|
http_client: reqwest::Client,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl LanceStore {
|
||||||
|
pub async fn new(data_dir: PathBuf) -> Result<Self, DBError> {
|
||||||
|
// Create data directory if it doesn't exist
|
||||||
|
std::fs::create_dir_all(&data_dir)
|
||||||
|
.map_err(|e| DBError(format!("Failed to create Lance data directory: {}", e)))?;
|
||||||
|
|
||||||
|
let http_client = reqwest::Client::builder()
|
||||||
|
.timeout(std::time::Duration::from_secs(30))
|
||||||
|
.build()
|
||||||
|
.map_err(|e| DBError(format!("Failed to create HTTP client: {}", e)))?;
|
||||||
|
|
||||||
|
Ok(Self {
|
||||||
|
datasets: Arc::new(RwLock::new(HashMap::new())),
|
||||||
|
data_dir,
|
||||||
|
http_client,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get embedding service URL from Redis config
|
||||||
|
async fn get_embedding_url(&self, server: &crate::server::Server) -> Result<String, DBError> {
|
||||||
|
// Get the embedding URL from Redis config
|
||||||
|
let key = "config:core:aiembed:url";
|
||||||
|
|
||||||
|
// Use HGET to retrieve the URL from Redis hash
|
||||||
|
let cmd = crate::cmd::Cmd::HGet {
|
||||||
|
key: key.to_string(),
|
||||||
|
field: "url".to_string(),
|
||||||
|
};
|
||||||
|
|
||||||
|
// Execute command to get the config
|
||||||
|
let result = cmd.run(server).await?;
|
||||||
|
|
||||||
|
match result {
|
||||||
|
Protocol::BulkString(url) => Ok(url),
|
||||||
|
Protocol::SimpleString(url) => Ok(url),
|
||||||
|
Protocol::Nil => Err(DBError(
|
||||||
|
"Embedding service URL not configured. Set it with: HSET config:core:aiembed:url url <YOUR_EMBEDDING_SERVICE_URL>".to_string()
|
||||||
|
)),
|
||||||
|
_ => Err(DBError("Invalid embedding URL configuration".to_string())),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Call external embedding service
|
||||||
|
async fn call_embedding_service(
|
||||||
|
&self,
|
||||||
|
server: &crate::server::Server,
|
||||||
|
texts: Option<Vec<String>>,
|
||||||
|
images: Option<Vec<String>>,
|
||||||
|
) -> Result<Vec<Vec<f32>>, DBError> {
|
||||||
|
let url = self.get_embedding_url(server).await?;
|
||||||
|
|
||||||
|
let request = EmbeddingRequest {
|
||||||
|
texts,
|
||||||
|
images,
|
||||||
|
model: None, // Let the service use its default
|
||||||
|
};
|
||||||
|
|
||||||
|
let response = self.http_client
|
||||||
|
.post(&url)
|
||||||
|
.json(&request)
|
||||||
|
.send()
|
||||||
|
.await
|
||||||
|
.map_err(|e| DBError(format!("Failed to call embedding service: {}", e)))?;
|
||||||
|
|
||||||
|
if !response.status().is_success() {
|
||||||
|
let status = response.status();
|
||||||
|
let error_text = response.text().await.unwrap_or_default();
|
||||||
|
return Err(DBError(format!(
|
||||||
|
"Embedding service returned error {}: {}",
|
||||||
|
status, error_text
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
let embedding_response: EmbeddingResponse = response
|
||||||
|
.json()
|
||||||
|
.await
|
||||||
|
.map_err(|e| DBError(format!("Failed to parse embedding response: {}", e)))?;
|
||||||
|
|
||||||
|
Ok(embedding_response.embeddings)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn embed_text(
|
||||||
|
&self,
|
||||||
|
server: &crate::server::Server,
|
||||||
|
texts: Vec<String>
|
||||||
|
) -> Result<Vec<Vec<f32>>, DBError> {
|
||||||
|
if texts.is_empty() {
|
||||||
|
return Ok(Vec::new());
|
||||||
|
}
|
||||||
|
|
||||||
|
self.call_embedding_service(server, Some(texts), None).await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn embed_image(
|
||||||
|
&self,
|
||||||
|
server: &crate::server::Server,
|
||||||
|
image_bytes: Vec<u8>
|
||||||
|
) -> Result<Vec<f32>, DBError> {
|
||||||
|
// Convert image bytes to base64
|
||||||
|
let base64_image = base64::encode(&image_bytes);
|
||||||
|
|
||||||
|
let embeddings = self.call_embedding_service(
|
||||||
|
server,
|
||||||
|
None,
|
||||||
|
Some(vec![base64_image])
|
||||||
|
).await?;
|
||||||
|
|
||||||
|
embeddings.into_iter()
|
||||||
|
.next()
|
||||||
|
.ok_or_else(|| DBError("No embedding returned for image".to_string()))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn create_dataset(
|
||||||
|
&self,
|
||||||
|
name: &str,
|
||||||
|
schema: Schema,
|
||||||
|
) -> Result<(), DBError> {
|
||||||
|
let dataset_path = self.data_dir.join(format!("{}.lance", name));
|
||||||
|
|
||||||
|
// Create empty dataset with schema
|
||||||
|
let write_params = WriteParams {
|
||||||
|
mode: WriteMode::Create,
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
|
||||||
|
// Create an empty RecordBatch with the schema
|
||||||
|
let empty_batch = RecordBatch::new_empty(Arc::new(schema));
|
||||||
|
let batches = vec![empty_batch];
|
||||||
|
|
||||||
|
let dataset = Dataset::write(
|
||||||
|
batches,
|
||||||
|
dataset_path.to_str().unwrap(),
|
||||||
|
Some(write_params)
|
||||||
|
).await
|
||||||
|
.map_err(|e| DBError(format!("Failed to create dataset: {}", e)))?;
|
||||||
|
|
||||||
|
let mut datasets = self.datasets.write().await;
|
||||||
|
datasets.insert(name.to_string(), Arc::new(dataset));
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn write_vectors(
|
||||||
|
&self,
|
||||||
|
dataset_name: &str,
|
||||||
|
vectors: Vec<Vec<f32>>,
|
||||||
|
metadata: Option<HashMap<String, Vec<String>>>,
|
||||||
|
) -> Result<usize, DBError> {
|
||||||
|
let dataset_path = self.data_dir.join(format!("{}.lance", dataset_name));
|
||||||
|
|
||||||
|
// Open or get cached dataset
|
||||||
|
let dataset = self.get_or_open_dataset(dataset_name).await?;
|
||||||
|
|
||||||
|
// Build RecordBatch
|
||||||
|
let num_vectors = vectors.len();
|
||||||
|
if num_vectors == 0 {
|
||||||
|
return Ok(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
let dim = vectors.first()
|
||||||
|
.ok_or_else(|| DBError("Empty vectors".to_string()))?
|
||||||
|
.len();
|
||||||
|
|
||||||
|
// Flatten vectors
|
||||||
|
let flat_vectors: Vec<f32> = vectors.into_iter().flatten().collect();
|
||||||
|
let vector_array = Float32Array::from(flat_vectors);
|
||||||
|
let vector_array = arrow::array::FixedSizeListArray::try_new_from_values(
|
||||||
|
vector_array,
|
||||||
|
dim as i32
|
||||||
|
).map_err(|e| DBError(format!("Failed to create vector array: {}", e)))?;
|
||||||
|
|
||||||
|
let mut arrays: Vec<ArrayRef> = vec![Arc::new(vector_array)];
|
||||||
|
let mut fields = vec![Field::new(
|
||||||
|
"vector",
|
||||||
|
DataType::FixedSizeList(
|
||||||
|
Arc::new(Field::new("item", DataType::Float32, true)),
|
||||||
|
dim as i32
|
||||||
|
),
|
||||||
|
false
|
||||||
|
)];
|
||||||
|
|
||||||
|
// Add metadata columns if provided
|
||||||
|
if let Some(metadata) = metadata {
|
||||||
|
for (key, values) in metadata {
|
||||||
|
if values.len() != num_vectors {
|
||||||
|
return Err(DBError(format!(
|
||||||
|
"Metadata field '{}' has {} values but expected {}",
|
||||||
|
key, values.len(), num_vectors
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
let array = StringArray::from(values);
|
||||||
|
arrays.push(Arc::new(array));
|
||||||
|
fields.push(Field::new(&key, DataType::Utf8, true));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let schema = Arc::new(Schema::new(fields));
|
||||||
|
let batch = RecordBatch::try_new(schema, arrays)
|
||||||
|
.map_err(|e| DBError(format!("Failed to create RecordBatch: {}", e)))?;
|
||||||
|
|
||||||
|
// Append to dataset
|
||||||
|
let write_params = WriteParams {
|
||||||
|
mode: WriteMode::Append,
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
|
||||||
|
Dataset::write(
|
||||||
|
vec![batch],
|
||||||
|
dataset_path.to_str().unwrap(),
|
||||||
|
Some(write_params)
|
||||||
|
).await
|
||||||
|
.map_err(|e| DBError(format!("Failed to write to dataset: {}", e)))?;
|
||||||
|
|
||||||
|
// Refresh cached dataset
|
||||||
|
let mut datasets = self.datasets.write().await;
|
||||||
|
datasets.remove(dataset_name);
|
||||||
|
|
||||||
|
Ok(num_vectors)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn search_vectors(
|
||||||
|
&self,
|
||||||
|
dataset_name: &str,
|
||||||
|
query_vector: Vec<f32>,
|
||||||
|
k: usize,
|
||||||
|
nprobes: Option<usize>,
|
||||||
|
refine_factor: Option<usize>,
|
||||||
|
) -> Result<Vec<(f32, HashMap<String, String>)>, DBError> {
|
||||||
|
let dataset = self.get_or_open_dataset(dataset_name).await?;
|
||||||
|
|
||||||
|
// Build query
|
||||||
|
let mut query = dataset.scan();
|
||||||
|
query = query.nearest(
|
||||||
|
"vector",
|
||||||
|
&query_vector,
|
||||||
|
k,
|
||||||
|
).map_err(|e| DBError(format!("Failed to build search query: {}", e)))?;
|
||||||
|
|
||||||
|
if let Some(nprobes) = nprobes {
|
||||||
|
query = query.nprobes(nprobes);
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(refine) = refine_factor {
|
||||||
|
query = query.refine_factor(refine);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Execute search
|
||||||
|
let results = query
|
||||||
|
.try_into_stream()
|
||||||
|
.await
|
||||||
|
.map_err(|e| DBError(format!("Failed to execute search: {}", e)))?
|
||||||
|
.try_collect::<Vec<_>>()
|
||||||
|
.await
|
||||||
|
.map_err(|e| DBError(format!("Failed to collect results: {}", e)))?;
|
||||||
|
|
||||||
|
// Process results
|
||||||
|
let mut output = Vec::new();
|
||||||
|
for batch in results {
|
||||||
|
// Get distances
|
||||||
|
let distances = batch
|
||||||
|
.column_by_name("_distance")
|
||||||
|
.ok_or_else(|| DBError("No distance column".to_string()))?
|
||||||
|
.as_any()
|
||||||
|
.downcast_ref::<Float32Array>()
|
||||||
|
.ok_or_else(|| DBError("Invalid distance type".to_string()))?;
|
||||||
|
|
||||||
|
// Get metadata
|
||||||
|
for i in 0..batch.num_rows() {
|
||||||
|
let distance = distances.value(i);
|
||||||
|
let mut metadata = HashMap::new();
|
||||||
|
|
||||||
|
for field in batch.schema().fields() {
|
||||||
|
if field.name() != "vector" && field.name() != "_distance" {
|
||||||
|
if let Some(col) = batch.column_by_name(field.name()) {
|
||||||
|
if let Some(str_array) = col.as_any().downcast_ref::<StringArray>() {
|
||||||
|
if !str_array.is_null(i) {
|
||||||
|
metadata.insert(
|
||||||
|
field.name().to_string(),
|
||||||
|
str_array.value(i).to_string()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
output.push((distance, metadata));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(output)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn store_multimodal(
|
||||||
|
&self,
|
||||||
|
server: &crate::server::Server,
|
||||||
|
dataset_name: &str,
|
||||||
|
text: Option<String>,
|
||||||
|
image_bytes: Option<Vec<u8>>,
|
||||||
|
metadata: HashMap<String, String>,
|
||||||
|
) -> Result<String, DBError> {
|
||||||
|
// Generate ID
|
||||||
|
let id = uuid::Uuid::new_v4().to_string();
|
||||||
|
|
||||||
|
// Generate embeddings using external service
|
||||||
|
let embedding = if let Some(text) = text.as_ref() {
|
||||||
|
self.embed_text(server, vec![text.clone()]).await?
|
||||||
|
.into_iter()
|
||||||
|
.next()
|
||||||
|
.ok_or_else(|| DBError("No embedding returned".to_string()))?
|
||||||
|
} else if let Some(img) = image_bytes.as_ref() {
|
||||||
|
self.embed_image(server, img.clone()).await?
|
||||||
|
} else {
|
||||||
|
return Err(DBError("No text or image provided".to_string()));
|
||||||
|
};
|
||||||
|
|
||||||
|
// Prepare metadata
|
||||||
|
let mut full_metadata = metadata;
|
||||||
|
full_metadata.insert("id".to_string(), id.clone());
|
||||||
|
if let Some(text) = text {
|
||||||
|
full_metadata.insert("text".to_string(), text);
|
||||||
|
}
|
||||||
|
if let Some(img) = image_bytes {
|
||||||
|
full_metadata.insert("image_base64".to_string(), base64::encode(img));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert metadata to column vectors
|
||||||
|
let mut metadata_cols = HashMap::new();
|
||||||
|
for (key, value) in full_metadata {
|
||||||
|
metadata_cols.insert(key, vec![value]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write to dataset
|
||||||
|
self.write_vectors(dataset_name, vec![embedding], Some(metadata_cols)).await?;
|
||||||
|
|
||||||
|
Ok(id)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn search_with_text(
|
||||||
|
&self,
|
||||||
|
server: &crate::server::Server,
|
||||||
|
dataset_name: &str,
|
||||||
|
query_text: String,
|
||||||
|
k: usize,
|
||||||
|
nprobes: Option<usize>,
|
||||||
|
refine_factor: Option<usize>,
|
||||||
|
) -> Result<Vec<(f32, HashMap<String, String>)>, DBError> {
|
||||||
|
// Embed the query text using external service
|
||||||
|
let embeddings = self.embed_text(server, vec![query_text]).await?;
|
||||||
|
let query_vector = embeddings.into_iter()
|
||||||
|
.next()
|
||||||
|
.ok_or_else(|| DBError("No embedding returned for query".to_string()))?;
|
||||||
|
|
||||||
|
// Search with the embedding
|
||||||
|
self.search_vectors(dataset_name, query_vector, k, nprobes, refine_factor).await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn create_index(
|
||||||
|
&self,
|
||||||
|
dataset_name: &str,
|
||||||
|
index_type: &str,
|
||||||
|
num_partitions: Option<usize>,
|
||||||
|
num_sub_vectors: Option<usize>,
|
||||||
|
) -> Result<(), DBError> {
|
||||||
|
let dataset = self.get_or_open_dataset(dataset_name).await?;
|
||||||
|
|
||||||
|
let mut params = VectorIndexParams::default();
|
||||||
|
|
||||||
|
match index_type.to_uppercase().as_str() {
|
||||||
|
"IVF_PQ" => {
|
||||||
|
params.ivf = IvfBuildParams {
|
||||||
|
num_partitions: num_partitions.unwrap_or(256),
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
params.pq = PQBuildParams {
|
||||||
|
num_sub_vectors: num_sub_vectors.unwrap_or(16),
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
}
|
||||||
|
_ => return Err(DBError(format!("Unsupported index type: {}", index_type))),
|
||||||
|
}
|
||||||
|
|
||||||
|
dataset.create_index(
|
||||||
|
&["vector"],
|
||||||
|
lance::index::IndexType::Vector,
|
||||||
|
None,
|
||||||
|
¶ms,
|
||||||
|
true
|
||||||
|
).await
|
||||||
|
.map_err(|e| DBError(format!("Failed to create index: {}", e)))?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn get_or_open_dataset(&self, name: &str) -> Result<Arc<Dataset>, DBError> {
|
||||||
|
let mut datasets = self.datasets.write().await;
|
||||||
|
|
||||||
|
if let Some(dataset) = datasets.get(name) {
|
||||||
|
return Ok(dataset.clone());
|
||||||
|
}
|
||||||
|
|
||||||
|
let dataset_path = self.data_dir.join(format!("{}.lance", name));
|
||||||
|
if !dataset_path.exists() {
|
||||||
|
return Err(DBError(format!("Dataset '{}' does not exist", name)));
|
||||||
|
}
|
||||||
|
|
||||||
|
let dataset = Dataset::open(dataset_path.to_str().unwrap())
|
||||||
|
.await
|
||||||
|
.map_err(|e| DBError(format!("Failed to open dataset: {}", e)))?;
|
||||||
|
|
||||||
|
let dataset = Arc::new(dataset);
|
||||||
|
datasets.insert(name.to_string(), dataset.clone());
|
||||||
|
|
||||||
|
Ok(dataset)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn list_datasets(&self) -> Result<Vec<String>, DBError> {
|
||||||
|
let mut datasets = Vec::new();
|
||||||
|
|
||||||
|
let entries = std::fs::read_dir(&self.data_dir)
|
||||||
|
.map_err(|e| DBError(format!("Failed to read data directory: {}", e)))?;
|
||||||
|
|
||||||
|
for entry in entries {
|
||||||
|
let entry = entry.map_err(|e| DBError(format!("Failed to read entry: {}", e)))?;
|
||||||
|
let path = entry.path();
|
||||||
|
|
||||||
|
if path.is_dir() {
|
||||||
|
if let Some(name) = path.file_name() {
|
||||||
|
if let Some(name_str) = name.to_str() {
|
||||||
|
if name_str.ends_with(".lance") {
|
||||||
|
let dataset_name = name_str.trim_end_matches(".lance");
|
||||||
|
datasets.push(dataset_name.to_string());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(datasets)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn drop_dataset(&self, name: &str) -> Result<(), DBError> {
|
||||||
|
// Remove from cache
|
||||||
|
let mut datasets = self.datasets.write().await;
|
||||||
|
datasets.remove(name);
|
||||||
|
|
||||||
|
// Delete from disk
|
||||||
|
let dataset_path = self.data_dir.join(format!("{}.lance", name));
|
||||||
|
if dataset_path.exists() {
|
||||||
|
std::fs::remove_dir_all(dataset_path)
|
||||||
|
.map_err(|e| DBError(format!("Failed to delete dataset: {}", e)))?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn get_dataset_info(&self, name: &str) -> Result<HashMap<String, String>, DBError> {
|
||||||
|
let dataset = self.get_or_open_dataset(name).await?;
|
||||||
|
|
||||||
|
let mut info = HashMap::new();
|
||||||
|
info.insert("name".to_string(), name.to_string());
|
||||||
|
info.insert("version".to_string(), dataset.version().to_string());
|
||||||
|
info.insert("num_rows".to_string(), dataset.count_rows().await?.to_string());
|
||||||
|
|
||||||
|
// Get schema info
|
||||||
|
let schema = dataset.schema();
|
||||||
|
let fields: Vec<String> = schema.fields()
|
||||||
|
.iter()
|
||||||
|
.map(|f| format!("{}:{}", f.name(), f.data_type()))
|
||||||
|
.collect();
|
||||||
|
info.insert("schema".to_string(), fields.join(", "));
|
||||||
|
|
||||||
|
Ok(info)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. Update Command Implementations
|
||||||
|
|
||||||
|
Update the command implementations to pass the server reference for embedding service access:
|
||||||
|
|
||||||
|
```rust
|
||||||
|
// In cmd.rs, update the lance command implementations
|
||||||
|
|
||||||
|
async fn lance_store_cmd(
|
||||||
|
server: &Server,
|
||||||
|
dataset: &str,
|
||||||
|
text: Option<String>,
|
||||||
|
image_base64: Option<String>,
|
||||||
|
metadata: HashMap<String, String>,
|
||||||
|
) -> Result<Protocol, DBError> {
|
||||||
|
let lance_store = server.lance_store()?;
|
||||||
|
|
||||||
|
// Decode image if provided
|
||||||
|
let image_bytes = if let Some(b64) = image_base64 {
|
||||||
|
Some(base64::decode(b64).map_err(|e|
|
||||||
|
DBError(format!("Invalid base64 image: {}", e)))?)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
|
// Pass server reference for embedding service access
|
||||||
|
let id = lance_store.store_multimodal(
|
||||||
|
server, // Pass server to access Redis config
|
||||||
|
dataset,
|
||||||
|
text,
|
||||||
|
image_bytes,
|
||||||
|
metadata,
|
||||||
|
).await?;
|
||||||
|
|
||||||
|
Ok(Protocol::BulkString(id))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn lance_embed_text_cmd(
|
||||||
|
server: &Server,
|
||||||
|
texts: &[String],
|
||||||
|
) -> Result<Protocol, DBError> {
|
||||||
|
let lance_store = server.lance_store()?;
|
||||||
|
|
||||||
|
// Pass server reference for embedding service access
|
||||||
|
let embeddings = lance_store.embed_text(server, texts.to_vec()).await?;
|
||||||
|
|
||||||
|
// Return as array of vectors
|
||||||
|
let mut output = Vec::new();
|
||||||
|
for embedding in embeddings {
|
||||||
|
let vector_str = format!("[{}]",
|
||||||
|
embedding.iter()
|
||||||
|
.map(|f| f.to_string())
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.join(",")
|
||||||
|
);
|
||||||
|
output.push(Protocol::BulkString(vector_str));
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(Protocol::Array(output))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn lance_search_text_cmd(
|
||||||
|
server: &Server,
|
||||||
|
dataset: &str,
|
||||||
|
query_text: &str,
|
||||||
|
k: usize,
|
||||||
|
nprobes: Option<usize>,
|
||||||
|
refine_factor: Option<usize>,
|
||||||
|
) -> Result<Protocol, DBError> {
|
||||||
|
let lance_store = server.lance_store()?;
|
||||||
|
|
||||||
|
// Search using text query (will be embedded automatically)
|
||||||
|
let results = lance_store.search_with_text(
|
||||||
|
server,
|
||||||
|
dataset,
|
||||||
|
query_text.to_string(),
|
||||||
|
k,
|
||||||
|
nprobes,
|
||||||
|
refine_factor,
|
||||||
|
).await?;
|
||||||
|
|
||||||
|
// Format results
|
||||||
|
let mut output = Vec::new();
|
||||||
|
for (distance, metadata) in results {
|
||||||
|
let metadata_json = serde_json::to_string(&metadata)
|
||||||
|
.unwrap_or_else(|_| "{}".to_string());
|
||||||
|
|
||||||
|
output.push(Protocol::Array(vec![
|
||||||
|
Protocol::BulkString(distance.to_string()),
|
||||||
|
Protocol::BulkString(metadata_json),
|
||||||
|
]));
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(Protocol::Array(output))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add new command for text-based search
|
||||||
|
pub enum Cmd {
|
||||||
|
// ... existing commands ...
|
||||||
|
LanceSearchText {
|
||||||
|
dataset: String,
|
||||||
|
query_text: String,
|
||||||
|
k: usize,
|
||||||
|
nprobes: Option<usize>,
|
||||||
|
refine_factor: Option<usize>,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Usage Examples
|
||||||
|
|
||||||
|
### 1. Configure the Embedding Service
|
||||||
|
|
||||||
|
First, users need to configure the embedding service URL:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Configure the embedding service endpoint
|
||||||
|
redis-cli> HSET config:core:aiembed:url url "http://localhost:8000/embeddings"
|
||||||
|
OK
|
||||||
|
|
||||||
|
# Or use a cloud service
|
||||||
|
redis-cli> HSET config:core:aiembed:url url "https://api.openai.com/v1/embeddings"
|
||||||
|
OK
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. Use Lance Commands with Automatic External Embedding
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Create a dataset
|
||||||
|
redis-cli> LANCE.CREATE products DIM 1536 SCHEMA name:string price:float category:string
|
||||||
|
OK
|
||||||
|
|
||||||
|
# Store text with automatic embedding (calls external service)
|
||||||
|
redis-cli> LANCE.STORE products TEXT "Wireless noise-canceling headphones with 30-hour battery" name:AirPods price:299.99 category:Electronics
|
||||||
|
"uuid-123-456"
|
||||||
|
|
||||||
|
# Search using text query (automatically embeds the query)
|
||||||
|
redis-cli> LANCE.SEARCH.TEXT products "best headphones for travel" K 5
|
||||||
|
1) "0.92"
|
||||||
|
2) "{\"id\":\"uuid-123\",\"name\":\"AirPods\",\"price\":\"299.99\"}"
|
||||||
|
|
||||||
|
# Get embeddings directly
|
||||||
|
redis-cli> LANCE.EMBED.TEXT "This text will be embedded"
|
||||||
|
1) "[0.123, 0.456, 0.789, ...]"
|
||||||
|
```
|
||||||
|
|
||||||
|
## External Embedding Service API Specification
|
||||||
|
|
||||||
|
The external embedding service should accept POST requests with this format:
|
||||||
|
|
||||||
|
```json
|
||||||
|
// Request
|
||||||
|
{
|
||||||
|
"texts": ["text1", "text2"], // Optional
|
||||||
|
"images": ["base64_img1"], // Optional
|
||||||
|
"model": "text-embedding-ada-002" // Optional
|
||||||
|
}
|
||||||
|
|
||||||
|
// Response
|
||||||
|
{
|
||||||
|
"embeddings": [[0.1, 0.2, ...], [0.3, 0.4, ...]],
|
||||||
|
"model": "text-embedding-ada-002",
|
||||||
|
"usage": {
|
||||||
|
"prompt_tokens": 100,
|
||||||
|
"total_tokens": 100
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Error Handling
|
||||||
|
|
||||||
|
The implementation includes comprehensive error handling:
|
||||||
|
|
||||||
|
1. **Missing Configuration**: Clear error message if embedding URL not configured
|
||||||
|
2. **Service Failures**: Graceful handling of embedding service errors
|
||||||
|
3. **Timeout Protection**: 30-second timeout for embedding requests
|
||||||
|
4. **Retry Logic**: Could be added for resilience
|
||||||
|
|
||||||
|
## Benefits of This Approach
|
||||||
|
|
||||||
|
1. **Flexibility**: Supports any embedding service with compatible API
|
||||||
|
2. **Cost Control**: Use your preferred embedding provider
|
||||||
|
3. **Scalability**: Embedding service can be scaled independently
|
||||||
|
4. **Consistency**: All embeddings use the same configured service
|
||||||
|
5. **Security**: API keys and endpoints stored securely in Redis
|
||||||
|
|
||||||
|
This implementation ensures that all embedding operations go through the external service configured in Redis, providing a clean separation between the vector database functionality and the embedding generation.
|
||||||
|
|
||||||
|
|
||||||
|
TODO EXTRA:
|
||||||
|
|
||||||
|
- secret for the embedding service API key
|
||||||
|
|
@@ -1,7 +1,7 @@
|
|||||||
use crate::{error::DBError, protocol::Protocol, server::Server};
|
use crate::{error::DBError, protocol::Protocol, server::Server, search_cmd};
|
||||||
use serde::Serialize;
|
|
||||||
use tokio::time::{timeout, Duration};
|
use tokio::time::{timeout, Duration};
|
||||||
use futures::future::select_all;
|
use futures::future::select_all;
|
||||||
|
use std::collections::HashMap;
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub enum Cmd {
|
pub enum Cmd {
|
||||||
@@ -85,6 +85,41 @@ pub enum Cmd {
|
|||||||
AgeSignName(String, String), // name, message
|
AgeSignName(String, String), // name, message
|
||||||
AgeVerifyName(String, String, String), // name, message, signature_b64
|
AgeVerifyName(String, String, String), // name, message, signature_b64
|
||||||
AgeList,
|
AgeList,
|
||||||
|
|
||||||
|
// Full-text search commands with schema support
|
||||||
|
FtCreate {
|
||||||
|
index_name: String,
|
||||||
|
schema: Vec<(String, String, Vec<String>)>, // (field_name, field_type, options)
|
||||||
|
},
|
||||||
|
FtAdd {
|
||||||
|
index_name: String,
|
||||||
|
doc_id: String,
|
||||||
|
score: f64,
|
||||||
|
fields: std::collections::HashMap<String, String>,
|
||||||
|
},
|
||||||
|
FtSearch {
|
||||||
|
index_name: String,
|
||||||
|
query: String,
|
||||||
|
filters: Vec<(String, String)>, // field, value pairs
|
||||||
|
limit: Option<usize>,
|
||||||
|
offset: Option<usize>,
|
||||||
|
return_fields: Option<Vec<String>>,
|
||||||
|
},
|
||||||
|
FtDel(String, String), // index_name, doc_id
|
||||||
|
FtInfo(String), // index_name
|
||||||
|
FtDrop(String), // index_name
|
||||||
|
FtAlter {
|
||||||
|
index_name: String,
|
||||||
|
field_name: String,
|
||||||
|
field_type: String,
|
||||||
|
options: Vec<String>,
|
||||||
|
},
|
||||||
|
FtAggregate {
|
||||||
|
index_name: String,
|
||||||
|
query: String,
|
||||||
|
group_by: Vec<String>,
|
||||||
|
reducers: Vec<String>,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Cmd {
|
impl Cmd {
|
||||||
@@ -617,6 +652,148 @@ impl Cmd {
|
|||||||
_ => return Err(DBError(format!("unsupported AGE subcommand {:?}", cmd))),
|
_ => return Err(DBError(format!("unsupported AGE subcommand {:?}", cmd))),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
"ft.create" => {
|
||||||
|
if cmd.len() < 4 || cmd[2].to_uppercase() != "SCHEMA" {
|
||||||
|
return Err(DBError("ERR FT.CREATE requires: indexname SCHEMA field1 type1 [options] ...".to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
let index_name = cmd[1].clone();
|
||||||
|
let mut schema = Vec::new();
|
||||||
|
let mut i = 3;
|
||||||
|
|
||||||
|
while i < cmd.len() {
|
||||||
|
if i + 1 >= cmd.len() {
|
||||||
|
return Err(DBError("ERR incomplete field definition".to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
let field_name = cmd[i].clone();
|
||||||
|
let field_type = cmd[i + 1].to_uppercase();
|
||||||
|
let mut options = Vec::new();
|
||||||
|
i += 2;
|
||||||
|
|
||||||
|
// Parse field options until we hit another field name or end
|
||||||
|
while i < cmd.len() && !["TEXT", "NUMERIC", "TAG", "GEO"].contains(&cmd[i].to_uppercase().as_str()) {
|
||||||
|
options.push(cmd[i].to_uppercase());
|
||||||
|
i += 1;
|
||||||
|
|
||||||
|
// If this option takes a value, consume it too
|
||||||
|
if i > 0 && ["SEPARATOR", "WEIGHT"].contains(&cmd[i-1].to_uppercase().as_str()) && i < cmd.len() {
|
||||||
|
options.push(cmd[i].clone());
|
||||||
|
i += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
schema.push((field_name, field_type, options));
|
||||||
|
}
|
||||||
|
|
||||||
|
Cmd::FtCreate {
|
||||||
|
index_name,
|
||||||
|
schema,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"ft.add" => {
|
||||||
|
if cmd.len() < 5 {
|
||||||
|
return Err(DBError("ERR FT.ADD requires: index_name doc_id score field value ...".to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
let index_name = cmd[1].clone();
|
||||||
|
let doc_id = cmd[2].clone();
|
||||||
|
let score = cmd[3].parse::<f64>()
|
||||||
|
.map_err(|_| DBError("ERR score must be a number".to_string()))?;
|
||||||
|
|
||||||
|
let mut fields = HashMap::new();
|
||||||
|
let mut i = 4;
|
||||||
|
|
||||||
|
while i + 1 < cmd.len() {
|
||||||
|
fields.insert(cmd[i].clone(), cmd[i + 1].clone());
|
||||||
|
i += 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
Cmd::FtAdd {
|
||||||
|
index_name,
|
||||||
|
doc_id,
|
||||||
|
score,
|
||||||
|
fields,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"ft.search" => {
|
||||||
|
if cmd.len() < 3 {
|
||||||
|
return Err(DBError("ERR FT.SEARCH requires: index_name query [options]".to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
let index_name = cmd[1].clone();
|
||||||
|
let query = cmd[2].clone();
|
||||||
|
|
||||||
|
let mut filters = Vec::new();
|
||||||
|
let mut limit = None;
|
||||||
|
let mut offset = None;
|
||||||
|
let mut return_fields = None;
|
||||||
|
|
||||||
|
let mut i = 3;
|
||||||
|
while i < cmd.len() {
|
||||||
|
match cmd[i].to_uppercase().as_str() {
|
||||||
|
"FILTER" => {
|
||||||
|
if i + 3 >= cmd.len() {
|
||||||
|
return Err(DBError("ERR FILTER requires field and value".to_string()));
|
||||||
|
}
|
||||||
|
filters.push((cmd[i + 1].clone(), cmd[i + 2].clone()));
|
||||||
|
i += 3;
|
||||||
|
}
|
||||||
|
"LIMIT" => {
|
||||||
|
if i + 2 >= cmd.len() {
|
||||||
|
return Err(DBError("ERR LIMIT requires offset and num".to_string()));
|
||||||
|
}
|
||||||
|
offset = Some(cmd[i + 1].parse().unwrap_or(0));
|
||||||
|
limit = Some(cmd[i + 2].parse().unwrap_or(10));
|
||||||
|
i += 3;
|
||||||
|
}
|
||||||
|
"RETURN" => {
|
||||||
|
if i + 1 >= cmd.len() {
|
||||||
|
return Err(DBError("ERR RETURN requires field count".to_string()));
|
||||||
|
}
|
||||||
|
let count: usize = cmd[i + 1].parse().unwrap_or(0);
|
||||||
|
i += 2;
|
||||||
|
|
||||||
|
let mut fields = Vec::new();
|
||||||
|
for _ in 0..count {
|
||||||
|
if i < cmd.len() {
|
||||||
|
fields.push(cmd[i].clone());
|
||||||
|
i += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return_fields = Some(fields);
|
||||||
|
}
|
||||||
|
_ => i += 1,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Cmd::FtSearch {
|
||||||
|
index_name,
|
||||||
|
query,
|
||||||
|
filters,
|
||||||
|
limit,
|
||||||
|
offset,
|
||||||
|
return_fields,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"ft.del" => {
|
||||||
|
if cmd.len() != 3 {
|
||||||
|
return Err(DBError("ERR FT.DEL requires: index_name doc_id".to_string()));
|
||||||
|
}
|
||||||
|
Cmd::FtDel(cmd[1].clone(), cmd[2].clone())
|
||||||
|
}
|
||||||
|
"ft.info" => {
|
||||||
|
if cmd.len() != 2 {
|
||||||
|
return Err(DBError("ERR FT.INFO requires: index_name".to_string()));
|
||||||
|
}
|
||||||
|
Cmd::FtInfo(cmd[1].clone())
|
||||||
|
}
|
||||||
|
"ft.drop" => {
|
||||||
|
if cmd.len() != 2 {
|
||||||
|
return Err(DBError("ERR FT.DROP requires: index_name".to_string()));
|
||||||
|
}
|
||||||
|
Cmd::FtDrop(cmd[1].clone())
|
||||||
|
}
|
||||||
_ => Cmd::Unknow(cmd[0].clone()),
|
_ => Cmd::Unknow(cmd[0].clone()),
|
||||||
},
|
},
|
||||||
protocol,
|
protocol,
|
||||||
@@ -731,6 +908,34 @@ impl Cmd {
|
|||||||
Cmd::AgeSignName(name, message) => Ok(crate::age::cmd_age_sign_name(server, &name, &message).await),
|
Cmd::AgeSignName(name, message) => Ok(crate::age::cmd_age_sign_name(server, &name, &message).await),
|
||||||
Cmd::AgeVerifyName(name, message, sig_b64) => Ok(crate::age::cmd_age_verify_name(server, &name, &message, &sig_b64).await),
|
Cmd::AgeVerifyName(name, message, sig_b64) => Ok(crate::age::cmd_age_verify_name(server, &name, &message, &sig_b64).await),
|
||||||
Cmd::AgeList => Ok(crate::age::cmd_age_list(server).await),
|
Cmd::AgeList => Ok(crate::age::cmd_age_list(server).await),
|
||||||
|
|
||||||
|
// Full-text search commands
|
||||||
|
Cmd::FtCreate { index_name, schema } => {
|
||||||
|
search_cmd::ft_create_cmd(server, index_name, schema).await
|
||||||
|
}
|
||||||
|
Cmd::FtAdd { index_name, doc_id, score, fields } => {
|
||||||
|
search_cmd::ft_add_cmd(server, index_name, doc_id, score, fields).await
|
||||||
|
}
|
||||||
|
Cmd::FtSearch { index_name, query, filters, limit, offset, return_fields } => {
|
||||||
|
search_cmd::ft_search_cmd(server, index_name, query, filters, limit, offset, return_fields).await
|
||||||
|
}
|
||||||
|
Cmd::FtDel(index_name, doc_id) => {
|
||||||
|
search_cmd::ft_del_cmd(server, index_name, doc_id).await
|
||||||
|
}
|
||||||
|
Cmd::FtInfo(index_name) => {
|
||||||
|
search_cmd::ft_info_cmd(server, index_name).await
|
||||||
|
}
|
||||||
|
Cmd::FtDrop(index_name) => {
|
||||||
|
search_cmd::ft_drop_cmd(server, index_name).await
|
||||||
|
}
|
||||||
|
Cmd::FtAlter { .. } => {
|
||||||
|
// Not implemented yet
|
||||||
|
Ok(Protocol::err("FT.ALTER not implemented yet"))
|
||||||
|
}
|
||||||
|
Cmd::FtAggregate { .. } => {
|
||||||
|
// Not implemented yet
|
||||||
|
Ok(Protocol::err("FT.AGGREGATE not implemented yet"))
|
||||||
|
}
|
||||||
Cmd::Unknow(s) => Ok(Protocol::err(&format!("ERR unknown command `{}`", s))),
|
Cmd::Unknow(s) => Ok(Protocol::err(&format!("ERR unknown command `{}`", s))),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1093,26 +1298,23 @@ async fn dbsize_cmd(server: &Server) -> Result<Protocol, DBError> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Serialize)]
|
|
||||||
struct ServerInfo {
|
|
||||||
redis_version: String,
|
|
||||||
encrypted: bool,
|
|
||||||
selected_db: u64,
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn info_cmd(server: &Server, section: &Option<String>) -> Result<Protocol, DBError> {
|
async fn info_cmd(server: &Server, section: &Option<String>) -> Result<Protocol, DBError> {
|
||||||
let info = ServerInfo {
|
let storage_info = server.current_storage()?.info()?;
|
||||||
redis_version: "7.0.0".to_string(),
|
let mut info_map: std::collections::HashMap<String, String> = storage_info.into_iter().collect();
|
||||||
encrypted: server.current_storage()?.is_encrypted(),
|
|
||||||
selected_db: server.selected_db,
|
info_map.insert("redis_version".to_string(), "7.0.0".to_string());
|
||||||
};
|
info_map.insert("selected_db".to_string(), server.selected_db.to_string());
|
||||||
|
info_map.insert("backend".to_string(), format!("{:?}", server.option.backend));
|
||||||
|
|
||||||
|
|
||||||
let mut info_string = String::new();
|
let mut info_string = String::new();
|
||||||
info_string.push_str(&format!("# Server\n"));
|
info_string.push_str("# Server\n");
|
||||||
info_string.push_str(&format!("redis_version:{}\n", info.redis_version));
|
info_string.push_str(&format!("redis_version:{}\n", info_map.get("redis_version").unwrap()));
|
||||||
info_string.push_str(&format!("encrypted:{}\n", if info.encrypted { 1 } else { 0 }));
|
info_string.push_str(&format!("backend:{}\n", info_map.get("backend").unwrap()));
|
||||||
info_string.push_str(&format!("# Keyspace\n"));
|
info_string.push_str(&format!("encrypted:{}\n", info_map.get("is_encrypted").unwrap()));
|
||||||
info_string.push_str(&format!("db{}:keys=0,expires=0,avg_ttl=0\n", info.selected_db));
|
|
||||||
|
info_string.push_str("# Keyspace\n");
|
||||||
|
info_string.push_str(&format!("db{}:keys={},expires=0,avg_ttl=0\n", info_map.get("selected_db").unwrap(), info_map.get("db_size").unwrap()));
|
||||||
|
|
||||||
match section {
|
match section {
|
||||||
Some(s) => {
|
Some(s) => {
|
@@ -23,6 +23,7 @@ impl From<CryptoError> for crate::error::DBError {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Super-simple factory: new(secret) + encrypt(bytes) + decrypt(bytes)
|
/// Super-simple factory: new(secret) + encrypt(bytes) + decrypt(bytes)
|
||||||
|
#[derive(Clone)]
|
||||||
pub struct CryptoFactory {
|
pub struct CryptoFactory {
|
||||||
key: chacha20poly1305::Key,
|
key: chacha20poly1305::Key,
|
||||||
}
|
}
|
@@ -4,5 +4,9 @@ pub mod crypto;
|
|||||||
pub mod error;
|
pub mod error;
|
||||||
pub mod options;
|
pub mod options;
|
||||||
pub mod protocol;
|
pub mod protocol;
|
||||||
|
pub mod search_cmd; // Add this
|
||||||
pub mod server;
|
pub mod server;
|
||||||
pub mod storage;
|
pub mod storage;
|
||||||
|
pub mod storage_trait; // Add this
|
||||||
|
pub mod storage_sled; // Add this
|
||||||
|
pub mod tantivy_search;
|
@@ -30,6 +30,10 @@ struct Args {
|
|||||||
/// Encrypt the database
|
/// Encrypt the database
|
||||||
#[arg(long)]
|
#[arg(long)]
|
||||||
encrypt: bool,
|
encrypt: bool,
|
||||||
|
|
||||||
|
/// Use the sled backend
|
||||||
|
#[arg(long)]
|
||||||
|
sled: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tokio::main]
|
#[tokio::main]
|
||||||
@@ -51,6 +55,11 @@ async fn main() {
|
|||||||
debug: args.debug,
|
debug: args.debug,
|
||||||
encryption_key: args.encryption_key,
|
encryption_key: args.encryption_key,
|
||||||
encrypt: args.encrypt,
|
encrypt: args.encrypt,
|
||||||
|
backend: if args.sled {
|
||||||
|
herodb::options::BackendType::Sled
|
||||||
|
} else {
|
||||||
|
herodb::options::BackendType::Redb
|
||||||
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
// new server
|
// new server
|
15
src/options.rs
Normal file
15
src/options.rs
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub enum BackendType {
|
||||||
|
Redb,
|
||||||
|
Sled,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct DBOption {
|
||||||
|
pub dir: String,
|
||||||
|
pub port: u16,
|
||||||
|
pub debug: bool,
|
||||||
|
pub encrypt: bool,
|
||||||
|
pub encryption_key: Option<String>,
|
||||||
|
pub backend: BackendType,
|
||||||
|
}
|
272
src/search_cmd.rs
Normal file
272
src/search_cmd.rs
Normal file
@@ -0,0 +1,272 @@
|
|||||||
|
use crate::{
|
||||||
|
error::DBError,
|
||||||
|
protocol::Protocol,
|
||||||
|
server::Server,
|
||||||
|
tantivy_search::{
|
||||||
|
TantivySearch, FieldDef, NumericType, IndexConfig,
|
||||||
|
SearchOptions, Filter, FilterType
|
||||||
|
},
|
||||||
|
};
|
||||||
|
use std::collections::HashMap;
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
pub async fn ft_create_cmd(
|
||||||
|
server: &Server,
|
||||||
|
index_name: String,
|
||||||
|
schema: Vec<(String, String, Vec<String>)>,
|
||||||
|
) -> Result<Protocol, DBError> {
|
||||||
|
// Parse schema into field definitions
|
||||||
|
let mut field_definitions = Vec::new();
|
||||||
|
|
||||||
|
for (field_name, field_type, options) in schema {
|
||||||
|
let field_def = match field_type.to_uppercase().as_str() {
|
||||||
|
"TEXT" => {
|
||||||
|
let mut weight = 1.0;
|
||||||
|
let mut sortable = false;
|
||||||
|
let mut no_index = false;
|
||||||
|
|
||||||
|
for opt in &options {
|
||||||
|
match opt.to_uppercase().as_str() {
|
||||||
|
"WEIGHT" => {
|
||||||
|
// Next option should be the weight value
|
||||||
|
if let Some(idx) = options.iter().position(|x| x == opt) {
|
||||||
|
if idx + 1 < options.len() {
|
||||||
|
weight = options[idx + 1].parse().unwrap_or(1.0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"SORTABLE" => sortable = true,
|
||||||
|
"NOINDEX" => no_index = true,
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
FieldDef::Text {
|
||||||
|
stored: true,
|
||||||
|
indexed: !no_index,
|
||||||
|
tokenized: true,
|
||||||
|
fast: sortable,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"NUMERIC" => {
|
||||||
|
let mut sortable = false;
|
||||||
|
|
||||||
|
for opt in &options {
|
||||||
|
if opt.to_uppercase() == "SORTABLE" {
|
||||||
|
sortable = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
FieldDef::Numeric {
|
||||||
|
stored: true,
|
||||||
|
indexed: true,
|
||||||
|
fast: sortable,
|
||||||
|
precision: NumericType::F64,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"TAG" => {
|
||||||
|
let mut separator = ",".to_string();
|
||||||
|
let mut case_sensitive = false;
|
||||||
|
|
||||||
|
for i in 0..options.len() {
|
||||||
|
match options[i].to_uppercase().as_str() {
|
||||||
|
"SEPARATOR" => {
|
||||||
|
if i + 1 < options.len() {
|
||||||
|
separator = options[i + 1].clone();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"CASESENSITIVE" => case_sensitive = true,
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
FieldDef::Tag {
|
||||||
|
stored: true,
|
||||||
|
separator,
|
||||||
|
case_sensitive,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"GEO" => {
|
||||||
|
FieldDef::Geo { stored: true }
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
return Err(DBError(format!("Unknown field type: {}", field_type)));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
field_definitions.push((field_name, field_def));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create the search index
|
||||||
|
let search_path = server.search_index_path();
|
||||||
|
let config = IndexConfig::default();
|
||||||
|
|
||||||
|
println!("Creating search index '{}' at path: {:?}", index_name, search_path);
|
||||||
|
println!("Field definitions: {:?}", field_definitions);
|
||||||
|
|
||||||
|
let search_index = TantivySearch::new_with_schema(
|
||||||
|
search_path,
|
||||||
|
index_name.clone(),
|
||||||
|
field_definitions,
|
||||||
|
Some(config),
|
||||||
|
)?;
|
||||||
|
|
||||||
|
println!("Search index '{}' created successfully", index_name);
|
||||||
|
|
||||||
|
// Store in registry
|
||||||
|
let mut indexes = server.search_indexes.write().unwrap();
|
||||||
|
indexes.insert(index_name, Arc::new(search_index));
|
||||||
|
|
||||||
|
Ok(Protocol::SimpleString("OK".to_string()))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn ft_add_cmd(
|
||||||
|
server: &Server,
|
||||||
|
index_name: String,
|
||||||
|
doc_id: String,
|
||||||
|
_score: f64,
|
||||||
|
fields: HashMap<String, String>,
|
||||||
|
) -> Result<Protocol, DBError> {
|
||||||
|
let indexes = server.search_indexes.read().unwrap();
|
||||||
|
|
||||||
|
let search_index = indexes.get(&index_name)
|
||||||
|
.ok_or_else(|| DBError(format!("Index '{}' not found", index_name)))?;
|
||||||
|
|
||||||
|
search_index.add_document_with_fields(&doc_id, fields)?;
|
||||||
|
|
||||||
|
Ok(Protocol::SimpleString("OK".to_string()))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn ft_search_cmd(
|
||||||
|
server: &Server,
|
||||||
|
index_name: String,
|
||||||
|
query: String,
|
||||||
|
filters: Vec<(String, String)>,
|
||||||
|
limit: Option<usize>,
|
||||||
|
offset: Option<usize>,
|
||||||
|
return_fields: Option<Vec<String>>,
|
||||||
|
) -> Result<Protocol, DBError> {
|
||||||
|
let indexes = server.search_indexes.read().unwrap();
|
||||||
|
|
||||||
|
let search_index = indexes.get(&index_name)
|
||||||
|
.ok_or_else(|| DBError(format!("Index '{}' not found", index_name)))?;
|
||||||
|
|
||||||
|
// Convert filters to search filters
|
||||||
|
let search_filters = filters.into_iter().map(|(field, value)| {
|
||||||
|
Filter {
|
||||||
|
field,
|
||||||
|
filter_type: FilterType::Equals(value),
|
||||||
|
}
|
||||||
|
}).collect();
|
||||||
|
|
||||||
|
let options = SearchOptions {
|
||||||
|
limit: limit.unwrap_or(10),
|
||||||
|
offset: offset.unwrap_or(0),
|
||||||
|
filters: search_filters,
|
||||||
|
sort_by: None,
|
||||||
|
return_fields,
|
||||||
|
highlight: false,
|
||||||
|
};
|
||||||
|
|
||||||
|
let results = search_index.search_with_options(&query, options)?;
|
||||||
|
|
||||||
|
// Format results as Redis protocol
|
||||||
|
let mut response = Vec::new();
|
||||||
|
|
||||||
|
// First element is the total count
|
||||||
|
response.push(Protocol::SimpleString(results.total.to_string()));
|
||||||
|
|
||||||
|
// Then each document
|
||||||
|
for doc in results.documents {
|
||||||
|
let mut doc_array = Vec::new();
|
||||||
|
|
||||||
|
// Add document ID if it exists
|
||||||
|
if let Some(id) = doc.fields.get("_id") {
|
||||||
|
doc_array.push(Protocol::BulkString(id.clone()));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add score
|
||||||
|
doc_array.push(Protocol::BulkString(doc.score.to_string()));
|
||||||
|
|
||||||
|
// Add fields as key-value pairs
|
||||||
|
for (field_name, field_value) in doc.fields {
|
||||||
|
if field_name != "_id" {
|
||||||
|
doc_array.push(Protocol::BulkString(field_name));
|
||||||
|
doc_array.push(Protocol::BulkString(field_value));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
response.push(Protocol::Array(doc_array));
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(Protocol::Array(response))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn ft_del_cmd(
|
||||||
|
server: &Server,
|
||||||
|
index_name: String,
|
||||||
|
doc_id: String,
|
||||||
|
) -> Result<Protocol, DBError> {
|
||||||
|
let indexes = server.search_indexes.read().unwrap();
|
||||||
|
|
||||||
|
let _search_index = indexes.get(&index_name)
|
||||||
|
.ok_or_else(|| DBError(format!("Index '{}' not found", index_name)))?;
|
||||||
|
|
||||||
|
// For now, return success
|
||||||
|
// In a full implementation, we'd need to add a delete method to TantivySearch
|
||||||
|
println!("Deleting document '{}' from index '{}'", doc_id, index_name);
|
||||||
|
|
||||||
|
Ok(Protocol::SimpleString("1".to_string()))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn ft_info_cmd(
|
||||||
|
server: &Server,
|
||||||
|
index_name: String,
|
||||||
|
) -> Result<Protocol, DBError> {
|
||||||
|
let indexes = server.search_indexes.read().unwrap();
|
||||||
|
|
||||||
|
let search_index = indexes.get(&index_name)
|
||||||
|
.ok_or_else(|| DBError(format!("Index '{}' not found", index_name)))?;
|
||||||
|
|
||||||
|
let info = search_index.get_info()?;
|
||||||
|
|
||||||
|
// Format info as Redis protocol
|
||||||
|
let mut response = Vec::new();
|
||||||
|
|
||||||
|
response.push(Protocol::BulkString("index_name".to_string()));
|
||||||
|
response.push(Protocol::BulkString(info.name));
|
||||||
|
|
||||||
|
response.push(Protocol::BulkString("num_docs".to_string()));
|
||||||
|
response.push(Protocol::BulkString(info.num_docs.to_string()));
|
||||||
|
|
||||||
|
response.push(Protocol::BulkString("num_fields".to_string()));
|
||||||
|
response.push(Protocol::BulkString(info.fields.len().to_string()));
|
||||||
|
|
||||||
|
response.push(Protocol::BulkString("fields".to_string()));
|
||||||
|
let fields_str = info.fields.iter()
|
||||||
|
.map(|f| format!("{}:{}", f.name, f.field_type))
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.join(", ");
|
||||||
|
response.push(Protocol::BulkString(fields_str));
|
||||||
|
|
||||||
|
Ok(Protocol::Array(response))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn ft_drop_cmd(
|
||||||
|
server: &Server,
|
||||||
|
index_name: String,
|
||||||
|
) -> Result<Protocol, DBError> {
|
||||||
|
let mut indexes = server.search_indexes.write().unwrap();
|
||||||
|
|
||||||
|
if indexes.remove(&index_name).is_some() {
|
||||||
|
// Also remove the index files from disk
|
||||||
|
let index_path = server.search_index_path().join(&index_name);
|
||||||
|
if index_path.exists() {
|
||||||
|
std::fs::remove_dir_all(index_path)
|
||||||
|
.map_err(|e| DBError(format!("Failed to remove index files: {}", e)))?;
|
||||||
|
}
|
||||||
|
Ok(Protocol::SimpleString("OK".to_string()))
|
||||||
|
} else {
|
||||||
|
Err(DBError(format!("Index '{}' not found", index_name)))
|
||||||
|
}
|
||||||
|
}
|
@@ -4,6 +4,7 @@ use std::sync::Arc;
|
|||||||
use tokio::io::AsyncReadExt;
|
use tokio::io::AsyncReadExt;
|
||||||
use tokio::io::AsyncWriteExt;
|
use tokio::io::AsyncWriteExt;
|
||||||
use tokio::sync::{Mutex, oneshot};
|
use tokio::sync::{Mutex, oneshot};
|
||||||
|
use std::sync::RwLock;
|
||||||
|
|
||||||
use std::sync::atomic::{AtomicU64, Ordering};
|
use std::sync::atomic::{AtomicU64, Ordering};
|
||||||
|
|
||||||
@@ -12,10 +13,14 @@ use crate::error::DBError;
|
|||||||
use crate::options;
|
use crate::options;
|
||||||
use crate::protocol::Protocol;
|
use crate::protocol::Protocol;
|
||||||
use crate::storage::Storage;
|
use crate::storage::Storage;
|
||||||
|
use crate::storage_sled::SledStorage;
|
||||||
|
use crate::storage_trait::StorageBackend;
|
||||||
|
use crate::tantivy_search::TantivySearch;
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct Server {
|
pub struct Server {
|
||||||
pub db_cache: std::sync::Arc<std::sync::RwLock<HashMap<u64, Arc<Storage>>>>,
|
pub db_cache: Arc<RwLock<HashMap<u64, Arc<dyn StorageBackend>>>>,
|
||||||
|
pub search_indexes: Arc<RwLock<HashMap<String, Arc<TantivySearch>>>>,
|
||||||
pub option: options::DBOption,
|
pub option: options::DBOption,
|
||||||
pub client_name: Option<String>,
|
pub client_name: Option<String>,
|
||||||
pub selected_db: u64, // Changed from usize to u64
|
pub selected_db: u64, // Changed from usize to u64
|
||||||
@@ -41,7 +46,8 @@ pub enum PopSide {
|
|||||||
impl Server {
|
impl Server {
|
||||||
pub async fn new(option: options::DBOption) -> Self {
|
pub async fn new(option: options::DBOption) -> Self {
|
||||||
Server {
|
Server {
|
||||||
db_cache: Arc::new(std::sync::RwLock::new(HashMap::new())),
|
db_cache: Arc::new(RwLock::new(HashMap::new())),
|
||||||
|
search_indexes: Arc::new(RwLock::new(HashMap::new())),
|
||||||
option,
|
option,
|
||||||
client_name: None,
|
client_name: None,
|
||||||
selected_db: 0,
|
selected_db: 0,
|
||||||
@@ -52,7 +58,7 @@ impl Server {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn current_storage(&self) -> Result<Arc<Storage>, DBError> {
|
pub fn current_storage(&self) -> Result<Arc<dyn StorageBackend>, DBError> {
|
||||||
let mut cache = self.db_cache.write().unwrap();
|
let mut cache = self.db_cache.write().unwrap();
|
||||||
|
|
||||||
if let Some(storage) = cache.get(&self.selected_db) {
|
if let Some(storage) = cache.get(&self.selected_db) {
|
||||||
@@ -73,11 +79,22 @@ impl Server {
|
|||||||
|
|
||||||
println!("Creating new db file: {}", db_file_path.display());
|
println!("Creating new db file: {}", db_file_path.display());
|
||||||
|
|
||||||
let storage = Arc::new(Storage::new(
|
let storage: Arc<dyn StorageBackend> = match self.option.backend {
|
||||||
db_file_path,
|
options::BackendType::Redb => {
|
||||||
self.should_encrypt_db(self.selected_db),
|
Arc::new(Storage::new(
|
||||||
self.option.encryption_key.as_deref()
|
db_file_path,
|
||||||
)?);
|
self.should_encrypt_db(self.selected_db),
|
||||||
|
self.option.encryption_key.as_deref()
|
||||||
|
)?)
|
||||||
|
}
|
||||||
|
options::BackendType::Sled => {
|
||||||
|
Arc::new(SledStorage::new(
|
||||||
|
db_file_path,
|
||||||
|
self.should_encrypt_db(self.selected_db),
|
||||||
|
self.option.encryption_key.as_deref()
|
||||||
|
)?)
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
cache.insert(self.selected_db, storage.clone());
|
cache.insert(self.selected_db, storage.clone());
|
||||||
Ok(storage)
|
Ok(storage)
|
||||||
@@ -88,6 +105,11 @@ impl Server {
|
|||||||
self.option.encrypt && db_index >= 10
|
self.option.encrypt && db_index >= 10
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Add method to get search index path
|
||||||
|
pub fn search_index_path(&self) -> std::path::PathBuf {
|
||||||
|
std::path::PathBuf::from(&self.option.dir).join("search_indexes")
|
||||||
|
}
|
||||||
|
|
||||||
// ----- BLPOP waiter helpers -----
|
// ----- BLPOP waiter helpers -----
|
||||||
|
|
||||||
pub async fn register_waiter(&self, db_index: u64, key: &str, side: PopSide) -> (u64, oneshot::Receiver<(String, String)>) {
|
pub async fn register_waiter(&self, db_index: u64, key: &str, side: PopSide) -> (u64, oneshot::Receiver<(String, String)>) {
|
287
src/storage/mod.rs
Normal file
287
src/storage/mod.rs
Normal file
@@ -0,0 +1,287 @@
|
|||||||
|
use std::{
|
||||||
|
path::Path,
|
||||||
|
sync::Arc,
|
||||||
|
time::{SystemTime, UNIX_EPOCH},
|
||||||
|
};
|
||||||
|
|
||||||
|
use redb::{Database, TableDefinition};
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
use crate::crypto::CryptoFactory;
|
||||||
|
use crate::error::DBError;
|
||||||
|
|
||||||
|
// Re-export modules
|
||||||
|
mod storage_basic;
|
||||||
|
mod storage_hset;
|
||||||
|
mod storage_lists;
|
||||||
|
mod storage_extra;
|
||||||
|
|
||||||
|
// Re-export implementations
|
||||||
|
// Note: These imports are used by the impl blocks in the submodules
|
||||||
|
// The compiler shows them as unused because they're not directly used in this file
|
||||||
|
// but they're needed for the Storage struct methods to be available
|
||||||
|
pub use storage_extra::*;
|
||||||
|
|
||||||
|
// Table definitions for different Redis data types
|
||||||
|
const TYPES_TABLE: TableDefinition<&str, &str> = TableDefinition::new("types");
|
||||||
|
const STRINGS_TABLE: TableDefinition<&str, &[u8]> = TableDefinition::new("strings");
|
||||||
|
const HASHES_TABLE: TableDefinition<(&str, &str), &[u8]> = TableDefinition::new("hashes");
|
||||||
|
const LISTS_TABLE: TableDefinition<&str, &[u8]> = TableDefinition::new("lists");
|
||||||
|
const STREAMS_META_TABLE: TableDefinition<&str, &[u8]> = TableDefinition::new("streams_meta");
|
||||||
|
const STREAMS_DATA_TABLE: TableDefinition<(&str, &str), &[u8]> = TableDefinition::new("streams_data");
|
||||||
|
const ENCRYPTED_TABLE: TableDefinition<&str, u8> = TableDefinition::new("encrypted");
|
||||||
|
const EXPIRATION_TABLE: TableDefinition<&str, u64> = TableDefinition::new("expiration");
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize, Debug, Clone)]
|
||||||
|
pub struct StreamEntry {
|
||||||
|
pub fields: Vec<(String, String)>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize, Debug, Clone)]
|
||||||
|
pub struct ListValue {
|
||||||
|
pub elements: Vec<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn now_in_millis() -> u128 {
|
||||||
|
let start = SystemTime::now();
|
||||||
|
let duration_since_epoch = start.duration_since(UNIX_EPOCH).unwrap();
|
||||||
|
duration_since_epoch.as_millis()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct Storage {
|
||||||
|
db: Database,
|
||||||
|
crypto: Option<CryptoFactory>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Storage {
|
||||||
|
pub fn new(path: impl AsRef<Path>, should_encrypt: bool, master_key: Option<&str>) -> Result<Self, DBError> {
|
||||||
|
let db = Database::create(path)?;
|
||||||
|
|
||||||
|
// Create tables if they don't exist
|
||||||
|
let write_txn = db.begin_write()?;
|
||||||
|
{
|
||||||
|
let _ = write_txn.open_table(TYPES_TABLE)?;
|
||||||
|
let _ = write_txn.open_table(STRINGS_TABLE)?;
|
||||||
|
let _ = write_txn.open_table(HASHES_TABLE)?;
|
||||||
|
let _ = write_txn.open_table(LISTS_TABLE)?;
|
||||||
|
let _ = write_txn.open_table(STREAMS_META_TABLE)?;
|
||||||
|
let _ = write_txn.open_table(STREAMS_DATA_TABLE)?;
|
||||||
|
let _ = write_txn.open_table(ENCRYPTED_TABLE)?;
|
||||||
|
let _ = write_txn.open_table(EXPIRATION_TABLE)?;
|
||||||
|
}
|
||||||
|
write_txn.commit()?;
|
||||||
|
|
||||||
|
// Check if database was previously encrypted
|
||||||
|
let read_txn = db.begin_read()?;
|
||||||
|
let encrypted_table = read_txn.open_table(ENCRYPTED_TABLE)?;
|
||||||
|
let was_encrypted = encrypted_table.get("encrypted")?.map(|v| v.value() == 1).unwrap_or(false);
|
||||||
|
drop(read_txn);
|
||||||
|
|
||||||
|
let crypto = if should_encrypt || was_encrypted {
|
||||||
|
if let Some(key) = master_key {
|
||||||
|
Some(CryptoFactory::new(key.as_bytes()))
|
||||||
|
} else {
|
||||||
|
return Err(DBError("Encryption requested but no master key provided".to_string()));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
|
// If we're enabling encryption for the first time, mark it
|
||||||
|
if should_encrypt && !was_encrypted {
|
||||||
|
let write_txn = db.begin_write()?;
|
||||||
|
{
|
||||||
|
let mut encrypted_table = write_txn.open_table(ENCRYPTED_TABLE)?;
|
||||||
|
encrypted_table.insert("encrypted", &1u8)?;
|
||||||
|
}
|
||||||
|
write_txn.commit()?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(Storage {
|
||||||
|
db,
|
||||||
|
crypto,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn is_encrypted(&self) -> bool {
|
||||||
|
self.crypto.is_some()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Helper methods for encryption
|
||||||
|
fn encrypt_if_needed(&self, data: &[u8]) -> Result<Vec<u8>, DBError> {
|
||||||
|
if let Some(crypto) = &self.crypto {
|
||||||
|
Ok(crypto.encrypt(data))
|
||||||
|
} else {
|
||||||
|
Ok(data.to_vec())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn decrypt_if_needed(&self, data: &[u8]) -> Result<Vec<u8>, DBError> {
|
||||||
|
if let Some(crypto) = &self.crypto {
|
||||||
|
Ok(crypto.decrypt(data)?)
|
||||||
|
} else {
|
||||||
|
Ok(data.to_vec())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
use crate::storage_trait::StorageBackend;
|
||||||
|
|
||||||
|
impl StorageBackend for Storage {
|
||||||
|
fn get(&self, key: &str) -> Result<Option<String>, DBError> {
|
||||||
|
self.get(key)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn set(&self, key: String, value: String) -> Result<(), DBError> {
|
||||||
|
self.set(key, value)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn setx(&self, key: String, value: String, expire_ms: u128) -> Result<(), DBError> {
|
||||||
|
self.setx(key, value, expire_ms)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn del(&self, key: String) -> Result<(), DBError> {
|
||||||
|
self.del(key)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn exists(&self, key: &str) -> Result<bool, DBError> {
|
||||||
|
self.exists(key)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn keys(&self, pattern: &str) -> Result<Vec<String>, DBError> {
|
||||||
|
self.keys(pattern)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn dbsize(&self) -> Result<i64, DBError> {
|
||||||
|
self.dbsize()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn flushdb(&self) -> Result<(), DBError> {
|
||||||
|
self.flushdb()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_key_type(&self, key: &str) -> Result<Option<String>, DBError> {
|
||||||
|
self.get_key_type(key)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn scan(&self, cursor: u64, pattern: Option<&str>, count: Option<u64>) -> Result<(u64, Vec<(String, String)>), DBError> {
|
||||||
|
self.scan(cursor, pattern, count)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn hscan(&self, key: &str, cursor: u64, pattern: Option<&str>, count: Option<u64>) -> Result<(u64, Vec<(String, String)>), DBError> {
|
||||||
|
self.hscan(key, cursor, pattern, count)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn hset(&self, key: &str, pairs: Vec<(String, String)>) -> Result<i64, DBError> {
|
||||||
|
self.hset(key, pairs)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn hget(&self, key: &str, field: &str) -> Result<Option<String>, DBError> {
|
||||||
|
self.hget(key, field)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn hgetall(&self, key: &str) -> Result<Vec<(String, String)>, DBError> {
|
||||||
|
self.hgetall(key)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn hdel(&self, key: &str, fields: Vec<String>) -> Result<i64, DBError> {
|
||||||
|
self.hdel(key, fields)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn hexists(&self, key: &str, field: &str) -> Result<bool, DBError> {
|
||||||
|
self.hexists(key, field)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn hkeys(&self, key: &str) -> Result<Vec<String>, DBError> {
|
||||||
|
self.hkeys(key)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn hvals(&self, key: &str) -> Result<Vec<String>, DBError> {
|
||||||
|
self.hvals(key)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn hlen(&self, key: &str) -> Result<i64, DBError> {
|
||||||
|
self.hlen(key)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn hmget(&self, key: &str, fields: Vec<String>) -> Result<Vec<Option<String>>, DBError> {
|
||||||
|
self.hmget(key, fields)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn hsetnx(&self, key: &str, field: &str, value: &str) -> Result<bool, DBError> {
|
||||||
|
self.hsetnx(key, field, value)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn lpush(&self, key: &str, elements: Vec<String>) -> Result<i64, DBError> {
|
||||||
|
self.lpush(key, elements)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn rpush(&self, key: &str, elements: Vec<String>) -> Result<i64, DBError> {
|
||||||
|
self.rpush(key, elements)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn lpop(&self, key: &str, count: u64) -> Result<Vec<String>, DBError> {
|
||||||
|
self.lpop(key, count)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn rpop(&self, key: &str, count: u64) -> Result<Vec<String>, DBError> {
|
||||||
|
self.rpop(key, count)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn llen(&self, key: &str) -> Result<i64, DBError> {
|
||||||
|
self.llen(key)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn lindex(&self, key: &str, index: i64) -> Result<Option<String>, DBError> {
|
||||||
|
self.lindex(key, index)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn lrange(&self, key: &str, start: i64, stop: i64) -> Result<Vec<String>, DBError> {
|
||||||
|
self.lrange(key, start, stop)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn ltrim(&self, key: &str, start: i64, stop: i64) -> Result<(), DBError> {
|
||||||
|
self.ltrim(key, start, stop)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn lrem(&self, key: &str, count: i64, element: &str) -> Result<i64, DBError> {
|
||||||
|
self.lrem(key, count, element)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn ttl(&self, key: &str) -> Result<i64, DBError> {
|
||||||
|
self.ttl(key)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn expire_seconds(&self, key: &str, secs: u64) -> Result<bool, DBError> {
|
||||||
|
self.expire_seconds(key, secs)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn pexpire_millis(&self, key: &str, ms: u128) -> Result<bool, DBError> {
|
||||||
|
self.pexpire_millis(key, ms)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn persist(&self, key: &str) -> Result<bool, DBError> {
|
||||||
|
self.persist(key)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn expire_at_seconds(&self, key: &str, ts_secs: i64) -> Result<bool, DBError> {
|
||||||
|
self.expire_at_seconds(key, ts_secs)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn pexpire_at_millis(&self, key: &str, ts_ms: i64) -> Result<bool, DBError> {
|
||||||
|
self.pexpire_at_millis(key, ts_ms)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_encrypted(&self) -> bool {
|
||||||
|
self.is_encrypted()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn info(&self) -> Result<Vec<(String, String)>, DBError> {
|
||||||
|
self.info()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn clone_arc(&self) -> Arc<dyn StorageBackend> {
|
||||||
|
unimplemented!("Storage cloning not yet implemented for redb backend")
|
||||||
|
}
|
||||||
|
}
|
@@ -208,6 +208,14 @@ impl Storage {
|
|||||||
write_txn.commit()?;
|
write_txn.commit()?;
|
||||||
Ok(applied)
|
Ok(applied)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn info(&self) -> Result<Vec<(String, String)>, DBError> {
|
||||||
|
let dbsize = self.dbsize()?;
|
||||||
|
Ok(vec![
|
||||||
|
("db_size".to_string(), dbsize.to_string()),
|
||||||
|
("is_encrypted".to_string(), self.is_encrypted().to_string()),
|
||||||
|
])
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Utility function for glob pattern matching
|
// Utility function for glob pattern matching
|
845
src/storage_sled/mod.rs
Normal file
845
src/storage_sled/mod.rs
Normal file
@@ -0,0 +1,845 @@
|
|||||||
|
// src/storage_sled/mod.rs
|
||||||
|
use std::path::Path;
|
||||||
|
use std::sync::Arc;
|
||||||
|
use std::collections::HashMap;
|
||||||
|
use std::time::{SystemTime, UNIX_EPOCH};
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use crate::error::DBError;
|
||||||
|
use crate::storage_trait::StorageBackend;
|
||||||
|
use crate::crypto::CryptoFactory;
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize, Debug, Clone)]
|
||||||
|
enum ValueType {
|
||||||
|
String(String),
|
||||||
|
Hash(HashMap<String, String>),
|
||||||
|
List(Vec<String>),
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize, Debug, Clone)]
|
||||||
|
struct StorageValue {
|
||||||
|
value: ValueType,
|
||||||
|
expires_at: Option<u128>, // milliseconds since epoch
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct SledStorage {
|
||||||
|
db: sled::Db,
|
||||||
|
types: sled::Tree,
|
||||||
|
crypto: Option<CryptoFactory>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SledStorage {
|
||||||
|
pub fn new(path: impl AsRef<Path>, should_encrypt: bool, master_key: Option<&str>) -> Result<Self, DBError> {
|
||||||
|
let db = sled::open(path).map_err(|e| DBError(format!("Failed to open sled: {}", e)))?;
|
||||||
|
let types = db.open_tree("types").map_err(|e| DBError(format!("Failed to open types tree: {}", e)))?;
|
||||||
|
|
||||||
|
// Check if database was previously encrypted
|
||||||
|
let encrypted_tree = db.open_tree("encrypted").map_err(|e| DBError(e.to_string()))?;
|
||||||
|
let was_encrypted = encrypted_tree.get("encrypted")
|
||||||
|
.map_err(|e| DBError(e.to_string()))?
|
||||||
|
.map(|v| v[0] == 1)
|
||||||
|
.unwrap_or(false);
|
||||||
|
|
||||||
|
let crypto = if should_encrypt || was_encrypted {
|
||||||
|
if let Some(key) = master_key {
|
||||||
|
Some(CryptoFactory::new(key.as_bytes()))
|
||||||
|
} else {
|
||||||
|
return Err(DBError("Encryption requested but no master key provided".to_string()));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
|
// Mark database as encrypted if enabling encryption
|
||||||
|
if should_encrypt && !was_encrypted {
|
||||||
|
encrypted_tree.insert("encrypted", &[1u8])
|
||||||
|
.map_err(|e| DBError(e.to_string()))?;
|
||||||
|
encrypted_tree.flush().map_err(|e| DBError(e.to_string()))?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(SledStorage { db, types, crypto })
|
||||||
|
}
|
||||||
|
|
||||||
|
fn now_millis() -> u128 {
|
||||||
|
SystemTime::now()
|
||||||
|
.duration_since(UNIX_EPOCH)
|
||||||
|
.unwrap()
|
||||||
|
.as_millis()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn encrypt_if_needed(&self, data: &[u8]) -> Result<Vec<u8>, DBError> {
|
||||||
|
if let Some(crypto) = &self.crypto {
|
||||||
|
Ok(crypto.encrypt(data))
|
||||||
|
} else {
|
||||||
|
Ok(data.to_vec())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn decrypt_if_needed(&self, data: &[u8]) -> Result<Vec<u8>, DBError> {
|
||||||
|
if let Some(crypto) = &self.crypto {
|
||||||
|
Ok(crypto.decrypt(data)?)
|
||||||
|
} else {
|
||||||
|
Ok(data.to_vec())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_storage_value(&self, key: &str) -> Result<Option<StorageValue>, DBError> {
|
||||||
|
match self.db.get(key).map_err(|e| DBError(e.to_string()))? {
|
||||||
|
Some(encrypted_data) => {
|
||||||
|
let decrypted = self.decrypt_if_needed(&encrypted_data)?;
|
||||||
|
let storage_val: StorageValue = bincode::deserialize(&decrypted)
|
||||||
|
.map_err(|e| DBError(format!("Deserialization error: {}", e)))?;
|
||||||
|
|
||||||
|
// Check expiration
|
||||||
|
if let Some(expires_at) = storage_val.expires_at {
|
||||||
|
if Self::now_millis() > expires_at {
|
||||||
|
// Expired, remove it
|
||||||
|
self.db.remove(key).map_err(|e| DBError(e.to_string()))?;
|
||||||
|
self.types.remove(key).map_err(|e| DBError(e.to_string()))?;
|
||||||
|
return Ok(None);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(Some(storage_val))
|
||||||
|
}
|
||||||
|
None => Ok(None)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn set_storage_value(&self, key: &str, storage_val: StorageValue) -> Result<(), DBError> {
|
||||||
|
let data = bincode::serialize(&storage_val)
|
||||||
|
.map_err(|e| DBError(format!("Serialization error: {}", e)))?;
|
||||||
|
let encrypted = self.encrypt_if_needed(&data)?;
|
||||||
|
self.db.insert(key, encrypted).map_err(|e| DBError(e.to_string()))?;
|
||||||
|
|
||||||
|
// Store type info (unencrypted for efficiency)
|
||||||
|
let type_str = match &storage_val.value {
|
||||||
|
ValueType::String(_) => "string",
|
||||||
|
ValueType::Hash(_) => "hash",
|
||||||
|
ValueType::List(_) => "list",
|
||||||
|
};
|
||||||
|
self.types.insert(key, type_str.as_bytes()).map_err(|e| DBError(e.to_string()))?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn glob_match(pattern: &str, text: &str) -> bool {
|
||||||
|
if pattern == "*" {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
let pattern_chars: Vec<char> = pattern.chars().collect();
|
||||||
|
let text_chars: Vec<char> = text.chars().collect();
|
||||||
|
|
||||||
|
fn match_recursive(pattern: &[char], text: &[char], pi: usize, ti: usize) -> bool {
|
||||||
|
if pi >= pattern.len() {
|
||||||
|
return ti >= text.len();
|
||||||
|
}
|
||||||
|
|
||||||
|
if ti >= text.len() {
|
||||||
|
return pattern[pi..].iter().all(|&c| c == '*');
|
||||||
|
}
|
||||||
|
|
||||||
|
match pattern[pi] {
|
||||||
|
'*' => {
|
||||||
|
for i in ti..=text.len() {
|
||||||
|
if match_recursive(pattern, text, pi + 1, i) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
false
|
||||||
|
}
|
||||||
|
'?' => match_recursive(pattern, text, pi + 1, ti + 1),
|
||||||
|
c => {
|
||||||
|
if text[ti] == c {
|
||||||
|
match_recursive(pattern, text, pi + 1, ti + 1)
|
||||||
|
} else {
|
||||||
|
false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
match_recursive(&pattern_chars, &text_chars, 0, 0)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl StorageBackend for SledStorage {
|
||||||
|
fn get(&self, key: &str) -> Result<Option<String>, DBError> {
|
||||||
|
match self.get_storage_value(key)? {
|
||||||
|
Some(storage_val) => match storage_val.value {
|
||||||
|
ValueType::String(s) => Ok(Some(s)),
|
||||||
|
_ => Ok(None)
|
||||||
|
}
|
||||||
|
None => Ok(None)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn set(&self, key: String, value: String) -> Result<(), DBError> {
|
||||||
|
let storage_val = StorageValue {
|
||||||
|
value: ValueType::String(value),
|
||||||
|
expires_at: None,
|
||||||
|
};
|
||||||
|
self.set_storage_value(&key, storage_val)?;
|
||||||
|
self.db.flush().map_err(|e| DBError(e.to_string()))?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn setx(&self, key: String, value: String, expire_ms: u128) -> Result<(), DBError> {
|
||||||
|
let storage_val = StorageValue {
|
||||||
|
value: ValueType::String(value),
|
||||||
|
expires_at: Some(Self::now_millis() + expire_ms),
|
||||||
|
};
|
||||||
|
self.set_storage_value(&key, storage_val)?;
|
||||||
|
self.db.flush().map_err(|e| DBError(e.to_string()))?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn del(&self, key: String) -> Result<(), DBError> {
|
||||||
|
self.db.remove(&key).map_err(|e| DBError(e.to_string()))?;
|
||||||
|
self.types.remove(&key).map_err(|e| DBError(e.to_string()))?;
|
||||||
|
self.db.flush().map_err(|e| DBError(e.to_string()))?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn exists(&self, key: &str) -> Result<bool, DBError> {
|
||||||
|
// Check with expiration
|
||||||
|
Ok(self.get_storage_value(key)?.is_some())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn keys(&self, pattern: &str) -> Result<Vec<String>, DBError> {
|
||||||
|
let mut keys = Vec::new();
|
||||||
|
for item in self.types.iter() {
|
||||||
|
let (key_bytes, _) = item.map_err(|e| DBError(e.to_string()))?;
|
||||||
|
let key = String::from_utf8_lossy(&key_bytes).to_string();
|
||||||
|
|
||||||
|
// Check if key is expired
|
||||||
|
if self.get_storage_value(&key)?.is_some() {
|
||||||
|
if Self::glob_match(pattern, &key) {
|
||||||
|
keys.push(key);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(keys)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn scan(&self, cursor: u64, pattern: Option<&str>, count: Option<u64>) -> Result<(u64, Vec<(String, String)>), DBError> {
|
||||||
|
let mut result = Vec::new();
|
||||||
|
let mut current_cursor = 0u64;
|
||||||
|
let limit = count.unwrap_or(10) as usize;
|
||||||
|
|
||||||
|
for item in self.types.iter() {
|
||||||
|
if current_cursor >= cursor {
|
||||||
|
let (key_bytes, type_bytes) = item.map_err(|e| DBError(e.to_string()))?;
|
||||||
|
let key = String::from_utf8_lossy(&key_bytes).to_string();
|
||||||
|
|
||||||
|
// Check pattern match
|
||||||
|
let matches = if let Some(pat) = pattern {
|
||||||
|
Self::glob_match(pat, &key)
|
||||||
|
} else {
|
||||||
|
true
|
||||||
|
};
|
||||||
|
|
||||||
|
if matches {
|
||||||
|
// Check if key is expired and get value
|
||||||
|
if let Some(storage_val) = self.get_storage_value(&key)? {
|
||||||
|
let value = match storage_val.value {
|
||||||
|
ValueType::String(s) => s,
|
||||||
|
_ => String::from_utf8_lossy(&type_bytes).to_string(),
|
||||||
|
};
|
||||||
|
result.push((key, value));
|
||||||
|
|
||||||
|
if result.len() >= limit {
|
||||||
|
current_cursor += 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
current_cursor += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
let next_cursor = if result.len() < limit { 0 } else { current_cursor };
|
||||||
|
Ok((next_cursor, result))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn dbsize(&self) -> Result<i64, DBError> {
|
||||||
|
let mut count = 0i64;
|
||||||
|
for item in self.types.iter() {
|
||||||
|
let (key_bytes, _) = item.map_err(|e| DBError(e.to_string()))?;
|
||||||
|
let key = String::from_utf8_lossy(&key_bytes).to_string();
|
||||||
|
if self.get_storage_value(&key)?.is_some() {
|
||||||
|
count += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(count)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn flushdb(&self) -> Result<(), DBError> {
|
||||||
|
self.db.clear().map_err(|e| DBError(e.to_string()))?;
|
||||||
|
self.types.clear().map_err(|e| DBError(e.to_string()))?;
|
||||||
|
self.db.flush().map_err(|e| DBError(e.to_string()))?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_key_type(&self, key: &str) -> Result<Option<String>, DBError> {
|
||||||
|
// First check if key exists (handles expiration)
|
||||||
|
if self.get_storage_value(key)?.is_some() {
|
||||||
|
match self.types.get(key).map_err(|e| DBError(e.to_string()))? {
|
||||||
|
Some(data) => Ok(Some(String::from_utf8_lossy(&data).to_string())),
|
||||||
|
None => Ok(None)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
Ok(None)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Hash operations
|
||||||
|
fn hset(&self, key: &str, pairs: Vec<(String, String)>) -> Result<i64, DBError> {
|
||||||
|
let mut storage_val = self.get_storage_value(key)?.unwrap_or(StorageValue {
|
||||||
|
value: ValueType::Hash(HashMap::new()),
|
||||||
|
expires_at: None,
|
||||||
|
});
|
||||||
|
|
||||||
|
let hash = match &mut storage_val.value {
|
||||||
|
ValueType::Hash(h) => h,
|
||||||
|
_ => return Err(DBError("WRONGTYPE Operation against a key holding the wrong kind of value".to_string())),
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut new_fields = 0i64;
|
||||||
|
for (field, value) in pairs {
|
||||||
|
if !hash.contains_key(&field) {
|
||||||
|
new_fields += 1;
|
||||||
|
}
|
||||||
|
hash.insert(field, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
self.set_storage_value(key, storage_val)?;
|
||||||
|
self.db.flush().map_err(|e| DBError(e.to_string()))?;
|
||||||
|
Ok(new_fields)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn hget(&self, key: &str, field: &str) -> Result<Option<String>, DBError> {
|
||||||
|
match self.get_storage_value(key)? {
|
||||||
|
Some(storage_val) => match storage_val.value {
|
||||||
|
ValueType::Hash(h) => Ok(h.get(field).cloned()),
|
||||||
|
_ => Ok(None)
|
||||||
|
}
|
||||||
|
None => Ok(None)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn hgetall(&self, key: &str) -> Result<Vec<(String, String)>, DBError> {
|
||||||
|
match self.get_storage_value(key)? {
|
||||||
|
Some(storage_val) => match storage_val.value {
|
||||||
|
ValueType::Hash(h) => Ok(h.into_iter().collect()),
|
||||||
|
_ => Ok(Vec::new())
|
||||||
|
}
|
||||||
|
None => Ok(Vec::new())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn hscan(&self, key: &str, cursor: u64, pattern: Option<&str>, count: Option<u64>) -> Result<(u64, Vec<(String, String)>), DBError> {
|
||||||
|
match self.get_storage_value(key)? {
|
||||||
|
Some(storage_val) => match storage_val.value {
|
||||||
|
ValueType::Hash(h) => {
|
||||||
|
let mut result = Vec::new();
|
||||||
|
let mut current_cursor = 0u64;
|
||||||
|
let limit = count.unwrap_or(10) as usize;
|
||||||
|
|
||||||
|
for (field, value) in h.iter() {
|
||||||
|
if current_cursor >= cursor {
|
||||||
|
let matches = if let Some(pat) = pattern {
|
||||||
|
Self::glob_match(pat, field)
|
||||||
|
} else {
|
||||||
|
true
|
||||||
|
};
|
||||||
|
|
||||||
|
if matches {
|
||||||
|
result.push((field.clone(), value.clone()));
|
||||||
|
if result.len() >= limit {
|
||||||
|
current_cursor += 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
current_cursor += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
let next_cursor = if result.len() < limit { 0 } else { current_cursor };
|
||||||
|
Ok((next_cursor, result))
|
||||||
|
}
|
||||||
|
_ => Err(DBError("WRONGTYPE Operation against a key holding the wrong kind of value".to_string()))
|
||||||
|
}
|
||||||
|
None => Ok((0, Vec::new()))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn hdel(&self, key: &str, fields: Vec<String>) -> Result<i64, DBError> {
|
||||||
|
let mut storage_val = match self.get_storage_value(key)? {
|
||||||
|
Some(sv) => sv,
|
||||||
|
None => return Ok(0)
|
||||||
|
};
|
||||||
|
|
||||||
|
let hash = match &mut storage_val.value {
|
||||||
|
ValueType::Hash(h) => h,
|
||||||
|
_ => return Ok(0)
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut deleted = 0i64;
|
||||||
|
for field in fields {
|
||||||
|
if hash.remove(&field).is_some() {
|
||||||
|
deleted += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if hash.is_empty() {
|
||||||
|
self.del(key.to_string())?;
|
||||||
|
} else {
|
||||||
|
self.set_storage_value(key, storage_val)?;
|
||||||
|
self.db.flush().map_err(|e| DBError(e.to_string()))?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(deleted)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn hexists(&self, key: &str, field: &str) -> Result<bool, DBError> {
|
||||||
|
match self.get_storage_value(key)? {
|
||||||
|
Some(storage_val) => match storage_val.value {
|
||||||
|
ValueType::Hash(h) => Ok(h.contains_key(field)),
|
||||||
|
_ => Ok(false)
|
||||||
|
}
|
||||||
|
None => Ok(false)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn hkeys(&self, key: &str) -> Result<Vec<String>, DBError> {
|
||||||
|
match self.get_storage_value(key)? {
|
||||||
|
Some(storage_val) => match storage_val.value {
|
||||||
|
ValueType::Hash(h) => Ok(h.keys().cloned().collect()),
|
||||||
|
_ => Ok(Vec::new())
|
||||||
|
}
|
||||||
|
None => Ok(Vec::new())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn hvals(&self, key: &str) -> Result<Vec<String>, DBError> {
|
||||||
|
match self.get_storage_value(key)? {
|
||||||
|
Some(storage_val) => match storage_val.value {
|
||||||
|
ValueType::Hash(h) => Ok(h.values().cloned().collect()),
|
||||||
|
_ => Ok(Vec::new())
|
||||||
|
}
|
||||||
|
None => Ok(Vec::new())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn hlen(&self, key: &str) -> Result<i64, DBError> {
|
||||||
|
match self.get_storage_value(key)? {
|
||||||
|
Some(storage_val) => match storage_val.value {
|
||||||
|
ValueType::Hash(h) => Ok(h.len() as i64),
|
||||||
|
_ => Ok(0)
|
||||||
|
}
|
||||||
|
None => Ok(0)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn hmget(&self, key: &str, fields: Vec<String>) -> Result<Vec<Option<String>>, DBError> {
|
||||||
|
match self.get_storage_value(key)? {
|
||||||
|
Some(storage_val) => match storage_val.value {
|
||||||
|
ValueType::Hash(h) => {
|
||||||
|
Ok(fields.into_iter().map(|f| h.get(&f).cloned()).collect())
|
||||||
|
}
|
||||||
|
_ => Ok(fields.into_iter().map(|_| None).collect())
|
||||||
|
}
|
||||||
|
None => Ok(fields.into_iter().map(|_| None).collect())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn hsetnx(&self, key: &str, field: &str, value: &str) -> Result<bool, DBError> {
|
||||||
|
let mut storage_val = self.get_storage_value(key)?.unwrap_or(StorageValue {
|
||||||
|
value: ValueType::Hash(HashMap::new()),
|
||||||
|
expires_at: None,
|
||||||
|
});
|
||||||
|
|
||||||
|
let hash = match &mut storage_val.value {
|
||||||
|
ValueType::Hash(h) => h,
|
||||||
|
_ => return Err(DBError("WRONGTYPE Operation against a key holding the wrong kind of value".to_string())),
|
||||||
|
};
|
||||||
|
|
||||||
|
if hash.contains_key(field) {
|
||||||
|
Ok(false)
|
||||||
|
} else {
|
||||||
|
hash.insert(field.to_string(), value.to_string());
|
||||||
|
self.set_storage_value(key, storage_val)?;
|
||||||
|
self.db.flush().map_err(|e| DBError(e.to_string()))?;
|
||||||
|
Ok(true)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// List operations
|
||||||
|
fn lpush(&self, key: &str, elements: Vec<String>) -> Result<i64, DBError> {
|
||||||
|
let mut storage_val = self.get_storage_value(key)?.unwrap_or(StorageValue {
|
||||||
|
value: ValueType::List(Vec::new()),
|
||||||
|
expires_at: None,
|
||||||
|
});
|
||||||
|
|
||||||
|
let list = match &mut storage_val.value {
|
||||||
|
ValueType::List(l) => l,
|
||||||
|
_ => return Err(DBError("WRONGTYPE Operation against a key holding the wrong kind of value".to_string())),
|
||||||
|
};
|
||||||
|
|
||||||
|
for element in elements.into_iter().rev() {
|
||||||
|
list.insert(0, element);
|
||||||
|
}
|
||||||
|
|
||||||
|
let len = list.len() as i64;
|
||||||
|
self.set_storage_value(key, storage_val)?;
|
||||||
|
self.db.flush().map_err(|e| DBError(e.to_string()))?;
|
||||||
|
Ok(len)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn rpush(&self, key: &str, elements: Vec<String>) -> Result<i64, DBError> {
|
||||||
|
let mut storage_val = self.get_storage_value(key)?.unwrap_or(StorageValue {
|
||||||
|
value: ValueType::List(Vec::new()),
|
||||||
|
expires_at: None,
|
||||||
|
});
|
||||||
|
|
||||||
|
let list = match &mut storage_val.value {
|
||||||
|
ValueType::List(l) => l,
|
||||||
|
_ => return Err(DBError("WRONGTYPE Operation against a key holding the wrong kind of value".to_string())),
|
||||||
|
};
|
||||||
|
|
||||||
|
list.extend(elements);
|
||||||
|
let len = list.len() as i64;
|
||||||
|
self.set_storage_value(key, storage_val)?;
|
||||||
|
self.db.flush().map_err(|e| DBError(e.to_string()))?;
|
||||||
|
Ok(len)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn lpop(&self, key: &str, count: u64) -> Result<Vec<String>, DBError> {
|
||||||
|
let mut storage_val = match self.get_storage_value(key)? {
|
||||||
|
Some(sv) => sv,
|
||||||
|
None => return Ok(Vec::new())
|
||||||
|
};
|
||||||
|
|
||||||
|
let list = match &mut storage_val.value {
|
||||||
|
ValueType::List(l) => l,
|
||||||
|
_ => return Ok(Vec::new())
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut result = Vec::new();
|
||||||
|
for _ in 0..count.min(list.len() as u64) {
|
||||||
|
if let Some(elem) = list.first() {
|
||||||
|
result.push(elem.clone());
|
||||||
|
list.remove(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if list.is_empty() {
|
||||||
|
self.del(key.to_string())?;
|
||||||
|
} else {
|
||||||
|
self.set_storage_value(key, storage_val)?;
|
||||||
|
self.db.flush().map_err(|e| DBError(e.to_string()))?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(result)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn rpop(&self, key: &str, count: u64) -> Result<Vec<String>, DBError> {
|
||||||
|
let mut storage_val = match self.get_storage_value(key)? {
|
||||||
|
Some(sv) => sv,
|
||||||
|
None => return Ok(Vec::new())
|
||||||
|
};
|
||||||
|
|
||||||
|
let list = match &mut storage_val.value {
|
||||||
|
ValueType::List(l) => l,
|
||||||
|
_ => return Ok(Vec::new())
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut result = Vec::new();
|
||||||
|
for _ in 0..count.min(list.len() as u64) {
|
||||||
|
if let Some(elem) = list.pop() {
|
||||||
|
result.push(elem);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if list.is_empty() {
|
||||||
|
self.del(key.to_string())?;
|
||||||
|
} else {
|
||||||
|
self.set_storage_value(key, storage_val)?;
|
||||||
|
self.db.flush().map_err(|e| DBError(e.to_string()))?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(result)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn llen(&self, key: &str) -> Result<i64, DBError> {
|
||||||
|
match self.get_storage_value(key)? {
|
||||||
|
Some(storage_val) => match storage_val.value {
|
||||||
|
ValueType::List(l) => Ok(l.len() as i64),
|
||||||
|
_ => Ok(0)
|
||||||
|
}
|
||||||
|
None => Ok(0)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn lindex(&self, key: &str, index: i64) -> Result<Option<String>, DBError> {
|
||||||
|
match self.get_storage_value(key)? {
|
||||||
|
Some(storage_val) => match storage_val.value {
|
||||||
|
ValueType::List(list) => {
|
||||||
|
let actual_index = if index < 0 {
|
||||||
|
list.len() as i64 + index
|
||||||
|
} else {
|
||||||
|
index
|
||||||
|
};
|
||||||
|
|
||||||
|
if actual_index >= 0 && (actual_index as usize) < list.len() {
|
||||||
|
Ok(Some(list[actual_index as usize].clone()))
|
||||||
|
} else {
|
||||||
|
Ok(None)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => Ok(None)
|
||||||
|
}
|
||||||
|
None => Ok(None)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn lrange(&self, key: &str, start: i64, stop: i64) -> Result<Vec<String>, DBError> {
|
||||||
|
match self.get_storage_value(key)? {
|
||||||
|
Some(storage_val) => match storage_val.value {
|
||||||
|
ValueType::List(list) => {
|
||||||
|
if list.is_empty() {
|
||||||
|
return Ok(Vec::new());
|
||||||
|
}
|
||||||
|
|
||||||
|
let len = list.len() as i64;
|
||||||
|
let start_idx = if start < 0 {
|
||||||
|
std::cmp::max(0, len + start)
|
||||||
|
} else {
|
||||||
|
std::cmp::min(start, len)
|
||||||
|
};
|
||||||
|
let stop_idx = if stop < 0 {
|
||||||
|
std::cmp::max(-1, len + stop)
|
||||||
|
} else {
|
||||||
|
std::cmp::min(stop, len - 1)
|
||||||
|
};
|
||||||
|
|
||||||
|
if start_idx > stop_idx || start_idx >= len {
|
||||||
|
return Ok(Vec::new());
|
||||||
|
}
|
||||||
|
|
||||||
|
let start_usize = start_idx as usize;
|
||||||
|
let stop_usize = (stop_idx + 1) as usize;
|
||||||
|
|
||||||
|
Ok(list[start_usize..std::cmp::min(stop_usize, list.len())].to_vec())
|
||||||
|
}
|
||||||
|
_ => Ok(Vec::new())
|
||||||
|
}
|
||||||
|
None => Ok(Vec::new())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn ltrim(&self, key: &str, start: i64, stop: i64) -> Result<(), DBError> {
|
||||||
|
let mut storage_val = match self.get_storage_value(key)? {
|
||||||
|
Some(sv) => sv,
|
||||||
|
None => return Ok(())
|
||||||
|
};
|
||||||
|
|
||||||
|
let list = match &mut storage_val.value {
|
||||||
|
ValueType::List(l) => l,
|
||||||
|
_ => return Ok(())
|
||||||
|
};
|
||||||
|
|
||||||
|
if list.is_empty() {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
let len = list.len() as i64;
|
||||||
|
let start_idx = if start < 0 {
|
||||||
|
std::cmp::max(0, len + start)
|
||||||
|
} else {
|
||||||
|
std::cmp::min(start, len)
|
||||||
|
};
|
||||||
|
let stop_idx = if stop < 0 {
|
||||||
|
std::cmp::max(-1, len + stop)
|
||||||
|
} else {
|
||||||
|
std::cmp::min(stop, len - 1)
|
||||||
|
};
|
||||||
|
|
||||||
|
if start_idx > stop_idx || start_idx >= len {
|
||||||
|
self.del(key.to_string())?;
|
||||||
|
} else {
|
||||||
|
let start_usize = start_idx as usize;
|
||||||
|
let stop_usize = (stop_idx + 1) as usize;
|
||||||
|
*list = list[start_usize..std::cmp::min(stop_usize, list.len())].to_vec();
|
||||||
|
|
||||||
|
if list.is_empty() {
|
||||||
|
self.del(key.to_string())?;
|
||||||
|
} else {
|
||||||
|
self.set_storage_value(key, storage_val)?;
|
||||||
|
self.db.flush().map_err(|e| DBError(e.to_string()))?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn lrem(&self, key: &str, count: i64, element: &str) -> Result<i64, DBError> {
|
||||||
|
let mut storage_val = match self.get_storage_value(key)? {
|
||||||
|
Some(sv) => sv,
|
||||||
|
None => return Ok(0)
|
||||||
|
};
|
||||||
|
|
||||||
|
let list = match &mut storage_val.value {
|
||||||
|
ValueType::List(l) => l,
|
||||||
|
_ => return Ok(0)
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut removed = 0i64;
|
||||||
|
|
||||||
|
if count == 0 {
|
||||||
|
// Remove all occurrences
|
||||||
|
let original_len = list.len();
|
||||||
|
list.retain(|x| x != element);
|
||||||
|
removed = (original_len - list.len()) as i64;
|
||||||
|
} else if count > 0 {
|
||||||
|
// Remove first count occurrences
|
||||||
|
let mut to_remove = count as usize;
|
||||||
|
list.retain(|x| {
|
||||||
|
if x == element && to_remove > 0 {
|
||||||
|
to_remove -= 1;
|
||||||
|
removed += 1;
|
||||||
|
false
|
||||||
|
} else {
|
||||||
|
true
|
||||||
|
}
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
// Remove last |count| occurrences
|
||||||
|
let mut to_remove = (-count) as usize;
|
||||||
|
for i in (0..list.len()).rev() {
|
||||||
|
if list[i] == element && to_remove > 0 {
|
||||||
|
list.remove(i);
|
||||||
|
to_remove -= 1;
|
||||||
|
removed += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if list.is_empty() {
|
||||||
|
self.del(key.to_string())?;
|
||||||
|
} else {
|
||||||
|
self.set_storage_value(key, storage_val)?;
|
||||||
|
self.db.flush().map_err(|e| DBError(e.to_string()))?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(removed)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Expiration
|
||||||
|
fn ttl(&self, key: &str) -> Result<i64, DBError> {
|
||||||
|
match self.get_storage_value(key)? {
|
||||||
|
Some(storage_val) => {
|
||||||
|
if let Some(expires_at) = storage_val.expires_at {
|
||||||
|
let now = Self::now_millis();
|
||||||
|
if now >= expires_at {
|
||||||
|
Ok(-2) // Key has expired
|
||||||
|
} else {
|
||||||
|
Ok(((expires_at - now) / 1000) as i64) // TTL in seconds
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
Ok(-1) // Key exists but has no expiration
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None => Ok(-2) // Key does not exist
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn expire_seconds(&self, key: &str, secs: u64) -> Result<bool, DBError> {
|
||||||
|
let mut storage_val = match self.get_storage_value(key)? {
|
||||||
|
Some(sv) => sv,
|
||||||
|
None => return Ok(false)
|
||||||
|
};
|
||||||
|
|
||||||
|
storage_val.expires_at = Some(Self::now_millis() + (secs as u128) * 1000);
|
||||||
|
self.set_storage_value(key, storage_val)?;
|
||||||
|
self.db.flush().map_err(|e| DBError(e.to_string()))?;
|
||||||
|
Ok(true)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn pexpire_millis(&self, key: &str, ms: u128) -> Result<bool, DBError> {
|
||||||
|
let mut storage_val = match self.get_storage_value(key)? {
|
||||||
|
Some(sv) => sv,
|
||||||
|
None => return Ok(false)
|
||||||
|
};
|
||||||
|
|
||||||
|
storage_val.expires_at = Some(Self::now_millis() + ms);
|
||||||
|
self.set_storage_value(key, storage_val)?;
|
||||||
|
self.db.flush().map_err(|e| DBError(e.to_string()))?;
|
||||||
|
Ok(true)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn persist(&self, key: &str) -> Result<bool, DBError> {
|
||||||
|
let mut storage_val = match self.get_storage_value(key)? {
|
||||||
|
Some(sv) => sv,
|
||||||
|
None => return Ok(false)
|
||||||
|
};
|
||||||
|
|
||||||
|
if storage_val.expires_at.is_some() {
|
||||||
|
storage_val.expires_at = None;
|
||||||
|
self.set_storage_value(key, storage_val)?;
|
||||||
|
self.db.flush().map_err(|e| DBError(e.to_string()))?;
|
||||||
|
Ok(true)
|
||||||
|
} else {
|
||||||
|
Ok(false)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn expire_at_seconds(&self, key: &str, ts_secs: i64) -> Result<bool, DBError> {
|
||||||
|
let mut storage_val = match self.get_storage_value(key)? {
|
||||||
|
Some(sv) => sv,
|
||||||
|
None => return Ok(false)
|
||||||
|
};
|
||||||
|
|
||||||
|
let expires_at_ms: u128 = if ts_secs <= 0 { 0 } else { (ts_secs as u128) * 1000 };
|
||||||
|
storage_val.expires_at = Some(expires_at_ms);
|
||||||
|
self.set_storage_value(key, storage_val)?;
|
||||||
|
self.db.flush().map_err(|e| DBError(e.to_string()))?;
|
||||||
|
Ok(true)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn pexpire_at_millis(&self, key: &str, ts_ms: i64) -> Result<bool, DBError> {
|
||||||
|
let mut storage_val = match self.get_storage_value(key)? {
|
||||||
|
Some(sv) => sv,
|
||||||
|
None => return Ok(false)
|
||||||
|
};
|
||||||
|
|
||||||
|
let expires_at_ms: u128 = if ts_ms <= 0 { 0 } else { ts_ms as u128 };
|
||||||
|
storage_val.expires_at = Some(expires_at_ms);
|
||||||
|
self.set_storage_value(key, storage_val)?;
|
||||||
|
self.db.flush().map_err(|e| DBError(e.to_string()))?;
|
||||||
|
Ok(true)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_encrypted(&self) -> bool {
|
||||||
|
self.crypto.is_some()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn info(&self) -> Result<Vec<(String, String)>, DBError> {
|
||||||
|
let dbsize = self.dbsize()?;
|
||||||
|
Ok(vec![
|
||||||
|
("db_size".to_string(), dbsize.to_string()),
|
||||||
|
("is_encrypted".to_string(), self.is_encrypted().to_string()),
|
||||||
|
])
|
||||||
|
}
|
||||||
|
|
||||||
|
fn clone_arc(&self) -> Arc<dyn StorageBackend> {
|
||||||
|
// Note: This is a simplified clone - in production you might want to
|
||||||
|
// handle this differently as sled::Db is already Arc internally
|
||||||
|
Arc::new(SledStorage {
|
||||||
|
db: self.db.clone(),
|
||||||
|
types: self.types.clone(),
|
||||||
|
crypto: self.crypto.clone(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
58
src/storage_trait.rs
Normal file
58
src/storage_trait.rs
Normal file
@@ -0,0 +1,58 @@
|
|||||||
|
// src/storage_trait.rs
|
||||||
|
use crate::error::DBError;
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
pub trait StorageBackend: Send + Sync {
|
||||||
|
// Basic key operations
|
||||||
|
fn get(&self, key: &str) -> Result<Option<String>, DBError>;
|
||||||
|
fn set(&self, key: String, value: String) -> Result<(), DBError>;
|
||||||
|
fn setx(&self, key: String, value: String, expire_ms: u128) -> Result<(), DBError>;
|
||||||
|
fn del(&self, key: String) -> Result<(), DBError>;
|
||||||
|
fn exists(&self, key: &str) -> Result<bool, DBError>;
|
||||||
|
fn keys(&self, pattern: &str) -> Result<Vec<String>, DBError>;
|
||||||
|
fn dbsize(&self) -> Result<i64, DBError>;
|
||||||
|
fn flushdb(&self) -> Result<(), DBError>;
|
||||||
|
fn get_key_type(&self, key: &str) -> Result<Option<String>, DBError>;
|
||||||
|
|
||||||
|
// Scanning
|
||||||
|
fn scan(&self, cursor: u64, pattern: Option<&str>, count: Option<u64>) -> Result<(u64, Vec<(String, String)>), DBError>;
|
||||||
|
fn hscan(&self, key: &str, cursor: u64, pattern: Option<&str>, count: Option<u64>) -> Result<(u64, Vec<(String, String)>), DBError>;
|
||||||
|
|
||||||
|
// Hash operations
|
||||||
|
fn hset(&self, key: &str, pairs: Vec<(String, String)>) -> Result<i64, DBError>;
|
||||||
|
fn hget(&self, key: &str, field: &str) -> Result<Option<String>, DBError>;
|
||||||
|
fn hgetall(&self, key: &str) -> Result<Vec<(String, String)>, DBError>;
|
||||||
|
fn hdel(&self, key: &str, fields: Vec<String>) -> Result<i64, DBError>;
|
||||||
|
fn hexists(&self, key: &str, field: &str) -> Result<bool, DBError>;
|
||||||
|
fn hkeys(&self, key: &str) -> Result<Vec<String>, DBError>;
|
||||||
|
fn hvals(&self, key: &str) -> Result<Vec<String>, DBError>;
|
||||||
|
fn hlen(&self, key: &str) -> Result<i64, DBError>;
|
||||||
|
fn hmget(&self, key: &str, fields: Vec<String>) -> Result<Vec<Option<String>>, DBError>;
|
||||||
|
fn hsetnx(&self, key: &str, field: &str, value: &str) -> Result<bool, DBError>;
|
||||||
|
|
||||||
|
// List operations
|
||||||
|
fn lpush(&self, key: &str, elements: Vec<String>) -> Result<i64, DBError>;
|
||||||
|
fn rpush(&self, key: &str, elements: Vec<String>) -> Result<i64, DBError>;
|
||||||
|
fn lpop(&self, key: &str, count: u64) -> Result<Vec<String>, DBError>;
|
||||||
|
fn rpop(&self, key: &str, count: u64) -> Result<Vec<String>, DBError>;
|
||||||
|
fn llen(&self, key: &str) -> Result<i64, DBError>;
|
||||||
|
fn lindex(&self, key: &str, index: i64) -> Result<Option<String>, DBError>;
|
||||||
|
fn lrange(&self, key: &str, start: i64, stop: i64) -> Result<Vec<String>, DBError>;
|
||||||
|
fn ltrim(&self, key: &str, start: i64, stop: i64) -> Result<(), DBError>;
|
||||||
|
fn lrem(&self, key: &str, count: i64, element: &str) -> Result<i64, DBError>;
|
||||||
|
|
||||||
|
// Expiration
|
||||||
|
fn ttl(&self, key: &str) -> Result<i64, DBError>;
|
||||||
|
fn expire_seconds(&self, key: &str, secs: u64) -> Result<bool, DBError>;
|
||||||
|
fn pexpire_millis(&self, key: &str, ms: u128) -> Result<bool, DBError>;
|
||||||
|
fn persist(&self, key: &str) -> Result<bool, DBError>;
|
||||||
|
fn expire_at_seconds(&self, key: &str, ts_secs: i64) -> Result<bool, DBError>;
|
||||||
|
fn pexpire_at_millis(&self, key: &str, ts_ms: i64) -> Result<bool, DBError>;
|
||||||
|
|
||||||
|
// Metadata
|
||||||
|
fn is_encrypted(&self) -> bool;
|
||||||
|
fn info(&self) -> Result<Vec<(String, String)>, DBError>;
|
||||||
|
|
||||||
|
// Clone to Arc for sharing
|
||||||
|
fn clone_arc(&self) -> Arc<dyn StorageBackend>;
|
||||||
|
}
|
567
src/tantivy_search.rs
Normal file
567
src/tantivy_search.rs
Normal file
@@ -0,0 +1,567 @@
|
|||||||
|
use tantivy::{
|
||||||
|
collector::TopDocs,
|
||||||
|
directory::MmapDirectory,
|
||||||
|
query::{QueryParser, BooleanQuery, Query, TermQuery, Occur},
|
||||||
|
schema::{Schema, Field, TextOptions, TextFieldIndexing,
|
||||||
|
STORED, STRING, Value},
|
||||||
|
Index, IndexWriter, IndexReader, ReloadPolicy,
|
||||||
|
Term, DateTime, TantivyDocument,
|
||||||
|
tokenizer::{TokenizerManager},
|
||||||
|
};
|
||||||
|
use std::path::PathBuf;
|
||||||
|
use std::sync::{Arc, RwLock};
|
||||||
|
use std::collections::HashMap;
|
||||||
|
use crate::error::DBError;
|
||||||
|
use serde::{Serialize, Deserialize};
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub enum FieldDef {
|
||||||
|
Text {
|
||||||
|
stored: bool,
|
||||||
|
indexed: bool,
|
||||||
|
tokenized: bool,
|
||||||
|
fast: bool,
|
||||||
|
},
|
||||||
|
Numeric {
|
||||||
|
stored: bool,
|
||||||
|
indexed: bool,
|
||||||
|
fast: bool,
|
||||||
|
precision: NumericType,
|
||||||
|
},
|
||||||
|
Tag {
|
||||||
|
stored: bool,
|
||||||
|
separator: String,
|
||||||
|
case_sensitive: bool,
|
||||||
|
},
|
||||||
|
Geo {
|
||||||
|
stored: bool,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub enum NumericType {
|
||||||
|
I64,
|
||||||
|
U64,
|
||||||
|
F64,
|
||||||
|
Date,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct IndexSchema {
|
||||||
|
schema: Schema,
|
||||||
|
fields: HashMap<String, (Field, FieldDef)>,
|
||||||
|
default_search_fields: Vec<Field>,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct TantivySearch {
|
||||||
|
index: Index,
|
||||||
|
writer: Arc<RwLock<IndexWriter>>,
|
||||||
|
reader: IndexReader,
|
||||||
|
index_schema: IndexSchema,
|
||||||
|
name: String,
|
||||||
|
config: IndexConfig,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct IndexConfig {
|
||||||
|
pub language: String,
|
||||||
|
pub stopwords: Vec<String>,
|
||||||
|
pub stemming: bool,
|
||||||
|
pub max_doc_count: Option<usize>,
|
||||||
|
pub default_score: f64,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for IndexConfig {
|
||||||
|
fn default() -> Self {
|
||||||
|
IndexConfig {
|
||||||
|
language: "english".to_string(),
|
||||||
|
stopwords: vec![],
|
||||||
|
stemming: true,
|
||||||
|
max_doc_count: None,
|
||||||
|
default_score: 1.0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TantivySearch {
|
||||||
|
pub fn new_with_schema(
|
||||||
|
base_path: PathBuf,
|
||||||
|
name: String,
|
||||||
|
field_definitions: Vec<(String, FieldDef)>,
|
||||||
|
config: Option<IndexConfig>,
|
||||||
|
) -> Result<Self, DBError> {
|
||||||
|
let index_path = base_path.join(&name);
|
||||||
|
std::fs::create_dir_all(&index_path)
|
||||||
|
.map_err(|e| DBError(format!("Failed to create index dir: {}", e)))?;
|
||||||
|
|
||||||
|
// Build schema from field definitions
|
||||||
|
let mut schema_builder = Schema::builder();
|
||||||
|
let mut fields = HashMap::new();
|
||||||
|
let mut default_search_fields = Vec::new();
|
||||||
|
|
||||||
|
// Always add a document ID field
|
||||||
|
let id_field = schema_builder.add_text_field("_id", STRING | STORED);
|
||||||
|
fields.insert("_id".to_string(), (id_field, FieldDef::Text {
|
||||||
|
stored: true,
|
||||||
|
indexed: true,
|
||||||
|
tokenized: false,
|
||||||
|
fast: false,
|
||||||
|
}));
|
||||||
|
|
||||||
|
// Add user-defined fields
|
||||||
|
for (field_name, field_def) in field_definitions {
|
||||||
|
let field = match &field_def {
|
||||||
|
FieldDef::Text { stored, indexed, tokenized, fast: _fast } => {
|
||||||
|
let mut text_options = TextOptions::default();
|
||||||
|
|
||||||
|
if *stored {
|
||||||
|
text_options = text_options.set_stored();
|
||||||
|
}
|
||||||
|
|
||||||
|
if *indexed {
|
||||||
|
let indexing_options = if *tokenized {
|
||||||
|
TextFieldIndexing::default()
|
||||||
|
.set_tokenizer("default")
|
||||||
|
.set_index_option(tantivy::schema::IndexRecordOption::WithFreqsAndPositions)
|
||||||
|
} else {
|
||||||
|
TextFieldIndexing::default()
|
||||||
|
.set_tokenizer("raw")
|
||||||
|
.set_index_option(tantivy::schema::IndexRecordOption::Basic)
|
||||||
|
};
|
||||||
|
text_options = text_options.set_indexing_options(indexing_options);
|
||||||
|
|
||||||
|
let f = schema_builder.add_text_field(&field_name, text_options);
|
||||||
|
if *tokenized {
|
||||||
|
default_search_fields.push(f);
|
||||||
|
}
|
||||||
|
f
|
||||||
|
} else {
|
||||||
|
schema_builder.add_text_field(&field_name, text_options)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
FieldDef::Numeric { stored, indexed, fast, precision } => {
|
||||||
|
match precision {
|
||||||
|
NumericType::I64 => {
|
||||||
|
let mut opts = tantivy::schema::NumericOptions::default();
|
||||||
|
if *stored { opts = opts.set_stored(); }
|
||||||
|
if *indexed { opts = opts.set_indexed(); }
|
||||||
|
if *fast { opts = opts.set_fast(); }
|
||||||
|
schema_builder.add_i64_field(&field_name, opts)
|
||||||
|
}
|
||||||
|
NumericType::U64 => {
|
||||||
|
let mut opts = tantivy::schema::NumericOptions::default();
|
||||||
|
if *stored { opts = opts.set_stored(); }
|
||||||
|
if *indexed { opts = opts.set_indexed(); }
|
||||||
|
if *fast { opts = opts.set_fast(); }
|
||||||
|
schema_builder.add_u64_field(&field_name, opts)
|
||||||
|
}
|
||||||
|
NumericType::F64 => {
|
||||||
|
let mut opts = tantivy::schema::NumericOptions::default();
|
||||||
|
if *stored { opts = opts.set_stored(); }
|
||||||
|
if *indexed { opts = opts.set_indexed(); }
|
||||||
|
if *fast { opts = opts.set_fast(); }
|
||||||
|
schema_builder.add_f64_field(&field_name, opts)
|
||||||
|
}
|
||||||
|
NumericType::Date => {
|
||||||
|
let mut opts = tantivy::schema::DateOptions::default();
|
||||||
|
if *stored { opts = opts.set_stored(); }
|
||||||
|
if *indexed { opts = opts.set_indexed(); }
|
||||||
|
if *fast { opts = opts.set_fast(); }
|
||||||
|
schema_builder.add_date_field(&field_name, opts)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
FieldDef::Tag { stored, separator: _, case_sensitive: _ } => {
|
||||||
|
let mut text_options = TextOptions::default();
|
||||||
|
if *stored {
|
||||||
|
text_options = text_options.set_stored();
|
||||||
|
}
|
||||||
|
text_options = text_options.set_indexing_options(
|
||||||
|
TextFieldIndexing::default()
|
||||||
|
.set_tokenizer("raw")
|
||||||
|
.set_index_option(tantivy::schema::IndexRecordOption::Basic)
|
||||||
|
);
|
||||||
|
schema_builder.add_text_field(&field_name, text_options)
|
||||||
|
}
|
||||||
|
FieldDef::Geo { stored } => {
|
||||||
|
// For now, store as two f64 fields for lat/lon
|
||||||
|
let mut opts = tantivy::schema::NumericOptions::default();
|
||||||
|
if *stored { opts = opts.set_stored(); }
|
||||||
|
opts = opts.set_indexed().set_fast();
|
||||||
|
|
||||||
|
let lat_field = schema_builder.add_f64_field(&format!("{}_lat", field_name), opts.clone());
|
||||||
|
let lon_field = schema_builder.add_f64_field(&format!("{}_lon", field_name), opts);
|
||||||
|
|
||||||
|
fields.insert(format!("{}_lat", field_name), (lat_field, FieldDef::Numeric {
|
||||||
|
stored: *stored,
|
||||||
|
indexed: true,
|
||||||
|
fast: true,
|
||||||
|
precision: NumericType::F64,
|
||||||
|
}));
|
||||||
|
fields.insert(format!("{}_lon", field_name), (lon_field, FieldDef::Numeric {
|
||||||
|
stored: *stored,
|
||||||
|
indexed: true,
|
||||||
|
fast: true,
|
||||||
|
precision: NumericType::F64,
|
||||||
|
}));
|
||||||
|
continue; // Skip adding the geo field itself
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
fields.insert(field_name.clone(), (field, field_def));
|
||||||
|
}
|
||||||
|
|
||||||
|
let schema = schema_builder.build();
|
||||||
|
let index_schema = IndexSchema {
|
||||||
|
schema: schema.clone(),
|
||||||
|
fields,
|
||||||
|
default_search_fields,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Create or open index
|
||||||
|
let dir = MmapDirectory::open(&index_path)
|
||||||
|
.map_err(|e| DBError(format!("Failed to open index directory: {}", e)))?;
|
||||||
|
|
||||||
|
let mut index = Index::open_or_create(dir, schema)
|
||||||
|
.map_err(|e| DBError(format!("Failed to create index: {}", e)))?;
|
||||||
|
|
||||||
|
// Configure tokenizers
|
||||||
|
let tokenizer_manager = TokenizerManager::default();
|
||||||
|
index.set_tokenizers(tokenizer_manager);
|
||||||
|
|
||||||
|
let writer = index.writer(1_000_000)
|
||||||
|
.map_err(|e| DBError(format!("Failed to create index writer: {}", e)))?;
|
||||||
|
|
||||||
|
let reader = index
|
||||||
|
.reader_builder()
|
||||||
|
.reload_policy(ReloadPolicy::OnCommitWithDelay)
|
||||||
|
.try_into()
|
||||||
|
.map_err(|e| DBError(format!("Failed to create reader: {}", e)))?;
|
||||||
|
|
||||||
|
let config = config.unwrap_or_default();
|
||||||
|
|
||||||
|
Ok(TantivySearch {
|
||||||
|
index,
|
||||||
|
writer: Arc::new(RwLock::new(writer)),
|
||||||
|
reader,
|
||||||
|
index_schema,
|
||||||
|
name,
|
||||||
|
config,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn add_document_with_fields(
|
||||||
|
&self,
|
||||||
|
doc_id: &str,
|
||||||
|
fields: HashMap<String, String>,
|
||||||
|
) -> Result<(), DBError> {
|
||||||
|
let mut writer = self.writer.write()
|
||||||
|
.map_err(|e| DBError(format!("Failed to acquire writer lock: {}", e)))?;
|
||||||
|
|
||||||
|
// Delete existing document with same ID
|
||||||
|
if let Some((id_field, _)) = self.index_schema.fields.get("_id") {
|
||||||
|
writer.delete_term(Term::from_field_text(*id_field, doc_id));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create new document
|
||||||
|
let mut doc = tantivy::doc!();
|
||||||
|
|
||||||
|
// Add document ID
|
||||||
|
if let Some((id_field, _)) = self.index_schema.fields.get("_id") {
|
||||||
|
doc.add_text(*id_field, doc_id);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add other fields based on schema
|
||||||
|
for (field_name, field_value) in fields {
|
||||||
|
if let Some((field, field_def)) = self.index_schema.fields.get(&field_name) {
|
||||||
|
match field_def {
|
||||||
|
FieldDef::Text { .. } => {
|
||||||
|
doc.add_text(*field, &field_value);
|
||||||
|
}
|
||||||
|
FieldDef::Numeric { precision, .. } => {
|
||||||
|
match precision {
|
||||||
|
NumericType::I64 => {
|
||||||
|
if let Ok(v) = field_value.parse::<i64>() {
|
||||||
|
doc.add_i64(*field, v);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
NumericType::U64 => {
|
||||||
|
if let Ok(v) = field_value.parse::<u64>() {
|
||||||
|
doc.add_u64(*field, v);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
NumericType::F64 => {
|
||||||
|
if let Ok(v) = field_value.parse::<f64>() {
|
||||||
|
doc.add_f64(*field, v);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
NumericType::Date => {
|
||||||
|
if let Ok(v) = field_value.parse::<i64>() {
|
||||||
|
doc.add_date(*field, DateTime::from_timestamp_millis(v));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
FieldDef::Tag { separator, case_sensitive, .. } => {
|
||||||
|
let tags = if !case_sensitive {
|
||||||
|
field_value.to_lowercase()
|
||||||
|
} else {
|
||||||
|
field_value.clone()
|
||||||
|
};
|
||||||
|
|
||||||
|
// Store tags as separate terms for efficient filtering
|
||||||
|
for tag in tags.split(separator.as_str()) {
|
||||||
|
doc.add_text(*field, tag.trim());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
FieldDef::Geo { .. } => {
|
||||||
|
// Parse "lat,lon" format
|
||||||
|
let parts: Vec<&str> = field_value.split(',').collect();
|
||||||
|
if parts.len() == 2 {
|
||||||
|
if let (Ok(lat), Ok(lon)) = (parts[0].parse::<f64>(), parts[1].parse::<f64>()) {
|
||||||
|
if let Some((lat_field, _)) = self.index_schema.fields.get(&format!("{}_lat", field_name)) {
|
||||||
|
doc.add_f64(*lat_field, lat);
|
||||||
|
}
|
||||||
|
if let Some((lon_field, _)) = self.index_schema.fields.get(&format!("{}_lon", field_name)) {
|
||||||
|
doc.add_f64(*lon_field, lon);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
writer.add_document(doc).map_err(|e| DBError(format!("Failed to add document: {}", e)))?;
|
||||||
|
|
||||||
|
writer.commit()
|
||||||
|
.map_err(|e| DBError(format!("Failed to commit: {}", e)))?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn search_with_options(
|
||||||
|
&self,
|
||||||
|
query_str: &str,
|
||||||
|
options: SearchOptions,
|
||||||
|
) -> Result<SearchResults, DBError> {
|
||||||
|
let searcher = self.reader.searcher();
|
||||||
|
|
||||||
|
// Parse query based on search fields
|
||||||
|
let query: Box<dyn Query> = if self.index_schema.default_search_fields.is_empty() {
|
||||||
|
return Err(DBError("No searchable fields defined in schema".to_string()));
|
||||||
|
} else {
|
||||||
|
let query_parser = QueryParser::for_index(
|
||||||
|
&self.index,
|
||||||
|
self.index_schema.default_search_fields.clone(),
|
||||||
|
);
|
||||||
|
|
||||||
|
Box::new(query_parser.parse_query(query_str)
|
||||||
|
.map_err(|e| DBError(format!("Failed to parse query: {}", e)))?)
|
||||||
|
};
|
||||||
|
|
||||||
|
// Apply filters if any
|
||||||
|
let final_query = if !options.filters.is_empty() {
|
||||||
|
let mut clauses: Vec<(Occur, Box<dyn Query>)> = vec![(Occur::Must, query)];
|
||||||
|
|
||||||
|
// Add filters
|
||||||
|
for filter in options.filters {
|
||||||
|
if let Some((field, _)) = self.index_schema.fields.get(&filter.field) {
|
||||||
|
match filter.filter_type {
|
||||||
|
FilterType::Equals(value) => {
|
||||||
|
let term_query = TermQuery::new(
|
||||||
|
Term::from_field_text(*field, &value),
|
||||||
|
tantivy::schema::IndexRecordOption::Basic,
|
||||||
|
);
|
||||||
|
clauses.push((Occur::Must, Box::new(term_query)));
|
||||||
|
}
|
||||||
|
FilterType::Range { min: _, max: _ } => {
|
||||||
|
// Would need numeric field handling here
|
||||||
|
// Simplified for now
|
||||||
|
}
|
||||||
|
FilterType::InSet(values) => {
|
||||||
|
let mut sub_clauses: Vec<(Occur, Box<dyn Query>)> = vec![];
|
||||||
|
for value in values {
|
||||||
|
let term_query = TermQuery::new(
|
||||||
|
Term::from_field_text(*field, &value),
|
||||||
|
tantivy::schema::IndexRecordOption::Basic,
|
||||||
|
);
|
||||||
|
sub_clauses.push((Occur::Should, Box::new(term_query)));
|
||||||
|
}
|
||||||
|
clauses.push((Occur::Must, Box::new(BooleanQuery::new(sub_clauses))));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Box::new(BooleanQuery::new(clauses))
|
||||||
|
} else {
|
||||||
|
query
|
||||||
|
};
|
||||||
|
|
||||||
|
// Execute search
|
||||||
|
let top_docs = searcher.search(
|
||||||
|
&*final_query,
|
||||||
|
&TopDocs::with_limit(options.limit + options.offset)
|
||||||
|
).map_err(|e| DBError(format!("Search failed: {}", e)))?;
|
||||||
|
|
||||||
|
let total_hits = top_docs.len();
|
||||||
|
let mut documents = Vec::new();
|
||||||
|
|
||||||
|
for (score, doc_address) in top_docs.iter().skip(options.offset).take(options.limit) {
|
||||||
|
let retrieved_doc: TantivyDocument = searcher.doc(*doc_address)
|
||||||
|
.map_err(|e| DBError(format!("Failed to retrieve doc: {}", e)))?;
|
||||||
|
|
||||||
|
let mut doc_fields = HashMap::new();
|
||||||
|
|
||||||
|
// Extract all stored fields
|
||||||
|
for (field_name, (field, field_def)) in &self.index_schema.fields {
|
||||||
|
match field_def {
|
||||||
|
FieldDef::Text { stored, .. } |
|
||||||
|
FieldDef::Tag { stored, .. } => {
|
||||||
|
if *stored {
|
||||||
|
if let Some(value) = retrieved_doc.get_first(*field) {
|
||||||
|
if let Some(text) = value.as_str() {
|
||||||
|
doc_fields.insert(field_name.clone(), text.to_string());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
FieldDef::Numeric { stored, precision, .. } => {
|
||||||
|
if *stored {
|
||||||
|
let value_str = match precision {
|
||||||
|
NumericType::I64 => {
|
||||||
|
retrieved_doc.get_first(*field)
|
||||||
|
.and_then(|v| v.as_i64())
|
||||||
|
.map(|v| v.to_string())
|
||||||
|
}
|
||||||
|
NumericType::U64 => {
|
||||||
|
retrieved_doc.get_first(*field)
|
||||||
|
.and_then(|v| v.as_u64())
|
||||||
|
.map(|v| v.to_string())
|
||||||
|
}
|
||||||
|
NumericType::F64 => {
|
||||||
|
retrieved_doc.get_first(*field)
|
||||||
|
.and_then(|v| v.as_f64())
|
||||||
|
.map(|v| v.to_string())
|
||||||
|
}
|
||||||
|
NumericType::Date => {
|
||||||
|
retrieved_doc.get_first(*field)
|
||||||
|
.and_then(|v| v.as_datetime())
|
||||||
|
.map(|v| v.into_timestamp_millis().to_string())
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
if let Some(v) = value_str {
|
||||||
|
doc_fields.insert(field_name.clone(), v);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
FieldDef::Geo { stored } => {
|
||||||
|
if *stored {
|
||||||
|
let lat_field = self.index_schema.fields.get(&format!("{}_lat", field_name)).unwrap().0;
|
||||||
|
let lon_field = self.index_schema.fields.get(&format!("{}_lon", field_name)).unwrap().0;
|
||||||
|
|
||||||
|
let lat = retrieved_doc.get_first(lat_field).and_then(|v| v.as_f64());
|
||||||
|
let lon = retrieved_doc.get_first(lon_field).and_then(|v| v.as_f64());
|
||||||
|
|
||||||
|
if let (Some(lat), Some(lon)) = (lat, lon) {
|
||||||
|
doc_fields.insert(field_name.clone(), format!("{},{}", lat, lon));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
documents.push(SearchDocument {
|
||||||
|
fields: doc_fields,
|
||||||
|
score: *score,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(SearchResults {
|
||||||
|
total: total_hits,
|
||||||
|
documents,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get_info(&self) -> Result<IndexInfo, DBError> {
|
||||||
|
let searcher = self.reader.searcher();
|
||||||
|
let num_docs = searcher.num_docs();
|
||||||
|
|
||||||
|
let fields_info: Vec<FieldInfo> = self.index_schema.fields.iter().map(|(name, (_, def))| {
|
||||||
|
FieldInfo {
|
||||||
|
name: name.clone(),
|
||||||
|
field_type: format!("{:?}", def),
|
||||||
|
}
|
||||||
|
}).collect();
|
||||||
|
|
||||||
|
Ok(IndexInfo {
|
||||||
|
name: self.name.clone(),
|
||||||
|
num_docs,
|
||||||
|
fields: fields_info,
|
||||||
|
config: self.config.clone(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct SearchOptions {
|
||||||
|
pub limit: usize,
|
||||||
|
pub offset: usize,
|
||||||
|
pub filters: Vec<Filter>,
|
||||||
|
pub sort_by: Option<String>,
|
||||||
|
pub return_fields: Option<Vec<String>>,
|
||||||
|
pub highlight: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for SearchOptions {
|
||||||
|
fn default() -> Self {
|
||||||
|
SearchOptions {
|
||||||
|
limit: 10,
|
||||||
|
offset: 0,
|
||||||
|
filters: vec![],
|
||||||
|
sort_by: None,
|
||||||
|
return_fields: None,
|
||||||
|
highlight: false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct Filter {
|
||||||
|
pub field: String,
|
||||||
|
pub filter_type: FilterType,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub enum FilterType {
|
||||||
|
Equals(String),
|
||||||
|
Range { min: String, max: String },
|
||||||
|
InSet(Vec<String>),
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct SearchResults {
|
||||||
|
pub total: usize,
|
||||||
|
pub documents: Vec<SearchDocument>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct SearchDocument {
|
||||||
|
pub fields: HashMap<String, String>,
|
||||||
|
pub score: f32,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Serialize, Deserialize)]
|
||||||
|
pub struct IndexInfo {
|
||||||
|
pub name: String,
|
||||||
|
pub num_docs: u64,
|
||||||
|
pub fields: Vec<FieldInfo>,
|
||||||
|
pub config: IndexConfig,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Serialize, Deserialize)]
|
||||||
|
pub struct FieldInfo {
|
||||||
|
pub name: String,
|
||||||
|
pub field_type: String,
|
||||||
|
}
|
@@ -27,6 +27,7 @@ async fn debug_hset_simple() {
|
|||||||
debug: false,
|
debug: false,
|
||||||
encrypt: false,
|
encrypt: false,
|
||||||
encryption_key: None,
|
encryption_key: None,
|
||||||
|
backend: herodb::options::BackendType::Redb,
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut server = Server::new(option).await;
|
let mut server = Server::new(option).await;
|
@@ -18,6 +18,7 @@ async fn debug_hset_return_value() {
|
|||||||
debug: false,
|
debug: false,
|
||||||
encrypt: false,
|
encrypt: false,
|
||||||
encryption_key: None,
|
encryption_key: None,
|
||||||
|
backend: herodb::options::BackendType::Redb,
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut server = Server::new(option).await;
|
let mut server = Server::new(option).await;
|
@@ -22,6 +22,7 @@ async fn start_test_server(test_name: &str) -> (Server, u16) {
|
|||||||
debug: true,
|
debug: true,
|
||||||
encrypt: false,
|
encrypt: false,
|
||||||
encryption_key: None,
|
encryption_key: None,
|
||||||
|
backend: herodb::options::BackendType::Redb,
|
||||||
};
|
};
|
||||||
|
|
||||||
let server = Server::new(option).await;
|
let server = Server::new(option).await;
|
@@ -24,6 +24,7 @@ async fn start_test_server(test_name: &str) -> (Server, u16) {
|
|||||||
debug: true,
|
debug: true,
|
||||||
encrypt: false,
|
encrypt: false,
|
||||||
encryption_key: None,
|
encryption_key: None,
|
||||||
|
backend: herodb::options::BackendType::Redb,
|
||||||
};
|
};
|
||||||
|
|
||||||
let server = Server::new(option).await;
|
let server = Server::new(option).await;
|
@@ -22,6 +22,7 @@ async fn start_test_server(test_name: &str) -> (Server, u16) {
|
|||||||
debug: false,
|
debug: false,
|
||||||
encrypt: false,
|
encrypt: false,
|
||||||
encryption_key: None,
|
encryption_key: None,
|
||||||
|
backend: herodb::options::BackendType::Redb,
|
||||||
};
|
};
|
||||||
|
|
||||||
let server = Server::new(option).await;
|
let server = Server::new(option).await;
|
@@ -22,6 +22,7 @@ async fn start_test_server(test_name: &str) -> (Server, u16) {
|
|||||||
debug: false,
|
debug: false,
|
||||||
encrypt: false,
|
encrypt: false,
|
||||||
encryption_key: None,
|
encryption_key: None,
|
||||||
|
backend: herodb::options::BackendType::Redb,
|
||||||
};
|
};
|
||||||
|
|
||||||
let server = Server::new(option).await;
|
let server = Server::new(option).await;
|
Reference in New Issue
Block a user