19 Commits

Author SHA1 Message Date
Maxime Van Hees
bd34fd092a Persist backend per database id in admin metadata so restarts and lazy opens always use the correct engine (Sled/Redb) 2025-09-22 15:29:58 +02:00
Maxime Van Hees
8e044a64b7 fix incorrect keycount displayed in database info over RPC calls 2025-09-19 14:04:03 +02:00
Maxime Van Hees
87177f4a07 update documentation about 0.db admin db + symmetric encryption + include RPC examples + asymmetric transpart named key instances for encryption and signatures 2025-09-19 11:55:28 +02:00
Maxime Van Hees
151a6ffbfa fixed test 2025-09-19 10:35:08 +02:00
Maxime Van Hees
8ab841f68c Key generation now automatically derives X25519 keys from Ed25519 keys which allows user to transparantly use their key name for encrypting/decrypting and signing/verifying 2025-09-18 22:37:19 +02:00
Maxime Van Hees
8808c0e9d9 Implemented symmetric encryption; new commands are SYM KEYGEN; SYM ENCRYPT; SYM DECRYPT 2025-09-18 11:59:44 +02:00
Maxime Van Hees
c6b277cc9c fixed DEL showing wrong deletion amount + AGE LIST now returns a list of managed keys names without nested arrays or labels 2025-09-18 00:19:40 +02:00
Maxime Van Hees
b8ca73397d implemented 0.db as admin database architecture + updated test file 2025-09-16 16:06:47 +02:00
Maxime Van Hees
1b15806a85 fix invalid values in RPC response about database instance details 2025-09-15 13:45:37 +02:00
Maxime Van Hees
da325a9659 fix bug where meta files where not auto-created upon starting + fix bug where meta json files were actually binary + improved access control to database instances 2025-09-15 10:34:03 +02:00
Maxime Van Hees
bdf363016a WIP: adding access management control to db instances 2025-09-12 17:11:50 +02:00
Maxime Van Hees
8798bc202e Restore working code 2025-09-11 18:33:09 +02:00
Maxime Van Hees
9fa9832605 combined curret main (with sled) and RPC server 2025-09-11 17:23:46 +02:00
Maxime Van Hees
4bb24b38dd fix typo in README 2025-09-11 15:34:03 +02:00
Maxime Van Hees
f3da14b957 Merge branch 'append' 2025-09-11 15:31:47 +02:00
Maxime Van Hees
5ea34b4445 update variable name as 'gen' is a reserved keyword since Rust 2024 edition 2025-09-11 15:25:26 +02:00
Maxime Van Hees
d9a3b711d1 Update tot Rust 2024 edition + update Cargo.toml file 2025-09-11 15:24:28 +02:00
Maxime Van Hees
d931770e90 Fix test suite + update Cargo.toml file 2025-09-09 16:04:31 +02:00
Timur Gordon
a87ec4dbb5 add readme 2025-08-27 15:39:59 +02:00
34 changed files with 2629 additions and 7747 deletions

5302
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -1,8 +1,8 @@
[package]
name = "herodb"
version = "0.0.1"
authors = ["Pin Fang <fpfangpin@hotmail.com>"]
edition = "2021"
authors = ["ThreeFold Tech NV"]
edition = "2024"
[dependencies]
anyhow = "1.0.59"
@@ -23,19 +23,9 @@ sha2 = "0.10"
age = "0.10"
secrecy = "0.8"
ed25519-dalek = "2"
x25519-dalek = "2"
base64 = "0.22"
# Lance vector database dependencies
lance = "0.33"
lance-index = "0.33"
lance-linalg = "0.33"
# Use Arrow version compatible with Lance 0.33
arrow = "55.2"
arrow-array = "55.2"
arrow-schema = "55.2"
parquet = "55.2"
uuid = { version = "1.10", features = ["v4"] }
reqwest = { version = "0.11", features = ["json"] }
image = "0.25"
jsonrpsee = { version = "0.26.0", features = ["http-client", "ws-client", "server", "macros"] }
[dev-dependencies]
redis = { version = "0.24", features = ["aio", "tokio-comp"] }

View File

@@ -17,6 +17,8 @@ The main purpose of HeroDB is to offer a lightweight, embeddable, and Redis-comp
- **Expiration**: Time-to-live (TTL) functionality for keys.
- **Scanning**: Cursor-based iteration for keys and hash fields (`SCAN`, `HSCAN`).
- **AGE Cryptography Commands**: HeroDB-specific extensions for cryptographic operations.
- **Symmetric Encryption**: Stateless symmetric encryption using XChaCha20-Poly1305.
- **Admin Database 0**: Centralized control for database management, access control, and per-database encryption.
## Quick Start
@@ -30,31 +32,14 @@ cargo build --release
### Running HeroDB
You can start HeroDB with different backends and encryption options:
#### Default `redb` Backend
Launch HeroDB with the required `--admin-secret` flag, which encrypts the admin database (DB 0) and authorizes admin access. Optional flags include `--dir` for the database directory, `--port` for the TCP port (default 6379), `--sled` for the sled backend, and `--enable-rpc` to start the JSON-RPC management server on port 8080.
Example:
```bash
./target/release/herodb --dir /tmp/herodb_redb --port 6379
./target/release/herodb --dir /tmp/herodb --admin-secret myadminsecret --port 6379 --enable-rpc
```
#### `sled` Backend
```bash
./target/release/herodb --dir /tmp/herodb_sled --port 6379 --sled
```
#### `redb` with Encryption
```bash
./target/release/herodb --dir /tmp/herodb_encrypted --port 6379 --encrypt --key mysecretkey
```
#### `sled` with Encryption
```bash
./target/release/herodb --dir /tmp/herodb_sled_encrypted --port 6379 --sled --encrypt --key mysecretkey
```
For detailed launch options, see [Basics](docs/basics.md).
## Usage with Redis Clients
@@ -76,10 +61,24 @@ redis-cli -p 6379 SCAN 0 MATCH user:* COUNT 10
# 2) 1) "user:1"
```
## Cryptography
HeroDB supports asymmetric encryption/signatures via AGE commands (X25519 for encryption, Ed25519 for signatures) in stateless or key-managed modes, and symmetric encryption via SYM commands. Keys are persisted in the admin database (DB 0) for managed modes.
For details, see [AGE Cryptography](docs/age.md) and [Basics](docs/basics.md).
## Database Management
Databases are managed via JSON-RPC API, with metadata stored in the encrypted admin database (DB 0). Databases are public by default upon creation; use RPC to set them private, requiring access keys for SELECT operations (read or readwrite based on permissions). This includes per-database encryption keys, access control, and lifecycle management.
For examples, see [JSON-RPC Examples](docs/rpc_examples.md) and [Admin DB 0 Model](docs/admin.md).
## Documentation
For more detailed information on commands, features, and advanced usage, please refer to the documentation:
- [Basics](docs/basics.md)
- [Supported Commands](docs/cmds.md)
- [AGE Cryptography](docs/age.md)
- [AGE Cryptography](docs/age.md)
- [Admin DB 0 Model (access control, per-db encryption)](docs/admin.md)
- [JSON-RPC Examples (management API)](docs/rpc_examples.md)

181
docs/admin.md Normal file
View File

@@ -0,0 +1,181 @@
# Admin Database 0 (`0.db`)
This page explains what the Admin Database `DB 0` is, why HeroDB uses it, and how to work with it as a developer and end-user. Its a practical guide covering how databases are created, listed, secured with access keys, and encrypted using per-database secrets.
## What is `DB 0`?
`DB 0` is the control-plane for a HeroDB instance. It stores metadata for all user databases (`db_id >= 1`) so the server can:
- Know which databases exist (without scanning the filesystem)
- Enforce access control (public/private with access keys)
- Enforce per-database encryption (whether a given database must be opened encrypted and with which write-only key)
`DB 0` itself is always encrypted with the admin secret (the process-level secret provided at startup).
## How `DB 0` is created and secured
- `DB 0` lives at `<base_dir>/0.db`
- It is always encrypted using the `admin secret` provided at process startup (using the `--admin-secret <secret>` CLI flag)
- Only clients that provide the correct admin secret can `SELECT 0` (see “`SELECT` + `KEY`” below)
At startup, the server bootstraps `DB 0` (initializes counters and structures) if its missing.
## Metadata stored in `DB 0`
Keys in `DB 0` (internal layout, but useful to understand how things work):
- `admin:next_id`
- String counter holding the next id to allocate (initialized to `"1"`)
- `admin:dbs`
- A hash acting as a set of existing database ids
- field = id (as string), value = `"1"`
- `meta:db:<id>`
- A hash holding db-level metadata
- field `public` = `"true"` or `"false"` (defaults to `true` if missing)
- `meta:db:<id>:keys`
- A hash mapping access-key hashes to the string `Permission:created_at_seconds`
- Examples: `Read:1713456789` or `ReadWrite:1713456789`
- The plaintext access keys are never stored; only their `SHA-256` hashes are kept
- `meta:db:<id>:enc`
- A string holding the per-database encryption key used to open `<id>.db` encrypted
- This value is write-only from the perspective of the management APIs (its set at creation and never returned)
- `age:key:<name>`
- Base64-encoded X25519 recipient (public encryption key) for named AGE keys
- `age:privkey:<name>`
- Base64-encoded X25519 identity (secret encryption key) for named AGE keys
- `age:signpub:<name>`
- Base64-encoded Ed25519 verify public key for named AGE keys
- `age:signpriv:<name>`
- Base64-encoded Ed25519 signing secret key for named AGE keys
> You dont need to manipulate these keys directly; theyre listed to clarify the model. AGE keys are managed via AGE commands.
## Database lifecycle
1) Create a database (via JSON-RPC)
- The server allocates an id from `admin:next_id`, registers it in `admin:dbs`, and defaults the database to `public=true`
- If you pass an optional `encryption_key` during creation, the server persists it in `meta:db:<id>:enc`. That database will be opened in encrypted mode from then on
2) Open and use a database
- Clients select a database over RESP using `SELECT`
- Authorization and encryption state are enforced using `DB 0` metadata
3) Delete database files
- Removing `<id>.db` removes the physical storage
- `DB 0` remains the source of truth for existence and may be updated by future management methods as the system evolves
## Access control model
- Public database (default)
- Anyone can `SELECT <id>` with no key, and will get `ReadWrite` permission
- Private database
- You must provide an access key when selecting the database
- The server hashes the provided key with `SHA-256` and checks membership in `meta:db:<id>:keys`
- Permissions are `Read` or `ReadWrite` depending on how the key was added
- Admin `DB 0`
- Requires the exact admin secret as the `KEY` argument to `SELECT 0`
- Permission is `ReadWrite` when the secret matches
### How to select databases with optional `KEY`
- Public DB (no key required)
- `SELECT <id>`
- Private DB (access key required)
- `SELECT <id> KEY <plaintext_key>`
- Admin `DB 0` (admin secret required)
- `SELECT 0 KEY <admin_secret>`
Examples (using `redis-cli`):
```bash
# Public database
redis-cli -p $PORT SELECT 1
# → OK
# Private database
redis-cli -p $PORT SELECT 2 KEY my-db2-access-key
# → OK
# Admin DB 0
redis-cli -p $PORT SELECT 0 KEY my-admin-secret
# → OK
```
## Per-database encryption
- At database creation, you can provide an optional per-db encryption key
- If provided, the server persists that key in `DB 0` as `meta:db:<id>:enc`
- When you later open the database, the engine checks whether `meta:db:<id>:enc` exists to decide if it must open `<id>.db` in encrypted mode
- The per-db key is not returned by RPC—it is considered write-only configuration data
Operationally:
- Create with encryption: pass a non-null `encryption_key` to the `createDatabase` RPC
- Open later: simply `SELECT` the database; encryption is transparent to clients
## Management via JSON-RPC
You can manage databases using the management RPC (namespaced `herodb.*`). Typical operations:
- `createDatabase(backend, config, encryption_key?)`
- Allocates a new id, sets optional encryption key
- `listDatabases()`
- Lists database ids and info (including whether storage is currently encrypted)
- `getDatabaseInfo(db_id)`
- Returns details: backend, encrypted flag, size on disk, `key_count`, timestamps, etc.
- `addAccessKey(db_id, key, permissions)`
- Adds a `Read` or `ReadWrite` access key (permissions = `"read"` | `"readwrite"`)
- `listAccessKeys(db_id)`
- Returns hashes and permissions; you can use these hashes to delete keys
- `deleteAccessKey(db_id, key_hash)`
- Removes a key by its hash
- `setDatabasePublic(db_id, public)`
- Toggles public/private
Copyable JSON examples are provided in the [RPC examples documentation](./rpc_examples.md).
## Typical flows
1) Public, unencrypted database
- Create a new database without an encryption key
- Clients can immediately `SELECT <id>` without a key
- You can later make it private and add keys if needed
2) Private, encrypted database
- Create passing an `encryption_key`
- Mark it private (`setDatabasePublic false`) and add access keys
- Clients must use `SELECT <id> KEY <plaintext_access_key>`
- Storage opens in encrypted mode automatically
## Security notes
- Only `SHA-256` hashes of access keys are stored in `DB 0`; keep plaintext keys safe on the client side
- The per-db encryption key is never exposed via the API after it is set
- The admin secret must be kept secure; anyone with it can `SELECT 0` and perform administrative actions
## Troubleshooting
- `ERR invalid access key` when selecting a private db
- Ensure you passed the `KEY` argument: `SELECT <id> KEY <plaintext_key>`
- If you recently added the key, confirm the permissions and that you used the exact plaintext (hash must match)
- `Database X not found`
- The id isnt registered in `DB 0` (`admin:dbs`). Use the management APIs to create or list databases
- Cannot `SELECT 0`
- The `KEY` must be the exact admin secret passed at server startup
## Reference
- Admin metadata lives in `DB 0` (`0.db`) and controls:
- Existence: `admin:dbs`
- Access: `meta:db:<id>.public` and `meta:db:<id>:keys`
- Encryption: `meta:db:<id>:enc`
For command examples and management payloads:
- RESP command basics: `docs/basics.md`
- Supported commands: `docs/cmds.md`
- JSON-RPC examples: `docs/rpc_examples.md`

View File

@@ -1,188 +1,96 @@
# HeroDB AGE usage: Stateless vs KeyManaged
# HeroDB AGE Cryptography
This document explains how to use the AGE cryptography commands exposed by HeroDB over the Redis protocol in two modes:
- Stateless (ephemeral keys; nothing stored on the server)
- Keymanaged (serverpersisted, named keys)
HeroDB provides AGE-based asymmetric encryption and digital signatures over the Redis protocol using X25519 for encryption and Ed25519 for signatures. Keys can be used in stateless (ephemeral) or key-managed (persistent, named) modes.
If you are new to the codebase, the exact tests that exercise these behaviors are:
- [rust.test_07_age_stateless_suite()](herodb/tests/usage_suite.rs:495)
- [rust.test_08_age_persistent_named_suite()](herodb/tests/usage_suite.rs:555)
In key-managed mode, HeroDB uses a unified keypair concept: a single Ed25519 signing key is deterministically derived into X25519 keys for encryption, allowing one keypair to handle both encryption and signatures transparently.
Implementation entry points:
- [herodb/src/age.rs](herodb/src/age.rs)
- Dispatch from [herodb/src/cmd.rs](herodb/src/cmd.rs)
## Cryptographic Algorithms
Note: Database-at-rest encryption flags in the test harness are unrelated to AGE commands; those flags control storage-level encryption of DB files. See the harness near [rust.start_test_server()](herodb/tests/usage_suite.rs:10).
### X25519 (Encryption)
- Elliptic-curve Diffie-Hellman key exchange for symmetric key derivation.
- Used for encrypting/decrypting messages.
## Quick start
### Ed25519 (Signatures)
- EdDSA digital signatures for message authentication.
- Used for signing/verifying messages.
Assuming the server is running on localhost on some $PORT:
### Key Derivation
Ed25519 signing keys are deterministically converted to X25519 keys for encryption. This enables a single keypair to support both operations without additional keys. Derivation uses the Ed25519 secret scalar clamped for X25519.
In named keypairs, Ed25519 keys are stored, and X25519 keys are derived on-demand and cached.
## Stateless Mode (Ephemeral Keys)
No server-side storage; keys are provided with each command.
Available commands:
- `AGE GENENC`: Generate ephemeral X25519 keypair. Returns `[recipient, identity]`.
- `AGE GENSIGN`: Generate ephemeral Ed25519 keypair. Returns `[verify_pub, sign_secret]`.
- `AGE ENCRYPT <recipient> <message>`: Encrypt message. Returns base64 ciphertext.
- `AGE DECRYPT <identity> <ciphertext_b64>`: Decrypt ciphertext. Returns plaintext.
- `AGE SIGN <sign_secret> <message>`: Sign message. Returns base64 signature.
- `AGE VERIFY <verify_pub> <message> <signature_b64>`: Verify signature. Returns 1 (valid) or 0 (invalid).
Example:
```bash
~/code/git.ourworld.tf/herocode/herodb/herodb/build.sh
~/code/git.ourworld.tf/herocode/herodb/target/release/herodb --dir /tmp/data --debug --$PORT 6381 --encryption-key 1234 --encrypt
```
redis-cli AGE GENENC
# → 1) "age1qz..." # recipient (X25519 public)
# 2) "AGE-SECRET-KEY-1..." # identity (X25519 secret)
redis-cli AGE ENCRYPT "age1qz..." "hello"
# → base64_ciphertext
```bash
export PORT=6381
# Generate an ephemeral keypair and encrypt/decrypt a message (stateless mode)
redis-cli -p $PORT AGE GENENC
# → returns an array: [recipient, identity]
redis-cli -p $PORT AGE ENCRYPT <recipient> "hello world"
# → returns ciphertext (base64 in a bulk string)
redis-cli -p $PORT AGE DECRYPT <identity> <ciphertext_b64>
# → returns "hello world"
```
For keymanaged mode, generate a named key once and reference it by name afterwards:
```bash
redis-cli -p $PORT AGE KEYGEN app1
# → persists encryption keypair under name "app1"
redis-cli -p $PORT AGE ENCRYPTNAME app1 "hello"
redis-cli -p $PORT AGE DECRYPTNAME app1 <ciphertext_b64>
```
## Stateless AGE (ephemeral)
Characteristics
- No serverside storage of keys.
- You pass the actual key material with every call.
- Not listable via AGE LIST.
Commands and examples
1) Ephemeral encryption keys
```bash
# Generate an ephemeral encryption keypair
redis-cli -p $PORT AGE GENENC
# Example output (abridged):
# 1) "age1qz..." # recipient (public key) = can be used by others e.g. to verify what I sign
# 2) "AGE-SECRET-KEY-1..." # identity (secret) = is like my private, cannot lose this one
# Encrypt with the recipient public key
redis-cli -p $PORT AGE ENCRYPT "age1qz..." "hello world"
# → returns bulk string payload: base64 ciphertext (encrypted content)
# Decrypt with the identity (secret) in other words your private key
redis-cli -p $PORT AGE DECRYPT "AGE-SECRET-KEY-1..." "<ciphertext_b64>"
# → "hello world"
```
2) Ephemeral signing keys
> ? is this same as my private key
```bash
# Generate an ephemeral signing keypair
redis-cli -p $PORT AGE GENSIGN
# Example output:
# 1) "<verify_pub_b64>"
# 2) "<sign_secret_b64>"
# Sign a message with the secret
redis-cli -p $PORT AGE SIGN "<sign_secret_b64>" "msg"
# → returns "<signature_b64>"
# Verify with the public key
redis-cli -p $PORT AGE VERIFY "<verify_pub_b64>" "msg" "<signature_b64>"
# → 1 (valid) or 0 (invalid)
```
When to use
- You do not want the server to store private keys.
- You already manage key material on the client side.
- You need adhoc operations without persistence.
Reference test: [rust.test_07_age_stateless_suite()](herodb/tests/usage_suite.rs:495)
## Keymanaged AGE (persistent, named)
Characteristics
- Server generates and persists keypairs under a chosen name.
- Clients refer to keys by name; raw secrets are not supplied on each call.
- Keys are discoverable via AGE LIST.
Commands and examples
1) Named encryption keys
```bash
# Create/persist a named encryption keypair
redis-cli -p $PORT AGE KEYGEN app1
# → returns [recipient, identity] but also stores them under name "app1"
> TODO: should not return identity (security, but there can be separate function to export it e.g. AGE EXPORTKEY app1)
# Encrypt using the stored public key
redis-cli -p $PORT AGE ENCRYPTNAME app1 "hello"
# → returns bulk string payload: base64 ciphertext
# Decrypt using the stored secret
redis-cli -p $PORT AGE DECRYPTNAME app1 "<ciphertext_b64>"
redis-cli AGE DECRYPT "AGE-SECRET-KEY-1..." base64_ciphertext
# → "hello"
```
2) Named signing keys
## Key-Managed Mode (Persistent Named Keys)
Keys are stored server-side under names. Supports unified keypairs for both encryption and signatures.
Available commands:
- `AGE KEYGEN <name>`: Generate and store unified keypair. Returns `[recipient, identity]` in age format.
- `AGE SIGNKEYGEN <name>`: Generate and store Ed25519 signing keypair. Returns `[verify_pub, sign_secret]`.
- `AGE ENCRYPTNAME <name> <message>`: Encrypt with named key. Returns base64 ciphertext.
- `AGE DECRYPTNAME <name> <ciphertext_b64>`: Decrypt with named key. Returns plaintext.
- `AGE SIGNNAME <name> <message>`: Sign with named key. Returns base64 signature.
- `AGE VERIFYNAME <name> <message> <signature_b64>`: Verify with named key. Returns 1 or 0.
- `AGE LIST`: List all stored key names. Returns sorted array of names.
### AGE LIST Output
Returns a flat, deduplicated, sorted array of key names (strings). Each name corresponds to a stored keypair, which may include encryption keys (X25519), signing keys (Ed25519), or both.
Output format: `["name1", "name2", ...]`
Example:
```bash
# Create/persist a named signing keypair
redis-cli -p $PORT AGE SIGNKEYGEN app1
# → returns [verify_pub_b64, sign_secret_b64] and stores under name "app1"
> TODO: should not return sign_secret_b64 (for security, but there can be separate function to export it e.g. AGE EXPORTSIGNKEY app1)
# Sign using the stored secret
redis-cli -p $PORT AGE SIGNNAME app1 "msg"
# → returns "<signature_b64>"
# Verify using the stored public key
redis-cli -p $PORT AGE VERIFYNAME app1 "msg" "<signature_b64>"
# → 1 (valid) or 0 (invalid)
redis-cli AGE LIST
# → 1) "<named_keypair_1>"
# 2) "<named_keypair_2>"
```
3) List stored AGE keys
For unified keypairs (from `AGE KEYGEN`), the name handles both encryption (derived X25519) and signatures (stored Ed25519) transparently.
Example with named keys:
```bash
redis-cli -p $PORT AGE LIST
# Example output includes labels such as "encpub" and your key names (e.g., "app1")
redis-cli AGE KEYGEN app1
# → 1) "age1..." # recipient
# 2) "AGE-SECRET-KEY-1..." # identity
redis-cli AGE ENCRYPTNAME app1 "secret message"
# → base64_ciphertext
redis-cli AGE DECRYPTNAME app1 base64_ciphertext
# → "secret message"
redis-cli AGE SIGNNAME app1 "message"
# → base64_signature
redis-cli AGE VERIFYNAME app1 "message" base64_signature
# → 1
```
When to use
- You want centralized key storage/rotation and fewer secrets on the client.
- You need names/labels for workflows and can trust the server with secrets.
- You want discoverability (AGE LIST) and simpler client commands.
## Choosing a Mode
- **Stateless**: For ad-hoc operations without persistence; client manages keys.
- **Key-managed**: For centralized key lifecycle; server stores keys for convenience and discoverability.
Reference test: [rust.test_08_age_persistent_named_suite()](herodb/tests/usage_suite.rs:555)
## Choosing a mode
- Prefer Stateless when:
- Minimizing server trust for secret material is the priority.
- Clients already have a secure mechanism to store/distribute keys.
- Prefer Keymanaged when:
- Centralized lifecycle, naming, and discoverability are beneficial.
- You plan to integrate rotation, ACLs, or auditability on the server side.
## Security notes
- Treat identities and signing secrets as sensitive; avoid logging them.
- For keymanaged mode, ensure server storage (and backups) are protected.
- AGE operations here are applicationlevel crypto and are distinct from database-at-rest encryption configured in the test harness.
## Repository pointers
- Stateless examples in tests: [rust.test_07_age_stateless_suite()](herodb/tests/usage_suite.rs:495)
- Keymanaged examples in tests: [rust.test_08_age_persistent_named_suite()](herodb/tests/usage_suite.rs:555)
- AGE implementation: [herodb/src/age.rs](herodb/src/age.rs)
- Command dispatch: [herodb/src/cmd.rs](herodb/src/cmd.rs)
- Bash demo: [herodb/examples/age_bash_demo.sh](herodb/examples/age_bash_demo.sh)
- Rust persistent demo: [herodb/examples/age_persist_demo.rs](herodb/examples/age_persist_demo.rs)
- Additional notes: [herodb/instructions/encrypt.md](herodb/instructions/encrypt.md)
Implementation: [herodb/src/age.rs](herodb/src/age.rs) <br>
Tests: [herodb/tests/usage_suite.rs](herodb/tests/usage_suite.rs)

View File

@@ -1,4 +1,58 @@
Here's an expanded version of the cmds.md documentation to include the list commands:
# HeroDB Basics
## Launching HeroDB
To launch HeroDB, use the binary with required and optional flags. The `--admin-secret` flag is mandatory, encrypting the admin database (DB 0) and authorizing admin access.
### Launch Flags
- `--dir <path>`: Directory for database files (default: current directory).
- `--port <port>`: TCP port for Redis protocol (default: 6379).
- `--debug`: Enable debug logging.
- `--sled`: Use Sled backend (default: Redb).
- `--enable-rpc`: Start JSON-RPC management server on port 8080.
- `--rpc-port <port>`: Custom RPC port (default: 8080).
- `--admin-secret <secret>`: Required secret for DB 0 encryption and admin access.
Example:
```bash
./target/release/herodb --dir /tmp/herodb --admin-secret mysecret --port 6379 --enable-rpc
```
Deprecated flags (`--encrypt`, `--encryption-key`) are ignored for data DBs; per-database encryption is managed via RPC.
## Admin Database (DB 0)
DB 0 acts as the administrative database instance, storing metadata for all user databases (IDs >= 1). It controls existence, access control, and per-database encryption. DB 0 is always encrypted with the `--admin-secret`.
When creating a new database, DB 0 allocates an ID, registers it, and optionally stores a per-database encryption key (write-only). Databases are public by default; use RPC to set them private, requiring access keys for SELECT (read or readwrite based on permissions). Keys are persisted in DB 0 for managed AGE operations.
Access DB 0 with `SELECT 0 KEY <admin-secret>`.
## Symmetric Encryption
HeroDB supports stateless symmetric encryption via SYM commands, using XChaCha20-Poly1305 AEAD.
Commands:
- `SYM KEYGEN`: Generate 32-byte key. Returns base64-encoded key.
- `SYM ENCRYPT <key_b64> <message>`: Encrypt message. Returns base64 ciphertext.
- `SYM DECRYPT <key_b64> <ciphertext_b64>`: Decrypt. Returns plaintext.
Example:
```bash
redis-cli SYM KEYGEN
# → base64_key
redis-cli SYM ENCRYPT base64_key "secret"
# → base64_ciphertext
redis-cli SYM DECRYPT base64_key base64_ciphertext
# → "secret"
```
## RPC Options
Enable the JSON-RPC server with `--enable-rpc` for database management. Methods include creating databases, managing access keys, and setting encryption. See [JSON-RPC Examples](./rpc_examples.md) for payloads.
# HeroDB Commands
HeroDB implements a subset of Redis commands over the Redis protocol. This document describes the available commands and their usage.
@@ -575,6 +629,29 @@ redis-cli -p $PORT AGE LIST
# 2) "keyname2"
```
## SYM Commands
### SYM KEYGEN
Generate a symmetric encryption key.
```bash
redis-cli -p $PORT SYM KEYGEN
# → base64_encoded_32byte_key
```
### SYM ENCRYPT
Encrypt a message with a symmetric key.
```bash
redis-cli -p $PORT SYM ENCRYPT <key_b64> "message"
# → base64_encoded_ciphertext
```
### SYM DECRYPT
Decrypt a ciphertext with a symmetric key.
```bash
redis-cli -p $PORT SYM DECRYPT <key_b64> <ciphertext_b64>
# → decrypted_message
```
## Server Information Commands
### INFO
@@ -621,3 +698,27 @@ This expanded documentation includes all the list commands that were implemented
10. LINDEX - get element by index
11. LRANGE - get range of elements
## Updated Database Selection and Access Keys
HeroDB uses an `Admin DB 0` to control database existence, access, and encryption. Access to data DBs can be public (no key) or private (requires a key). See detailed model in `docs/admin.md`.
Examples:
```bash
# Public database (no key required)
redis-cli -p $PORT SELECT 1
# → OK
```
```bash
# Private database (requires access key)
redis-cli -p $PORT SELECT 2 KEY my-db2-access-key
# → OK
```
```bash
# Admin DB 0 (requires admin secret)
redis-cli -p $PORT SELECT 0 KEY my-admin-secret
# → OK
```

View File

@@ -122,4 +122,27 @@ redis-cli -p 6379 --rdb dump.rdb
# Import to sled
redis-cli -p 6381 --pipe < dump.rdb
```
## Authentication and Database Selection
HeroDB uses an `Admin DB 0` to govern database existence, access and per-db encryption. Access control is enforced via `Admin DB 0` metadata. See the full model in `docs/admin.md`.
Examples:
```bash
# Public database (no key required)
redis-cli -p $PORT SELECT 1
# → OK
```
```bash
# Private database (requires access key)
redis-cli -p $PORT SELECT 2 KEY my-db2-access-key
# → OK
```
```bash
# Admin DB 0 (requires admin secret)
redis-cli -p $PORT SELECT 0 KEY my-admin-secret
# → OK
```

View File

@@ -1,454 +0,0 @@
# Lance Vector Database Operations
HeroDB includes a powerful vector database integration using Lance, enabling high-performance vector storage, search, and multimodal data management. By default, it uses Ollama for local text embeddings, with support for custom external embedding services.
## Overview
The Lance vector database integration provides:
- **High-performance vector storage** using Lance's columnar format
- **Local Ollama integration** for text embeddings (default, no external dependencies)
- **Custom embedding service support** for advanced use cases
- **Text embedding support** (images via custom services)
- **Vector similarity search** with configurable parameters
- **Scalable indexing** with IVF_PQ (Inverted File with Product Quantization)
- **Redis-compatible command interface**
## Architecture
```
┌─────────────────┐ ┌──────────────────┐ ┌─────────────────┐
│ HeroDB │ │ External │ │ Lance │
│ Redis Server │◄──►│ Embedding │ │ Vector Store │
│ │ │ Service │ │ │
└─────────────────┘ └──────────────────┘ └─────────────────┘
│ │ │
│ │ │
Redis Protocol HTTP API Arrow/Parquet
Commands JSON Requests Columnar Storage
```
### Key Components
1. **Lance Store**: High-performance columnar vector storage
2. **Ollama Integration**: Local embedding service (default)
3. **Custom Embedding Service**: Optional HTTP API for advanced use cases
4. **Redis Command Interface**: Familiar Redis-style commands
5. **Arrow Schema**: Flexible schema definition for metadata
## Configuration
### Default Setup (Ollama)
HeroDB uses Ollama by default for text embeddings. No configuration is required if Ollama is running locally:
```bash
# Install Ollama (if not already installed)
# Visit: https://ollama.ai
# Pull the embedding model
ollama pull nomic-embed-text
# Ollama automatically runs on localhost:11434
# HeroDB will use this by default
```
**Default Configuration:**
- **URL**: `http://localhost:11434`
- **Model**: `nomic-embed-text`
- **Dimensions**: 768 (for nomic-embed-text)
### Custom Embedding Service (Optional)
To use a custom embedding service instead of Ollama:
```bash
# Set custom embedding service URL
redis-cli HSET config:core:aiembed url "http://your-embedding-service:8080/embed"
# Optional: Set authentication if required
redis-cli HSET config:core:aiembed token "your-api-token"
```
### Embedding Service API Contracts
#### Ollama API (Default)
HeroDB calls Ollama using this format:
```bash
POST http://localhost:11434/api/embeddings
Content-Type: application/json
{
"model": "nomic-embed-text",
"prompt": "Your text to embed"
}
```
Response:
```json
{
"embedding": [0.1, 0.2, 0.3, ...]
}
```
#### Custom Service API
Your custom embedding service should accept POST requests with this JSON format:
```json
{
"texts": ["text1", "text2"], // Optional: array of texts
"images": ["base64_image1", "base64_image2"], // Optional: base64 encoded images
"model": "your-model-name" // Optional: model specification
}
```
And return responses in this format:
```json
{
"embeddings": [[0.1, 0.2, ...], [0.3, 0.4, ...]], // Array of embedding vectors
"model": "model-name", // Model used
"usage": { // Optional usage stats
"tokens": 100,
"requests": 2
}
}
```
## Commands Reference
### Dataset Management
#### LANCE CREATE
Create a new vector dataset with specified dimensions and optional schema.
```bash
LANCE CREATE <dataset> DIM <dimension> [SCHEMA field:type ...]
```
**Parameters:**
- `dataset`: Name of the dataset
- `dimension`: Vector dimension (e.g., 384, 768, 1536)
- `field:type`: Optional metadata fields (string, int, float, bool)
**Examples:**
```bash
# Create a simple dataset for 384-dimensional vectors
LANCE CREATE documents DIM 384
# Create dataset with metadata schema
LANCE CREATE products DIM 768 SCHEMA category:string price:float available:bool
```
#### LANCE LIST
List all available datasets.
```bash
LANCE LIST
```
**Returns:** Array of dataset names
#### LANCE INFO
Get information about a specific dataset.
```bash
LANCE INFO <dataset>
```
**Returns:** Dataset metadata including name, version, row count, and schema
#### LANCE DROP
Delete a dataset and all its data.
```bash
LANCE DROP <dataset>
```
### Data Operations
#### LANCE STORE
Store multimodal data (text/images) with automatic embedding generation.
```bash
LANCE STORE <dataset> [TEXT <text>] [IMAGE <base64>] [key value ...]
```
**Parameters:**
- `dataset`: Target dataset name
- `TEXT`: Text content to embed
- `IMAGE`: Base64-encoded image to embed
- `key value`: Metadata key-value pairs
**Examples:**
```bash
# Store text with metadata
LANCE STORE documents TEXT "Machine learning is transforming industries" category "AI" author "John Doe"
# Store image with metadata
LANCE STORE images IMAGE "iVBORw0KGgoAAAANSUhEUgAA..." category "nature" tags "landscape,mountains"
# Store both text and image
LANCE STORE multimodal TEXT "Beautiful sunset" IMAGE "base64data..." location "California"
```
**Returns:** Unique ID of the stored item
### Search Operations
#### LANCE SEARCH
Search using a raw vector.
```bash
LANCE SEARCH <dataset> VECTOR <vector> K <k> [NPROBES <n>] [REFINE <r>]
```
**Parameters:**
- `dataset`: Dataset to search
- `vector`: Comma-separated vector values (e.g., "0.1,0.2,0.3")
- `k`: Number of results to return
- `NPROBES`: Number of partitions to search (optional)
- `REFINE`: Refine factor for better accuracy (optional)
**Example:**
```bash
LANCE SEARCH documents VECTOR "0.1,0.2,0.3,0.4" K 5 NPROBES 10
```
#### LANCE SEARCH.TEXT
Search using text query (automatically embedded).
```bash
LANCE SEARCH.TEXT <dataset> <query_text> K <k> [NPROBES <n>] [REFINE <r>]
```
**Parameters:**
- `dataset`: Dataset to search
- `query_text`: Text query to search for
- `k`: Number of results to return
- `NPROBES`: Number of partitions to search (optional)
- `REFINE`: Refine factor for better accuracy (optional)
**Example:**
```bash
LANCE SEARCH.TEXT documents "artificial intelligence applications" K 10 NPROBES 20
```
**Returns:** Array of results with distance scores and metadata
### Embedding Operations
#### LANCE EMBED.TEXT
Generate embeddings for text without storing.
```bash
LANCE EMBED.TEXT <text1> [text2] [text3] ...
```
**Example:**
```bash
LANCE EMBED.TEXT "Hello world" "Machine learning" "Vector database"
```
**Returns:** Array of embedding vectors
### Index Management
#### LANCE CREATE.INDEX
Create a vector index for faster search performance.
```bash
LANCE CREATE.INDEX <dataset> <index_type> [PARTITIONS <n>] [SUBVECTORS <n>]
```
**Parameters:**
- `dataset`: Dataset to index
- `index_type`: Index type (currently supports "IVF_PQ")
- `PARTITIONS`: Number of partitions (default: 256)
- `SUBVECTORS`: Number of sub-vectors for PQ (default: 16)
**Example:**
```bash
LANCE CREATE.INDEX documents IVF_PQ PARTITIONS 512 SUBVECTORS 32
```
## Usage Patterns
### 1. Document Search System
```bash
# Setup
LANCE CREATE documents DIM 384 SCHEMA title:string content:string category:string
# Store documents
LANCE STORE documents TEXT "Introduction to machine learning algorithms" title "ML Basics" category "education"
LANCE STORE documents TEXT "Deep learning neural networks explained" title "Deep Learning" category "education"
LANCE STORE documents TEXT "Building scalable web applications" title "Web Dev" category "programming"
# Create index for better performance
LANCE CREATE.INDEX documents IVF_PQ PARTITIONS 256
# Search
LANCE SEARCH.TEXT documents "neural networks" K 5
```
### 2. Image Similarity Search
```bash
# Setup
LANCE CREATE images DIM 512 SCHEMA filename:string tags:string
# Store images (base64 encoded)
LANCE STORE images IMAGE "iVBORw0KGgoAAAANSUhEUgAA..." filename "sunset.jpg" tags "nature,landscape"
LANCE STORE images IMAGE "iVBORw0KGgoAAAANSUhEUgBB..." filename "city.jpg" tags "urban,architecture"
# Search by image
LANCE STORE temp_search IMAGE "query_image_base64..."
# Then use the returned ID to get embedding and search
```
### 3. Multimodal Content Management
```bash
# Setup
LANCE CREATE content DIM 768 SCHEMA type:string source:string
# Store mixed content
LANCE STORE content TEXT "Product description for smartphone" type "product" source "catalog"
LANCE STORE content IMAGE "product_image_base64..." type "product_image" source "catalog"
# Search across all content types
LANCE SEARCH.TEXT content "smartphone features" K 10
```
## Performance Considerations
### Vector Dimensions
- **384**: Good for general text (e.g., sentence-transformers)
- **768**: Standard for BERT-like models
- **1536**: OpenAI text-embedding-ada-002
- **Higher dimensions**: Better accuracy but slower search
### Index Configuration
- **More partitions**: Better for larger datasets (>100K vectors)
- **More sub-vectors**: Better compression but slower search
- **NPROBES**: Higher values = better accuracy, slower search
### Best Practices
1. **Create indexes** for datasets with >1000 vectors
2. **Use appropriate dimensions** based on your embedding model
3. **Configure NPROBES** based on accuracy vs speed requirements
4. **Batch operations** when possible for better performance
5. **Monitor embedding service** response times and rate limits
## Error Handling
Common error scenarios and solutions:
### Embedding Service Errors
```bash
# Error: Embedding service not configured
ERR Embedding service URL not configured. Set it with: HSET config:core:aiembed url <YOUR_EMBEDDING_SERVICE_URL>
# Error: Service unavailable
ERR Embedding service returned error 404 Not Found
```
**Solution:** Ensure embedding service is running and URL is correct.
### Dataset Errors
```bash
# Error: Dataset doesn't exist
ERR Dataset 'mydata' does not exist
# Error: Dimension mismatch
ERR Vector dimension mismatch: expected 384, got 768
```
**Solution:** Create dataset first or check vector dimensions.
### Search Errors
```bash
# Error: Invalid vector format
ERR Invalid vector format
# Error: No index available
ERR No index available for fast search
```
**Solution:** Check vector format or create an index.
## Integration Examples
### With Python
```python
import redis
import json
r = redis.Redis(host='localhost', port=6379)
# Create dataset
r.execute_command('LANCE', 'CREATE', 'docs', 'DIM', '384')
# Store document
result = r.execute_command('LANCE', 'STORE', 'docs',
'TEXT', 'Machine learning tutorial',
'category', 'education')
print(f"Stored with ID: {result}")
# Search
results = r.execute_command('LANCE', 'SEARCH.TEXT', 'docs',
'machine learning', 'K', '5')
print(f"Search results: {results}")
```
### With Node.js
```javascript
const redis = require('redis');
const client = redis.createClient();
// Create dataset
await client.sendCommand(['LANCE', 'CREATE', 'docs', 'DIM', '384']);
// Store document
const id = await client.sendCommand(['LANCE', 'STORE', 'docs',
'TEXT', 'Deep learning guide',
'category', 'AI']);
// Search
const results = await client.sendCommand(['LANCE', 'SEARCH.TEXT', 'docs',
'deep learning', 'K', '10']);
```
## Monitoring and Maintenance
### Health Checks
```bash
# Check if Lance store is available
LANCE LIST
# Check dataset health
LANCE INFO mydataset
# Test embedding service
LANCE EMBED.TEXT "test"
```
### Maintenance Operations
```bash
# Backup: Use standard Redis backup procedures
# The Lance data is stored separately in the data directory
# Cleanup: Remove unused datasets
LANCE DROP old_dataset
# Reindex: Drop and recreate indexes if needed
LANCE DROP dataset_name
LANCE CREATE dataset_name DIM 384
# Re-import data
LANCE CREATE.INDEX dataset_name IVF_PQ
```
This integration provides a powerful foundation for building AI-powered applications with vector search capabilities while maintaining the familiar Redis interface.

141
docs/rpc_examples.md Normal file
View File

@@ -0,0 +1,141 @@
# HeroDB JSON-RPC Examples
These examples show full JSON-RPC 2.0 payloads for managing HeroDB via the RPC API (enable with `--enable-rpc`). Methods are named as `hero_<function>`. Params are positional arrays; enum values are strings (e.g., `"Redb"`). Copy-paste into Postman or similar clients.
## Database Management
### Create Database
Creates a new database with optional per-database encryption key (stored write-only in Admin DB 0).
```json
{
"jsonrpc": "2.0",
"id": 1,
"method": "hero_createDatabase",
"params": [
"Redb",
{ "name": null, "storage_path": null, "max_size": null, "redis_version": null },
null
]
}
```
With encryption:
```json
{
"jsonrpc": "2.0",
"id": 2,
"method": "hero_createDatabase",
"params": [
"Sled",
{ "name": "secure-db", "storage_path": null, "max_size": null, "redis_version": null },
"my-per-db-encryption-key"
]
}
```
### List Databases
Returns array of database infos (id, backend, encrypted status, size, etc.).
```json
{
"jsonrpc": "2.0",
"id": 3,
"method": "hero_listDatabases",
"params": []
}
```
### Get Database Info
Retrieves detailed info for a specific database.
```json
{
"jsonrpc": "2.0",
"id": 4,
"method": "hero_getDatabaseInfo",
"params": [1]
}
```
### Delete Database
Removes physical database file; metadata remains in Admin DB 0.
```json
{
"jsonrpc": "2.0",
"id": 5,
"method": "hero_deleteDatabase",
"params": [1]
}
```
## Access Control
### Add Access Key
Adds a hashed access key for private databases. Permissions: `"read"` or `"readwrite"`.
```json
{
"jsonrpc": "2.0",
"id": 6,
"method": "hero_addAccessKey",
"params": [2, "my-access-key", "readwrite"]
}
```
### List Access Keys
Returns array of key hashes, permissions, and creation timestamps.
```json
{
"jsonrpc": "2.0",
"id": 7,
"method": "hero_listAccessKeys",
"params": [2]
}
```
### Delete Access Key
Removes key by its SHA-256 hash.
```json
{
"jsonrpc": "2.0",
"id": 8,
"method": "hero_deleteAccessKey",
"params": [2, "0123abcd...keyhash..."]
}
```
### Set Database Public/Private
Toggles public access (default true). Private databases require access keys.
```json
{
"jsonrpc": "2.0",
"id": 9,
"method": "hero_setDatabasePublic",
"params": [2, false]
}
```
## Server Info
### Get Server Stats
Returns stats like total databases and uptime.
```json
{
"jsonrpc": "2.0",
"id": 10,
"method": "hero_getServerStats",
"params": []
}
```
## Notes
- Per-database encryption keys are write-only; set at creation and used transparently.
- Access keys are hashed (SHA-256) for storage; provide plaintext in requests.
- Backend options: `"Redb"` (default) or `"Sled"`.
- Config object fields (name, storage_path, etc.) are optional and currently ignored but positional.

View File

@@ -1,191 +1,6 @@
# HeroDB Examples
# HeroDB Tantivy Search Examples
This directory contains examples demonstrating HeroDB's capabilities including full-text search powered by Tantivy and vector database operations using Lance.
## Available Examples
1. **[Tantivy Search Demo](#tantivy-search-demo-bash-script)** - Full-text search capabilities
2. **[Lance Vector Database Demo](#lance-vector-database-demo-bash-script)** - Vector database and AI operations
3. **[AGE Encryption Demo](age_bash_demo.sh)** - Cryptographic operations
4. **[Simple Demo](simple_demo.sh)** - Basic Redis operations
---
## Lance Vector Database Demo (Bash Script)
### Overview
The `lance_vector_demo.sh` script provides a comprehensive demonstration of HeroDB's vector database capabilities using Lance. It showcases vector storage, similarity search, multimodal data handling, and AI-powered operations with external embedding services.
### Prerequisites
1. **HeroDB Server**: The server must be running (default port 6379)
2. **Redis CLI**: The `redis-cli` tool must be installed and available in your PATH
3. **Embedding Service** (optional): For full functionality, set up an external embedding service
### Running the Demo
#### Step 1: Start HeroDB Server
```bash
# From the project root directory
cargo run -- --dir ./test_data --port 6379
```
#### Step 2: Run the Demo (in a new terminal)
```bash
# From the project root directory
./examples/lance_vector_demo.sh
```
### What the Demo Covers
The script demonstrates comprehensive vector database operations:
1. **Dataset Management**
- Creating vector datasets with custom dimensions
- Defining schemas with metadata fields
- Listing and inspecting datasets
- Dataset information and statistics
2. **Embedding Operations**
- Text embedding generation via external services
- Multimodal embedding support (text + images)
- Batch embedding operations
3. **Data Storage**
- Storing text documents with automatic embedding
- Storing images with metadata
- Multimodal content storage
- Rich metadata support
4. **Vector Search**
- Similarity search with raw vectors
- Text-based semantic search
- Configurable search parameters (K, NPROBES, REFINE)
- Cross-modal search capabilities
5. **Index Management**
- Creating IVF_PQ indexes for performance
- Custom index parameters
- Performance optimization
6. **Advanced Features**
- Error handling and recovery
- Performance testing concepts
- Monitoring and maintenance
- Cleanup operations
### Key Lance Commands Demonstrated
#### Dataset Management
```bash
# Create vector dataset
LANCE CREATE documents DIM 384
# Create dataset with schema
LANCE CREATE products DIM 768 SCHEMA category:string price:float available:bool
# List datasets
LANCE LIST
# Get dataset information
LANCE INFO documents
```
#### Data Operations
```bash
# Store text with metadata
LANCE STORE documents TEXT "Machine learning tutorial" category "education" author "John Doe"
# Store image with metadata
LANCE STORE images IMAGE "base64_encoded_image..." filename "photo.jpg" tags "nature,landscape"
# Store multimodal content
LANCE STORE content TEXT "Product description" IMAGE "base64_image..." type "product"
```
#### Search Operations
```bash
# Search with raw vector
LANCE SEARCH documents VECTOR "0.1,0.2,0.3,0.4" K 5
# Semantic text search
LANCE SEARCH.TEXT documents "artificial intelligence" K 10 NPROBES 20
# Generate embeddings
LANCE EMBED.TEXT "Hello world" "Machine learning"
```
#### Index Management
```bash
# Create performance index
LANCE CREATE.INDEX documents IVF_PQ PARTITIONS 256 SUBVECTORS 16
# Drop dataset
LANCE DROP old_dataset
```
### Configuration
#### Setting Up Embedding Service
```bash
# Configure embedding service URL
redis-cli HSET config:core:aiembed url "http://your-embedding-service:8080/embed"
# Optional: Set authentication token
redis-cli HSET config:core:aiembed token "your-api-token"
```
#### Embedding Service API
Your embedding service should accept POST requests:
```json
{
"texts": ["text1", "text2"],
"images": ["base64_image1", "base64_image2"],
"model": "your-model-name"
}
```
And return responses:
```json
{
"embeddings": [[0.1, 0.2, ...], [0.3, 0.4, ...]],
"model": "model-name",
"usage": {"tokens": 100, "requests": 2}
}
```
### Interactive Features
The demo script includes:
- **Colored output** for better readability
- **Step-by-step execution** with explanations
- **Error handling** demonstrations
- **Automatic cleanup** options
- **Performance testing** concepts
- **Real-world usage** examples
### Use Cases Demonstrated
1. **Document Search System**
- Semantic document retrieval
- Metadata filtering
- Relevance ranking
2. **Image Similarity Search**
- Visual content matching
- Tag-based filtering
- Multimodal queries
3. **Product Recommendations**
- Feature-based similarity
- Category filtering
- Price range queries
4. **Content Management**
- Mixed media storage
- Cross-modal search
- Rich metadata support
---
This directory contains examples demonstrating HeroDB's full-text search capabilities powered by Tantivy.
## Tantivy Search Demo (Bash Script)

View File

@@ -1,426 +0,0 @@
#!/bin/bash
# Lance Vector Database Demo Script
# This script demonstrates all Lance vector database operations in HeroDB
set -e # Exit on any error
# Configuration
REDIS_HOST="localhost"
REDIS_PORT="6379"
REDIS_CLI="redis-cli -h $REDIS_HOST -p $REDIS_PORT"
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# Helper functions
log_info() {
echo -e "${BLUE}[INFO]${NC} $1"
}
log_success() {
echo -e "${GREEN}[SUCCESS]${NC} $1"
}
log_warning() {
echo -e "${YELLOW}[WARNING]${NC} $1"
}
log_error() {
echo -e "${RED}[ERROR]${NC} $1"
}
execute_command() {
local cmd="$1"
local description="$2"
echo
log_info "Executing: $description"
echo "Command: $cmd"
if result=$($cmd 2>&1); then
log_success "Result: $result"
else
log_error "Failed: $result"
return 1
fi
}
# Check if HeroDB is running
check_herodb() {
log_info "Checking if HeroDB is running..."
if ! $REDIS_CLI ping > /dev/null 2>&1; then
log_error "HeroDB is not running. Please start it first:"
echo " cargo run -- --dir ./test_data --port $REDIS_PORT"
exit 1
fi
log_success "HeroDB is running"
}
# Setup embedding service configuration
setup_embedding_service() {
log_info "Setting up embedding service configuration..."
# Note: This is a mock URL for demonstration
# In production, replace with your actual embedding service
execute_command \
"$REDIS_CLI HSET config:core:aiembed url 'http://localhost:8080/embed'" \
"Configure embedding service URL"
# Optional: Set authentication token
# execute_command \
# "$REDIS_CLI HSET config:core:aiembed token 'your-api-token'" \
# "Configure embedding service token"
log_warning "Note: Embedding service at http://localhost:8080/embed is not running."
log_warning "Some operations will fail, but this demonstrates the command structure."
}
# Dataset Management Operations
demo_dataset_management() {
echo
echo "=========================================="
echo " DATASET MANAGEMENT DEMO"
echo "=========================================="
# List datasets (should be empty initially)
execute_command \
"$REDIS_CLI LANCE LIST" \
"List all datasets (initially empty)"
# Create a simple dataset
execute_command \
"$REDIS_CLI LANCE CREATE documents DIM 384" \
"Create a simple document dataset with 384 dimensions"
# Create a dataset with schema
execute_command \
"$REDIS_CLI LANCE CREATE products DIM 768 SCHEMA category:string price:float available:bool description:string" \
"Create products dataset with custom schema"
# Create an image dataset
execute_command \
"$REDIS_CLI LANCE CREATE images DIM 512 SCHEMA filename:string tags:string width:int height:int" \
"Create images dataset for multimodal content"
# List datasets again
execute_command \
"$REDIS_CLI LANCE LIST" \
"List all datasets (should show 3 datasets)"
# Get info about datasets
execute_command \
"$REDIS_CLI LANCE INFO documents" \
"Get information about documents dataset"
execute_command \
"$REDIS_CLI LANCE INFO products" \
"Get information about products dataset"
}
# Embedding Operations
demo_embedding_operations() {
echo
echo "=========================================="
echo " EMBEDDING OPERATIONS DEMO"
echo "=========================================="
log_warning "The following operations will fail because no embedding service is running."
log_warning "This demonstrates the command structure and error handling."
# Try to embed text (will fail without embedding service)
execute_command \
"$REDIS_CLI LANCE EMBED.TEXT 'Hello world'" \
"Generate embedding for single text" || true
# Try to embed multiple texts
execute_command \
"$REDIS_CLI LANCE EMBED.TEXT 'Machine learning' 'Artificial intelligence' 'Deep learning'" \
"Generate embeddings for multiple texts" || true
}
# Data Storage Operations
demo_data_storage() {
echo
echo "=========================================="
echo " DATA STORAGE DEMO"
echo "=========================================="
log_warning "Storage operations will fail without embedding service, but show command structure."
# Store text documents
execute_command \
"$REDIS_CLI LANCE STORE documents TEXT 'Introduction to machine learning algorithms and their applications in modern AI systems' category 'education' author 'John Doe' difficulty 'beginner'" \
"Store a document with text and metadata" || true
execute_command \
"$REDIS_CLI LANCE STORE documents TEXT 'Deep learning neural networks for computer vision tasks' category 'research' author 'Jane Smith' difficulty 'advanced'" \
"Store another document" || true
# Store product information
execute_command \
"$REDIS_CLI LANCE STORE products TEXT 'High-performance laptop with 16GB RAM and SSD storage' category 'electronics' price '1299.99' available 'true'" \
"Store product with text description" || true
# Store image with metadata (using placeholder base64)
execute_command \
"$REDIS_CLI LANCE STORE images IMAGE 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8/5+hHgAHggJ/PchI7wAAAABJRU5ErkJggg==' filename 'sample.png' tags 'test,demo' width '1' height '1'" \
"Store image with metadata (1x1 pixel PNG)" || true
# Store multimodal content
execute_command \
"$REDIS_CLI LANCE STORE images TEXT 'Beautiful sunset over mountains' IMAGE 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8/5+hHgAHggJ/PchI7wAAAABJRU5ErkJggg==' filename 'sunset.png' tags 'nature,landscape' location 'California'" \
"Store multimodal content (text + image)" || true
}
# Search Operations
demo_search_operations() {
echo
echo "=========================================="
echo " SEARCH OPERATIONS DEMO"
echo "=========================================="
log_warning "Search operations will fail without data, but show command structure."
# Search with raw vector
execute_command \
"$REDIS_CLI LANCE SEARCH documents VECTOR '0.1,0.2,0.3,0.4,0.5' K 5" \
"Search with raw vector (5 results)" || true
# Search with vector and parameters
execute_command \
"$REDIS_CLI LANCE SEARCH documents VECTOR '0.1,0.2,0.3,0.4,0.5' K 10 NPROBES 20 REFINE 2" \
"Search with vector and advanced parameters" || true
# Text-based search
execute_command \
"$REDIS_CLI LANCE SEARCH.TEXT documents 'machine learning algorithms' K 5" \
"Search using text query" || true
# Text search with parameters
execute_command \
"$REDIS_CLI LANCE SEARCH.TEXT products 'laptop computer' K 3 NPROBES 10" \
"Search products using text with parameters" || true
# Search in image dataset
execute_command \
"$REDIS_CLI LANCE SEARCH.TEXT images 'sunset landscape' K 5" \
"Search images using text description" || true
}
# Index Management Operations
demo_index_management() {
echo
echo "=========================================="
echo " INDEX MANAGEMENT DEMO"
echo "=========================================="
# Create indexes for better search performance
execute_command \
"$REDIS_CLI LANCE CREATE.INDEX documents IVF_PQ" \
"Create default IVF_PQ index for documents"
execute_command \
"$REDIS_CLI LANCE CREATE.INDEX products IVF_PQ PARTITIONS 512 SUBVECTORS 32" \
"Create IVF_PQ index with custom parameters for products"
execute_command \
"$REDIS_CLI LANCE CREATE.INDEX images IVF_PQ PARTITIONS 256 SUBVECTORS 16" \
"Create IVF_PQ index for images dataset"
log_success "Indexes created successfully"
}
# Advanced Usage Examples
demo_advanced_usage() {
echo
echo "=========================================="
echo " ADVANCED USAGE EXAMPLES"
echo "=========================================="
# Create a specialized dataset for semantic search
execute_command \
"$REDIS_CLI LANCE CREATE semantic_search DIM 1536 SCHEMA title:string content:string url:string timestamp:string source:string" \
"Create dataset for semantic search with rich metadata"
# Demonstrate batch operations concept
log_info "Batch operations example (would store multiple items):"
echo " for doc in documents:"
echo " LANCE STORE semantic_search TEXT \"\$doc_content\" title \"\$title\" url \"\$url\""
# Show monitoring commands
log_info "Monitoring and maintenance commands:"
execute_command \
"$REDIS_CLI LANCE LIST" \
"List all datasets for monitoring"
# Show dataset statistics
for dataset in documents products images semantic_search; do
execute_command \
"$REDIS_CLI LANCE INFO $dataset" \
"Get statistics for $dataset" || true
done
}
# Cleanup Operations
demo_cleanup() {
echo
echo "=========================================="
echo " CLEANUP OPERATIONS DEMO"
echo "=========================================="
log_info "Demonstrating cleanup operations..."
# Drop individual datasets
execute_command \
"$REDIS_CLI LANCE DROP semantic_search" \
"Drop semantic_search dataset"
# List remaining datasets
execute_command \
"$REDIS_CLI LANCE LIST" \
"List remaining datasets"
# Ask user if they want to clean up all test data
echo
read -p "Do you want to clean up all test datasets? (y/N): " -n 1 -r
echo
if [[ $REPLY =~ ^[Yy]$ ]]; then
execute_command \
"$REDIS_CLI LANCE DROP documents" \
"Drop documents dataset"
execute_command \
"$REDIS_CLI LANCE DROP products" \
"Drop products dataset"
execute_command \
"$REDIS_CLI LANCE DROP images" \
"Drop images dataset"
execute_command \
"$REDIS_CLI LANCE LIST" \
"Verify all datasets are cleaned up"
log_success "All test datasets cleaned up"
else
log_info "Keeping test datasets for further experimentation"
fi
}
# Error Handling Demo
demo_error_handling() {
echo
echo "=========================================="
echo " ERROR HANDLING DEMO"
echo "=========================================="
log_info "Demonstrating various error conditions..."
# Try to access non-existent dataset
execute_command \
"$REDIS_CLI LANCE INFO nonexistent_dataset" \
"Try to get info for non-existent dataset" || true
# Try to search non-existent dataset
execute_command \
"$REDIS_CLI LANCE SEARCH nonexistent_dataset VECTOR '0.1,0.2' K 5" \
"Try to search non-existent dataset" || true
# Try to drop non-existent dataset
execute_command \
"$REDIS_CLI LANCE DROP nonexistent_dataset" \
"Try to drop non-existent dataset" || true
# Try invalid vector format
execute_command \
"$REDIS_CLI LANCE SEARCH documents VECTOR 'invalid,vector,format' K 5" \
"Try search with invalid vector format" || true
log_info "Error handling demonstration complete"
}
# Performance Testing Demo
demo_performance_testing() {
echo
echo "=========================================="
echo " PERFORMANCE TESTING DEMO"
echo "=========================================="
log_info "Creating performance test dataset..."
execute_command \
"$REDIS_CLI LANCE CREATE perf_test DIM 128 SCHEMA batch_id:string item_id:string" \
"Create performance test dataset"
log_info "Performance testing would involve:"
echo " 1. Bulk loading thousands of vectors"
echo " 2. Creating indexes with different parameters"
echo " 3. Measuring search latency with various K values"
echo " 4. Testing different NPROBES settings"
echo " 5. Monitoring memory usage"
log_info "Example performance test commands:"
echo " # Test search speed with different parameters"
echo " time redis-cli LANCE SEARCH.TEXT perf_test 'query' K 10"
echo " time redis-cli LANCE SEARCH.TEXT perf_test 'query' K 10 NPROBES 50"
echo " time redis-cli LANCE SEARCH.TEXT perf_test 'query' K 100 NPROBES 100"
# Clean up performance test dataset
execute_command \
"$REDIS_CLI LANCE DROP perf_test" \
"Clean up performance test dataset"
}
# Main execution
main() {
echo "=========================================="
echo " LANCE VECTOR DATABASE DEMO SCRIPT"
echo "=========================================="
echo
echo "This script demonstrates all Lance vector database operations."
echo "Note: Some operations will fail without a running embedding service."
echo "This is expected and demonstrates error handling."
echo
# Check prerequisites
check_herodb
# Setup
setup_embedding_service
# Run demos
demo_dataset_management
demo_embedding_operations
demo_data_storage
demo_search_operations
demo_index_management
demo_advanced_usage
demo_error_handling
demo_performance_testing
# Cleanup
demo_cleanup
echo
echo "=========================================="
echo " DEMO COMPLETE"
echo "=========================================="
echo
log_success "Lance vector database demo completed successfully!"
echo
echo "Next steps:"
echo "1. Set up a real embedding service (OpenAI, Hugging Face, etc.)"
echo "2. Update the embedding service URL configuration"
echo "3. Try storing and searching real data"
echo "4. Experiment with different vector dimensions and index parameters"
echo "5. Build your AI-powered application!"
echo
echo "For more information, see docs/lance_vector_db.md"
}
# Run the demo
main "$@"

143
run.sh
View File

@@ -1,143 +0,0 @@
#!/bin/bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
cd "$SCRIPT_DIR"
# Test script for HeroDB - Redis-compatible database with redb backend
# This script starts the server and runs comprehensive tests
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# Configuration
DB_DIR="/tmp/test_db"
PORT=6381
SERVER_PID=""
# Function to print colored output
print_status() {
echo -e "${BLUE}[INFO]${NC} $1"
}
print_success() {
echo -e "${GREEN}[SUCCESS]${NC} $1"
}
print_error() {
echo -e "${RED}[ERROR]${NC} $1"
}
print_warning() {
echo -e "${YELLOW}[WARNING]${NC} $1"
}
# Function to cleanup on exit
cleanup() {
if [ ! -z "$SERVER_PID" ]; then
print_status "Stopping HeroDB server (PID: $SERVER_PID)..."
kill $SERVER_PID 2>/dev/null || true
wait $SERVER_PID 2>/dev/null || true
fi
# Clean up test database
if [ -d "$DB_DIR" ]; then
print_status "Cleaning up test database directory..."
rm -rf "$DB_DIR"
fi
}
# Set trap to cleanup on script exit
trap cleanup EXIT
# Function to wait for server to start
wait_for_server() {
local max_attempts=30
local attempt=1
print_status "Waiting for server to start on port $PORT..."
while [ $attempt -le $max_attempts ]; do
if nc -z localhost $PORT 2>/dev/null; then
print_success "Server is ready!"
return 0
fi
echo -n "."
sleep 1
attempt=$((attempt + 1))
done
print_error "Server failed to start within $max_attempts seconds"
return 1
}
# Function to send Redis command and get response
redis_cmd() {
local cmd="$1"
local expected="$2"
print_status "Testing: $cmd"
local result=$(echo "$cmd" | redis-cli -p $PORT --raw 2>/dev/null || echo "ERROR")
if [ "$expected" != "" ] && [ "$result" != "$expected" ]; then
print_error "Expected: '$expected', Got: '$result'"
return 1
else
print_success "$cmd -> $result"
return 0
fi
}
# Main execution
main() {
print_status "Starting HeroDB"
# Build the project
print_status "Building HeroDB..."
if ! cargo build -p herodb --release; then
print_error "Failed to build HeroDB"
exit 1
fi
# Create test database directory
mkdir -p "$DB_DIR"
# Start the server
print_status "Starting HeroDB server..."
${SCRIPT_DIR}/target/release/herodb --dir "$DB_DIR" --port $PORT &
SERVER_PID=$!
# Wait for server to start
if ! wait_for_server; then
print_error "Failed to start server"
exit 1
fi
}
# Check dependencies
check_dependencies() {
if ! command -v cargo &> /dev/null; then
print_error "cargo is required but not installed"
exit 1
fi
if ! command -v nc &> /dev/null; then
print_warning "netcat (nc) not found - some tests may not work properly"
fi
if ! command -v redis-cli &> /dev/null; then
print_warning "redis-cli not found - using netcat fallback"
fi
}
# Run dependency check and main function
check_dependencies
main "$@"
tail -f /dev/null

View File

@@ -1,7 +1,4 @@
#!/bin/bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
cd "$SCRIPT_DIR"
echo "🧪 Running HeroDB Redis Compatibility Tests"
echo "=========================================="

481
src/admin_meta.rs Normal file
View File

@@ -0,0 +1,481 @@
use std::path::PathBuf;
use std::sync::{Arc, OnceLock, Mutex, RwLock};
use std::collections::HashMap;
use crate::error::DBError;
use crate::options;
use crate::rpc::Permissions;
use crate::storage::Storage;
use crate::storage_sled::SledStorage;
use crate::storage_trait::StorageBackend;
// Key builders
fn k_admin_next_id() -> &'static str {
"admin:next_id"
}
fn k_admin_dbs() -> &'static str {
"admin:dbs"
}
fn k_meta_db(id: u64) -> String {
format!("meta:db:{}", id)
}
fn k_meta_db_keys(id: u64) -> String {
format!("meta:db:{}:keys", id)
}
fn k_meta_db_enc(id: u64) -> String {
format!("meta:db:{}:enc", id)
}
// Global cache of admin DB 0 handles per base_dir to avoid sled/reDB file-lock contention
// and to correctly isolate different test instances with distinct directories.
static ADMIN_STORAGES: OnceLock<RwLock<HashMap<String, Arc<dyn StorageBackend>>>> = OnceLock::new();
// Global registry for data DB storages to avoid double-open across process.
static DATA_STORAGES: OnceLock<RwLock<HashMap<u64, Arc<dyn StorageBackend>>>> = OnceLock::new();
static DATA_INIT_LOCK: Mutex<()> = Mutex::new(());
fn init_admin_storage(
base_dir: &str,
backend: options::BackendType,
admin_secret: &str,
) -> Result<Arc<dyn StorageBackend>, DBError> {
let db_file = PathBuf::from(base_dir).join("0.db");
if let Some(parent_dir) = db_file.parent() {
std::fs::create_dir_all(parent_dir).map_err(|e| {
DBError(format!("Failed to create directory {}: {}", parent_dir.display(), e))
})?;
}
let storage: Arc<dyn StorageBackend> = match backend {
options::BackendType::Redb => Arc::new(Storage::new(&db_file, true, Some(admin_secret))?),
options::BackendType::Sled => Arc::new(SledStorage::new(&db_file, true, Some(admin_secret))?),
};
Ok(storage)
}
// Get or initialize a cached handle to admin DB 0 per base_dir (thread-safe, no double-open race)
pub fn open_admin_storage(
base_dir: &str,
backend: options::BackendType,
admin_secret: &str,
) -> Result<Arc<dyn StorageBackend>, DBError> {
let map = ADMIN_STORAGES.get_or_init(|| RwLock::new(HashMap::new()));
// Fast path
if let Some(st) = map.read().unwrap().get(base_dir) {
return Ok(st.clone());
}
// Slow path with write lock
{
let mut w = map.write().unwrap();
if let Some(st) = w.get(base_dir) {
return Ok(st.clone());
}
// Detect existing 0.db backend by filesystem, if present.
let admin_path = PathBuf::from(base_dir).join("0.db");
let detected = if admin_path.exists() {
if admin_path.is_file() {
Some(options::BackendType::Redb)
} else if admin_path.is_dir() {
Some(options::BackendType::Sled)
} else {
None
}
} else {
None
};
let effective_backend = match detected {
Some(d) if d != backend => {
eprintln!(
"warning: Admin DB 0 at {} appears to be {:?}, but process default is {:?}. Using detected backend.",
admin_path.display(),
d,
backend
);
d
}
Some(d) => d,
None => backend, // First boot: use requested backend to initialize 0.db
};
let st = init_admin_storage(base_dir, effective_backend, admin_secret)?;
w.insert(base_dir.to_string(), st.clone());
Ok(st)
}
}
// Ensure admin structures exist in encrypted DB 0
pub fn ensure_bootstrap(
base_dir: &str,
backend: options::BackendType,
admin_secret: &str,
) -> Result<(), DBError> {
let admin = open_admin_storage(base_dir, backend, admin_secret)?;
// Initialize next id if missing
if !admin.exists(k_admin_next_id())? {
admin.set(k_admin_next_id().to_string(), "1".to_string())?;
}
// admin:dbs is a hash; it's fine if it doesn't exist (hlen -> 0)
Ok(())
}
// Get or initialize a shared handle to a data DB (> 0), avoiding double-open across subsystems
pub fn open_data_storage(
base_dir: &str,
backend: options::BackendType,
admin_secret: &str,
id: u64,
) -> Result<Arc<dyn StorageBackend>, DBError> {
if id == 0 {
return open_admin_storage(base_dir, backend, admin_secret);
}
// Validate existence in admin metadata
if !db_exists(base_dir, backend.clone(), admin_secret, id)? {
return Err(DBError(format!(
"Cannot open database instance {}, as that database instance does not exist.",
id
)));
}
let map = DATA_STORAGES.get_or_init(|| RwLock::new(HashMap::new()));
// Fast path
if let Some(st) = map.read().unwrap().get(&id) {
return Ok(st.clone());
}
// Slow path with init lock
let _guard = DATA_INIT_LOCK.lock().unwrap();
if let Some(st) = map.read().unwrap().get(&id) {
return Ok(st.clone());
}
// Resolve effective backend for this db id:
// 1) Try admin meta "backend" field
// 2) If missing, sniff filesystem (file => Redb, dir => Sled), then persist into admin meta
// 3) Fallback to requested 'backend' (startup default) if nothing else is known
let meta_backend = get_database_backend(base_dir, backend.clone(), admin_secret, id).ok().flatten();
let db_path = PathBuf::from(base_dir).join(format!("{}.db", id));
let sniffed_backend = if db_path.exists() {
if db_path.is_file() {
Some(options::BackendType::Redb)
} else if db_path.is_dir() {
Some(options::BackendType::Sled)
} else {
None
}
} else {
None
};
let effective_backend = meta_backend.clone().or(sniffed_backend).unwrap_or(backend.clone());
// If we had to sniff (i.e., meta missing), persist it for future robustness
if meta_backend.is_none() {
let _ = set_database_backend(base_dir, backend.clone(), admin_secret, id, effective_backend.clone());
}
// Warn if caller-provided backend differs from effective
if effective_backend != backend {
eprintln!(
"notice: Database {} backend resolved to {:?} (caller requested {:?}). Using resolved backend.",
id, effective_backend, backend
);
}
// Determine per-db encryption (from admin meta)
let enc = get_enc_key(base_dir, backend.clone(), admin_secret, id)?;
let should_encrypt = enc.is_some();
// Build database file path and ensure parent dir exists
let db_file = PathBuf::from(base_dir).join(format!("{}.db", id));
if let Some(parent_dir) = db_file.parent() {
std::fs::create_dir_all(parent_dir).map_err(|e| {
DBError(format!("Failed to create directory {}: {}", parent_dir.display(), e))
})?;
}
// Open storage using the effective backend
let storage: Arc<dyn StorageBackend> = match effective_backend {
options::BackendType::Redb => Arc::new(Storage::new(&db_file, should_encrypt, enc.as_deref())?),
options::BackendType::Sled => Arc::new(SledStorage::new(&db_file, should_encrypt, enc.as_deref())?),
};
// Publish to registry
map.write().unwrap().insert(id, storage.clone());
Ok(storage)
}
// Allocate the next DB id and persist new pointer
pub fn allocate_next_id(
base_dir: &str,
backend: options::BackendType,
admin_secret: &str,
) -> Result<u64, DBError> {
let admin = open_admin_storage(base_dir, backend, admin_secret)?;
let cur = admin
.get(k_admin_next_id())?
.unwrap_or_else(|| "1".to_string());
let id: u64 = cur.parse().unwrap_or(1);
let next = id.checked_add(1).ok_or_else(|| DBError("next_id overflow".into()))?;
admin.set(k_admin_next_id().to_string(), next.to_string())?;
// Register into admin:dbs set/hash
let _ = admin.hset(k_admin_dbs(), vec![(id.to_string(), "1".to_string())])?;
// Default meta for the new db: public true
let meta_key = k_meta_db(id);
let _ = admin.hset(&meta_key, vec![("public".to_string(), "true".to_string())])?;
Ok(id)
}
// Check existence of a db id in admin:dbs
pub fn db_exists(
base_dir: &str,
backend: options::BackendType,
admin_secret: &str,
id: u64,
) -> Result<bool, DBError> {
let admin = open_admin_storage(base_dir, backend, admin_secret)?;
Ok(admin.hexists(k_admin_dbs(), &id.to_string())?)
}
// Get per-db encryption key, if any
pub fn get_enc_key(
base_dir: &str,
backend: options::BackendType,
admin_secret: &str,
id: u64,
) -> Result<Option<String>, DBError> {
let admin = open_admin_storage(base_dir, backend, admin_secret)?;
admin.get(&k_meta_db_enc(id))
}
// Set per-db encryption key (called during create)
pub fn set_enc_key(
base_dir: &str,
backend: options::BackendType,
admin_secret: &str,
id: u64,
key: &str,
) -> Result<(), DBError> {
let admin = open_admin_storage(base_dir, backend, admin_secret)?;
admin.set(k_meta_db_enc(id), key.to_string())
}
// Set database public flag
pub fn set_database_public(
base_dir: &str,
backend: options::BackendType,
admin_secret: &str,
id: u64,
public: bool,
) -> Result<(), DBError> {
let admin = open_admin_storage(base_dir, backend, admin_secret)?;
let mk = k_meta_db(id);
let _ = admin.hset(&mk, vec![("public".to_string(), public.to_string())])?;
Ok(())
}
// Persist per-db backend type in admin metadata (module-scope)
pub fn set_database_backend(
base_dir: &str,
backend: options::BackendType,
admin_secret: &str,
id: u64,
db_backend: options::BackendType,
) -> Result<(), DBError> {
let admin = open_admin_storage(base_dir, backend, admin_secret)?;
let mk = k_meta_db(id);
let val = match db_backend {
options::BackendType::Redb => "Redb",
options::BackendType::Sled => "Sled",
};
let _ = admin.hset(&mk, vec![("backend".to_string(), val.to_string())])?;
Ok(())
}
pub fn get_database_backend(
base_dir: &str,
backend: options::BackendType,
admin_secret: &str,
id: u64,
) -> Result<Option<options::BackendType>, DBError> {
let admin = open_admin_storage(base_dir, backend, admin_secret)?;
let mk = k_meta_db(id);
match admin.hget(&mk, "backend")? {
Some(s) if s == "Redb" => Ok(Some(options::BackendType::Redb)),
Some(s) if s == "Sled" => Ok(Some(options::BackendType::Sled)),
_ => Ok(None),
}
}
// Set database name
pub fn set_database_name(
base_dir: &str,
backend: options::BackendType,
admin_secret: &str,
id: u64,
name: &str,
) -> Result<(), DBError> {
let admin = open_admin_storage(base_dir, backend, admin_secret)?;
let mk = k_meta_db(id);
let _ = admin.hset(&mk, vec![("name".to_string(), name.to_string())])?;
Ok(())
}
// Get database name
pub fn get_database_name(
base_dir: &str,
backend: options::BackendType,
admin_secret: &str,
id: u64,
) -> Result<Option<String>, DBError> {
let admin = open_admin_storage(base_dir, backend, admin_secret)?;
let mk = k_meta_db(id);
admin.hget(&mk, "name")
}
// Internal: load public flag; default to true when meta missing
fn load_public(
admin: &Arc<dyn StorageBackend>,
id: u64,
) -> Result<bool, DBError> {
let mk = k_meta_db(id);
match admin.hget(&mk, "public")? {
Some(v) => Ok(v == "true"),
None => Ok(true),
}
}
// Add access key for db (value format: "Read:ts" or "ReadWrite:ts")
pub fn add_access_key(
base_dir: &str,
backend: options::BackendType,
admin_secret: &str,
id: u64,
key_plain: &str,
perms: Permissions,
) -> Result<(), DBError> {
let admin = open_admin_storage(base_dir, backend, admin_secret)?;
let hash = crate::rpc::hash_key(key_plain);
let v = match perms {
Permissions::Read => format!("Read:{}", now_secs()),
Permissions::ReadWrite => format!("ReadWrite:{}", now_secs()),
};
let _ = admin.hset(&k_meta_db_keys(id), vec![(hash, v)])?;
Ok(())
}
// Delete access key by hash
pub fn delete_access_key(
base_dir: &str,
backend: options::BackendType,
admin_secret: &str,
id: u64,
key_hash: &str,
) -> Result<bool, DBError> {
let admin = open_admin_storage(base_dir, backend, admin_secret)?;
let n = admin.hdel(&k_meta_db_keys(id), vec![key_hash.to_string()])?;
Ok(n > 0)
}
// List access keys, returning (hash, perms, created_at_secs)
pub fn list_access_keys(
base_dir: &str,
backend: options::BackendType,
admin_secret: &str,
id: u64,
) -> Result<Vec<(String, Permissions, u64)>, DBError> {
let admin = open_admin_storage(base_dir, backend, admin_secret)?;
let pairs = admin.hgetall(&k_meta_db_keys(id))?;
let mut out = Vec::new();
for (hash, val) in pairs {
let (perm, ts) = parse_perm_value(&val);
out.push((hash, perm, ts));
}
Ok(out)
}
// Verify access permission for db id with optional key
// Returns:
// - Ok(Some(Permissions)) when access is allowed
// - Ok(None) when not allowed or db missing (caller can distinguish by calling db_exists)
pub fn verify_access(
base_dir: &str,
backend: options::BackendType,
admin_secret: &str,
id: u64,
key_opt: Option<&str>,
) -> Result<Option<Permissions>, DBError> {
// Admin DB 0: require exact admin_secret
if id == 0 {
if let Some(k) = key_opt {
if k == admin_secret {
return Ok(Some(Permissions::ReadWrite));
}
}
return Ok(None);
}
let admin = open_admin_storage(base_dir, backend, admin_secret)?;
if !admin.hexists(k_admin_dbs(), &id.to_string())? {
return Ok(None);
}
// Public?
if load_public(&admin, id)? {
return Ok(Some(Permissions::ReadWrite));
}
// Private: require key and verify
if let Some(k) = key_opt {
let hash = crate::rpc::hash_key(k);
if let Some(v) = admin.hget(&k_meta_db_keys(id), &hash)? {
let (perm, _ts) = parse_perm_value(&v);
return Ok(Some(perm));
}
}
Ok(None)
}
// Enumerate all db ids
pub fn list_dbs(
base_dir: &str,
backend: options::BackendType,
admin_secret: &str,
) -> Result<Vec<u64>, DBError> {
let admin = open_admin_storage(base_dir, backend, admin_secret)?;
let ids = admin.hkeys(k_admin_dbs())?;
let mut out = Vec::new();
for s in ids {
if let Ok(v) = s.parse() {
out.push(v);
}
}
Ok(out)
}
// Helper: parse permission value "Read:ts" or "ReadWrite:ts"
fn parse_perm_value(v: &str) -> (Permissions, u64) {
let mut parts = v.split(':');
let p = parts.next().unwrap_or("Read");
let ts = parts
.next()
.and_then(|s| s.parse().ok())
.unwrap_or(0u64);
let perm = match p {
"ReadWrite" => Permissions::ReadWrite,
_ => Permissions::Read,
};
(perm, ts)
}
fn now_secs() -> u64 {
use std::time::{SystemTime, UNIX_EPOCH};
SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap_or_default()
.as_secs()
}

View File

@@ -19,6 +19,8 @@ use age::x25519;
use ed25519_dalek::{Signature, Signer, Verifier, SigningKey, VerifyingKey};
use base64::{engine::general_purpose::STANDARD as B64, Engine as _};
use std::collections::HashSet;
use std::convert::TryInto;
use crate::protocol::Protocol;
use crate::server::Server;
@@ -74,6 +76,125 @@ fn parse_ed25519_verifying_key(s: &str) -> Result<VerifyingKey, AgeWireError> {
VerifyingKey::from_bytes(&key_bytes).map_err(|_| AgeWireError::ParseKey)
}
// ---------- Derivation + Raw X25519 (Ed25519 -> X25519) ----------
//
// We deterministically derive an X25519 keypair from an Ed25519 SigningKey.
// We persist the X25519 public/secret as base64-encoded 32-byte raw values
// (no "age1..."/"AGE-SECRET-KEY-1..." formatting). Name-based encrypt/decrypt
// uses these raw values directly via x25519-dalek + ChaCha20Poly1305.
use chacha20poly1305::{aead::{Aead, KeyInit}, ChaCha20Poly1305, Key, Nonce};
use sha2::{Digest, Sha256};
use x25519_dalek::{PublicKey as XPublicKey, StaticSecret as XStaticSecret};
fn derive_x25519_raw_from_ed25519(sk: &SigningKey) -> ([u8; 32], [u8; 32]) {
// X25519 secret scalar (clamped) from Ed25519 secret
let scalar: [u8; 32] = sk.to_scalar_bytes();
// Build X25519 secret/public using dalek
let xsec = XStaticSecret::from(scalar);
let xpub = XPublicKey::from(&xsec);
(xpub.to_bytes(), xsec.to_bytes())
}
fn derive_x25519_raw_b64_from_ed25519(sk: &SigningKey) -> (String, String) {
let (xpub, xsec) = derive_x25519_raw_from_ed25519(sk);
(B64.encode(xpub), B64.encode(xsec))
}
// Helper: detect whether a stored key looks like an age-formatted string
fn looks_like_age_format(s: &str) -> bool {
s.starts_with("age1") || s.starts_with("AGE-SECRET-KEY-1")
}
// Our container format for name-based raw X25519 encryption:
// bytes = "HDBX1" (5) || eph_pub(32) || nonce(12) || ciphertext(..)
// Entire blob is base64-encoded for transport.
const HDBX1_MAGIC: &[u8; 5] = b"HDBX1";
fn encrypt_b64_with_x25519_raw(recip_pub_b64: &str, msg: &str) -> Result<String, AgeWireError> {
use rand::RngCore;
use rand::rngs::OsRng;
// Parse recipient public key (raw 32 bytes, base64)
let recip_pub_bytes = B64.decode(recip_pub_b64).map_err(|_| AgeWireError::ParseKey)?;
if recip_pub_bytes.len() != 32 { return Err(AgeWireError::ParseKey); }
let recip_pub_arr: [u8; 32] = recip_pub_bytes.as_slice().try_into().map_err(|_| AgeWireError::ParseKey)?;
let recip_pub: XPublicKey = XPublicKey::from(recip_pub_arr);
// Generate ephemeral X25519 keypair
let mut eph_sec_bytes = [0u8; 32];
OsRng.fill_bytes(&mut eph_sec_bytes);
let eph_sec = XStaticSecret::from(eph_sec_bytes);
let eph_pub = XPublicKey::from(&eph_sec);
// ECDH
let shared = eph_sec.diffie_hellman(&recip_pub);
// Derive symmetric key via SHA-256 over context + shared + parties
let mut hasher = Sha256::default();
hasher.update(b"herodb-x25519-v1");
hasher.update(shared.as_bytes());
hasher.update(eph_pub.as_bytes());
hasher.update(recip_pub.as_bytes());
let key_bytes = hasher.finalize();
let key = Key::from_slice(&key_bytes[..32]);
// Nonce (12 bytes)
let mut nonce_bytes = [0u8; 12];
OsRng.fill_bytes(&mut nonce_bytes);
let nonce = Nonce::from_slice(&nonce_bytes);
// Encrypt
let cipher = ChaCha20Poly1305::new(key);
let ct = cipher.encrypt(nonce, msg.as_bytes())
.map_err(|e| AgeWireError::Crypto(format!("encrypt: {e}")))?;
// Assemble container
let mut out = Vec::with_capacity(5 + 32 + 12 + ct.len());
out.extend_from_slice(HDBX1_MAGIC);
out.extend_from_slice(eph_pub.as_bytes());
out.extend_from_slice(&nonce_bytes);
out.extend_from_slice(&ct);
Ok(B64.encode(out))
}
fn decrypt_b64_with_x25519_raw(identity_sec_b64: &str, ct_b64: &str) -> Result<String, AgeWireError> {
// Parse X25519 secret (raw 32 bytes, base64)
let sec_bytes = B64.decode(identity_sec_b64).map_err(|_| AgeWireError::ParseKey)?;
if sec_bytes.len() != 32 { return Err(AgeWireError::ParseKey); }
let sec_arr: [u8; 32] = sec_bytes.as_slice().try_into().map_err(|_| AgeWireError::ParseKey)?;
let xsec = XStaticSecret::from(sec_arr);
let xpub = XPublicKey::from(&xsec); // self public
// Decode container
let blob = B64.decode(ct_b64.as_bytes()).map_err(|e| AgeWireError::Crypto(e.to_string()))?;
if blob.len() < 5 + 32 + 12 { return Err(AgeWireError::Crypto("ciphertext too short".to_string())); }
if &blob[..5] != HDBX1_MAGIC { return Err(AgeWireError::Crypto("bad header".to_string())); }
let eph_pub_arr: [u8; 32] = blob[5..5+32].try_into().map_err(|_| AgeWireError::Crypto("bad eph pub".to_string()))?;
let eph_pub = XPublicKey::from(eph_pub_arr);
let nonce_bytes: [u8; 12] = blob[5+32..5+32+12].try_into().unwrap();
let ct = &blob[5+32+12..];
// Recompute shared + key
let shared = xsec.diffie_hellman(&eph_pub);
let mut hasher = Sha256::default();
hasher.update(b"herodb-x25519-v1");
hasher.update(shared.as_bytes());
hasher.update(eph_pub.as_bytes());
hasher.update(xpub.as_bytes());
let key_bytes = hasher.finalize();
let key = Key::from_slice(&key_bytes[..32]);
// Decrypt
let cipher = ChaCha20Poly1305::new(key);
let nonce = Nonce::from_slice(&nonce_bytes);
let pt = cipher.decrypt(nonce, ct)
.map_err(|e| AgeWireError::Crypto(format!("decrypt: {e}")))?;
String::from_utf8(pt).map_err(|_| AgeWireError::Utf8)
}
// ---------- Stateless crypto helpers (string in/out) ----------
pub fn gen_enc_keypair() -> (String, String) {
@@ -210,13 +331,72 @@ pub async fn cmd_age_verify(verify_pub: &str, message: &str, sig_b64: &str) -> P
}
}
// ---------- NEW: unified stateless generator (Ed25519 + derived X25519 raw) ----------
//
// Returns 4-tuple:
// [ verify_pub_b64 (32B), signpriv_b64 (32B), x25519_pub_b64 (32B), x25519_sec_b64 (32B) ]
// No persistence (stateless).
pub async fn cmd_age_genkey() -> Protocol {
use rand::RngCore;
use rand::rngs::OsRng;
let mut secret_bytes = [0u8; 32];
OsRng.fill_bytes(&mut secret_bytes);
let signing_key = SigningKey::from_bytes(&secret_bytes);
let verifying_key = signing_key.verifying_key();
let verify_b64 = B64.encode(verifying_key.to_bytes());
let sign_b64 = B64.encode(signing_key.to_bytes());
let (xpub_b64, xsec_b64) = derive_x25519_raw_b64_from_ed25519(&signing_key);
Protocol::Array(vec![
Protocol::BulkString(verify_b64),
Protocol::BulkString(sign_b64),
Protocol::BulkString(xpub_b64),
Protocol::BulkString(xsec_b64),
])
}
// ---------- NEW: Persistent, named-key commands ----------
pub async fn cmd_age_keygen(server: &Server, name: &str) -> Protocol {
let (recip, ident) = gen_enc_keypair();
if let Err(e) = sset(server, &enc_pub_key_key(name), &recip) { return e.to_protocol(); }
if let Err(e) = sset(server, &enc_priv_key_key(name), &ident) { return e.to_protocol(); }
Protocol::Array(vec![Protocol::BulkString(recip), Protocol::BulkString(ident)])
use rand::RngCore;
use rand::rngs::OsRng;
// Generate Ed25519 keypair
let mut secret_bytes = [0u8; 32];
OsRng.fill_bytes(&mut secret_bytes);
let signing_key = SigningKey::from_bytes(&secret_bytes);
let verifying_key = signing_key.verifying_key();
// Encode Ed25519 as base64 (32 bytes)
let verify_b64 = B64.encode(verifying_key.to_bytes());
let sign_b64 = B64.encode(signing_key.to_bytes());
// Derive X25519 raw (32-byte) keys and encode as base64
let (xpub_b64, xsec_b64) = derive_x25519_raw_b64_from_ed25519(&signing_key);
// Decode to create age-formatted strings
let xpub_bytes = B64.decode(&xpub_b64).unwrap();
let xsec_bytes = B64.decode(&xsec_b64).unwrap();
let xpub_arr: [u8; 32] = xpub_bytes.as_slice().try_into().unwrap();
let xsec_arr: [u8; 32] = xsec_bytes.as_slice().try_into().unwrap();
let recip_str = format!("age1{}", B64.encode(xpub_arr));
let ident_str = format!("AGE-SECRET-KEY-1{}", B64.encode(xsec_arr));
// Persist Ed25519 and derived X25519 (key-managed mode)
if let Err(e) = sset(server, &sign_pub_key_key(name), &verify_b64) { return e.to_protocol(); }
if let Err(e) = sset(server, &sign_priv_key_key(name), &sign_b64) { return e.to_protocol(); }
if let Err(e) = sset(server, &enc_pub_key_key(name), &xpub_b64) { return e.to_protocol(); }
if let Err(e) = sset(server, &enc_priv_key_key(name), &xsec_b64) { return e.to_protocol(); }
// Return [recipient, identity] in age format
Protocol::Array(vec![
Protocol::BulkString(recip_str),
Protocol::BulkString(ident_str),
])
}
pub async fn cmd_age_signkeygen(server: &Server, name: &str) -> Protocol {
@@ -227,26 +407,76 @@ pub async fn cmd_age_signkeygen(server: &Server, name: &str) -> Protocol {
}
pub async fn cmd_age_encrypt_name(server: &Server, name: &str, message: &str) -> Protocol {
let recip = match sget(server, &enc_pub_key_key(name)) {
// Load stored recipient (could be raw b64 32-byte or "age1..." from legacy)
let recip_or_b64 = match sget(server, &enc_pub_key_key(name)) {
Ok(Some(v)) => v,
Ok(None) => return AgeWireError::NotFound("recipient (age:key:{name})").to_protocol(),
Ok(None) => {
// Derive from stored Ed25519 if present, then persist
match sget(server, &sign_priv_key_key(name)) {
Ok(Some(sign_b64)) => {
let sk = match parse_ed25519_signing_key(&sign_b64) {
Ok(k) => k,
Err(e) => return e.to_protocol(),
};
let (xpub_b64, xsec_b64) = derive_x25519_raw_b64_from_ed25519(&sk);
if let Err(e) = sset(server, &enc_pub_key_key(name), &xpub_b64) { return e.to_protocol(); }
if let Err(e) = sset(server, &enc_priv_key_key(name), &xsec_b64) { return e.to_protocol(); }
xpub_b64
}
Ok(None) => return AgeWireError::NotFound("recipient (age:key:{name})").to_protocol(),
Err(e) => return e.to_protocol(),
}
}
Err(e) => return e.to_protocol(),
};
match encrypt_b64(&recip, message) {
Ok(ct) => Protocol::BulkString(ct),
Err(e) => e.to_protocol(),
if looks_like_age_format(&recip_or_b64) {
match encrypt_b64(&recip_or_b64, message) {
Ok(ct) => Protocol::BulkString(ct),
Err(e) => e.to_protocol(),
}
} else {
match encrypt_b64_with_x25519_raw(&recip_or_b64, message) {
Ok(ct) => Protocol::BulkString(ct),
Err(e) => e.to_protocol(),
}
}
}
pub async fn cmd_age_decrypt_name(server: &Server, name: &str, ct_b64: &str) -> Protocol {
let ident = match sget(server, &enc_priv_key_key(name)) {
// Load stored identity (could be raw b64 32-byte or "AGE-SECRET-KEY-1..." from legacy)
let ident_or_b64 = match sget(server, &enc_priv_key_key(name)) {
Ok(Some(v)) => v,
Ok(None) => return AgeWireError::NotFound("identity (age:privkey:{name})").to_protocol(),
Ok(None) => {
// Derive from stored Ed25519 if present, then persist
match sget(server, &sign_priv_key_key(name)) {
Ok(Some(sign_b64)) => {
let sk = match parse_ed25519_signing_key(&sign_b64) {
Ok(k) => k,
Err(e) => return e.to_protocol(),
};
let (xpub_b64, xsec_b64) = derive_x25519_raw_b64_from_ed25519(&sk);
if let Err(e) = sset(server, &enc_pub_key_key(name), &xpub_b64) { return e.to_protocol(); }
if let Err(e) = sset(server, &enc_priv_key_key(name), &xsec_b64) { return e.to_protocol(); }
xsec_b64
}
Ok(None) => return AgeWireError::NotFound("identity (age:privkey:{name})").to_protocol(),
Err(e) => return e.to_protocol(),
}
}
Err(e) => return e.to_protocol(),
};
match decrypt_b64(&ident, ct_b64) {
Ok(pt) => Protocol::BulkString(pt),
Err(e) => e.to_protocol(),
if looks_like_age_format(&ident_or_b64) {
match decrypt_b64(&ident_or_b64, ct_b64) {
Ok(pt) => Protocol::BulkString(pt),
Err(e) => e.to_protocol(),
}
} else {
match decrypt_b64_with_x25519_raw(&ident_or_b64, ct_b64) {
Ok(pt) => Protocol::BulkString(pt),
Err(e) => e.to_protocol(),
}
}
}
@@ -276,33 +506,31 @@ pub async fn cmd_age_verify_name(server: &Server, name: &str, message: &str, sig
}
pub async fn cmd_age_list(server: &Server) -> Protocol {
// Returns 4 arrays: ["encpub", <names...>], ["encpriv", ...], ["signpub", ...], ["signpriv", ...]
// Return a flat, deduplicated, sorted list of managed key names (no labels)
let st = match server.current_storage() { Ok(s) => s, Err(e) => return Protocol::err(&e.0) };
let pull = |pat: &str, prefix: &str| -> Result<Vec<String>, DBError> {
let keys = st.keys(pat)?;
let mut names: Vec<String> = keys.into_iter()
let mut names: Vec<String> = keys
.into_iter()
.filter_map(|k| k.strip_prefix(prefix).map(|x| x.to_string()))
.collect();
names.sort();
Ok(names)
};
let encpub = match pull("age:key:*", "age:key:") { Ok(v) => v, Err(e)=> return Protocol::err(&e.0) };
let encpriv = match pull("age:privkey:*", "age:privkey:") { Ok(v) => v, Err(e)=> return Protocol::err(&e.0) };
let signpub = match pull("age:signpub:*", "age:signpub:") { Ok(v) => v, Err(e)=> return Protocol::err(&e.0) };
let signpriv= match pull("age:signpriv:*", "age:signpriv:") { Ok(v) => v, Err(e)=> return Protocol::err(&e.0) };
let encpub = match pull("age:key:*", "age:key:") { Ok(v) => v, Err(e)=> return Protocol::err(&e.0) };
let encpriv = match pull("age:privkey:*", "age:privkey:") { Ok(v) => v, Err(e)=> return Protocol::err(&e.0) };
let signpub = match pull("age:signpub:*", "age:signpub:") { Ok(v) => v, Err(e)=> return Protocol::err(&e.0) };
let signpriv = match pull("age:signpriv:*", "age:signpriv:") { Ok(v) => v, Err(e)=> return Protocol::err(&e.0) };
let to_arr = |label: &str, v: Vec<String>| {
let mut out = vec![Protocol::BulkString(label.to_string())];
out.push(Protocol::Array(v.into_iter().map(Protocol::BulkString).collect()));
Protocol::Array(out)
};
let mut set: HashSet<String> = HashSet::new();
for n in encpub.into_iter().chain(encpriv).chain(signpub).chain(signpriv) {
set.insert(n);
}
Protocol::Array(vec![
to_arr("encpub", encpub),
to_arr("encpriv", encpriv),
to_arr("signpub", signpub),
to_arr("signpriv", signpriv),
])
let mut names: Vec<String> = set.into_iter().collect();
names.sort();
Protocol::Array(names.into_iter().map(Protocol::BulkString).collect())
}

View File

@@ -1,14 +1,12 @@
use crate::{error::DBError, protocol::Protocol, server::Server};
use tokio::time::{timeout, Duration};
use futures::future::select_all;
use std::sync::Arc;
use base64::Engine;
#[derive(Debug, Clone)]
pub enum Cmd {
Ping,
Echo(String),
Select(u64), // Changed from u16 to u64
Select(u64, Option<String>), // db_index, optional_key
Get(String),
Set(String, String),
SetPx(String, String, u128),
@@ -73,12 +71,13 @@ pub enum Cmd {
// AGE (rage) commands — stateless
AgeGenEnc,
AgeGenSign,
AgeGenKey, // unified stateless: returns [verify_b64, signpriv_b64, x25519_pub_b64, x25519_sec_b64]
AgeEncrypt(String, String), // recipient, message
AgeDecrypt(String, String), // identity, ciphertext_b64
AgeSign(String, String), // signing_secret, message
AgeVerify(String, String, String), // verify_pub, message, signature_b64
// NEW: persistent named-key commands
// Persistent named-key commands
AgeKeygen(String), // name
AgeSignKeygen(String), // name
AgeEncryptName(String, String), // name, message
@@ -87,48 +86,11 @@ pub enum Cmd {
AgeVerifyName(String, String, String), // name, message, signature_b64
AgeList,
// Lance vector database commands
LanceCreate {
dataset: String,
dim: usize,
schema: Vec<(String, String)>, // field_name, field_type pairs
},
LanceStore {
dataset: String,
text: Option<String>,
image_base64: Option<String>,
metadata: std::collections::HashMap<String, String>,
},
LanceSearch {
dataset: String,
vector: Vec<f32>,
k: usize,
nprobes: Option<usize>,
refine_factor: Option<usize>,
},
LanceSearchText {
dataset: String,
query_text: String,
k: usize,
nprobes: Option<usize>,
refine_factor: Option<usize>,
},
LanceEmbedText {
texts: Vec<String>,
},
LanceCreateIndex {
dataset: String,
index_type: String,
num_partitions: Option<usize>,
num_sub_vectors: Option<usize>,
},
LanceList,
LanceDrop {
dataset: String,
},
LanceInfo {
dataset: String,
},
// SYM (symmetric) commands — stateless
// Raw 32-byte key provided as base64; ciphertext returned as base64
SymKeygen,
SymEncrypt(String, String), // key_b64, message
SymDecrypt(String, String), // key_b64, ciphertext_b64
}
impl Cmd {
@@ -143,11 +105,18 @@ impl Cmd {
Ok((
match cmd[0].to_lowercase().as_str() {
"select" => {
if cmd.len() != 2 {
if cmd.len() < 2 || cmd.len() > 4 {
return Err(DBError("wrong number of arguments for SELECT".to_string()));
}
let idx = cmd[1].parse::<u64>().map_err(|_| DBError("ERR DB index is not an integer".to_string()))?;
Cmd::Select(idx)
let key = if cmd.len() == 4 && cmd[2].to_lowercase() == "key" {
Some(cmd[3].clone())
} else if cmd.len() == 2 {
None
} else {
return Err(DBError("ERR syntax error".to_string()));
};
Cmd::Select(idx, key)
}
"echo" => Cmd::Echo(cmd[1].clone()),
"ping" => Cmd::Ping,
@@ -634,6 +603,8 @@ impl Cmd {
Cmd::AgeGenEnc }
"gensign" => { if cmd.len() != 2 { return Err(DBError("AGE GENSIGN takes no args".to_string())); }
Cmd::AgeGenSign }
"genkey" => { if cmd.len() != 2 { return Err(DBError("AGE GENKEY takes no args".to_string())); }
Cmd::AgeGenKey }
"encrypt" => { if cmd.len() != 4 { return Err(DBError("AGE ENCRYPT <recipient> <message>".to_string())); }
Cmd::AgeEncrypt(cmd[2].clone(), cmd[3].clone()) }
"decrypt" => { if cmd.len() != 4 { return Err(DBError("AGE DECRYPT <identity> <ciphertext_b64>".to_string())); }
@@ -661,235 +632,18 @@ impl Cmd {
_ => return Err(DBError(format!("unsupported AGE subcommand {:?}", cmd))),
}
}
"lance" => {
"sym" => {
if cmd.len() < 2 {
return Err(DBError("wrong number of arguments for LANCE".to_string()));
return Err(DBError("wrong number of arguments for SYM".to_string()));
}
match cmd[1].to_lowercase().as_str() {
"create" => {
if cmd.len() < 4 {
return Err(DBError("LANCE CREATE <dataset> DIM <dimension> [SCHEMA field:type ...]".to_string()));
}
let dataset = cmd[2].clone();
// Parse DIM parameter
if cmd[3].to_lowercase() != "dim" {
return Err(DBError("Expected DIM after dataset name".to_string()));
}
if cmd.len() < 5 {
return Err(DBError("Missing dimension value".to_string()));
}
let dim = cmd[4].parse::<usize>().map_err(|_| DBError("Invalid dimension value".to_string()))?;
// Parse optional SCHEMA
let mut schema = Vec::new();
let mut i = 5;
if i < cmd.len() && cmd[i].to_lowercase() == "schema" {
i += 1;
while i < cmd.len() {
let field_spec = &cmd[i];
let parts: Vec<&str> = field_spec.split(':').collect();
if parts.len() != 2 {
return Err(DBError("Schema fields must be in format field:type".to_string()));
}
schema.push((parts[0].to_string(), parts[1].to_string()));
i += 1;
}
}
Cmd::LanceCreate { dataset, dim, schema }
}
"store" => {
if cmd.len() < 3 {
return Err(DBError("LANCE STORE <dataset> [TEXT <text>] [IMAGE <base64>] [metadata...]".to_string()));
}
let dataset = cmd[2].clone();
let mut text = None;
let mut image_base64 = None;
let mut metadata = std::collections::HashMap::new();
let mut i = 3;
while i < cmd.len() {
match cmd[i].to_lowercase().as_str() {
"text" => {
if i + 1 >= cmd.len() {
return Err(DBError("TEXT requires a value".to_string()));
}
text = Some(cmd[i + 1].clone());
i += 2;
}
"image" => {
if i + 1 >= cmd.len() {
return Err(DBError("IMAGE requires a base64 value".to_string()));
}
image_base64 = Some(cmd[i + 1].clone());
i += 2;
}
_ => {
// Parse as metadata key:value
if i + 1 >= cmd.len() {
return Err(DBError("Metadata requires key value pairs".to_string()));
}
metadata.insert(cmd[i].clone(), cmd[i + 1].clone());
i += 2;
}
}
}
Cmd::LanceStore { dataset, text, image_base64, metadata }
}
"search" => {
if cmd.len() < 5 {
return Err(DBError("LANCE SEARCH <dataset> VECTOR <vector> K <k> [NPROBES <n>] [REFINE <r>]".to_string()));
}
let dataset = cmd[2].clone();
if cmd[3].to_lowercase() != "vector" {
return Err(DBError("Expected VECTOR after dataset name".to_string()));
}
// Parse vector - expect comma-separated floats in brackets or just comma-separated
let vector_str = &cmd[4];
let vector_str = vector_str.trim_start_matches('[').trim_end_matches(']');
let vector: Result<Vec<f32>, _> = vector_str
.split(',')
.map(|s| s.trim().parse::<f32>())
.collect();
let vector = vector.map_err(|_| DBError("Invalid vector format".to_string()))?;
if cmd.len() < 7 || cmd[5].to_lowercase() != "k" {
return Err(DBError("Expected K after vector".to_string()));
}
let k = cmd[6].parse::<usize>().map_err(|_| DBError("Invalid K value".to_string()))?;
let mut nprobes = None;
let mut refine_factor = None;
let mut i = 7;
while i < cmd.len() {
match cmd[i].to_lowercase().as_str() {
"nprobes" => {
if i + 1 >= cmd.len() {
return Err(DBError("NPROBES requires a value".to_string()));
}
nprobes = Some(cmd[i + 1].parse::<usize>().map_err(|_| DBError("Invalid NPROBES value".to_string()))?);
i += 2;
}
"refine" => {
if i + 1 >= cmd.len() {
return Err(DBError("REFINE requires a value".to_string()));
}
refine_factor = Some(cmd[i + 1].parse::<usize>().map_err(|_| DBError("Invalid REFINE value".to_string()))?);
i += 2;
}
_ => {
return Err(DBError(format!("Unknown parameter: {}", cmd[i])));
}
}
}
Cmd::LanceSearch { dataset, vector, k, nprobes, refine_factor }
}
"search.text" => {
if cmd.len() < 6 {
return Err(DBError("LANCE SEARCH.TEXT <dataset> <query_text> K <k> [NPROBES <n>] [REFINE <r>]".to_string()));
}
let dataset = cmd[2].clone();
let query_text = cmd[3].clone();
if cmd[4].to_lowercase() != "k" {
return Err(DBError("Expected K after query text".to_string()));
}
let k = cmd[5].parse::<usize>().map_err(|_| DBError("Invalid K value".to_string()))?;
let mut nprobes = None;
let mut refine_factor = None;
let mut i = 6;
while i < cmd.len() {
match cmd[i].to_lowercase().as_str() {
"nprobes" => {
if i + 1 >= cmd.len() {
return Err(DBError("NPROBES requires a value".to_string()));
}
nprobes = Some(cmd[i + 1].parse::<usize>().map_err(|_| DBError("Invalid NPROBES value".to_string()))?);
i += 2;
}
"refine" => {
if i + 1 >= cmd.len() {
return Err(DBError("REFINE requires a value".to_string()));
}
refine_factor = Some(cmd[i + 1].parse::<usize>().map_err(|_| DBError("Invalid REFINE value".to_string()))?);
i += 2;
}
_ => {
return Err(DBError(format!("Unknown parameter: {}", cmd[i])));
}
}
}
Cmd::LanceSearchText { dataset, query_text, k, nprobes, refine_factor }
}
"embed.text" => {
if cmd.len() < 3 {
return Err(DBError("LANCE EMBED.TEXT <text1> [text2] ...".to_string()));
}
let texts = cmd[2..].to_vec();
Cmd::LanceEmbedText { texts }
}
"create.index" => {
if cmd.len() < 5 {
return Err(DBError("LANCE CREATE.INDEX <dataset> <index_type> [PARTITIONS <n>] [SUBVECTORS <n>]".to_string()));
}
let dataset = cmd[2].clone();
let index_type = cmd[3].clone();
let mut num_partitions = None;
let mut num_sub_vectors = None;
let mut i = 4;
while i < cmd.len() {
match cmd[i].to_lowercase().as_str() {
"partitions" => {
if i + 1 >= cmd.len() {
return Err(DBError("PARTITIONS requires a value".to_string()));
}
num_partitions = Some(cmd[i + 1].parse::<usize>().map_err(|_| DBError("Invalid PARTITIONS value".to_string()))?);
i += 2;
}
"subvectors" => {
if i + 1 >= cmd.len() {
return Err(DBError("SUBVECTORS requires a value".to_string()));
}
num_sub_vectors = Some(cmd[i + 1].parse::<usize>().map_err(|_| DBError("Invalid SUBVECTORS value".to_string()))?);
i += 2;
}
_ => {
return Err(DBError(format!("Unknown parameter: {}", cmd[i])));
}
}
}
Cmd::LanceCreateIndex { dataset, index_type, num_partitions, num_sub_vectors }
}
"list" => {
if cmd.len() != 2 {
return Err(DBError("LANCE LIST takes no arguments".to_string()));
}
Cmd::LanceList
}
"drop" => {
if cmd.len() != 3 {
return Err(DBError("LANCE DROP <dataset>".to_string()));
}
let dataset = cmd[2].clone();
Cmd::LanceDrop { dataset }
}
"info" => {
if cmd.len() != 3 {
return Err(DBError("LANCE INFO <dataset>".to_string()));
}
let dataset = cmd[2].clone();
Cmd::LanceInfo { dataset }
}
_ => return Err(DBError(format!("unsupported LANCE subcommand {:?}", cmd))),
"keygen" => { if cmd.len() != 2 { return Err(DBError("SYM KEYGEN takes no args".to_string())); }
Cmd::SymKeygen }
"encrypt" => { if cmd.len() != 4 { return Err(DBError("SYM ENCRYPT <key_b64> <message>".to_string())); }
Cmd::SymEncrypt(cmd[2].clone(), cmd[3].clone()) }
"decrypt" => { if cmd.len() != 4 { return Err(DBError("SYM DECRYPT <key_b64> <ciphertext_b64>".to_string())); }
Cmd::SymDecrypt(cmd[2].clone(), cmd[3].clone()) }
_ => return Err(DBError(format!("unsupported SYM subcommand {:?}", cmd))),
}
}
_ => Cmd::Unknow(cmd[0].clone()),
@@ -918,7 +672,7 @@ impl Cmd {
}
match self {
Cmd::Select(db) => select_cmd(server, db).await,
Cmd::Select(db, key) => select_cmd(server, db, key).await,
Cmd::Ping => Ok(Protocol::SimpleString("PONG".to_string())),
Cmd::Echo(s) => Ok(Protocol::BulkString(s)),
Cmd::Get(k) => get_cmd(server, &k).await,
@@ -993,6 +747,7 @@ impl Cmd {
// AGE (rage): stateless
Cmd::AgeGenEnc => Ok(crate::age::cmd_age_genenc().await),
Cmd::AgeGenSign => Ok(crate::age::cmd_age_gensign().await),
Cmd::AgeGenKey => Ok(crate::age::cmd_age_genkey().await),
Cmd::AgeEncrypt(recipient, message) => Ok(crate::age::cmd_age_encrypt(&recipient, &message).await),
Cmd::AgeDecrypt(identity, ct_b64) => Ok(crate::age::cmd_age_decrypt(&identity, &ct_b64).await),
Cmd::AgeSign(secret, message) => Ok(crate::age::cmd_age_sign(&secret, &message).await),
@@ -1006,25 +761,26 @@ impl Cmd {
Cmd::AgeSignName(name, message) => Ok(crate::age::cmd_age_sign_name(server, &name, &message).await),
Cmd::AgeVerifyName(name, message, sig_b64) => Ok(crate::age::cmd_age_verify_name(server, &name, &message, &sig_b64).await),
Cmd::AgeList => Ok(crate::age::cmd_age_list(server).await),
// Lance vector database commands
Cmd::LanceCreate { dataset, dim, schema } => lance_create_cmd(server, &dataset, dim, &schema).await,
Cmd::LanceStore { dataset, text, image_base64, metadata } => lance_store_cmd(server, &dataset, text.as_deref(), image_base64.as_deref(), &metadata).await,
Cmd::LanceSearch { dataset, vector, k, nprobes, refine_factor } => lance_search_cmd(server, &dataset, &vector, k, nprobes, refine_factor).await,
Cmd::LanceSearchText { dataset, query_text, k, nprobes, refine_factor } => lance_search_text_cmd(server, &dataset, &query_text, k, nprobes, refine_factor).await,
Cmd::LanceEmbedText { texts } => lance_embed_text_cmd(server, &texts).await,
Cmd::LanceCreateIndex { dataset, index_type, num_partitions, num_sub_vectors } => lance_create_index_cmd(server, &dataset, &index_type, num_partitions, num_sub_vectors).await,
Cmd::LanceList => lance_list_cmd(server).await,
Cmd::LanceDrop { dataset } => lance_drop_cmd(server, &dataset).await,
Cmd::LanceInfo { dataset } => lance_info_cmd(server, &dataset).await,
// SYM (symmetric): stateless (Phase 1)
Cmd::SymKeygen => Ok(crate::sym::cmd_sym_keygen().await),
Cmd::SymEncrypt(key_b64, message) => Ok(crate::sym::cmd_sym_encrypt(&key_b64, &message).await),
Cmd::SymDecrypt(key_b64, ct_b64) => Ok(crate::sym::cmd_sym_decrypt(&key_b64, &ct_b64).await),
Cmd::Unknow(s) => Ok(Protocol::err(&format!("ERR unknown command `{}`", s))),
}
}
pub fn to_protocol(self) -> Protocol {
match self {
Cmd::Select(db) => Protocol::Array(vec![Protocol::BulkString("select".to_string()), Protocol::BulkString(db.to_string())]),
Cmd::Select(db, key) => {
let mut arr = vec![Protocol::BulkString("select".to_string()), Protocol::BulkString(db.to_string())];
if let Some(k) = key {
arr.push(Protocol::BulkString("key".to_string()));
arr.push(Protocol::BulkString(k));
}
Protocol::Array(arr)
}
Cmd::Ping => Protocol::Array(vec![Protocol::BulkString("ping".to_string())]),
Cmd::Echo(s) => Protocol::Array(vec![Protocol::BulkString("echo".to_string()), Protocol::BulkString(s)]),
Cmd::Get(k) => Protocol::Array(vec![Protocol::BulkString("get".to_string()), Protocol::BulkString(k)]),
@@ -1041,9 +797,65 @@ async fn flushdb_cmd(server: &mut Server) -> Result<Protocol, DBError> {
}
}
async fn select_cmd(server: &mut Server, db: u64) -> Result<Protocol, DBError> {
// Test if we can access the database (this will create it if needed)
async fn select_cmd(server: &mut Server, db: u64, key: Option<String>) -> Result<Protocol, DBError> {
// Authorization and existence checks via admin DB 0
// DB 0: require KEY admin-secret
if db == 0 {
match key {
Some(k) if k == server.option.admin_secret => {
server.selected_db = 0;
server.current_permissions = Some(crate::rpc::Permissions::ReadWrite);
// Will create encrypted 0.db if missing
match server.current_storage() {
Ok(_) => return Ok(Protocol::SimpleString("OK".to_string())),
Err(e) => return Ok(Protocol::err(&e.0)),
}
}
_ => {
return Ok(Protocol::err("ERR invalid access key"));
}
}
}
// DB > 0: must exist in admin:dbs
let exists = match crate::admin_meta::db_exists(
&server.option.dir,
server.option.backend.clone(),
&server.option.admin_secret,
db,
) {
Ok(b) => b,
Err(e) => return Ok(Protocol::err(&e.0)),
};
if !exists {
return Ok(Protocol::err(&format!(
"Cannot open database instance {}, as that database instance does not exist.",
db
)));
}
// Verify permissions (public => RW; private => use key)
let perms_opt = match crate::admin_meta::verify_access(
&server.option.dir,
server.option.backend.clone(),
&server.option.admin_secret,
db,
key.as_deref(),
) {
Ok(p) => p,
Err(e) => return Ok(Protocol::err(&e.0)),
};
let perms = match perms_opt {
Some(p) => p,
None => return Ok(Protocol::err("ERR invalid access key")),
};
// Set selected database and permissions, then open storage
server.selected_db = db;
server.current_permissions = Some(perms);
match server.current_storage() {
Ok(_) => Ok(Protocol::SimpleString("OK".to_string())),
Err(e) => Ok(Protocol::err(&e.0)),
@@ -1291,6 +1103,9 @@ async fn brpop_cmd(server: &Server, keys: &[String], timeout_secs: f64) -> Resul
}
async fn lpush_cmd(server: &Server, key: &str, elements: &[String]) -> Result<Protocol, DBError> {
if !server.has_write_permission() {
return Ok(Protocol::err("ERR write permission denied"));
}
match server.current_storage()?.lpush(key, elements.to_vec()) {
Ok(len) => {
// Attempt to deliver to any blocked BLPOP waiters
@@ -1422,8 +1237,16 @@ async fn type_cmd(server: &Server, k: &String) -> Result<Protocol, DBError> {
}
async fn del_cmd(server: &Server, k: &str) -> Result<Protocol, DBError> {
server.current_storage()?.del(k.to_string())?;
Ok(Protocol::SimpleString("1".to_string()))
if !server.has_write_permission() {
return Ok(Protocol::err("ERR write permission denied"));
}
let storage = server.current_storage()?;
if storage.exists(k)? {
storage.del(k.to_string())?;
Ok(Protocol::SimpleString("1".to_string()))
} else {
Ok(Protocol::SimpleString("0".to_string()))
}
}
async fn set_ex_cmd(
@@ -1447,6 +1270,9 @@ async fn set_px_cmd(
}
async fn set_cmd(server: &Server, k: &str, v: &str) -> Result<Protocol, DBError> {
if !server.has_write_permission() {
return Ok(Protocol::err("ERR write permission denied"));
}
server.current_storage()?.set(k.to_string(), v.to_string())?;
Ok(Protocol::SimpleString("OK".to_string()))
}
@@ -1531,6 +1357,9 @@ async fn mset_cmd(server: &Server, pairs: &[(String, String)]) -> Result<Protoco
// DEL with multiple keys: return count of keys actually deleted
async fn del_multi_cmd(server: &Server, keys: &[String]) -> Result<Protocol, DBError> {
if !server.has_write_permission() {
return Ok(Protocol::err("ERR write permission denied"));
}
let storage = server.current_storage()?;
let mut deleted = 0i64;
for k in keys {
@@ -1561,6 +1390,9 @@ async fn get_cmd(server: &Server, k: &str) -> Result<Protocol, DBError> {
// Hash command implementations
async fn hset_cmd(server: &Server, key: &str, pairs: &[(String, String)]) -> Result<Protocol, DBError> {
if !server.has_write_permission() {
return Ok(Protocol::err("ERR write permission denied"));
}
let new_fields = server.current_storage()?.hset(key, pairs.to_vec())?;
Ok(Protocol::SimpleString(new_fields.to_string()))
}
@@ -1801,243 +1633,3 @@ fn command_cmd(args: &[String]) -> Result<Protocol, DBError> {
_ => Ok(Protocol::Array(vec![])),
}
}
// Helper function to create Arrow schema from field specifications
fn create_schema_from_fields(dim: usize, fields: &[(String, String)]) -> arrow::datatypes::Schema {
let mut schema_fields = Vec::new();
// Always add the vector field first
let vector_field = arrow::datatypes::Field::new(
"vector",
arrow::datatypes::DataType::FixedSizeList(
Arc::new(arrow::datatypes::Field::new("item", arrow::datatypes::DataType::Float32, true)),
dim as i32
),
false
);
schema_fields.push(vector_field);
// Add custom fields
for (name, field_type) in fields {
let data_type = match field_type.to_lowercase().as_str() {
"string" | "text" => arrow::datatypes::DataType::Utf8,
"int" | "integer" => arrow::datatypes::DataType::Int64,
"float" => arrow::datatypes::DataType::Float64,
"bool" | "boolean" => arrow::datatypes::DataType::Boolean,
_ => arrow::datatypes::DataType::Utf8, // Default to string
};
schema_fields.push(arrow::datatypes::Field::new(name, data_type, true));
}
arrow::datatypes::Schema::new(schema_fields)
}
// Lance vector database command implementations
async fn lance_create_cmd(
server: &Server,
dataset: &str,
dim: usize,
schema: &[(String, String)],
) -> Result<Protocol, DBError> {
match server.lance_store() {
Ok(lance_store) => {
match lance_store.create_dataset(dataset, create_schema_from_fields(dim, schema)).await {
Ok(_) => Ok(Protocol::SimpleString("OK".to_string())),
Err(e) => Ok(Protocol::err(&sanitize_error_message(&format!("ERR {}", e)))),
}
}
Err(e) => Ok(Protocol::err(&sanitize_error_message(&format!("ERR Lance store not available: {}", e)))),
}
}
async fn lance_store_cmd(
server: &Server,
dataset: &str,
text: Option<&str>,
image_base64: Option<&str>,
metadata: &std::collections::HashMap<String, String>,
) -> Result<Protocol, DBError> {
match server.lance_store() {
Ok(lance_store) => {
match lance_store.store_multimodal(server, dataset, text.map(|s| s.to_string()),
image_base64.and_then(|s| base64::engine::general_purpose::STANDARD.decode(s).ok()),
metadata.clone()).await {
Ok(id) => Ok(Protocol::BulkString(id)),
Err(e) => Ok(Protocol::err(&sanitize_error_message(&format!("ERR {}", e)))),
}
}
Err(e) => Ok(Protocol::err(&sanitize_error_message(&format!("ERR Lance store not available: {}", e)))),
}
}
async fn lance_search_cmd(
server: &Server,
dataset: &str,
vector: &[f32],
k: usize,
nprobes: Option<usize>,
refine_factor: Option<usize>,
) -> Result<Protocol, DBError> {
match server.lance_store() {
Ok(lance_store) => {
match lance_store.search_vectors(dataset, vector.to_vec(), k, nprobes, refine_factor).await {
Ok(results) => {
let mut response = Vec::new();
for (distance, metadata) in results {
let mut item = Vec::new();
item.push(Protocol::BulkString("distance".to_string()));
item.push(Protocol::BulkString(distance.to_string()));
for (key, value) in metadata {
item.push(Protocol::BulkString(key));
item.push(Protocol::BulkString(value));
}
response.push(Protocol::Array(item));
}
Ok(Protocol::Array(response))
}
Err(e) => Ok(Protocol::err(&sanitize_error_message(&format!("ERR {}", e)))),
}
}
Err(e) => Ok(Protocol::err(&sanitize_error_message(&format!("ERR Lance store not available: {}", e)))),
}
}
async fn lance_search_text_cmd(
server: &Server,
dataset: &str,
query_text: &str,
k: usize,
nprobes: Option<usize>,
refine_factor: Option<usize>,
) -> Result<Protocol, DBError> {
match server.lance_store() {
Ok(lance_store) => {
match lance_store.search_with_text(server, dataset, query_text.to_string(), k, nprobes, refine_factor).await {
Ok(results) => {
let mut response = Vec::new();
for (distance, metadata) in results {
let mut item = Vec::new();
item.push(Protocol::BulkString("distance".to_string()));
item.push(Protocol::BulkString(distance.to_string()));
for (key, value) in metadata {
item.push(Protocol::BulkString(key));
item.push(Protocol::BulkString(value));
}
response.push(Protocol::Array(item));
}
Ok(Protocol::Array(response))
}
Err(e) => Ok(Protocol::err(&sanitize_error_message(&format!("ERR {}", e)))),
}
}
Err(e) => Ok(Protocol::err(&sanitize_error_message(&format!("ERR Lance store not available: {}", e)))),
}
}
// Helper function to sanitize error messages for Redis protocol
fn sanitize_error_message(msg: &str) -> String {
// Remove newlines, carriage returns, and limit length
let sanitized = msg
.replace('\n', " ")
.replace('\r', " ")
.replace('\t', " ");
// Limit to 200 characters to avoid overly long error messages
if sanitized.len() > 200 {
format!("{}...", &sanitized[..197])
} else {
sanitized
}
}
async fn lance_embed_text_cmd(
server: &Server,
texts: &[String],
) -> Result<Protocol, DBError> {
match server.lance_store() {
Ok(lance_store) => {
match lance_store.embed_text(server, texts.to_vec()).await {
Ok(embeddings) => {
let mut response = Vec::new();
for embedding in embeddings {
let vector_strings: Vec<Protocol> = embedding
.iter()
.map(|f| Protocol::BulkString(f.to_string()))
.collect();
response.push(Protocol::Array(vector_strings));
}
Ok(Protocol::Array(response))
}
Err(e) => Ok(Protocol::err(&sanitize_error_message(&format!("ERR {}", e)))),
}
}
Err(e) => Ok(Protocol::err(&sanitize_error_message(&format!("ERR Lance store not available: {}", e)))),
}
}
async fn lance_create_index_cmd(
server: &Server,
dataset: &str,
index_type: &str,
num_partitions: Option<usize>,
num_sub_vectors: Option<usize>,
) -> Result<Protocol, DBError> {
match server.lance_store() {
Ok(lance_store) => {
match lance_store.create_index(dataset, index_type, num_partitions, num_sub_vectors).await {
Ok(_) => Ok(Protocol::SimpleString("OK".to_string())),
Err(e) => Ok(Protocol::err(&sanitize_error_message(&format!("ERR {}", e)))),
}
}
Err(e) => Ok(Protocol::err(&sanitize_error_message(&format!("ERR Lance store not available: {}", e)))),
}
}
async fn lance_list_cmd(server: &Server) -> Result<Protocol, DBError> {
match server.lance_store() {
Ok(lance_store) => {
match lance_store.list_datasets().await {
Ok(datasets) => {
let response: Vec<Protocol> = datasets
.into_iter()
.map(Protocol::BulkString)
.collect();
Ok(Protocol::Array(response))
}
Err(e) => Ok(Protocol::err(&sanitize_error_message(&format!("ERR {}", e)))),
}
}
Err(e) => Ok(Protocol::err(&sanitize_error_message(&format!("ERR Lance store not available: {}", e)))),
}
}
async fn lance_drop_cmd(server: &Server, dataset: &str) -> Result<Protocol, DBError> {
match server.lance_store() {
Ok(lance_store) => {
match lance_store.drop_dataset(dataset).await {
Ok(_) => Ok(Protocol::SimpleString("OK".to_string())),
Err(e) => Ok(Protocol::err(&sanitize_error_message(&format!("ERR {}", e)))),
}
}
Err(e) => Ok(Protocol::err(&sanitize_error_message(&format!("ERR Lance store not available: {}", e)))),
}
}
async fn lance_info_cmd(server: &Server, dataset: &str) -> Result<Protocol, DBError> {
match server.lance_store() {
Ok(lance_store) => {
match lance_store.get_dataset_info(dataset).await {
Ok(info) => {
let mut response = Vec::new();
for (key, value) in info {
response.push(Protocol::BulkString(key));
response.push(Protocol::BulkString(value));
}
Ok(Protocol::Array(response))
}
Err(e) => Ok(Protocol::err(&sanitize_error_message(&format!("ERR {}", e)))),
}
}
Err(e) => Ok(Protocol::err(&sanitize_error_message(&format!("ERR Lance store not available: {}", e)))),
}
}

View File

@@ -9,12 +9,6 @@ use bincode;
#[derive(Debug)]
pub struct DBError(pub String);
impl std::fmt::Display for DBError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.0)
}
}
impl From<std::io::Error> for DBError {
fn from(item: std::io::Error) -> Self {
DBError(item.to_string().clone())
@@ -98,40 +92,3 @@ impl From<chacha20poly1305::Error> for DBError {
DBError(item.to_string())
}
}
// Lance and related dependencies error handling
impl From<lance::Error> for DBError {
fn from(item: lance::Error) -> Self {
DBError(item.to_string())
}
}
impl From<arrow::error::ArrowError> for DBError {
fn from(item: arrow::error::ArrowError) -> Self {
DBError(item.to_string())
}
}
impl From<reqwest::Error> for DBError {
fn from(item: reqwest::Error) -> Self {
DBError(item.to_string())
}
}
impl From<image::ImageError> for DBError {
fn from(item: image::ImageError) -> Self {
DBError(item.to_string())
}
}
impl From<uuid::Error> for DBError {
fn from(item: uuid::Error) -> Self {
DBError(item.to_string())
}
}
impl From<base64::DecodeError> for DBError {
fn from(item: base64::DecodeError) -> Self {
DBError(item.to_string())
}
}

View File

@@ -1,609 +0,0 @@
use std::collections::HashMap;
use std::path::PathBuf;
use std::sync::Arc;
use tokio::sync::RwLock;
use arrow::array::{Float32Array, StringArray, ArrayRef, FixedSizeListArray, Array};
use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
use arrow::record_batch::{RecordBatch, RecordBatchReader};
use arrow::error::ArrowError;
use lance::dataset::{Dataset, WriteParams, WriteMode};
use lance::index::vector::VectorIndexParams;
use lance_index::vector::pq::PQBuildParams;
use lance_index::vector::ivf::IvfBuildParams;
use lance_index::DatasetIndexExt;
use lance_linalg::distance::MetricType;
use futures::TryStreamExt;
use base64::Engine;
use serde::{Deserialize, Serialize};
use crate::error::DBError;
// Simple RecordBatchReader implementation for Vec<RecordBatch>
struct VecRecordBatchReader {
batches: std::vec::IntoIter<Result<RecordBatch, ArrowError>>,
}
impl VecRecordBatchReader {
fn new(batches: Vec<RecordBatch>) -> Self {
let result_batches = batches.into_iter().map(Ok).collect::<Vec<_>>();
Self {
batches: result_batches.into_iter(),
}
}
}
impl Iterator for VecRecordBatchReader {
type Item = Result<RecordBatch, ArrowError>;
fn next(&mut self) -> Option<Self::Item> {
self.batches.next()
}
}
impl RecordBatchReader for VecRecordBatchReader {
fn schema(&self) -> SchemaRef {
// This is a simplified implementation - in practice you'd want to store the schema
Arc::new(Schema::empty())
}
}
#[derive(Debug, Serialize, Deserialize)]
struct EmbeddingRequest {
texts: Option<Vec<String>>,
images: Option<Vec<String>>, // base64 encoded
model: Option<String>,
}
#[derive(Debug, Serialize, Deserialize)]
struct EmbeddingResponse {
embeddings: Vec<Vec<f32>>,
model: String,
usage: Option<HashMap<String, u32>>,
}
// Ollama-specific request/response structures
#[derive(Debug, Serialize, Deserialize)]
struct OllamaEmbeddingRequest {
model: String,
prompt: String,
}
#[derive(Debug, Serialize, Deserialize)]
struct OllamaEmbeddingResponse {
embedding: Vec<f32>,
}
pub struct LanceStore {
datasets: Arc<RwLock<HashMap<String, Arc<Dataset>>>>,
data_dir: PathBuf,
http_client: reqwest::Client,
}
impl LanceStore {
pub async fn new(data_dir: PathBuf) -> Result<Self, DBError> {
// Create data directory if it doesn't exist
std::fs::create_dir_all(&data_dir)
.map_err(|e| DBError(format!("Failed to create Lance data directory: {}", e)))?;
let http_client = reqwest::Client::builder()
.timeout(std::time::Duration::from_secs(30))
.build()
.map_err(|e| DBError(format!("Failed to create HTTP client: {}", e)))?;
Ok(Self {
datasets: Arc::new(RwLock::new(HashMap::new())),
data_dir,
http_client,
})
}
/// Get embedding service URL from Redis config, default to local Ollama
async fn get_embedding_url(&self, server: &crate::server::Server) -> Result<String, DBError> {
// Get the embedding URL from Redis config directly from storage
let storage = server.current_storage()?;
match storage.hget("config:core:aiembed", "url")? {
Some(url) => Ok(url),
None => Ok("http://localhost:11434".to_string()), // Default to local Ollama
}
}
/// Check if we're using Ollama (default) or custom embedding service
async fn is_ollama_service(&self, server: &crate::server::Server) -> Result<bool, DBError> {
let url = self.get_embedding_url(server).await?;
Ok(url.contains("localhost:11434") || url.contains("127.0.0.1:11434"))
}
/// Call external embedding service (Ollama or custom)
async fn call_embedding_service(
&self,
server: &crate::server::Server,
texts: Option<Vec<String>>,
images: Option<Vec<String>>,
) -> Result<Vec<Vec<f32>>, DBError> {
let base_url = self.get_embedding_url(server).await?;
let is_ollama = self.is_ollama_service(server).await?;
if is_ollama {
// Use Ollama API format
if let Some(texts) = texts {
let mut embeddings = Vec::new();
for text in texts {
let url = format!("{}/api/embeddings", base_url);
let request = OllamaEmbeddingRequest {
model: "nomic-embed-text".to_string(),
prompt: text,
};
let response = self.http_client
.post(&url)
.json(&request)
.send()
.await
.map_err(|e| DBError(format!("Failed to call Ollama embedding service: {}", e)))?;
if !response.status().is_success() {
let status = response.status();
let error_text = response.text().await.unwrap_or_default();
return Err(DBError(format!(
"Ollama embedding service returned error {}: {}",
status, error_text
)));
}
let ollama_response: OllamaEmbeddingResponse = response
.json()
.await
.map_err(|e| DBError(format!("Failed to parse Ollama embedding response: {}", e)))?;
embeddings.push(ollama_response.embedding);
}
Ok(embeddings)
} else if let Some(_images) = images {
// Ollama doesn't support image embeddings with this API yet
Err(DBError("Image embeddings not supported with Ollama. Please configure a custom embedding service.".to_string()))
} else {
Err(DBError("No text or images provided for embedding".to_string()))
}
} else {
// Use custom embedding service API format
let request = EmbeddingRequest {
texts,
images,
model: None, // Let the service use its default
};
let response = self.http_client
.post(&base_url)
.json(&request)
.send()
.await
.map_err(|e| DBError(format!("Failed to call embedding service: {}", e)))?;
if !response.status().is_success() {
let status = response.status();
let error_text = response.text().await.unwrap_or_default();
return Err(DBError(format!(
"Embedding service returned error {}: {}",
status, error_text
)));
}
let embedding_response: EmbeddingResponse = response
.json()
.await
.map_err(|e| DBError(format!("Failed to parse embedding response: {}", e)))?;
Ok(embedding_response.embeddings)
}
}
pub async fn embed_text(
&self,
server: &crate::server::Server,
texts: Vec<String>
) -> Result<Vec<Vec<f32>>, DBError> {
if texts.is_empty() {
return Ok(Vec::new());
}
self.call_embedding_service(server, Some(texts), None).await
}
pub async fn embed_image(
&self,
server: &crate::server::Server,
image_bytes: Vec<u8>
) -> Result<Vec<f32>, DBError> {
// Convert image bytes to base64
let base64_image = base64::engine::general_purpose::STANDARD.encode(&image_bytes);
let embeddings = self.call_embedding_service(
server,
None,
Some(vec![base64_image])
).await?;
embeddings.into_iter()
.next()
.ok_or_else(|| DBError("No embedding returned for image".to_string()))
}
pub async fn create_dataset(
&self,
name: &str,
schema: Schema,
) -> Result<(), DBError> {
let dataset_path = self.data_dir.join(format!("{}.lance", name));
// Create empty dataset with schema
let write_params = WriteParams {
mode: WriteMode::Create,
..Default::default()
};
// Create an empty RecordBatch with the schema
let empty_batch = RecordBatch::new_empty(Arc::new(schema));
// Use RecordBatchReader for Lance 0.33
let reader = VecRecordBatchReader::new(vec![empty_batch]);
let dataset = Dataset::write(
reader,
dataset_path.to_str().unwrap(),
Some(write_params)
).await
.map_err(|e| DBError(format!("Failed to create dataset: {}", e)))?;
let mut datasets = self.datasets.write().await;
datasets.insert(name.to_string(), Arc::new(dataset));
Ok(())
}
pub async fn write_vectors(
&self,
dataset_name: &str,
vectors: Vec<Vec<f32>>,
metadata: Option<HashMap<String, Vec<String>>>,
) -> Result<usize, DBError> {
let dataset_path = self.data_dir.join(format!("{}.lance", dataset_name));
// Open or get cached dataset
let _dataset = self.get_or_open_dataset(dataset_name).await?;
// Build RecordBatch
let num_vectors = vectors.len();
if num_vectors == 0 {
return Ok(0);
}
let dim = vectors.first()
.ok_or_else(|| DBError("Empty vectors".to_string()))?
.len();
// Flatten vectors
let flat_vectors: Vec<f32> = vectors.into_iter().flatten().collect();
let values_array = Float32Array::from(flat_vectors);
let field = Arc::new(Field::new("item", DataType::Float32, true));
let vector_array = FixedSizeListArray::try_new(
field,
dim as i32,
Arc::new(values_array),
None
).map_err(|e| DBError(format!("Failed to create vector array: {}", e)))?;
let mut arrays: Vec<ArrayRef> = vec![Arc::new(vector_array)];
let mut fields = vec![Field::new(
"vector",
DataType::FixedSizeList(
Arc::new(Field::new("item", DataType::Float32, true)),
dim as i32
),
false
)];
// Add metadata columns if provided
if let Some(metadata) = metadata {
for (key, values) in metadata {
if values.len() != num_vectors {
return Err(DBError(format!(
"Metadata field '{}' has {} values but expected {}",
key, values.len(), num_vectors
)));
}
let array = StringArray::from(values);
arrays.push(Arc::new(array));
fields.push(Field::new(&key, DataType::Utf8, true));
}
}
let schema = Arc::new(Schema::new(fields));
let batch = RecordBatch::try_new(schema, arrays)
.map_err(|e| DBError(format!("Failed to create RecordBatch: {}", e)))?;
// Append to dataset
let write_params = WriteParams {
mode: WriteMode::Append,
..Default::default()
};
let reader = VecRecordBatchReader::new(vec![batch]);
Dataset::write(
reader,
dataset_path.to_str().unwrap(),
Some(write_params)
).await
.map_err(|e| DBError(format!("Failed to write to dataset: {}", e)))?;
// Refresh cached dataset
let mut datasets = self.datasets.write().await;
datasets.remove(dataset_name);
Ok(num_vectors)
}
pub async fn search_vectors(
&self,
dataset_name: &str,
query_vector: Vec<f32>,
k: usize,
nprobes: Option<usize>,
_refine_factor: Option<usize>,
) -> Result<Vec<(f32, HashMap<String, String>)>, DBError> {
let dataset = self.get_or_open_dataset(dataset_name).await?;
// Build query
let query_array = Float32Array::from(query_vector.clone());
let mut query = dataset.scan();
query.nearest(
"vector",
&query_array,
k,
).map_err(|e| DBError(format!("Failed to build search query: {}", e)))?;
if let Some(nprobes) = nprobes {
query.nprobs(nprobes);
}
// Note: refine_factor might not be available in this Lance version
// if let Some(refine) = refine_factor {
// query.refine_factor(refine);
// }
// Execute search
let results = query
.try_into_stream()
.await
.map_err(|e| DBError(format!("Failed to execute search: {}", e)))?
.try_collect::<Vec<_>>()
.await
.map_err(|e| DBError(format!("Failed to collect results: {}", e)))?;
// Process results
let mut output = Vec::new();
for batch in results {
// Get distances
let distances = batch
.column_by_name("_distance")
.ok_or_else(|| DBError("No distance column".to_string()))?
.as_any()
.downcast_ref::<Float32Array>()
.ok_or_else(|| DBError("Invalid distance type".to_string()))?;
// Get metadata
for i in 0..batch.num_rows() {
let distance = distances.value(i);
let mut metadata = HashMap::new();
for field in batch.schema().fields() {
if field.name() != "vector" && field.name() != "_distance" {
if let Some(col) = batch.column_by_name(field.name()) {
if let Some(str_array) = col.as_any().downcast_ref::<StringArray>() {
if !str_array.is_null(i) {
metadata.insert(
field.name().to_string(),
str_array.value(i).to_string()
);
}
}
}
}
}
output.push((distance, metadata));
}
}
Ok(output)
}
pub async fn store_multimodal(
&self,
server: &crate::server::Server,
dataset_name: &str,
text: Option<String>,
image_bytes: Option<Vec<u8>>,
metadata: HashMap<String, String>,
) -> Result<String, DBError> {
// Generate ID
let id = uuid::Uuid::new_v4().to_string();
// Generate embeddings using external service
let embedding = if let Some(text) = text.as_ref() {
self.embed_text(server, vec![text.clone()]).await?
.into_iter()
.next()
.ok_or_else(|| DBError("No embedding returned".to_string()))?
} else if let Some(img) = image_bytes.as_ref() {
self.embed_image(server, img.clone()).await?
} else {
return Err(DBError("No text or image provided".to_string()));
};
// Prepare metadata
let mut full_metadata = metadata;
full_metadata.insert("id".to_string(), id.clone());
if let Some(text) = text {
full_metadata.insert("text".to_string(), text);
}
if let Some(img) = image_bytes {
full_metadata.insert("image_base64".to_string(), base64::engine::general_purpose::STANDARD.encode(img));
}
// Convert metadata to column vectors
let mut metadata_cols = HashMap::new();
for (key, value) in full_metadata {
metadata_cols.insert(key, vec![value]);
}
// Write to dataset
self.write_vectors(dataset_name, vec![embedding], Some(metadata_cols)).await?;
Ok(id)
}
pub async fn search_with_text(
&self,
server: &crate::server::Server,
dataset_name: &str,
query_text: String,
k: usize,
nprobes: Option<usize>,
refine_factor: Option<usize>,
) -> Result<Vec<(f32, HashMap<String, String>)>, DBError> {
// Embed the query text using external service
let embeddings = self.embed_text(server, vec![query_text]).await?;
let query_vector = embeddings.into_iter()
.next()
.ok_or_else(|| DBError("No embedding returned for query".to_string()))?;
// Search with the embedding
self.search_vectors(dataset_name, query_vector, k, nprobes, refine_factor).await
}
pub async fn create_index(
&self,
dataset_name: &str,
index_type: &str,
num_partitions: Option<usize>,
num_sub_vectors: Option<usize>,
) -> Result<(), DBError> {
let _dataset = self.get_or_open_dataset(dataset_name).await?;
match index_type.to_uppercase().as_str() {
"IVF_PQ" => {
let ivf_params = IvfBuildParams {
num_partitions: num_partitions.unwrap_or(256),
..Default::default()
};
let pq_params = PQBuildParams {
num_sub_vectors: num_sub_vectors.unwrap_or(16),
..Default::default()
};
let params = VectorIndexParams::with_ivf_pq_params(
MetricType::L2,
ivf_params,
pq_params,
);
// Get a mutable reference to the dataset
let mut dataset_mut = Dataset::open(self.data_dir.join(format!("{}.lance", dataset_name)).to_str().unwrap())
.await
.map_err(|e| DBError(format!("Failed to open dataset for indexing: {}", e)))?;
dataset_mut.create_index(
&["vector"],
lance_index::IndexType::Vector,
None,
&params,
true
).await
.map_err(|e| DBError(format!("Failed to create index: {}", e)))?;
}
_ => return Err(DBError(format!("Unsupported index type: {}", index_type))),
}
Ok(())
}
async fn get_or_open_dataset(&self, name: &str) -> Result<Arc<Dataset>, DBError> {
let mut datasets = self.datasets.write().await;
if let Some(dataset) = datasets.get(name) {
return Ok(dataset.clone());
}
let dataset_path = self.data_dir.join(format!("{}.lance", name));
if !dataset_path.exists() {
return Err(DBError(format!("Dataset '{}' does not exist", name)));
}
let dataset = Dataset::open(dataset_path.to_str().unwrap())
.await
.map_err(|e| DBError(format!("Failed to open dataset: {}", e)))?;
let dataset = Arc::new(dataset);
datasets.insert(name.to_string(), dataset.clone());
Ok(dataset)
}
pub async fn list_datasets(&self) -> Result<Vec<String>, DBError> {
let mut datasets = Vec::new();
let entries = std::fs::read_dir(&self.data_dir)
.map_err(|e| DBError(format!("Failed to read data directory: {}", e)))?;
for entry in entries {
let entry = entry.map_err(|e| DBError(format!("Failed to read entry: {}", e)))?;
let path = entry.path();
if path.is_dir() {
if let Some(name) = path.file_name() {
if let Some(name_str) = name.to_str() {
if name_str.ends_with(".lance") {
let dataset_name = name_str.trim_end_matches(".lance");
datasets.push(dataset_name.to_string());
}
}
}
}
}
Ok(datasets)
}
pub async fn drop_dataset(&self, name: &str) -> Result<(), DBError> {
// Remove from cache
let mut datasets = self.datasets.write().await;
datasets.remove(name);
// Delete from disk
let dataset_path = self.data_dir.join(format!("{}.lance", name));
if dataset_path.exists() {
std::fs::remove_dir_all(dataset_path)
.map_err(|e| DBError(format!("Failed to delete dataset: {}", e)))?;
}
Ok(())
}
pub async fn get_dataset_info(&self, name: &str) -> Result<HashMap<String, String>, DBError> {
let dataset = self.get_or_open_dataset(name).await?;
let mut info = HashMap::new();
info.insert("name".to_string(), name.to_string());
info.insert("version".to_string(), dataset.version().version.to_string());
info.insert("num_rows".to_string(), dataset.count_rows(None).await?.to_string());
// Get schema info
let schema = dataset.schema();
let fields: Vec<String> = schema.fields
.iter()
.map(|f| format!("{}:{}", f.name, f.data_type()))
.collect();
info.insert("schema".to_string(), fields.join(", "));
Ok(info)
}
}

View File

@@ -1,11 +1,14 @@
pub mod age; // NEW
pub mod age;
pub mod sym;
pub mod cmd;
pub mod crypto;
pub mod error;
pub mod lance_store; // Add Lance store module
pub mod options;
pub mod protocol;
pub mod rpc;
pub mod rpc_server;
pub mod server;
pub mod storage;
pub mod storage_trait; // Add this
pub mod storage_sled; // Add this
pub mod storage_trait;
pub mod storage_sled;
pub mod admin_meta;

View File

@@ -3,6 +3,7 @@
use tokio::net::TcpListener;
use herodb::server;
use herodb::rpc_server;
use clap::Parser;
@@ -22,18 +23,29 @@ struct Args {
#[arg(long)]
debug: bool,
/// Master encryption key for encrypted databases
/// Master encryption key for encrypted databases (deprecated; ignored for data DBs)
#[arg(long)]
encryption_key: Option<String>,
/// Encrypt the database
/// Encrypt the database (deprecated; ignored for data DBs)
#[arg(long)]
encrypt: bool,
/// Enable RPC management server
#[arg(long)]
enable_rpc: bool,
/// RPC server port (default: 8080)
#[arg(long, default_value = "8080")]
rpc_port: u16,
/// Use the sled backend
#[arg(long)]
sled: bool,
/// Admin secret used to encrypt DB 0 and authorize admin access (required)
#[arg(long)]
admin_secret: String,
}
#[tokio::main]
@@ -48,9 +60,19 @@ async fn main() {
.await
.unwrap();
// deprecation warnings for legacy flags
if args.encrypt || args.encryption_key.is_some() {
eprintln!("warning: --encrypt and --encryption-key are deprecated and ignored for data DBs. Admin DB 0 is always encrypted with --admin-secret.");
}
// basic validation for admin secret
if args.admin_secret.trim().is_empty() {
eprintln!("error: --admin-secret must not be empty");
std::process::exit(2);
}
// new DB option
let option = herodb::options::DBOption {
dir: args.dir,
dir: args.dir.clone(),
port,
debug: args.debug,
encryption_key: args.encryption_key,
@@ -60,14 +82,42 @@ async fn main() {
} else {
herodb::options::BackendType::Redb
},
admin_secret: args.admin_secret.clone(),
};
let backend = option.backend.clone();
// Bootstrap admin DB 0 before opening any server storage
if let Err(e) = herodb::admin_meta::ensure_bootstrap(&args.dir, backend.clone(), &args.admin_secret) {
eprintln!("Failed to bootstrap admin DB 0: {}", e.0);
std::process::exit(2);
}
// new server
let server = server::Server::new(option).await;
// Add a small delay to ensure the port is ready
tokio::time::sleep(std::time::Duration::from_millis(100)).await;
// Start RPC server if enabled
let _rpc_handle = if args.enable_rpc {
let rpc_addr = format!("127.0.0.1:{}", args.rpc_port).parse().unwrap();
let base_dir = args.dir.clone();
match rpc_server::start_rpc_server(rpc_addr, base_dir, backend, args.admin_secret.clone()).await {
Ok(handle) => {
println!("RPC management server started on port {}", args.rpc_port);
Some(handle)
}
Err(e) => {
eprintln!("Failed to start RPC server: {}", e);
None
}
}
} else {
None
};
// accept new connections
loop {
let stream = listener.accept().await;

View File

@@ -1,4 +1,4 @@
#[derive(Debug, Clone)]
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum BackendType {
Redb,
Sled,
@@ -9,7 +9,11 @@ pub struct DBOption {
pub dir: String,
pub port: u16,
pub debug: bool,
// Deprecated for data DBs; retained for backward-compat on CLI parsing
pub encrypt: bool,
// Deprecated for data DBs; retained for backward-compat on CLI parsing
pub encryption_key: Option<String>,
pub backend: BackendType,
// New: required admin secret, used to encrypt DB 0 and authorize admin operations
pub admin_secret: String,
}

472
src/rpc.rs Normal file
View File

@@ -0,0 +1,472 @@
use std::collections::HashMap;
use std::sync::Arc;
use tokio::sync::RwLock;
use jsonrpsee::{core::RpcResult, proc_macros::rpc};
use serde::{Deserialize, Serialize};
use sha2::{Digest, Sha256};
use crate::server::Server;
use crate::options::DBOption;
use crate::admin_meta;
/// Database backend types
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum BackendType {
Redb,
Sled,
// Future: InMemory, Custom(String)
}
/// Database configuration
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DatabaseConfig {
pub name: Option<String>,
pub storage_path: Option<String>,
pub max_size: Option<u64>,
pub redis_version: Option<String>,
}
/// Database information returned by metadata queries
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DatabaseInfo {
pub id: u64,
pub name: Option<String>,
pub backend: BackendType,
pub encrypted: bool,
pub redis_version: Option<String>,
pub storage_path: Option<String>,
pub size_on_disk: Option<u64>,
pub key_count: Option<u64>,
pub created_at: u64,
pub last_access: Option<u64>,
}
/// Access permissions for database keys
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub enum Permissions {
Read,
ReadWrite,
}
/// Access key information returned by RPC
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AccessKeyInfo {
pub hash: String,
pub permissions: Permissions,
pub created_at: u64,
}
/// Hash a plaintext key using SHA-256
pub fn hash_key(key: &str) -> String {
let mut hasher = Sha256::new();
hasher.update(key.as_bytes());
format!("{:x}", hasher.finalize())
}
/// RPC trait for HeroDB management
#[rpc(server, client, namespace = "herodb")]
pub trait Rpc {
/// Create a new database with specified configuration
#[method(name = "createDatabase")]
async fn create_database(
&self,
backend: BackendType,
config: DatabaseConfig,
encryption_key: Option<String>,
) -> RpcResult<u64>;
/// Set encryption for an existing database (write-only key)
#[method(name = "setEncryption")]
async fn set_encryption(&self, db_id: u64, encryption_key: String) -> RpcResult<bool>;
/// List all managed databases
#[method(name = "listDatabases")]
async fn list_databases(&self) -> RpcResult<Vec<DatabaseInfo>>;
/// Get detailed information about a specific database
#[method(name = "getDatabaseInfo")]
async fn get_database_info(&self, db_id: u64) -> RpcResult<DatabaseInfo>;
/// Delete a database
#[method(name = "deleteDatabase")]
async fn delete_database(&self, db_id: u64) -> RpcResult<bool>;
/// Get server statistics
#[method(name = "getServerStats")]
async fn get_server_stats(&self) -> RpcResult<HashMap<String, serde_json::Value>>;
/// Add an access key to a database
#[method(name = "addAccessKey")]
async fn add_access_key(&self, db_id: u64, key: String, permissions: String) -> RpcResult<bool>;
/// Delete an access key from a database
#[method(name = "deleteAccessKey")]
async fn delete_access_key(&self, db_id: u64, key_hash: String) -> RpcResult<bool>;
/// List all access keys for a database
#[method(name = "listAccessKeys")]
async fn list_access_keys(&self, db_id: u64) -> RpcResult<Vec<AccessKeyInfo>>;
/// Set database public/private status
#[method(name = "setDatabasePublic")]
async fn set_database_public(&self, db_id: u64, public: bool) -> RpcResult<bool>;
}
/// RPC Server implementation
pub struct RpcServerImpl {
/// Base directory for database files
base_dir: String,
/// Managed database servers
servers: Arc<RwLock<HashMap<u64, Arc<Server>>>>,
/// Default backend type
backend: crate::options::BackendType,
/// Admin secret used to encrypt DB 0 and authorize admin access
admin_secret: String,
}
impl RpcServerImpl {
/// Create a new RPC server instance
pub fn new(base_dir: String, backend: crate::options::BackendType, admin_secret: String) -> Self {
Self {
base_dir,
servers: Arc::new(RwLock::new(HashMap::new())),
backend,
admin_secret,
}
}
/// Get or create a server instance for the given database ID
async fn get_or_create_server(&self, db_id: u64) -> Result<Arc<Server>, jsonrpsee::types::ErrorObjectOwned> {
// Check if server already exists
{
let servers = self.servers.read().await;
if let Some(server) = servers.get(&db_id) {
return Ok(server.clone());
}
}
// Validate existence via admin DB 0 (metadata), not filesystem presence
let exists = admin_meta::db_exists(&self.base_dir, self.backend.clone(), &self.admin_secret, db_id)
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?;
if !exists {
return Err(jsonrpsee::types::ErrorObjectOwned::owned(
-32000,
format!("Database {} not found", db_id),
None::<()>
));
}
// Resolve effective backend for this db from admin meta or filesystem; fallback to default
let meta_backend = admin_meta::get_database_backend(&self.base_dir, self.backend.clone(), &self.admin_secret, db_id)
.ok()
.flatten();
let db_path = std::path::PathBuf::from(&self.base_dir).join(format!("{}.db", db_id));
let sniffed_backend = if db_path.exists() {
if db_path.is_file() {
Some(crate::options::BackendType::Redb)
} else if db_path.is_dir() {
Some(crate::options::BackendType::Sled)
} else {
None
}
} else {
None
};
let effective_backend = meta_backend.clone().or(sniffed_backend).unwrap_or(self.backend.clone());
if effective_backend != self.backend {
eprintln!(
"notice: get_or_create_server: db {} backend resolved to {:?} (server default {:?})",
db_id, effective_backend, self.backend
);
}
// If we had to sniff (no meta), persist the resolved backend
if meta_backend.is_none() {
let _ = admin_meta::set_database_backend(&self.base_dir, self.backend.clone(), &self.admin_secret, db_id, effective_backend.clone());
}
// Create server instance with resolved backend
let db_option = DBOption {
dir: self.base_dir.clone(),
port: 0, // Not used for RPC-managed databases
debug: false,
encryption_key: None,
encrypt: false,
backend: effective_backend,
admin_secret: self.admin_secret.clone(),
};
let mut server = Server::new(db_option).await;
// Set the selected database to the db_id
server.selected_db = db_id;
// Lazily open/create physical storage according to admin meta (per-db encryption)
let _ = server.current_storage();
// Store the server
let mut servers = self.servers.write().await;
servers.insert(db_id, Arc::new(server.clone()));
Ok(Arc::new(server))
}
/// Discover existing database IDs from admin DB 0
async fn discover_databases(&self) -> Vec<u64> {
admin_meta::list_dbs(&self.base_dir, self.backend.clone(), &self.admin_secret)
.unwrap_or_default()
}
/// Build database file path for given server/db_id
fn db_file_path(&self, server: &Server, db_id: u64) -> std::path::PathBuf {
std::path::PathBuf::from(&server.option.dir).join(format!("{}.db", db_id))
}
/// Recursively compute size on disk for the database path
fn compute_size_on_disk(&self, path: &std::path::Path) -> Option<u64> {
fn dir_size(p: &std::path::Path) -> u64 {
if p.is_file() {
std::fs::metadata(p).map(|m| m.len()).unwrap_or(0)
} else if p.is_dir() {
let mut total = 0u64;
if let Ok(read) = std::fs::read_dir(p) {
for entry in read.flatten() {
total += dir_size(&entry.path());
}
}
total
} else {
0
}
}
Some(dir_size(path))
}
/// Extract created and last access times (secs) from a path, with fallbacks
fn get_file_times_secs(path: &std::path::Path) -> (u64, Option<u64>) {
let now = std::time::SystemTime::now();
let created = std::fs::metadata(path)
.and_then(|m| m.created().or_else(|_| m.modified()))
.unwrap_or(now)
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default()
.as_secs();
let last_access = std::fs::metadata(path)
.and_then(|m| m.accessed())
.ok()
.and_then(|t| t.duration_since(std::time::UNIX_EPOCH).ok().map(|d| d.as_secs()));
(created, last_access)
}
/// Compose a DatabaseInfo by probing storage and filesystem, with admin meta for access key count
async fn build_database_info(&self, db_id: u64, server: &Server) -> DatabaseInfo {
// Probe storage to determine encryption state
let storage = server.current_storage().ok();
let encrypted = storage.as_ref().map(|s| s.is_encrypted()).unwrap_or(server.option.encrypt);
// Get actual key count from storage
let key_count = storage.as_ref()
.and_then(|s| s.dbsize().ok())
.map(|count| count as u64);
// Get database name from admin meta
let name = admin_meta::get_database_name(&self.base_dir, self.backend.clone(), &self.admin_secret, db_id)
.ok()
.flatten();
// Compute size on disk and timestamps from the DB file path
let db_path = self.db_file_path(server, db_id);
let size_on_disk = self.compute_size_on_disk(&db_path);
let (created_at, last_access) = Self::get_file_times_secs(&db_path);
let backend = match server.option.backend {
crate::options::BackendType::Redb => BackendType::Redb,
crate::options::BackendType::Sled => BackendType::Sled,
};
DatabaseInfo {
id: db_id,
name,
backend,
encrypted,
redis_version: Some("7.0".to_string()),
storage_path: Some(server.option.dir.clone()),
size_on_disk,
key_count,
created_at,
last_access,
}
}
}
#[jsonrpsee::core::async_trait]
impl RpcServer for RpcServerImpl {
async fn create_database(
&self,
backend: BackendType,
config: DatabaseConfig,
encryption_key: Option<String>,
) -> RpcResult<u64> {
// Allocate new ID via admin DB 0
let db_id = admin_meta::allocate_next_id(&self.base_dir, self.backend.clone(), &self.admin_secret)
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?;
// Persist per-db encryption key in admin DB 0 if provided
if let Some(ref key) = encryption_key {
admin_meta::set_enc_key(&self.base_dir, self.backend.clone(), &self.admin_secret, db_id, key)
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?;
}
// Persist database name if provided
if let Some(ref name) = config.name {
admin_meta::set_database_name(&self.base_dir, self.backend.clone(), &self.admin_secret, db_id, name)
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?;
}
// Ensure base dir exists
if let Err(e) = std::fs::create_dir_all(&self.base_dir) {
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, format!("Failed to ensure base dir: {}", e), None::<()>));
}
// Map RPC backend to options backend and persist it in admin meta for this db id
let opt_backend = match backend {
BackendType::Redb => crate::options::BackendType::Redb,
BackendType::Sled => crate::options::BackendType::Sled,
};
admin_meta::set_database_backend(&self.base_dir, self.backend.clone(), &self.admin_secret, db_id, opt_backend.clone())
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?;
// Create server instance using base_dir, chosen backend and admin secret
let option = DBOption {
dir: self.base_dir.clone(),
port: 0, // Not used for RPC-managed databases
debug: false,
encryption_key: None, // per-db key is stored in admin DB 0
encrypt: false, // encryption decided per-db at open time
backend: opt_backend,
admin_secret: self.admin_secret.clone(),
};
let mut server = Server::new(option).await;
server.selected_db = db_id;
// Initialize storage to create physical <id>.db with proper encryption from admin meta
let _ = server.current_storage();
// Store the server in cache
let mut servers = self.servers.write().await;
servers.insert(db_id, Arc::new(server));
Ok(db_id)
}
async fn set_encryption(&self, _db_id: u64, _encryption_key: String) -> RpcResult<bool> {
// For now, return false as encryption can only be set during creation
let _servers = self.servers.read().await;
// TODO: Implement encryption setting for existing databases
Ok(false)
}
async fn list_databases(&self) -> RpcResult<Vec<DatabaseInfo>> {
let db_ids = self.discover_databases().await;
let mut result = Vec::new();
for db_id in db_ids {
if let Ok(server) = self.get_or_create_server(db_id).await {
// Build accurate info from storage/meta/fs
let info = self.build_database_info(db_id, &server).await;
result.push(info);
}
}
Ok(result)
}
async fn get_database_info(&self, db_id: u64) -> RpcResult<DatabaseInfo> {
let server = self.get_or_create_server(db_id).await?;
// Build accurate info from storage/meta/fs
let info = self.build_database_info(db_id, &server).await;
Ok(info)
}
async fn delete_database(&self, db_id: u64) -> RpcResult<bool> {
let mut servers = self.servers.write().await;
if let Some(_server) = servers.remove(&db_id) {
// Clean up database files
let db_path = std::path::PathBuf::from(&self.base_dir).join(format!("{}.db", db_id));
if db_path.exists() {
if db_path.is_dir() {
std::fs::remove_dir_all(&db_path).ok();
} else {
std::fs::remove_file(&db_path).ok();
}
}
Ok(true)
} else {
Ok(false)
}
}
async fn get_server_stats(&self) -> RpcResult<HashMap<String, serde_json::Value>> {
let db_ids = self.discover_databases().await;
let mut stats = HashMap::new();
stats.insert("total_databases".to_string(), serde_json::json!(db_ids.len()));
stats.insert("uptime".to_string(), serde_json::json!(
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_secs()
));
Ok(stats)
}
async fn add_access_key(&self, db_id: u64, key: String, permissions: String) -> RpcResult<bool> {
let perms = match permissions.to_lowercase().as_str() {
"read" => Permissions::Read,
"readwrite" => Permissions::ReadWrite,
_ => return Err(jsonrpsee::types::ErrorObjectOwned::owned(
-32000,
"Invalid permissions: use 'read' or 'readwrite'",
None::<()>
)),
};
admin_meta::add_access_key(&self.base_dir, self.backend.clone(), &self.admin_secret, db_id, &key, perms)
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?;
Ok(true)
}
async fn delete_access_key(&self, db_id: u64, key_hash: String) -> RpcResult<bool> {
let ok = admin_meta::delete_access_key(&self.base_dir, self.backend.clone(), &self.admin_secret, db_id, &key_hash)
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?;
Ok(ok)
}
async fn list_access_keys(&self, db_id: u64) -> RpcResult<Vec<AccessKeyInfo>> {
let pairs = admin_meta::list_access_keys(&self.base_dir, self.backend.clone(), &self.admin_secret, db_id)
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?;
let keys: Vec<AccessKeyInfo> = pairs.into_iter().map(|(hash, perm, ts)| AccessKeyInfo {
hash,
permissions: perm,
created_at: ts,
}).collect();
Ok(keys)
}
async fn set_database_public(&self, db_id: u64, public: bool) -> RpcResult<bool> {
admin_meta::set_database_public(&self.base_dir, self.backend.clone(), &self.admin_secret, db_id, public)
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?;
Ok(true)
}
}

49
src/rpc_server.rs Normal file
View File

@@ -0,0 +1,49 @@
use std::net::SocketAddr;
use jsonrpsee::server::{ServerBuilder, ServerHandle};
use jsonrpsee::RpcModule;
use crate::rpc::{RpcServer, RpcServerImpl};
/// Start the RPC server on the specified address
pub async fn start_rpc_server(addr: SocketAddr, base_dir: String, backend: crate::options::BackendType, admin_secret: String) -> Result<ServerHandle, Box<dyn std::error::Error + Send + Sync>> {
// Create the RPC server implementation
let rpc_impl = RpcServerImpl::new(base_dir, backend, admin_secret);
// Create the RPC module
let mut module = RpcModule::new(());
module.merge(RpcServer::into_rpc(rpc_impl))?;
// Build the server with both HTTP and WebSocket support
let server = ServerBuilder::default()
.build(addr)
.await?;
// Start the server
let handle = server.start(module);
println!("RPC server started on {}", addr);
Ok(handle)
}
#[cfg(test)]
mod tests {
use super::*;
use std::time::Duration;
#[tokio::test]
async fn test_rpc_server_startup() {
let addr = "127.0.0.1:0".parse().unwrap(); // Use port 0 for auto-assignment
let base_dir = "/tmp/test_rpc".to_string();
let backend = crate::options::BackendType::Redb; // Default for test
let handle = start_rpc_server(addr, base_dir, backend, "test-admin".to_string()).await.unwrap();
// Give the server a moment to start
tokio::time::sleep(Duration::from_millis(100)).await;
// Stop the server
handle.stop().unwrap();
handle.stopped().await;
}
}

View File

@@ -9,12 +9,10 @@ use std::sync::atomic::{AtomicU64, Ordering};
use crate::cmd::Cmd;
use crate::error::DBError;
use crate::lance_store::LanceStore;
use crate::options;
use crate::protocol::Protocol;
use crate::storage::Storage;
use crate::storage_sled::SledStorage;
use crate::storage_trait::StorageBackend;
use crate::admin_meta;
#[derive(Clone)]
pub struct Server {
@@ -23,13 +21,11 @@ pub struct Server {
pub client_name: Option<String>,
pub selected_db: u64, // Changed from usize to u64
pub queued_cmd: Option<Vec<(Cmd, Protocol)>>,
pub current_permissions: Option<crate::rpc::Permissions>,
// BLPOP waiter registry: per (db_index, key) FIFO of waiters
pub list_waiters: Arc<Mutex<HashMap<u64, HashMap<String, Vec<Waiter>>>>>,
pub waiter_seq: Arc<AtomicU64>,
// Lance vector store
pub lance_store: Option<Arc<LanceStore>>,
}
pub struct Waiter {
@@ -46,81 +42,57 @@ pub enum PopSide {
impl Server {
pub async fn new(option: options::DBOption) -> Self {
// Initialize Lance store
let lance_data_dir = std::path::PathBuf::from(&option.dir).join("lance");
let lance_store = match LanceStore::new(lance_data_dir).await {
Ok(store) => Some(Arc::new(store)),
Err(e) => {
eprintln!("Warning: Failed to initialize Lance store: {}", e.0);
None
}
};
Server {
db_cache: Arc::new(std::sync::RwLock::new(HashMap::new())),
option,
client_name: None,
selected_db: 0,
queued_cmd: None,
current_permissions: None,
list_waiters: Arc::new(Mutex::new(HashMap::new())),
waiter_seq: Arc::new(AtomicU64::new(1)),
lance_store,
}
}
pub fn lance_store(&self) -> Result<Arc<LanceStore>, DBError> {
self.lance_store
.as_ref()
.cloned()
.ok_or_else(|| DBError("Lance store not initialized".to_string()))
}
pub fn current_storage(&self) -> Result<Arc<dyn StorageBackend>, DBError> {
let mut cache = self.db_cache.write().unwrap();
if let Some(storage) = cache.get(&self.selected_db) {
return Ok(storage.clone());
}
// Create new database file
let db_file_path = std::path::PathBuf::from(self.option.dir.clone())
.join(format!("{}.db", self.selected_db));
// Ensure the directory exists before creating the database file
if let Some(parent_dir) = db_file_path.parent() {
std::fs::create_dir_all(parent_dir).map_err(|e| {
DBError(format!("Failed to create directory {}: {}", parent_dir.display(), e))
})?;
}
println!("Creating new db file: {}", db_file_path.display());
let storage: Arc<dyn StorageBackend> = match self.option.backend {
options::BackendType::Redb => {
Arc::new(Storage::new(
db_file_path,
self.should_encrypt_db(self.selected_db),
self.option.encryption_key.as_deref()
)?)
}
options::BackendType::Sled => {
Arc::new(SledStorage::new(
db_file_path,
self.should_encrypt_db(self.selected_db),
self.option.encryption_key.as_deref()
)?)
}
// Use process-wide shared handles to avoid sled/reDB double-open lock contention.
let storage = if self.selected_db == 0 {
// Admin DB 0: always via singleton
admin_meta::open_admin_storage(
&self.option.dir,
self.option.backend.clone(),
&self.option.admin_secret,
)?
} else {
// Data DBs: via global registry keyed by id
admin_meta::open_data_storage(
&self.option.dir,
self.option.backend.clone(),
&self.option.admin_secret,
self.selected_db,
)?
};
cache.insert(self.selected_db, storage.clone());
Ok(storage)
}
fn should_encrypt_db(&self, db_index: u64) -> bool {
// DB 0-9 are non-encrypted, DB 10+ are encrypted
self.option.encrypt && db_index >= 10
/// Check if current permissions allow read operations
pub fn has_read_permission(&self) -> bool {
matches!(self.current_permissions, Some(crate::rpc::Permissions::Read) | Some(crate::rpc::Permissions::ReadWrite))
}
/// Check if current permissions allow write operations
pub fn has_write_permission(&self) -> bool {
matches!(self.current_permissions, Some(crate::rpc::Permissions::ReadWrite))
}
// ----- BLPOP waiter helpers -----

123
src/sym.rs Normal file
View File

@@ -0,0 +1,123 @@
//! sym.rs — Stateless symmetric encryption (Phase 1)
//!
//! Commands implemented (RESP):
//! - SYM KEYGEN
//! - SYM ENCRYPT <key_b64> <message>
//! - SYM DECRYPT <key_b64> <ciphertext_b64>
//!
//! Notes:
//! - Raw key: exactly 32 bytes, provided as Base64 in commands.
//! - Cipher: XChaCha20-Poly1305 (AEAD) without AAD in Phase 1
//! - Ciphertext binary layout: [version:1][nonce:24][ciphertext||tag]
//! - Encoding for wire I/O: Base64
use base64::{engine::general_purpose::STANDARD as B64, Engine as _};
use chacha20poly1305::{
aead::{Aead, KeyInit, OsRng},
XChaCha20Poly1305, XNonce,
};
use rand::RngCore;
use crate::protocol::Protocol;
const VERSION: u8 = 1;
const NONCE_LEN: usize = 24;
const TAG_LEN: usize = 16;
#[derive(Debug)]
pub enum SymWireError {
InvalidKey,
BadEncoding,
BadFormat,
BadVersion(u8),
Crypto,
}
impl SymWireError {
fn to_protocol(self) -> Protocol {
match self {
SymWireError::InvalidKey => Protocol::err("ERR sym: invalid key"),
SymWireError::BadEncoding => Protocol::err("ERR sym: bad encoding"),
SymWireError::BadFormat => Protocol::err("ERR sym: bad format"),
SymWireError::BadVersion(v) => Protocol::err(&format!("ERR sym: unsupported version {}", v)),
SymWireError::Crypto => Protocol::err("ERR sym: auth failed"),
}
}
}
fn decode_key_b64(s: &str) -> Result<chacha20poly1305::Key, SymWireError> {
let bytes = B64.decode(s.as_bytes()).map_err(|_| SymWireError::BadEncoding)?;
if bytes.len() != 32 {
return Err(SymWireError::InvalidKey);
}
Ok(chacha20poly1305::Key::from_slice(&bytes).to_owned())
}
fn encrypt_blob(key: &chacha20poly1305::Key, plaintext: &[u8]) -> Result<Vec<u8>, SymWireError> {
let cipher = XChaCha20Poly1305::new(key);
let mut nonce_bytes = [0u8; NONCE_LEN];
OsRng.fill_bytes(&mut nonce_bytes);
let nonce = XNonce::from_slice(&nonce_bytes);
let mut out = Vec::with_capacity(1 + NONCE_LEN + plaintext.len() + TAG_LEN);
out.push(VERSION);
out.extend_from_slice(&nonce_bytes);
let ct = cipher.encrypt(nonce, plaintext).map_err(|_| SymWireError::Crypto)?;
out.extend_from_slice(&ct);
Ok(out)
}
fn decrypt_blob(key: &chacha20poly1305::Key, blob: &[u8]) -> Result<Vec<u8>, SymWireError> {
if blob.len() < 1 + NONCE_LEN + TAG_LEN {
return Err(SymWireError::BadFormat);
}
let ver = blob[0];
if ver != VERSION {
return Err(SymWireError::BadVersion(ver));
}
let nonce = XNonce::from_slice(&blob[1..1 + NONCE_LEN]);
let ct = &blob[1 + NONCE_LEN..];
let cipher = XChaCha20Poly1305::new(key);
cipher.decrypt(nonce, ct).map_err(|_| SymWireError::Crypto)
}
// ---------- Command handlers (RESP) ----------
pub async fn cmd_sym_keygen() -> Protocol {
let mut key_bytes = [0u8; 32];
OsRng.fill_bytes(&mut key_bytes);
let key_b64 = B64.encode(key_bytes);
Protocol::BulkString(key_b64)
}
pub async fn cmd_sym_encrypt(key_b64: &str, message: &str) -> Protocol {
let key = match decode_key_b64(key_b64) {
Ok(k) => k,
Err(e) => return e.to_protocol(),
};
match encrypt_blob(&key, message.as_bytes()) {
Ok(blob) => Protocol::BulkString(B64.encode(blob)),
Err(e) => e.to_protocol(),
}
}
pub async fn cmd_sym_decrypt(key_b64: &str, ct_b64: &str) -> Protocol {
let key = match decode_key_b64(key_b64) {
Ok(k) => k,
Err(e) => return e.to_protocol(),
};
let blob = match B64.decode(ct_b64.as_bytes()) {
Ok(b) => b,
Err(_) => return SymWireError::BadEncoding.to_protocol(),
};
match decrypt_blob(&key, &blob) {
Ok(pt) => match String::from_utf8(pt) {
Ok(s) => Protocol::BulkString(s),
Err(_) => Protocol::err("ERR sym: invalid UTF-8 plaintext"),
},
Err(e) => e.to_protocol(),
}
}

View File

@@ -1,11 +1,10 @@
#!/bin/bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
cd "$SCRIPT_DIR"
# Test script for HeroDB - Redis-compatible database with redb backend
# This script starts the server and runs comprehensive tests
set -e
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'

View File

@@ -28,6 +28,7 @@ async fn debug_hset_simple() {
encrypt: false,
encryption_key: None,
backend: herodb::options::BackendType::Redb,
admin_secret: "test-admin".to_string(),
};
let mut server = Server::new(option).await;
@@ -48,6 +49,12 @@ async fn debug_hset_simple() {
sleep(Duration::from_millis(200)).await;
let mut stream = TcpStream::connect(format!("127.0.0.1:{}", port)).await.unwrap();
// Acquire ReadWrite permissions on this connection
let resp = send_command(
&mut stream,
"*4\r\n$6\r\nSELECT\r\n$1\r\n0\r\n$3\r\nKEY\r\n$10\r\ntest-admin\r\n",
).await;
assert!(resp.contains("OK"), "Failed SELECT handshake: {}", resp);
// Test simple HSET
println!("Testing HSET...");

View File

@@ -19,6 +19,7 @@ async fn debug_hset_return_value() {
encrypt: false,
encryption_key: None,
backend: herodb::options::BackendType::Redb,
admin_secret: "test-admin".to_string(),
};
let mut server = Server::new(option).await;
@@ -40,12 +41,19 @@ async fn debug_hset_return_value() {
// Connect and test HSET
let mut stream = TcpStream::connect("127.0.0.1:16390").await.unwrap();
// Acquire ReadWrite permissions for this new connection
let handshake = "*4\r\n$6\r\nSELECT\r\n$1\r\n0\r\n$3\r\nKEY\r\n$10\r\ntest-admin\r\n";
stream.write_all(handshake.as_bytes()).await.unwrap();
let mut buffer = [0; 1024];
let n = stream.read(&mut buffer).await.unwrap();
let resp = String::from_utf8_lossy(&buffer[..n]);
assert!(resp.contains("OK"), "Failed SELECT handshake: {}", resp);
// Send HSET command
let cmd = "*4\r\n$4\r\nHSET\r\n$4\r\nhash\r\n$6\r\nfield1\r\n$6\r\nvalue1\r\n";
stream.write_all(cmd.as_bytes()).await.unwrap();
let mut buffer = [0; 1024];
let n = stream.read(&mut buffer).await.unwrap();
let response = String::from_utf8_lossy(&buffer[..n]);

View File

@@ -12,7 +12,15 @@ fn get_redis_connection(port: u16) -> Connection {
match client.get_connection() {
Ok(mut conn) => {
if redis::cmd("PING").query::<String>(&mut conn).is_ok() {
return conn;
// Acquire ReadWrite permissions on this connection
let sel: RedisResult<String> = redis::cmd("SELECT")
.arg(0)
.arg("KEY")
.arg("test-admin")
.query(&mut conn);
if sel.is_ok() {
return conn;
}
}
}
Err(e) => {
@@ -78,6 +86,8 @@ fn setup_server() -> (ServerProcessGuard, u16) {
"--port",
&port.to_string(),
"--debug",
"--admin-secret",
"test-admin",
])
.spawn()
.expect("Failed to start server process");

View File

@@ -23,18 +23,29 @@ async fn start_test_server(test_name: &str) -> (Server, u16) {
encrypt: false,
encryption_key: None,
backend: herodb::options::BackendType::Redb,
admin_secret: "test-admin".to_string(),
};
let server = Server::new(option).await;
(server, port)
}
// Helper function to connect to the test server
// Helper function to connect to the test server
async fn connect_to_server(port: u16) -> TcpStream {
let mut attempts = 0;
loop {
match TcpStream::connect(format!("127.0.0.1:{}", port)).await {
Ok(stream) => return stream,
Ok(mut stream) => {
// Obtain ReadWrite permissions for this connection by selecting DB 0 with admin key
let resp = send_command(
&mut stream,
"*4\r\n$6\r\nSELECT\r\n$1\r\n0\r\n$3\r\nKEY\r\n$10\r\ntest-admin\r\n",
).await;
if !resp.contains("OK") {
panic!("Failed to acquire write permissions via SELECT 0 KEY test-admin: {}", resp);
}
return stream;
}
Err(_) if attempts < 10 => {
attempts += 1;
sleep(Duration::from_millis(100)).await;

85
tests/rpc_tests.rs Normal file
View File

@@ -0,0 +1,85 @@
use herodb::rpc::{BackendType, DatabaseConfig};
use herodb::admin_meta;
use herodb::options::BackendType as OptionsBackendType;
#[tokio::test]
async fn test_rpc_server_basic() {
// This test would require starting the RPC server in a separate thread
// For now, we'll just test that the types compile correctly
// Test serialization of types
let backend = BackendType::Redb;
let config = DatabaseConfig {
name: Some("test_db".to_string()),
storage_path: Some("/tmp/test".to_string()),
max_size: Some(1024 * 1024),
redis_version: Some("7.0".to_string()),
};
let backend_json = serde_json::to_string(&backend).unwrap();
let config_json = serde_json::to_string(&config).unwrap();
assert_eq!(backend_json, "\"Redb\"");
assert!(config_json.contains("test_db"));
}
#[tokio::test]
async fn test_database_config_serialization() {
let config = DatabaseConfig {
name: Some("my_db".to_string()),
storage_path: None,
max_size: Some(1000000),
redis_version: Some("7.0".to_string()),
};
let json = serde_json::to_value(&config).unwrap();
assert_eq!(json["name"], "my_db");
assert_eq!(json["max_size"], 1000000);
assert_eq!(json["redis_version"], "7.0");
}
#[tokio::test]
async fn test_backend_type_serialization() {
// Test that both Redb and Sled backends serialize correctly
let redb_backend = BackendType::Redb;
let sled_backend = BackendType::Sled;
let redb_json = serde_json::to_string(&redb_backend).unwrap();
let sled_json = serde_json::to_string(&sled_backend).unwrap();
assert_eq!(redb_json, "\"Redb\"");
assert_eq!(sled_json, "\"Sled\"");
// Test deserialization
let redb_deserialized: BackendType = serde_json::from_str(&redb_json).unwrap();
let sled_deserialized: BackendType = serde_json::from_str(&sled_json).unwrap();
assert!(matches!(redb_deserialized, BackendType::Redb));
assert!(matches!(sled_deserialized, BackendType::Sled));
}
#[tokio::test]
async fn test_database_name_persistence() {
let base_dir = "/tmp/test_db_name_persistence";
let admin_secret = "test-admin-secret";
let backend = OptionsBackendType::Redb;
let db_id = 1;
let test_name = "test-database-name";
// Clean up any existing test data
let _ = std::fs::remove_dir_all(base_dir);
// Set the database name
admin_meta::set_database_name(base_dir, backend.clone(), admin_secret, db_id, test_name)
.expect("Failed to set database name");
// Retrieve the database name
let retrieved_name = admin_meta::get_database_name(base_dir, backend, admin_secret, db_id)
.expect("Failed to get database name");
// Verify the name matches
assert_eq!(retrieved_name, Some(test_name.to_string()));
// Clean up
let _ = std::fs::remove_dir_all(base_dir);
}

View File

@@ -25,6 +25,7 @@ async fn start_test_server(test_name: &str) -> (Server, u16) {
encrypt: false,
encryption_key: None,
backend: herodb::options::BackendType::Redb,
admin_secret: "test-admin".to_string(),
};
let server = Server::new(option).await;
@@ -34,9 +35,16 @@ async fn start_test_server(test_name: &str) -> (Server, u16) {
// Helper function to send Redis command and get response
async fn send_redis_command(port: u16, command: &str) -> String {
let mut stream = TcpStream::connect(format!("127.0.0.1:{}", port)).await.unwrap();
// Acquire ReadWrite permissions on this new connection
let handshake = "*4\r\n$6\r\nSELECT\r\n$1\r\n0\r\n$3\r\nKEY\r\n$10\r\ntest-admin\r\n";
stream.write_all(handshake.as_bytes()).await.unwrap();
let mut buffer = [0; 1024];
let _ = stream.read(&mut buffer).await.unwrap(); // Read and ignore the OK for handshake
// Now send the intended command
stream.write_all(command.as_bytes()).await.unwrap();
let mut buffer = [0; 1024];
let n = stream.read(&mut buffer).await.unwrap();
String::from_utf8_lossy(&buffer[..n]).to_string()
}
@@ -184,12 +192,19 @@ async fn test_transaction_operations() {
sleep(Duration::from_millis(100)).await;
// Use a single connection for the transaction
// Use a single connection for the transaction
let mut stream = TcpStream::connect(format!("127.0.0.1:{}", port)).await.unwrap();
// Acquire write permissions for this connection
let handshake = "*4\r\n$6\r\nSELECT\r\n$1\r\n0\r\n$3\r\nKEY\r\n$10\r\ntest-admin\r\n";
stream.write_all(handshake.as_bytes()).await.unwrap();
let mut buffer = [0; 1024];
let n = stream.read(&mut buffer).await.unwrap();
let resp = String::from_utf8_lossy(&buffer[..n]);
assert!(resp.contains("OK"));
// Test MULTI
stream.write_all("*1\r\n$5\r\nMULTI\r\n".as_bytes()).await.unwrap();
let mut buffer = [0; 1024];
let n = stream.read(&mut buffer).await.unwrap();
let response = String::from_utf8_lossy(&buffer[..n]);
assert!(response.contains("OK"));

View File

@@ -23,6 +23,7 @@ async fn start_test_server(test_name: &str) -> (Server, u16) {
encrypt: false,
encryption_key: None,
backend: herodb::options::BackendType::Redb,
admin_secret: "test-admin".to_string(),
};
let server = Server::new(option).await;
@@ -38,12 +39,22 @@ async fn send_command(stream: &mut TcpStream, command: &str) -> String {
String::from_utf8_lossy(&buffer[..n]).to_string()
}
// Helper function to connect to the test server
// Helper function to connect to the test server
async fn connect_to_server(port: u16) -> TcpStream {
let mut attempts = 0;
loop {
match TcpStream::connect(format!("127.0.0.1:{}", port)).await {
Ok(stream) => return stream,
Ok(mut stream) => {
// Acquire ReadWrite permissions for this connection
let resp = send_command(
&mut stream,
"*4\r\n$6\r\nSELECT\r\n$1\r\n0\r\n$3\r\nKEY\r\n$10\r\ntest-admin\r\n",
).await;
if !resp.contains("OK") {
panic!("Failed to acquire write permissions via SELECT 0 KEY test-admin: {}", resp);
}
return stream;
}
Err(_) if attempts < 10 => {
attempts += 1;
sleep(Duration::from_millis(100)).await;
@@ -97,14 +108,21 @@ async fn test_hset_clean_db() {
sleep(Duration::from_millis(200)).await;
let mut stream = connect_to_server(port).await;
// Test HSET - should return 1 for new field
let response = send_command(&mut stream, "*4\r\n$4\r\nHSET\r\n$4\r\nhash\r\n$6\r\nfield1\r\n$6\r\nvalue1\r\n").await;
// Ensure clean DB state (admin DB 0 may be shared due to global singleton)
let flush = send_command(&mut stream, "*1\r\n$7\r\nFLUSHDB\r\n").await;
assert!(flush.contains("OK"), "Failed to FLUSHDB: {}", flush);
// Test HSET - should return 1 for new field (use a unique key name to avoid collisions)
let key = "hash_clean";
let hset_cmd = format!("*4\r\n$4\r\nHSET\r\n${}\r\n{}\r\n$6\r\nfield1\r\n$6\r\nvalue1\r\n", key.len(), key);
let response = send_command(&mut stream, &hset_cmd).await;
println!("HSET response: {}", response);
assert!(response.contains("1"), "Expected HSET to return 1, got: {}", response);
// Test HGET
let response = send_command(&mut stream, "*3\r\n$4\r\nHGET\r\n$4\r\nhash\r\n$6\r\nfield1\r\n").await;
let hget_cmd = format!("*3\r\n$4\r\nHGET\r\n${}\r\n{}\r\n$6\r\nfield1\r\n", key.len(), key);
let response = send_command(&mut stream, &hget_cmd).await;
println!("HGET response: {}", response);
assert!(response.contains("value1"));
}

View File

@@ -23,6 +23,7 @@ async fn start_test_server(test_name: &str) -> (Server, u16) {
encrypt: false,
encryption_key: None,
backend: herodb::options::BackendType::Redb,
admin_secret: "test-admin".to_string(),
};
let server = Server::new(option).await;
@@ -61,7 +62,17 @@ async fn connect(port: u16) -> TcpStream {
let mut attempts = 0;
loop {
match TcpStream::connect(format!("127.0.0.1:{}", port)).await {
Ok(s) => return s,
Ok(mut s) => {
// Acquire ReadWrite permissions for this connection using admin DB 0
let resp = send_cmd(&mut s, &["SELECT", "0", "KEY", "test-admin"]).await;
assert_contains(&resp, "OK", "SELECT 0 KEY test-admin handshake");
// Ensure clean slate per test on DB 0
let fl = send_cmd(&mut s, &["FLUSHDB"]).await;
assert_contains(&fl, "OK", "FLUSHDB after handshake");
return s;
}
Err(_) if attempts < 30 => {
attempts += 1;
sleep(Duration::from_millis(100)).await;
@@ -246,9 +257,9 @@ async fn test_01_connection_and_info() {
let getname = send_cmd(&mut s, &["CLIENT", "GETNAME"]).await;
assert_contains(&getname, "myapp", "CLIENT GETNAME");
// SELECT db
let sel = send_cmd(&mut s, &["SELECT", "0"]).await;
assert_contains(&sel, "OK", "SELECT 0");
// SELECT db (requires key on DB 0)
let sel = send_cmd(&mut s, &["SELECT", "0", "KEY", "test-admin"]).await;
assert_contains(&sel, "OK", "SELECT 0 with key");
// QUIT should close connection after sending OK
let quit = send_cmd(&mut s, &["QUIT"]).await;
@@ -279,7 +290,11 @@ async fn test_02_strings_and_expiry() {
let ex0 = send_cmd(&mut s, &["EXISTS", "user:1"]).await;
assert_contains(&ex0, "0", "EXISTS after DEL");
// DEL non-existent should return 0
let del0 = send_cmd(&mut s, &["DEL", "user:1"]).await;
assert_contains(&del0, "0", "DEL user:1 when not exists -> 0");
// INCR behavior
let i1 = send_cmd(&mut s, &["INCR", "count"]).await;
assert_contains(&i1, "1", "INCR new key -> 1");
@@ -501,11 +516,11 @@ async fn test_07_age_stateless_suite() {
let mut s = connect(port).await;
// GENENC -> [recipient, identity]
let gen = send_cmd(&mut s, &["AGE", "GENENC"]).await;
let genenc = send_cmd(&mut s, &["AGE", "GENENC"]).await;
assert!(
gen.starts_with("*2\r\n$"),
genenc.starts_with("*2\r\n$"),
"AGE GENENC should return array [recipient, identity], got:\n{}",
gen
genenc
);
// Parse simple RESP array of two bulk strings to extract keys
@@ -520,7 +535,7 @@ async fn test_07_age_stateless_suite() {
let ident = lines.next().unwrap_or("").to_string();
(recip, ident)
}
let (recipient, identity) = parse_two_bulk_array(&gen);
let (recipient, identity) = parse_two_bulk_array(&genenc);
assert!(
recipient.starts_with("age1") && identity.starts_with("AGE-SECRET-KEY-1"),
"Unexpected AGE key formats.\nrecipient: {}\nidentity: {}",
@@ -591,7 +606,7 @@ async fn test_08_age_persistent_named_suite() {
// AGE LIST
let lst = send_cmd(&mut s, &["AGE", "LIST"]).await;
assert_contains(&lst, "encpub", "AGE LIST label encpub");
// After flattening, LIST returns a flat array of managed key names
assert_contains(&lst, "app1", "AGE LIST includes app1");
}