Merge branch 'development' of git.ourworld.tf:herocode/herolib_rust into development

This commit is contained in:
2025-08-25 07:06:52 +02:00
177 changed files with 26340 additions and 5 deletions

View File

@@ -19,12 +19,16 @@ members = [
"packages/core/net",
"packages/core/text",
"packages/crypt/vault",
"packages/data/ourdb",
"packages/data/radixtree",
"packages/data/tst",
"packages/system/git",
"packages/system/kubernetes",
"packages/system/os",
"packages/system/process",
"packages/system/virt",
"rhai",
"rhailib",
"herodo",
"packages/clients/hetznerclient",
"packages/ai/codemonkey",

View File

@@ -33,9 +33,11 @@ pub fn run(script_path: &str) -> Result<(), Box<dyn Error>> {
// TODO: if we create a scope here we could clean up all the different functionsand types regsitered wit the engine
// We should generalize the way we add things to the scope for each module sepeartely
let mut scope = Scope::new();
// TODO: this should be done for the other clients as well (but not here of course, in each module)
let hetzner_client = sal::hetzner::api::Client::new(sal::hetzner::config::Config::from_env().unwrap());
scope.push("hetzner", hetzner_client);
// Conditionally add Hetzner client only when env config is present
if let Ok(cfg) = sal::hetzner::config::Config::from_env() {
let hetzner_client = sal::hetzner::api::Client::new(cfg);
scope.push("hetzner", hetzner_client);
}
// This makes it easy to call e.g. `hetzner.get_server()` or `mycelium.get_connected_peers()`
// --> without the need of manually created a client for each one first
// --> could be conditionally compiled to only use those who we need (we only push the things to the scope that we actually need to run the script)

277
packages/data/ourdb/API.md Normal file
View File

@@ -0,0 +1,277 @@
# OurDB API Reference
This document provides a comprehensive reference for the OurDB Rust API.
## Table of Contents
1. [Configuration](#configuration)
2. [Database Operations](#database-operations)
- [Creating and Opening](#creating-and-opening)
- [Setting Data](#setting-data)
- [Getting Data](#getting-data)
- [Deleting Data](#deleting-data)
- [History Tracking](#history-tracking)
3. [Error Handling](#error-handling)
4. [Advanced Usage](#advanced-usage)
- [Custom File Size](#custom-file-size)
- [Custom Key Size](#custom-key-size)
5. [Performance Considerations](#performance-considerations)
## Configuration
### OurDBConfig
The `OurDBConfig` struct is used to configure a new OurDB instance.
```rust
pub struct OurDBConfig {
pub path: PathBuf,
pub incremental_mode: bool,
pub file_size: Option<usize>,
pub keysize: Option<u8>,
}
```
| Field | Type | Description |
|-------|------|-------------|
| `path` | `PathBuf` | Path to the database directory |
| `incremental_mode` | `bool` | Whether to use auto-incremented IDs (true) or user-provided IDs (false) |
| `file_size` | `Option<usize>` | Maximum size of each database file in bytes (default: 500MB) |
| `keysize` | `Option<u8>` | Size of keys in bytes (default: 4, valid values: 2, 3, 4, 6) |
Example:
```rust
let config = OurDBConfig {
path: PathBuf::from("/path/to/db"),
incremental_mode: true,
file_size: Some(1024 * 1024 * 100), // 100MB
keysize: Some(4), // 4-byte keys
};
```
## Database Operations
### Creating and Opening
#### `OurDB::new`
Creates a new OurDB instance or opens an existing one.
```rust
pub fn new(config: OurDBConfig) -> Result<OurDB, Error>
```
Example:
```rust
let mut db = OurDB::new(config)?;
```
### Setting Data
#### `OurDB::set`
Sets a value in the database. In incremental mode, if no ID is provided, a new ID is generated.
```rust
pub fn set(&mut self, args: OurDBSetArgs) -> Result<u32, Error>
```
The `OurDBSetArgs` struct has the following fields:
```rust
pub struct OurDBSetArgs<'a> {
pub id: Option<u32>,
pub data: &'a [u8],
}
```
Example with auto-generated ID:
```rust
let id = db.set(OurDBSetArgs {
id: None,
data: b"Hello, World!",
})?;
```
Example with explicit ID:
```rust
db.set(OurDBSetArgs {
id: Some(42),
data: b"Hello, World!",
})?;
```
### Getting Data
#### `OurDB::get`
Retrieves a value from the database by ID.
```rust
pub fn get(&mut self, id: u32) -> Result<Vec<u8>, Error>
```
Example:
```rust
let data = db.get(42)?;
```
### Deleting Data
#### `OurDB::delete`
Deletes a value from the database by ID.
```rust
pub fn delete(&mut self, id: u32) -> Result<(), Error>
```
Example:
```rust
db.delete(42)?;
```
### History Tracking
#### `OurDB::get_history`
Retrieves the history of values for a given ID, up to the specified depth.
```rust
pub fn get_history(&mut self, id: u32, depth: u8) -> Result<Vec<Vec<u8>>, Error>
```
Example:
```rust
// Get the last 5 versions of the record
let history = db.get_history(42, 5)?;
// Process each version (most recent first)
for (i, version) in history.iter().enumerate() {
println!("Version {}: {:?}", i, version);
}
```
### Other Operations
#### `OurDB::get_next_id`
Returns the next ID that will be assigned in incremental mode.
```rust
pub fn get_next_id(&self) -> Result<u32, Error>
```
Example:
```rust
let next_id = db.get_next_id()?;
```
#### `OurDB::close`
Closes the database, ensuring all data is flushed to disk.
```rust
pub fn close(&mut self) -> Result<(), Error>
```
Example:
```rust
db.close()?;
```
#### `OurDB::destroy`
Closes the database and deletes all database files.
```rust
pub fn destroy(&mut self) -> Result<(), Error>
```
Example:
```rust
db.destroy()?;
```
## Error Handling
OurDB uses the `thiserror` crate to define error types. The main error type is `ourdb::Error`.
```rust
pub enum Error {
IoError(std::io::Error),
InvalidKeySize,
InvalidId,
RecordNotFound,
InvalidCrc,
NotIncrementalMode,
DatabaseClosed,
// ...
}
```
All OurDB operations that can fail return a `Result<T, Error>` which can be handled using Rust's standard error handling mechanisms.
Example:
```rust
match db.get(42) {
Ok(data) => println!("Found data: {:?}", data),
Err(ourdb::Error::RecordNotFound) => println!("Record not found"),
Err(e) => eprintln!("Error: {}", e),
}
```
## Advanced Usage
### Custom File Size
You can configure the maximum size of each database file:
```rust
let config = OurDBConfig {
path: PathBuf::from("/path/to/db"),
incremental_mode: true,
file_size: Some(1024 * 1024 * 10), // 10MB per file
keysize: None,
};
```
Smaller file sizes can be useful for:
- Limiting memory usage when reading files
- Improving performance on systems with limited memory
- Easier backup and file management
### Custom Key Size
OurDB supports different key sizes (2, 3, 4, or 6 bytes):
```rust
let config = OurDBConfig {
path: PathBuf::from("/path/to/db"),
incremental_mode: true,
file_size: None,
keysize: Some(6), // 6-byte keys
};
```
Key size considerations:
- 2 bytes: Up to 65,536 records
- 3 bytes: Up to 16,777,216 records
- 4 bytes: Up to 4,294,967,296 records (default)
- 6 bytes: Up to 281,474,976,710,656 records
## Performance Considerations
For optimal performance:
1. **Choose appropriate key size**: Use the smallest key size that can accommodate your expected number of records.
2. **Configure file size**: For large databases, consider using smaller file sizes to improve memory usage.
3. **Batch operations**: When inserting or updating many records, consider batching operations to minimize disk I/O.
4. **Close properly**: Always call `close()` when you're done with the database to ensure data is properly flushed to disk.
5. **Reuse OurDB instance**: Creating a new OurDB instance has overhead, so reuse the same instance for multiple operations when possible.
6. **Consider memory usage**: The lookup table is loaded into memory, so very large databases may require significant RAM.

View File

@@ -0,0 +1,32 @@
[package]
name = "ourdb"
version = "0.1.0"
edition = "2021"
description = "A lightweight, efficient key-value database with history tracking capabilities"
authors = ["OurWorld Team"]
[dependencies]
crc32fast = "1.3.2"
thiserror = "1.0.40"
log = "0.4.17"
rand = "0.8.5"
[dev-dependencies]
criterion = "0.5.1"
tempfile = "3.8.0"
# [[bench]]
# name = "ourdb_benchmarks"
# harness = false
[[example]]
name = "basic_usage"
path = "examples/basic_usage.rs"
[[example]]
name = "advanced_usage"
path = "examples/advanced_usage.rs"
[[example]]
name = "benchmark"
path = "examples/benchmark.rs"

View File

@@ -0,0 +1,135 @@
# OurDB
OurDB is a lightweight, efficient key-value database implementation that provides data persistence with history tracking capabilities. This Rust implementation offers a robust and performant solution for applications requiring simple but reliable data storage.
## Features
- Simple key-value storage with history tracking
- Data integrity verification using CRC32
- Support for multiple backend files for large datasets
- Lookup table for fast data retrieval
- Incremental mode for auto-generated IDs
- Memory and disk-based lookup tables
## Limitations
- Maximum data size per entry is 65,535 bytes (~64KB) due to the 2-byte size field in the record header
## Usage
### Basic Example
```rust
use ourdb::{OurDB, OurDBConfig, OurDBSetArgs};
use std::path::PathBuf;
fn main() -> Result<(), ourdb::Error> {
// Create a new database
let config = OurDBConfig {
path: PathBuf::from("/tmp/ourdb"),
incremental_mode: true,
file_size: None, // Use default (500MB)
keysize: None, // Use default (4 bytes)
};
let mut db = OurDB::new(config)?;
// Store data (with auto-generated ID in incremental mode)
let data = b"Hello, OurDB!";
let id = db.set(OurDBSetArgs { id: None, data })?;
println!("Stored data with ID: {}", id);
// Retrieve data
let retrieved = db.get(id)?;
println!("Retrieved: {}", String::from_utf8_lossy(&retrieved));
// Update data
let updated_data = b"Updated data";
db.set(OurDBSetArgs { id: Some(id), data: updated_data })?;
// Get history (returns most recent first)
let history = db.get_history(id, 2)?;
for (i, entry) in history.iter().enumerate() {
println!("History {}: {}", i, String::from_utf8_lossy(entry));
}
// Delete data
db.delete(id)?;
// Close the database
db.close()?;
Ok(())
}
```
### Key-Value Mode vs Incremental Mode
OurDB supports two operating modes:
1. **Key-Value Mode** (`incremental_mode: false`): You must provide IDs explicitly when storing data.
2. **Incremental Mode** (`incremental_mode: true`): IDs are auto-generated when not provided.
### Configuration Options
- `path`: Directory for database storage
- `incremental_mode`: Whether to use auto-increment mode
- `file_size`: Maximum file size (default: 500MB)
- `keysize`: Size of lookup table entries (2-6 bytes)
- 2: For databases with < 65,536 records
- 3: For databases with < 16,777,216 records
- 4: For databases with < 4,294,967,296 records (default)
- 6: For large databases requiring multiple files
## Architecture
OurDB consists of three main components:
1. **Frontend API**: Provides the public interface for database operations
2. **Lookup Table**: Maps keys to physical locations in the backend storage
3. **Backend Storage**: Manages the actual data persistence in files
### Record Format
Each record in the backend storage includes:
- 2 bytes: Data size
- 4 bytes: CRC32 checksum
- 6 bytes: Previous record location (for history)
- N bytes: Actual data
## Documentation
Additional documentation is available in the repository:
- [API Reference](API.md): Detailed API documentation
- [Migration Guide](MIGRATION.md): Guide for migrating from the V implementation
- [Architecture](architecture.md): Design and implementation details
## Examples
The repository includes several examples to demonstrate OurDB usage:
- `basic_usage.rs`: Simple operations with OurDB
- `advanced_usage.rs`: More complex features including both operation modes
- `benchmark.rs`: Performance benchmarking tool
Run an example with:
```bash
cargo run --example basic_usage
cargo run --example advanced_usage
cargo run --example benchmark
```
## Performance
OurDB is designed for efficiency and minimal overhead. The benchmark example can be used to evaluate performance on your specific hardware and workload.
Typical performance metrics on modern hardware:
- **Write**: 10,000+ operations per second
- **Read**: 50,000+ operations per second
## License
This project is licensed under the MIT License.

View File

@@ -0,0 +1,439 @@
# OurDB: Architecture for V to Rust Port
## 1. Overview
OurDB is a lightweight, efficient key-value database implementation that provides data persistence with history tracking capabilities. This document outlines the architecture for porting OurDB from its original V implementation to Rust, maintaining all existing functionality while leveraging Rust's memory safety, performance, and ecosystem.
## 2. Current Architecture (V Implementation)
The current V implementation of OurDB consists of three main components in a layered architecture:
```mermaid
graph TD
A[Client Code] --> B[Frontend API]
B --> C[Lookup Table]
B --> D[Backend Storage]
C --> D
```
### 2.1 Frontend (db.v)
The frontend provides the public API for database operations and coordinates between the lookup table and backend storage components.
Key responsibilities:
- Exposing high-level operations (set, get, delete, history)
- Managing incremental ID generation in auto-increment mode
- Coordinating data flow between lookup and backend components
- Handling database lifecycle (open, close, destroy)
### 2.2 Lookup Table (lookup.v)
The lookup table maps keys to physical locations in the backend storage.
Key responsibilities:
- Maintaining key-to-location mapping
- Optimizing key sizes based on database configuration
- Supporting both memory and disk-based lookup tables
- Handling sparse data efficiently
- Providing next ID generation for incremental mode
### 2.3 Backend Storage (backend.v)
The backend storage manages the actual data persistence in files.
Key responsibilities:
- Managing physical data storage in files
- Ensuring data integrity with CRC32 checksums
- Supporting multiple file backends for large datasets
- Implementing low-level read/write operations
- Tracking record history through linked locations
### 2.4 Core Data Structures
#### OurDB
```v
@[heap]
pub struct OurDB {
mut:
lookup &LookupTable
pub:
path string // directory for storage
incremental_mode bool
file_size u32 = 500 * (1 << 20) // 500MB
pub mut:
file os.File
file_nr u16 // the file which is open
last_used_file_nr u16
}
```
#### LookupTable
```v
pub struct LookupTable {
keysize u8
lookuppath string
mut:
data []u8
incremental ?u32 // points to next empty slot if incremental mode is enabled
}
```
#### Location
```v
pub struct Location {
pub mut:
file_nr u16
position u32
}
```
### 2.5 Storage Format
#### Record Format
Each record in the backend storage includes:
- 2 bytes: Data size
- 4 bytes: CRC32 checksum
- 6 bytes: Previous record location (for history)
- N bytes: Actual data
#### Lookup Table Optimization
The lookup table automatically optimizes its key size based on the database configuration:
- 2 bytes: For databases with < 65,536 records
- 3 bytes: For databases with < 16,777,216 records
- 4 bytes: For databases with < 4,294,967,296 records
- 6 bytes: For large databases requiring multiple files
## 3. Proposed Rust Architecture
The Rust implementation will maintain the same layered architecture while leveraging Rust's type system, ownership model, and error handling.
```mermaid
graph TD
A[Client Code] --> B[OurDB API]
B --> C[LookupTable]
B --> D[Backend]
C --> D
E[Error Handling] --> B
E --> C
E --> D
F[Configuration] --> B
```
### 3.1 Core Components
#### 3.1.1 OurDB (API Layer)
```rust
pub struct OurDB {
path: String,
incremental_mode: bool,
file_size: u32,
lookup: LookupTable,
file: Option<std::fs::File>,
file_nr: u16,
last_used_file_nr: u16,
}
impl OurDB {
pub fn new(config: OurDBConfig) -> Result<Self, Error>;
pub fn set(&mut self, id: Option<u32>, data: &[u8]) -> Result<u32, Error>;
pub fn get(&mut self, id: u32) -> Result<Vec<u8>, Error>;
pub fn get_history(&mut self, id: u32, depth: u8) -> Result<Vec<Vec<u8>>, Error>;
pub fn delete(&mut self, id: u32) -> Result<(), Error>;
pub fn get_next_id(&mut self) -> Result<u32, Error>;
pub fn close(&mut self) -> Result<(), Error>;
pub fn destroy(&mut self) -> Result<(), Error>;
}
```
#### 3.1.2 LookupTable
```rust
pub struct LookupTable {
keysize: u8,
lookuppath: String,
data: Vec<u8>,
incremental: Option<u32>,
}
impl LookupTable {
fn new(config: LookupConfig) -> Result<Self, Error>;
fn get(&self, id: u32) -> Result<Location, Error>;
fn set(&mut self, id: u32, location: Location) -> Result<(), Error>;
fn delete(&mut self, id: u32) -> Result<(), Error>;
fn get_next_id(&self) -> Result<u32, Error>;
fn increment_index(&mut self) -> Result<(), Error>;
fn export_data(&self, path: &str) -> Result<(), Error>;
fn import_data(&mut self, path: &str) -> Result<(), Error>;
fn export_sparse(&self, path: &str) -> Result<(), Error>;
fn import_sparse(&mut self, path: &str) -> Result<(), Error>;
}
```
#### 3.1.3 Location
```rust
pub struct Location {
file_nr: u16,
position: u32,
}
impl Location {
fn new(bytes: &[u8], keysize: u8) -> Result<Self, Error>;
fn to_bytes(&self) -> Result<Vec<u8>, Error>;
fn to_u64(&self) -> u64;
}
```
#### 3.1.4 Backend
The backend functionality will be implemented as methods on the OurDB struct:
```rust
impl OurDB {
fn db_file_select(&mut self, file_nr: u16) -> Result<(), Error>;
fn create_new_db_file(&mut self, file_nr: u16) -> Result<(), Error>;
fn get_file_nr(&mut self) -> Result<u16, Error>;
fn set_(&mut self, id: u32, old_location: Location, data: &[u8]) -> Result<(), Error>;
fn get_(&mut self, location: Location) -> Result<Vec<u8>, Error>;
fn get_prev_pos_(&mut self, location: Location) -> Result<Location, Error>;
fn delete_(&mut self, id: u32, location: Location) -> Result<(), Error>;
fn close_(&mut self);
}
```
#### 3.1.5 Configuration
```rust
pub struct OurDBConfig {
pub record_nr_max: u32,
pub record_size_max: u32,
pub file_size: u32,
pub path: String,
pub incremental_mode: bool,
pub reset: bool,
}
struct LookupConfig {
size: u32,
keysize: u8,
lookuppath: String,
incremental_mode: bool,
}
```
#### 3.1.6 Error Handling
```rust
#[derive(Debug, thiserror::Error)]
pub enum Error {
#[error("I/O error: {0}")]
Io(#[from] std::io::Error),
#[error("Invalid key size: {0}")]
InvalidKeySize(u8),
#[error("Record not found: {0}")]
RecordNotFound(u32),
#[error("Data corruption: CRC mismatch")]
DataCorruption,
#[error("Index out of bounds: {0}")]
IndexOutOfBounds(u32),
#[error("Incremental mode not enabled")]
IncrementalNotEnabled,
#[error("Lookup table is full")]
LookupTableFull,
#[error("Invalid file number: {0}")]
InvalidFileNumber(u16),
#[error("Invalid operation: {0}")]
InvalidOperation(String),
}
```
## 4. Implementation Strategy
### 4.1 Phase 1: Core Data Structures
1. Implement the `Location` struct with serialization/deserialization
2. Implement the `Error` enum for error handling
3. Implement the configuration structures
### 4.2 Phase 2: Lookup Table
1. Implement the `LookupTable` struct with memory-based storage
2. Add disk-based storage support
3. Implement key size optimization
4. Add incremental ID support
5. Implement import/export functionality
### 4.3 Phase 3: Backend Storage
1. Implement file management functions
2. Implement record serialization/deserialization with CRC32
3. Implement history tracking through linked locations
4. Add support for multiple backend files
### 4.4 Phase 4: Frontend API
1. Implement the `OurDB` struct with core operations
2. Add high-level API methods (set, get, delete, history)
3. Implement database lifecycle management
### 4.5 Phase 5: Testing and Optimization
1. Port existing tests from V to Rust
2. Add new tests for Rust-specific functionality
3. Benchmark and optimize performance
4. Ensure compatibility with existing OurDB files
## 5. Implementation Considerations
### 5.1 Memory Management
Leverage Rust's ownership model for safe and efficient memory management:
- Use `Vec<u8>` for data buffers instead of raw pointers
- Implement proper RAII for file handles
- Use references and borrows to avoid unnecessary copying
- Consider using `Bytes` from the `bytes` crate for zero-copy operations
### 5.2 Error Handling
Use Rust's `Result` type for comprehensive error handling:
- Define custom error types for OurDB-specific errors
- Propagate errors using the `?` operator
- Provide detailed error messages
- Implement proper error conversion using the `From` trait
### 5.3 File I/O
Optimize file operations for performance:
- Use `BufReader` and `BufWriter` for buffered I/O
- Implement proper file locking for concurrent access
- Consider memory-mapped files for lookup tables
- Use `seek` and `read_exact` for precise positioning
### 5.4 Concurrency
Consider thread safety for concurrent database access:
- Use interior mutability patterns where appropriate
- Implement `Send` and `Sync` traits for thread safety
- Consider using `RwLock` for shared read access
- Provide clear documentation on thread safety guarantees
### 5.5 Performance Optimizations
Identify opportunities for performance improvements:
- Use memory-mapped files for lookup tables
- Implement caching for frequently accessed records
- Use zero-copy operations where possible
- Consider async I/O for non-blocking operations
## 6. Testing Strategy
### 6.1 Unit Tests
Write comprehensive unit tests for each component:
- Test `Location` serialization/deserialization
- Test `LookupTable` operations
- Test backend storage functions
- Test error handling
### 6.2 Integration Tests
Write integration tests for the complete system:
- Test database creation and configuration
- Test basic CRUD operations
- Test history tracking
- Test incremental ID generation
- Test file management
### 6.3 Compatibility Tests
Ensure compatibility with existing OurDB files:
- Test reading existing V-created OurDB files
- Test writing files that can be read by the V implementation
- Test migration scenarios
### 6.4 Performance Tests
Benchmark performance against the V implementation:
- Measure throughput for set/get operations
- Measure latency for different operations
- Test with different database sizes
- Test with different record sizes
## 7. Project Structure
```
ourdb/
├── Cargo.toml
├── src/
│ ├── lib.rs # Public API and re-exports
│ ├── ourdb.rs # OurDB implementation (frontend)
│ ├── lookup.rs # Lookup table implementation
│ ├── location.rs # Location struct implementation
│ ├── backend.rs # Backend storage implementation
│ ├── error.rs # Error types
│ ├── config.rs # Configuration structures
│ └── utils.rs # Utility functions
├── tests/
│ ├── unit/ # Unit tests
│ ├── integration/ # Integration tests
│ └── compatibility/ # Compatibility tests
└── examples/
├── basic.rs # Basic usage example
├── history.rs # History tracking example
└── client_server.rs # Client-server example
```
## 8. Dependencies
The Rust implementation will use the following dependencies:
- `thiserror` for error handling
- `crc32fast` for CRC32 calculation
- `bytes` for efficient byte manipulation
- `memmap2` for memory-mapped files (optional)
- `serde` for serialization (optional, for future extensions)
- `log` for logging
- `criterion` for benchmarking
## 9. Compatibility Considerations
To ensure compatibility with the V implementation:
1. Maintain the same file format for data storage
2. Preserve the lookup table format
3. Keep the same CRC32 calculation method
4. Ensure identical behavior for incremental ID generation
5. Maintain the same history tracking mechanism
## 10. Future Extensions
Potential future extensions to consider:
1. Async API for non-blocking operations
2. Transactions support
3. Better concurrency control
4. Compression support
5. Encryption support
6. Streaming API for large values
7. Iterators for scanning records
8. Secondary indexes
## 11. Conclusion
This architecture provides a roadmap for porting OurDB from V to Rust while maintaining compatibility and leveraging Rust's strengths. The implementation will follow a phased approach, starting with core data structures and gradually building up to the complete system.
The Rust implementation aims to be:
- **Safe**: Leveraging Rust's ownership model for memory safety
- **Fast**: Maintaining or improving performance compared to V
- **Compatible**: Working with existing OurDB files
- **Extensible**: Providing a foundation for future enhancements
- **Well-tested**: Including comprehensive test coverage

View File

@@ -0,0 +1,231 @@
use ourdb::{OurDB, OurDBConfig, OurDBSetArgs};
use std::path::PathBuf;
use std::time::Instant;
fn main() -> Result<(), ourdb::Error> {
// Create a temporary directory for the database
let db_path = std::env::temp_dir().join("ourdb_advanced_example");
std::fs::create_dir_all(&db_path)?;
println!("Creating database at: {}", db_path.display());
// Demonstrate key-value mode (non-incremental)
key_value_mode_example(&db_path)?;
// Demonstrate incremental mode
incremental_mode_example(&db_path)?;
// Demonstrate performance benchmarking
performance_benchmark(&db_path)?;
// Clean up (optional)
if std::env::var("KEEP_DB").is_err() {
std::fs::remove_dir_all(&db_path)?;
println!("Cleaned up database directory");
} else {
println!("Database kept at: {}", db_path.display());
}
Ok(())
}
fn key_value_mode_example(base_path: &PathBuf) -> Result<(), ourdb::Error> {
println!("\n=== Key-Value Mode Example ===");
let db_path = base_path.join("key_value");
std::fs::create_dir_all(&db_path)?;
// Create a new database with key-value mode (non-incremental)
let config = OurDBConfig {
path: db_path,
incremental_mode: false,
file_size: Some(1024 * 1024), // 1MB for testing
keysize: Some(2), // Small key size for demonstration
reset: None, // Don't reset existing database
};
let mut db = OurDB::new(config)?;
// In key-value mode, we must provide IDs explicitly
let custom_ids = [100, 200, 300, 400, 500];
// Store data with custom IDs
for (i, &id) in custom_ids.iter().enumerate() {
let data = format!("Record with custom ID {}", id);
db.set(OurDBSetArgs {
id: Some(id),
data: data.as_bytes(),
})?;
println!("Stored record {} with custom ID: {}", i + 1, id);
}
// Retrieve data by custom IDs
for &id in &custom_ids {
let retrieved = db.get(id)?;
println!(
"Retrieved ID {}: {}",
id,
String::from_utf8_lossy(&retrieved)
);
}
// Update and track history
let id_to_update = custom_ids[2]; // ID 300
for i in 1..=3 {
let updated_data = format!("Updated record {} (version {})", id_to_update, i);
db.set(OurDBSetArgs {
id: Some(id_to_update),
data: updated_data.as_bytes(),
})?;
println!("Updated ID {} (version {})", id_to_update, i);
}
// Get history for the updated record
let history = db.get_history(id_to_update, 5)?;
println!("History for ID {} (most recent first):", id_to_update);
for (i, entry) in history.iter().enumerate() {
println!(" Version {}: {}", i, String::from_utf8_lossy(entry));
}
db.close()?;
println!("Key-value mode example completed");
Ok(())
}
fn incremental_mode_example(base_path: &PathBuf) -> Result<(), ourdb::Error> {
println!("\n=== Incremental Mode Example ===");
let db_path = base_path.join("incremental");
std::fs::create_dir_all(&db_path)?;
// Create a new database with incremental mode
let config = OurDBConfig {
path: db_path,
incremental_mode: true,
file_size: Some(1024 * 1024), // 1MB for testing
keysize: Some(3), // 3-byte keys
reset: None, // Don't reset existing database
};
let mut db = OurDB::new(config)?;
// In incremental mode, IDs are auto-generated
let mut assigned_ids = Vec::new();
// Store multiple records and collect assigned IDs
for i in 1..=5 {
let data = format!("Auto-increment record {}", i);
let id = db.set(OurDBSetArgs {
id: None,
data: data.as_bytes(),
})?;
assigned_ids.push(id);
println!("Stored record {} with auto-assigned ID: {}", i, id);
}
// Check next ID
let next_id = db.get_next_id()?;
println!("Next ID to be assigned: {}", next_id);
// Retrieve all records
for &id in &assigned_ids {
let retrieved = db.get(id)?;
println!(
"Retrieved ID {}: {}",
id,
String::from_utf8_lossy(&retrieved)
);
}
db.close()?;
println!("Incremental mode example completed");
Ok(())
}
fn performance_benchmark(base_path: &PathBuf) -> Result<(), ourdb::Error> {
println!("\n=== Performance Benchmark ===");
let db_path = base_path.join("benchmark");
std::fs::create_dir_all(&db_path)?;
// Create a new database
let config = OurDBConfig {
path: db_path,
incremental_mode: true,
file_size: Some(1024 * 1024), // 10MB
keysize: Some(4), // 4-byte keys
reset: None, // Don't reset existing database
};
let mut db = OurDB::new(config)?;
// Number of operations for the benchmark
let num_operations = 1000;
let data_size = 100; // bytes per record
// Prepare test data
let test_data = vec![b'A'; data_size];
// Benchmark write operations
println!("Benchmarking {} write operations...", num_operations);
let start = Instant::now();
let mut ids = Vec::with_capacity(num_operations);
for _ in 0..num_operations {
let id = db.set(OurDBSetArgs {
id: None,
data: &test_data,
})?;
ids.push(id);
}
let write_duration = start.elapsed();
let writes_per_second = num_operations as f64 / write_duration.as_secs_f64();
println!(
"Write performance: {:.2} ops/sec ({:.2} ms/op)",
writes_per_second,
write_duration.as_secs_f64() * 1000.0 / num_operations as f64
);
// Benchmark read operations
println!("Benchmarking {} read operations...", num_operations);
let start = Instant::now();
for &id in &ids {
let _ = db.get(id)?;
}
let read_duration = start.elapsed();
let reads_per_second = num_operations as f64 / read_duration.as_secs_f64();
println!(
"Read performance: {:.2} ops/sec ({:.2} ms/op)",
reads_per_second,
read_duration.as_secs_f64() * 1000.0 / num_operations as f64
);
// Benchmark update operations
println!("Benchmarking {} update operations...", num_operations);
let start = Instant::now();
for &id in &ids {
db.set(OurDBSetArgs {
id: Some(id),
data: &test_data,
})?;
}
let update_duration = start.elapsed();
let updates_per_second = num_operations as f64 / update_duration.as_secs_f64();
println!(
"Update performance: {:.2} ops/sec ({:.2} ms/op)",
updates_per_second,
update_duration.as_secs_f64() * 1000.0 / num_operations as f64
);
db.close()?;
println!("Performance benchmark completed");
Ok(())
}

View File

@@ -0,0 +1,89 @@
use ourdb::{OurDB, OurDBConfig, OurDBSetArgs};
fn main() -> Result<(), ourdb::Error> {
// Create a temporary directory for the database
let db_path = std::env::temp_dir().join("ourdb_example");
std::fs::create_dir_all(&db_path)?;
println!("Creating database at: {}", db_path.display());
// Create a new database with incremental mode enabled
let config = OurDBConfig {
path: db_path.clone(),
incremental_mode: true,
file_size: None, // Use default (500MB)
keysize: None, // Use default (4 bytes)
reset: None, // Don't reset existing database
};
let mut db = OurDB::new(config)?;
// Store some data with auto-generated IDs
let data1 = b"First record";
let id1 = db.set(OurDBSetArgs {
id: None,
data: data1,
})?;
println!("Stored first record with ID: {}", id1);
let data2 = b"Second record";
let id2 = db.set(OurDBSetArgs {
id: None,
data: data2,
})?;
println!("Stored second record with ID: {}", id2);
// Retrieve and print the data
let retrieved1 = db.get(id1)?;
println!(
"Retrieved ID {}: {}",
id1,
String::from_utf8_lossy(&retrieved1)
);
let retrieved2 = db.get(id2)?;
println!(
"Retrieved ID {}: {}",
id2,
String::from_utf8_lossy(&retrieved2)
);
// Update a record to demonstrate history tracking
let updated_data = b"Updated first record";
db.set(OurDBSetArgs {
id: Some(id1),
data: updated_data,
})?;
println!("Updated record with ID: {}", id1);
// Get history for the updated record
let history = db.get_history(id1, 2)?;
println!("History for ID {}:", id1);
for (i, entry) in history.iter().enumerate() {
println!(" Version {}: {}", i, String::from_utf8_lossy(entry));
}
// Delete a record
db.delete(id2)?;
println!("Deleted record with ID: {}", id2);
// Verify deletion
match db.get(id2) {
Ok(_) => println!("Record still exists (unexpected)"),
Err(e) => println!("Verified deletion: {}", e),
}
// Close the database
db.close()?;
println!("Database closed successfully");
// Clean up (optional)
if std::env::var("KEEP_DB").is_err() {
std::fs::remove_dir_all(&db_path)?;
println!("Cleaned up database directory");
} else {
println!("Database kept at: {}", db_path.display());
}
Ok(())
}

View File

@@ -0,0 +1,124 @@
use ourdb::{OurDB, OurDBConfig, OurDBSetArgs};
use std::time::Instant;
fn main() -> Result<(), ourdb::Error> {
// Parse command-line arguments
let args: Vec<String> = std::env::args().collect();
// Default values
let mut incremental_mode = true;
let mut keysize: u8 = 4;
let mut num_operations = 10000;
// Parse arguments
for i in 1..args.len() {
if args[i] == "--no-incremental" {
incremental_mode = false;
} else if args[i] == "--keysize" && i + 1 < args.len() {
keysize = args[i + 1].parse().unwrap_or(4);
} else if args[i] == "--ops" && i + 1 < args.len() {
num_operations = args[i + 1].parse().unwrap_or(10000);
}
}
// Create a temporary directory for the database
let db_path = std::env::temp_dir().join("ourdb_benchmark");
std::fs::create_dir_all(&db_path)?;
println!("Database path: {}", db_path.display());
// Create a new database
let config = OurDBConfig {
path: db_path.clone(),
incremental_mode,
file_size: Some(1024 * 1024),
keysize: Some(keysize),
reset: Some(true), // Reset the database for benchmarking
};
let mut db = OurDB::new(config)?;
// Prepare test data (100 bytes per record)
let test_data = vec![b'A'; 100];
// Benchmark write operations
println!(
"Benchmarking {} write operations (incremental: {}, keysize: {})...",
num_operations, incremental_mode, keysize
);
let start = Instant::now();
let mut ids = Vec::with_capacity(num_operations);
for _ in 0..num_operations {
let id = if incremental_mode {
db.set(OurDBSetArgs {
id: None,
data: &test_data,
})?
} else {
// In non-incremental mode, we need to provide IDs
let id = ids.len() as u32 + 1;
db.set(OurDBSetArgs {
id: Some(id),
data: &test_data,
})?;
id
};
ids.push(id);
}
let write_duration = start.elapsed();
let writes_per_second = num_operations as f64 / write_duration.as_secs_f64();
println!(
"Write performance: {:.2} ops/sec ({:.2} ms/op)",
writes_per_second,
write_duration.as_secs_f64() * 1000.0 / num_operations as f64
);
// Benchmark read operations
println!("Benchmarking {} read operations...", num_operations);
let start = Instant::now();
for &id in &ids {
let _ = db.get(id)?;
}
let read_duration = start.elapsed();
let reads_per_second = num_operations as f64 / read_duration.as_secs_f64();
println!(
"Read performance: {:.2} ops/sec ({:.2} ms/op)",
reads_per_second,
read_duration.as_secs_f64() * 1000.0 / num_operations as f64
);
// Benchmark update operations
println!("Benchmarking {} update operations...", num_operations);
let start = Instant::now();
for &id in &ids {
db.set(OurDBSetArgs {
id: Some(id),
data: &test_data,
})?;
}
let update_duration = start.elapsed();
let updates_per_second = num_operations as f64 / update_duration.as_secs_f64();
println!(
"Update performance: {:.2} ops/sec ({:.2} ms/op)",
updates_per_second,
update_duration.as_secs_f64() * 1000.0 / num_operations as f64
);
// Clean up
db.close()?;
std::fs::remove_dir_all(&db_path)?;
Ok(())
}

View File

@@ -0,0 +1,83 @@
use ourdb::{OurDB, OurDBConfig, OurDBSetArgs};
use std::env::temp_dir;
use std::time::{SystemTime, UNIX_EPOCH};
fn main() -> Result<(), Box<dyn std::error::Error>> {
println!("Standalone OurDB Example");
println!("=======================\n");
// Create a temporary directory for the database
let timestamp = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap()
.as_secs();
let db_path = temp_dir().join(format!("ourdb_example_{}", timestamp));
std::fs::create_dir_all(&db_path)?;
println!("Creating database at: {}", db_path.display());
// Create a new OurDB instance
let config = OurDBConfig {
path: db_path.clone(),
incremental_mode: true,
file_size: None,
keysize: None,
reset: Some(false),
};
let mut db = OurDB::new(config)?;
println!("Database created successfully");
// Store some data
let test_data = b"Hello, OurDB!";
let id = db.set(OurDBSetArgs {
id: None,
data: test_data,
})?;
println!("\nStored data with ID: {}", id);
// Retrieve the data
let retrieved = db.get(id)?;
println!("Retrieved data: {}", String::from_utf8_lossy(&retrieved));
// Update the data
let updated_data = b"Updated data in OurDB!";
db.set(OurDBSetArgs {
id: Some(id),
data: updated_data,
})?;
println!("\nUpdated data with ID: {}", id);
// Retrieve the updated data
let retrieved = db.get(id)?;
println!(
"Retrieved updated data: {}",
String::from_utf8_lossy(&retrieved)
);
// Get history
let history = db.get_history(id, 2)?;
println!("\nHistory for ID {}:", id);
for (i, data) in history.iter().enumerate() {
println!(" Version {}: {}", i + 1, String::from_utf8_lossy(data));
}
// Delete the data
db.delete(id)?;
println!("\nDeleted data with ID: {}", id);
// Try to retrieve the deleted data (should fail)
match db.get(id) {
Ok(_) => println!("Data still exists (unexpected)"),
Err(e) => println!("Verified deletion: {}", e),
}
println!("\nExample completed successfully!");
// Clean up
db.close()?;
std::fs::remove_dir_all(&db_path)?;
println!("Cleaned up database directory");
Ok(())
}

View File

@@ -0,0 +1,83 @@
use ourdb::{OurDB, OurDBConfig, OurDBSetArgs};
use std::env::temp_dir;
use std::time::{SystemTime, UNIX_EPOCH};
fn main() -> Result<(), Box<dyn std::error::Error>> {
println!("Standalone OurDB Example");
println!("=======================\n");
// Create a temporary directory for the database
let timestamp = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap()
.as_secs();
let db_path = temp_dir().join(format!("ourdb_example_{}", timestamp));
std::fs::create_dir_all(&db_path)?;
println!("Creating database at: {}", db_path.display());
// Create a new OurDB instance
let config = OurDBConfig {
path: db_path.clone(),
incremental_mode: true,
file_size: None,
keysize: None,
reset: Some(false),
};
let mut db = OurDB::new(config)?;
println!("Database created successfully");
// Store some data
let test_data = b"Hello, OurDB!";
let id = db.set(OurDBSetArgs {
id: None,
data: test_data,
})?;
println!("\nStored data with ID: {}", id);
// Retrieve the data
let retrieved = db.get(id)?;
println!("Retrieved data: {}", String::from_utf8_lossy(&retrieved));
// Update the data
let updated_data = b"Updated data in OurDB!";
db.set(OurDBSetArgs {
id: Some(id),
data: updated_data,
})?;
println!("\nUpdated data with ID: {}", id);
// Retrieve the updated data
let retrieved = db.get(id)?;
println!(
"Retrieved updated data: {}",
String::from_utf8_lossy(&retrieved)
);
// Get history
let history = db.get_history(id, 2)?;
println!("\nHistory for ID {}:", id);
for (i, data) in history.iter().enumerate() {
println!(" Version {}: {}", i + 1, String::from_utf8_lossy(data));
}
// Delete the data
db.delete(id)?;
println!("\nDeleted data with ID: {}", id);
// Try to retrieve the deleted data (should fail)
match db.get(id) {
Ok(_) => println!("Data still exists (unexpected)"),
Err(e) => println!("Verified deletion: {}", e),
}
println!("\nExample completed successfully!");
// Clean up
db.close()?;
std::fs::remove_dir_all(&db_path)?;
println!("Cleaned up database directory");
Ok(())
}

View File

@@ -0,0 +1,366 @@
use std::fs::{self, File, OpenOptions};
use std::io::{Read, Seek, SeekFrom, Write};
use crc32fast::Hasher;
use crate::error::Error;
use crate::location::Location;
use crate::OurDB;
// Header size: 2 bytes (size) + 4 bytes (CRC32) + 6 bytes (previous location)
pub const HEADER_SIZE: usize = 12;
impl OurDB {
/// Selects and opens a database file for read/write operations
pub(crate) fn db_file_select(&mut self, file_nr: u16) -> Result<(), Error> {
// No need to check if file_nr > 65535 as u16 can't exceed that value
let path = self.path.join(format!("{}.db", file_nr));
// Always close the current file if it's open
self.file = None;
// Create file if it doesn't exist
if !path.exists() {
self.create_new_db_file(file_nr)?;
}
// Open the file fresh
let file = OpenOptions::new().read(true).write(true).open(&path)?;
self.file = Some(file);
self.file_nr = file_nr;
Ok(())
}
/// Creates a new database file
pub(crate) fn create_new_db_file(&mut self, file_nr: u16) -> Result<(), Error> {
let new_file_path = self.path.join(format!("{}.db", file_nr));
let mut file = File::create(&new_file_path)?;
// Write a single byte to make all positions start from 1
file.write_all(&[0u8])?;
Ok(())
}
/// Gets the file number to use for the next write operation
pub(crate) fn get_file_nr(&mut self) -> Result<u16, Error> {
// For keysize 2, 3, or 4, we can only use file_nr 0
if self.lookup.keysize() <= 4 {
let path = self.path.join("0.db");
if !path.exists() {
self.create_new_db_file(0)?;
}
return Ok(0);
}
// For keysize 6, we can use multiple files
let path = self.path.join(format!("{}.db", self.last_used_file_nr));
if !path.exists() {
self.create_new_db_file(self.last_used_file_nr)?;
return Ok(self.last_used_file_nr);
}
let metadata = fs::metadata(&path)?;
if metadata.len() >= self.file_size as u64 {
self.last_used_file_nr += 1;
self.create_new_db_file(self.last_used_file_nr)?;
}
Ok(self.last_used_file_nr)
}
/// Stores data at the specified ID with history tracking
pub(crate) fn set_(
&mut self,
id: u32,
old_location: Location,
data: &[u8],
) -> Result<(), Error> {
// Validate data size - maximum is u16::MAX (65535 bytes or ~64KB)
if data.len() > u16::MAX as usize {
return Err(Error::InvalidOperation(format!(
"Data size exceeds maximum allowed size of {} bytes",
u16::MAX
)));
}
// Get file number to use
let file_nr = self.get_file_nr()?;
// Select the file
self.db_file_select(file_nr)?;
// Get current file position for lookup
let file = self
.file
.as_mut()
.ok_or_else(|| Error::Other("No file open".to_string()))?;
file.seek(SeekFrom::End(0))?;
let position = file.stream_position()? as u32;
// Create new location
let new_location = Location { file_nr, position };
// Calculate CRC of data
let crc = calculate_crc(data);
// Create header
let mut header = vec![0u8; HEADER_SIZE];
// Write size (2 bytes)
let size = data.len() as u16; // Safe now because we've validated the size
header[0] = (size & 0xFF) as u8;
header[1] = ((size >> 8) & 0xFF) as u8;
// Write CRC (4 bytes)
header[2] = (crc & 0xFF) as u8;
header[3] = ((crc >> 8) & 0xFF) as u8;
header[4] = ((crc >> 16) & 0xFF) as u8;
header[5] = ((crc >> 24) & 0xFF) as u8;
// Write previous location (6 bytes)
let prev_bytes = old_location.to_bytes();
for (i, &byte) in prev_bytes.iter().enumerate().take(6) {
header[6 + i] = byte;
}
// Write header
file.write_all(&header)?;
// Write actual data
file.write_all(data)?;
file.flush()?;
// Update lookup table with new position
self.lookup.set(id, new_location)?;
Ok(())
}
/// Retrieves data at the specified location
pub(crate) fn get_(&mut self, location: Location) -> Result<Vec<u8>, Error> {
if location.position == 0 {
return Err(Error::NotFound(format!(
"Record not found, location: {:?}",
location
)));
}
// Select the file
self.db_file_select(location.file_nr)?;
let file = self
.file
.as_mut()
.ok_or_else(|| Error::Other("No file open".to_string()))?;
// Read header
file.seek(SeekFrom::Start(location.position as u64))?;
let mut header = vec![0u8; HEADER_SIZE];
file.read_exact(&mut header)?;
// Parse size (2 bytes)
let size = u16::from(header[0]) | (u16::from(header[1]) << 8);
// Parse CRC (4 bytes)
let stored_crc = u32::from(header[2])
| (u32::from(header[3]) << 8)
| (u32::from(header[4]) << 16)
| (u32::from(header[5]) << 24);
// Read data
let mut data = vec![0u8; size as usize];
file.read_exact(&mut data)?;
// Verify CRC
let calculated_crc = calculate_crc(&data);
if calculated_crc != stored_crc {
return Err(Error::DataCorruption(
"CRC mismatch: data corruption detected".to_string(),
));
}
Ok(data)
}
/// Retrieves the previous position for a record (for history tracking)
pub(crate) fn get_prev_pos_(&mut self, location: Location) -> Result<Location, Error> {
if location.position == 0 {
return Err(Error::NotFound("Record not found".to_string()));
}
// Select the file
self.db_file_select(location.file_nr)?;
let file = self
.file
.as_mut()
.ok_or_else(|| Error::Other("No file open".to_string()))?;
// Skip size and CRC (6 bytes)
file.seek(SeekFrom::Start(location.position as u64 + 6))?;
// Read previous location (6 bytes)
let mut prev_bytes = vec![0u8; 6];
file.read_exact(&mut prev_bytes)?;
// Create location from bytes
Location::from_bytes(&prev_bytes, 6)
}
/// Deletes the record at the specified location
pub(crate) fn delete_(&mut self, id: u32, location: Location) -> Result<(), Error> {
if location.position == 0 {
return Err(Error::NotFound("Record not found".to_string()));
}
// Select the file
self.db_file_select(location.file_nr)?;
let file = self
.file
.as_mut()
.ok_or_else(|| Error::Other("No file open".to_string()))?;
// Read size first
file.seek(SeekFrom::Start(location.position as u64))?;
let mut size_bytes = vec![0u8; 2];
file.read_exact(&mut size_bytes)?;
let size = u16::from(size_bytes[0]) | (u16::from(size_bytes[1]) << 8);
// Write zeros for the entire record (header + data)
let zeros = vec![0u8; HEADER_SIZE + size as usize];
file.seek(SeekFrom::Start(location.position as u64))?;
file.write_all(&zeros)?;
// Clear lookup entry
self.lookup.delete(id)?;
Ok(())
}
/// Condenses the database by removing empty records and updating positions
pub fn condense(&mut self) -> Result<(), Error> {
// Create a temporary directory
let temp_path = self.path.join("temp");
fs::create_dir_all(&temp_path)?;
// Get all file numbers
let mut file_numbers = Vec::new();
for entry in fs::read_dir(&self.path)? {
let entry = entry?;
let path = entry.path();
if path.is_file() && path.extension().map_or(false, |ext| ext == "db") {
if let Some(stem) = path.file_stem() {
if let Ok(file_nr) = stem.to_string_lossy().parse::<u16>() {
file_numbers.push(file_nr);
}
}
}
}
// Process each file
for file_nr in file_numbers {
let src_path = self.path.join(format!("{}.db", file_nr));
let temp_file_path = temp_path.join(format!("{}.db", file_nr));
// Create new file
let mut temp_file = File::create(&temp_file_path)?;
temp_file.write_all(&[0u8])?; // Initialize with a byte
// Open source file
let mut src_file = File::open(&src_path)?;
// Read and process records
let mut buffer = vec![0u8; 1024]; // Read in chunks
let mut _position = 0;
while let Ok(bytes_read) = src_file.read(&mut buffer) {
if bytes_read == 0 {
break;
}
// Process the chunk
// This is a simplified version - in a real implementation,
// you would need to handle records that span chunk boundaries
_position += bytes_read;
}
// TODO: Implement proper record copying and position updating
// This would involve:
// 1. Reading each record from the source file
// 2. If not deleted (all zeros), copy to temp file
// 3. Update lookup table with new positions
}
// TODO: Replace original files with temp files
// Clean up
fs::remove_dir_all(&temp_path)?;
Ok(())
}
}
/// Calculates CRC32 for the data
fn calculate_crc(data: &[u8]) -> u32 {
let mut hasher = Hasher::new();
hasher.update(data);
hasher.finalize()
}
#[cfg(test)]
mod tests {
use std::path::PathBuf;
use crate::{OurDB, OurDBConfig, OurDBSetArgs};
use std::env::temp_dir;
use std::time::{SystemTime, UNIX_EPOCH};
fn get_temp_dir() -> PathBuf {
let timestamp = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap()
.as_secs();
temp_dir().join(format!("ourdb_backend_test_{}", timestamp))
}
#[test]
fn test_backend_operations() {
let temp_dir = get_temp_dir();
let config = OurDBConfig {
path: temp_dir.clone(),
incremental_mode: false,
file_size: None,
keysize: None,
reset: None, // Don't reset existing database
};
let mut db = OurDB::new(config).unwrap();
// Test set and get
let test_data = b"Test data for backend operations";
let id = 1;
db.set(OurDBSetArgs {
id: Some(id),
data: test_data,
})
.unwrap();
let retrieved = db.get(id).unwrap();
assert_eq!(retrieved, test_data);
// Clean up
db.destroy().unwrap();
}
}

View File

@@ -0,0 +1,41 @@
use thiserror::Error;
/// Error types for OurDB operations
#[derive(Error, Debug)]
pub enum Error {
/// IO errors from file operations
#[error("IO error: {0}")]
Io(#[from] std::io::Error),
/// Data corruption errors
#[error("Data corruption: {0}")]
DataCorruption(String),
/// Invalid operation errors
#[error("Invalid operation: {0}")]
InvalidOperation(String),
/// Lookup table errors
#[error("Lookup error: {0}")]
LookupError(String),
/// Record not found errors
#[error("Record not found: {0}")]
NotFound(String),
/// Other errors
#[error("Error: {0}")]
Other(String),
}
impl From<String> for Error {
fn from(msg: String) -> Self {
Error::Other(msg)
}
}
impl From<&str> for Error {
fn from(msg: &str) -> Self {
Error::Other(msg.to_string())
}
}

View File

@@ -0,0 +1,293 @@
mod backend;
mod error;
mod location;
mod lookup;
pub use error::Error;
pub use location::Location;
pub use lookup::LookupTable;
use std::fs::File;
use std::path::PathBuf;
/// OurDB is a lightweight, efficient key-value database implementation that provides
/// data persistence with history tracking capabilities.
pub struct OurDB {
/// Directory path for storage
path: PathBuf,
/// Whether to use auto-increment mode
incremental_mode: bool,
/// Maximum file size (default: 500MB)
file_size: u32,
/// Lookup table for mapping keys to locations
lookup: LookupTable,
/// Currently open file
file: Option<File>,
/// Current file number
file_nr: u16,
/// Last used file number
last_used_file_nr: u16,
}
/// Configuration for creating a new OurDB instance
pub struct OurDBConfig {
/// Directory path for storage
pub path: PathBuf,
/// Whether to use auto-increment mode
pub incremental_mode: bool,
/// Maximum file size (default: 500MB)
pub file_size: Option<u32>,
/// Lookup table key size (default: 4)
/// - 2: For databases with < 65,536 records (single file)
/// - 3: For databases with < 16,777,216 records (single file)
/// - 4: For databases with < 4,294,967,296 records (single file)
/// - 6: For large databases requiring multiple files (default)
pub keysize: Option<u8>,
/// Whether to reset the database if it exists (default: false)
pub reset: Option<bool>,
}
/// Arguments for setting a value in OurDB
pub struct OurDBSetArgs<'a> {
/// ID for the record (optional in incremental mode)
pub id: Option<u32>,
/// Data to store
pub data: &'a [u8],
}
impl OurDB {
/// Creates a new OurDB instance with the given configuration
pub fn new(config: OurDBConfig) -> Result<Self, Error> {
// If reset is true and the path exists, remove it first
if config.reset.unwrap_or(false) && config.path.exists() {
std::fs::remove_dir_all(&config.path)?;
}
// Create directory if it doesn't exist
std::fs::create_dir_all(&config.path)?;
// Create lookup table
let lookup_path = config.path.join("lookup");
std::fs::create_dir_all(&lookup_path)?;
let lookup_config = lookup::LookupConfig {
size: 1000000, // Default size
keysize: config.keysize.unwrap_or(4),
lookuppath: lookup_path.to_string_lossy().to_string(),
incremental_mode: config.incremental_mode,
};
let lookup = LookupTable::new(lookup_config)?;
let mut db = OurDB {
path: config.path,
incremental_mode: config.incremental_mode,
file_size: config.file_size.unwrap_or(500 * (1 << 20)), // 500MB default
lookup,
file: None,
file_nr: 0,
last_used_file_nr: 0,
};
// Load existing metadata if available
db.load()?;
Ok(db)
}
/// Sets a value in the database
///
/// In incremental mode:
/// - If ID is provided, it updates an existing record
/// - If ID is not provided, it creates a new record with auto-generated ID
///
/// In key-value mode:
/// - ID must be provided
pub fn set(&mut self, args: OurDBSetArgs) -> Result<u32, Error> {
if self.incremental_mode {
if let Some(id) = args.id {
// This is an update
let location = self.lookup.get(id)?;
if location.position == 0 {
return Err(Error::InvalidOperation(
"Cannot set ID for insertions when incremental mode is enabled".to_string(),
));
}
self.set_(id, location, args.data)?;
Ok(id)
} else {
// This is an insert
let id = self.lookup.get_next_id()?;
self.set_(id, Location::default(), args.data)?;
Ok(id)
}
} else {
// Using key-value mode
let id = args.id.ok_or_else(|| {
Error::InvalidOperation(
"ID must be provided when incremental is disabled".to_string(),
)
})?;
let location = self.lookup.get(id)?;
self.set_(id, location, args.data)?;
Ok(id)
}
}
/// Retrieves data stored at the specified key position
pub fn get(&mut self, id: u32) -> Result<Vec<u8>, Error> {
let location = self.lookup.get(id)?;
self.get_(location)
}
/// Retrieves a list of previous values for the specified key
///
/// The depth parameter controls how many historical values to retrieve (maximum)
pub fn get_history(&mut self, id: u32, depth: u8) -> Result<Vec<Vec<u8>>, Error> {
let mut result = Vec::new();
let mut current_location = self.lookup.get(id)?;
// Traverse the history chain up to specified depth
for _ in 0..depth {
// Get current value
let data = self.get_(current_location)?;
result.push(data);
// Try to get previous location
match self.get_prev_pos_(current_location) {
Ok(location) => {
if location.position == 0 {
break;
}
current_location = location;
}
Err(_) => break,
}
}
Ok(result)
}
/// Deletes the data at the specified key position
pub fn delete(&mut self, id: u32) -> Result<(), Error> {
let location = self.lookup.get(id)?;
self.delete_(id, location)?;
self.lookup.delete(id)?;
Ok(())
}
/// Returns the next ID which will be used when storing in incremental mode
pub fn get_next_id(&mut self) -> Result<u32, Error> {
if !self.incremental_mode {
return Err(Error::InvalidOperation(
"Incremental mode is not enabled".to_string(),
));
}
self.lookup.get_next_id()
}
/// Closes the database, ensuring all data is saved
pub fn close(&mut self) -> Result<(), Error> {
self.save()?;
self.close_();
Ok(())
}
/// Destroys the database, removing all files
pub fn destroy(&mut self) -> Result<(), Error> {
let _ = self.close();
std::fs::remove_dir_all(&self.path)?;
Ok(())
}
// Helper methods
fn lookup_dump_path(&self) -> PathBuf {
self.path.join("lookup_dump.db")
}
fn load(&mut self) -> Result<(), Error> {
let dump_path = self.lookup_dump_path();
if dump_path.exists() {
self.lookup.import_sparse(&dump_path.to_string_lossy())?;
}
Ok(())
}
fn save(&mut self) -> Result<(), Error> {
self.lookup
.export_sparse(&self.lookup_dump_path().to_string_lossy())?;
Ok(())
}
fn close_(&mut self) {
self.file = None;
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::env::temp_dir;
use std::time::{SystemTime, UNIX_EPOCH};
fn get_temp_dir() -> PathBuf {
let timestamp = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap()
.as_secs();
temp_dir().join(format!("ourdb_test_{}", timestamp))
}
#[test]
fn test_basic_operations() {
let temp_dir = get_temp_dir();
let config = OurDBConfig {
path: temp_dir.clone(),
incremental_mode: true,
file_size: None,
keysize: None,
reset: None, // Don't reset existing database
};
let mut db = OurDB::new(config).unwrap();
// Test set and get
let test_data = b"Hello, OurDB!";
let id = db
.set(OurDBSetArgs {
id: None,
data: test_data,
})
.unwrap();
let retrieved = db.get(id).unwrap();
assert_eq!(retrieved, test_data);
// Test update
let updated_data = b"Updated data";
db.set(OurDBSetArgs {
id: Some(id),
data: updated_data,
})
.unwrap();
let retrieved = db.get(id).unwrap();
assert_eq!(retrieved, updated_data);
// Test history
let history = db.get_history(id, 2).unwrap();
assert_eq!(history.len(), 2);
assert_eq!(history[0], updated_data);
assert_eq!(history[1], test_data);
// Test delete
db.delete(id).unwrap();
assert!(db.get(id).is_err());
// Clean up
db.destroy().unwrap();
}
}

View File

@@ -0,0 +1,178 @@
use crate::error::Error;
/// Location represents a physical position in a database file
///
/// It consists of a file number and a position within that file.
/// This allows OurDB to span multiple files for large datasets.
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
pub struct Location {
/// File number (0-65535)
pub file_nr: u16,
/// Position within the file
pub position: u32,
}
impl Location {
/// Creates a new Location from bytes based on keysize
///
/// - keysize = 2: Only position (2 bytes), file_nr = 0
/// - keysize = 3: Only position (3 bytes), file_nr = 0
/// - keysize = 4: Only position (4 bytes), file_nr = 0
/// - keysize = 6: file_nr (2 bytes) + position (4 bytes)
pub fn from_bytes(bytes: &[u8], keysize: u8) -> Result<Self, Error> {
// Validate keysize
if ![2, 3, 4, 6].contains(&keysize) {
return Err(Error::InvalidOperation(format!(
"Invalid keysize: {}",
keysize
)));
}
// Create padded bytes
let mut padded = vec![0u8; keysize as usize];
if bytes.len() > keysize as usize {
return Err(Error::InvalidOperation(
"Input bytes exceed keysize".to_string(),
));
}
let start_idx = keysize as usize - bytes.len();
for (i, &b) in bytes.iter().enumerate() {
if i + start_idx < padded.len() {
padded[start_idx + i] = b;
}
}
let mut location = Location::default();
match keysize {
2 => {
// Only position, 2 bytes big endian
location.position = u32::from(padded[0]) << 8 | u32::from(padded[1]);
location.file_nr = 0;
// Verify limits
if location.position > 0xFFFF {
return Err(Error::InvalidOperation(
"Position exceeds max value for keysize=2 (max 65535)".to_string(),
));
}
}
3 => {
// Only position, 3 bytes big endian
location.position =
u32::from(padded[0]) << 16 | u32::from(padded[1]) << 8 | u32::from(padded[2]);
location.file_nr = 0;
// Verify limits
if location.position > 0xFFFFFF {
return Err(Error::InvalidOperation(
"Position exceeds max value for keysize=3 (max 16777215)".to_string(),
));
}
}
4 => {
// Only position, 4 bytes big endian
location.position = u32::from(padded[0]) << 24
| u32::from(padded[1]) << 16
| u32::from(padded[2]) << 8
| u32::from(padded[3]);
location.file_nr = 0;
}
6 => {
// 2 bytes file_nr + 4 bytes position, all big endian
location.file_nr = u16::from(padded[0]) << 8 | u16::from(padded[1]);
location.position = u32::from(padded[2]) << 24
| u32::from(padded[3]) << 16
| u32::from(padded[4]) << 8
| u32::from(padded[5]);
}
_ => unreachable!(),
}
Ok(location)
}
/// Converts the location to bytes (always 6 bytes)
///
/// Format: [file_nr (2 bytes)][position (4 bytes)]
pub fn to_bytes(&self) -> Vec<u8> {
let mut bytes = Vec::with_capacity(6);
// Put file_nr first (2 bytes)
bytes.push((self.file_nr >> 8) as u8);
bytes.push(self.file_nr as u8);
// Put position next (4 bytes)
bytes.push((self.position >> 24) as u8);
bytes.push((self.position >> 16) as u8);
bytes.push((self.position >> 8) as u8);
bytes.push(self.position as u8);
bytes
}
/// Converts the location to a u64 value
///
/// The file_nr is stored in the most significant bits
pub fn to_u64(&self) -> u64 {
(u64::from(self.file_nr) << 32) | u64::from(self.position)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_location_from_bytes_keysize_2() {
let bytes = vec![0x12, 0x34];
let location = Location::from_bytes(&bytes, 2).unwrap();
assert_eq!(location.file_nr, 0);
assert_eq!(location.position, 0x1234);
}
#[test]
fn test_location_from_bytes_keysize_3() {
let bytes = vec![0x12, 0x34, 0x56];
let location = Location::from_bytes(&bytes, 3).unwrap();
assert_eq!(location.file_nr, 0);
assert_eq!(location.position, 0x123456);
}
#[test]
fn test_location_from_bytes_keysize_4() {
let bytes = vec![0x12, 0x34, 0x56, 0x78];
let location = Location::from_bytes(&bytes, 4).unwrap();
assert_eq!(location.file_nr, 0);
assert_eq!(location.position, 0x12345678);
}
#[test]
fn test_location_from_bytes_keysize_6() {
let bytes = vec![0xAB, 0xCD, 0x12, 0x34, 0x56, 0x78];
let location = Location::from_bytes(&bytes, 6).unwrap();
assert_eq!(location.file_nr, 0xABCD);
assert_eq!(location.position, 0x12345678);
}
#[test]
fn test_location_to_bytes() {
let location = Location {
file_nr: 0xABCD,
position: 0x12345678,
};
let bytes = location.to_bytes();
assert_eq!(bytes, vec![0xAB, 0xCD, 0x12, 0x34, 0x56, 0x78]);
}
#[test]
fn test_location_to_u64() {
let location = Location {
file_nr: 0xABCD,
position: 0x12345678,
};
let value = location.to_u64();
assert_eq!(value, 0xABCD_0000_0000 | 0x12345678);
}
}

View File

@@ -0,0 +1,540 @@
use std::fs::{self, File, OpenOptions};
use std::io::{Read, Seek, SeekFrom, Write};
use std::path::Path;
use crate::error::Error;
use crate::location::Location;
const DATA_FILE_NAME: &str = "data";
const INCREMENTAL_FILE_NAME: &str = ".inc";
/// Configuration for creating a new lookup table
pub struct LookupConfig {
/// Size of the lookup table
pub size: u32,
/// Size of each entry in bytes (2-6)
/// - 2: For databases with < 65,536 records (single file)
/// - 3: For databases with < 16,777,216 records (single file)
/// - 4: For databases with < 4,294,967,296 records (single file)
/// - 6: For large databases requiring multiple files
pub keysize: u8,
/// Path for disk-based lookup
pub lookuppath: String,
/// Whether to use incremental mode
pub incremental_mode: bool,
}
/// Lookup table maps keys to physical locations in the backend storage
pub struct LookupTable {
/// Size of each entry in bytes (2-6)
keysize: u8,
/// Path for disk-based lookup
lookuppath: String,
/// In-memory data for memory-based lookup
data: Vec<u8>,
/// Next empty slot if incremental mode is enabled
incremental: Option<u32>,
}
impl LookupTable {
/// Returns the keysize of this lookup table
pub fn keysize(&self) -> u8 {
self.keysize
}
/// Creates a new lookup table with the given configuration
pub fn new(config: LookupConfig) -> Result<Self, Error> {
// Verify keysize is valid
if ![2, 3, 4, 6].contains(&config.keysize) {
return Err(Error::InvalidOperation(format!(
"Invalid keysize: {}",
config.keysize
)));
}
let incremental = if config.incremental_mode {
Some(get_incremental_info(&config)?)
} else {
None
};
if !config.lookuppath.is_empty() {
// Create directory if it doesn't exist
fs::create_dir_all(&config.lookuppath)?;
// For disk-based lookup, create empty file if it doesn't exist
let data_path = Path::new(&config.lookuppath).join(DATA_FILE_NAME);
if !data_path.exists() {
let data = vec![0u8; config.size as usize * config.keysize as usize];
fs::write(&data_path, &data)?;
}
Ok(LookupTable {
data: Vec::new(),
keysize: config.keysize,
lookuppath: config.lookuppath,
incremental,
})
} else {
// For memory-based lookup
Ok(LookupTable {
data: vec![0u8; config.size as usize * config.keysize as usize],
keysize: config.keysize,
lookuppath: String::new(),
incremental,
})
}
}
/// Gets a location for the given ID
pub fn get(&self, id: u32) -> Result<Location, Error> {
let entry_size = self.keysize as usize;
if !self.lookuppath.is_empty() {
// Disk-based lookup
let data_path = Path::new(&self.lookuppath).join(DATA_FILE_NAME);
// Check file size first
let file_size = fs::metadata(&data_path)?.len();
let start_pos = id as u64 * entry_size as u64;
if start_pos + entry_size as u64 > file_size {
return Err(Error::LookupError(format!(
"Invalid read for get in lut: {}: {} would exceed file size {}",
self.lookuppath,
start_pos + entry_size as u64,
file_size
)));
}
// Read directly from file
let mut file = File::open(&data_path)?;
file.seek(SeekFrom::Start(start_pos))?;
let mut data = vec![0u8; entry_size];
let bytes_read = file.read(&mut data)?;
if bytes_read < entry_size {
return Err(Error::LookupError(format!(
"Incomplete read: expected {} bytes but got {}",
entry_size, bytes_read
)));
}
return Location::from_bytes(&data, self.keysize);
}
// Memory-based lookup
if (id * self.keysize as u32) as usize >= self.data.len() {
return Err(Error::LookupError("Index out of bounds".to_string()));
}
let start = (id * self.keysize as u32) as usize;
let end = start + entry_size;
Location::from_bytes(&self.data[start..end], self.keysize)
}
/// Sets a location for the given ID
pub fn set(&mut self, id: u32, location: Location) -> Result<(), Error> {
let entry_size = self.keysize as usize;
// Handle incremental mode
if let Some(incremental) = self.incremental {
if id == incremental {
self.increment_index()?;
}
if id > incremental {
return Err(Error::InvalidOperation(
"Cannot set ID for insertions when incremental mode is enabled".to_string(),
));
}
}
// Convert location to bytes based on keysize
let location_bytes = match self.keysize {
2 => {
if location.file_nr != 0 {
return Err(Error::InvalidOperation(
"file_nr must be 0 for keysize=2".to_string(),
));
}
if location.position > 0xFFFF {
return Err(Error::InvalidOperation(
"position exceeds max value for keysize=2 (max 65535)".to_string(),
));
}
vec![(location.position >> 8) as u8, location.position as u8]
}
3 => {
if location.file_nr != 0 {
return Err(Error::InvalidOperation(
"file_nr must be 0 for keysize=3".to_string(),
));
}
if location.position > 0xFFFFFF {
return Err(Error::InvalidOperation(
"position exceeds max value for keysize=3 (max 16777215)".to_string(),
));
}
vec![
(location.position >> 16) as u8,
(location.position >> 8) as u8,
location.position as u8,
]
}
4 => {
if location.file_nr != 0 {
return Err(Error::InvalidOperation(
"file_nr must be 0 for keysize=4".to_string(),
));
}
vec![
(location.position >> 24) as u8,
(location.position >> 16) as u8,
(location.position >> 8) as u8,
location.position as u8,
]
}
6 => {
// Full location with file_nr and position
location.to_bytes()
}
_ => {
return Err(Error::InvalidOperation(format!(
"Invalid keysize: {}",
self.keysize
)))
}
};
if !self.lookuppath.is_empty() {
// Disk-based lookup
let data_path = Path::new(&self.lookuppath).join(DATA_FILE_NAME);
let mut file = OpenOptions::new().write(true).open(data_path)?;
let start_pos = id as u64 * entry_size as u64;
file.seek(SeekFrom::Start(start_pos))?;
file.write_all(&location_bytes)?;
} else {
// Memory-based lookup
let start = (id * self.keysize as u32) as usize;
if start + entry_size > self.data.len() {
return Err(Error::LookupError("Index out of bounds".to_string()));
}
for (i, &byte) in location_bytes.iter().enumerate() {
self.data[start + i] = byte;
}
}
Ok(())
}
/// Deletes an entry for the given ID
pub fn delete(&mut self, id: u32) -> Result<(), Error> {
// Set location to all zeros
self.set(id, Location::default())
}
/// Gets the next available ID in incremental mode
pub fn get_next_id(&self) -> Result<u32, Error> {
let incremental = self.incremental.ok_or_else(|| {
Error::InvalidOperation("Lookup table not in incremental mode".to_string())
})?;
let table_size = if !self.lookuppath.is_empty() {
let data_path = Path::new(&self.lookuppath).join(DATA_FILE_NAME);
fs::metadata(data_path)?.len() as u32
} else {
self.data.len() as u32
};
if incremental * self.keysize as u32 >= table_size {
return Err(Error::LookupError("Lookup table is full".to_string()));
}
Ok(incremental)
}
/// Increments the index in incremental mode
pub fn increment_index(&mut self) -> Result<(), Error> {
let mut incremental = self.incremental.ok_or_else(|| {
Error::InvalidOperation("Lookup table not in incremental mode".to_string())
})?;
incremental += 1;
self.incremental = Some(incremental);
if !self.lookuppath.is_empty() {
let inc_path = Path::new(&self.lookuppath).join(INCREMENTAL_FILE_NAME);
fs::write(inc_path, incremental.to_string())?;
}
Ok(())
}
/// Exports the lookup table to a file
pub fn export_data(&self, path: &str) -> Result<(), Error> {
if !self.lookuppath.is_empty() {
// For disk-based lookup, just copy the file
let data_path = Path::new(&self.lookuppath).join(DATA_FILE_NAME);
fs::copy(data_path, path)?;
} else {
// For memory-based lookup, write the data to file
fs::write(path, &self.data)?;
}
Ok(())
}
/// Imports the lookup table from a file
pub fn import_data(&mut self, path: &str) -> Result<(), Error> {
if !self.lookuppath.is_empty() {
// For disk-based lookup, copy the file
let data_path = Path::new(&self.lookuppath).join(DATA_FILE_NAME);
fs::copy(path, data_path)?;
} else {
// For memory-based lookup, read the data from file
self.data = fs::read(path)?;
}
Ok(())
}
/// Exports only non-zero entries to save space
pub fn export_sparse(&self, path: &str) -> Result<(), Error> {
let mut output = Vec::new();
let entry_size = self.keysize as usize;
if !self.lookuppath.is_empty() {
// For disk-based lookup
let data_path = Path::new(&self.lookuppath).join(DATA_FILE_NAME);
let mut file = File::open(&data_path)?;
let file_size = fs::metadata(&data_path)?.len();
let max_entries = file_size / entry_size as u64;
for id in 0..max_entries {
file.seek(SeekFrom::Start(id * entry_size as u64))?;
let mut buffer = vec![0u8; entry_size];
let bytes_read = file.read(&mut buffer)?;
if bytes_read < entry_size {
break;
}
// Check if entry is non-zero
if buffer.iter().any(|&b| b != 0) {
// Write ID (4 bytes) + entry
output.extend_from_slice(&(id as u32).to_be_bytes());
output.extend_from_slice(&buffer);
}
}
} else {
// For memory-based lookup
let max_entries = self.data.len() / entry_size;
for id in 0..max_entries {
let start = id * entry_size;
let entry = &self.data[start..start + entry_size];
// Check if entry is non-zero
if entry.iter().any(|&b| b != 0) {
// Write ID (4 bytes) + entry
output.extend_from_slice(&(id as u32).to_be_bytes());
output.extend_from_slice(entry);
}
}
}
// Write the output to file
fs::write(path, &output)?;
Ok(())
}
/// Imports sparse data (only non-zero entries)
pub fn import_sparse(&mut self, path: &str) -> Result<(), Error> {
let data = fs::read(path)?;
let entry_size = self.keysize as usize;
let record_size = 4 + entry_size; // ID (4 bytes) + entry
if data.len() % record_size != 0 {
return Err(Error::DataCorruption(
"Invalid sparse data format: size mismatch".to_string(),
));
}
for chunk_start in (0..data.len()).step_by(record_size) {
if chunk_start + record_size > data.len() {
break;
}
// Extract ID (4 bytes)
let id_bytes = &data[chunk_start..chunk_start + 4];
let id = u32::from_be_bytes([id_bytes[0], id_bytes[1], id_bytes[2], id_bytes[3]]);
// Extract entry
let entry = &data[chunk_start + 4..chunk_start + record_size];
// Create location from entry
let location = Location::from_bytes(entry, self.keysize)?;
// Set the entry
self.set(id, location)?;
}
Ok(())
}
/// Finds the highest ID with a non-zero entry
pub fn find_last_entry(&mut self) -> Result<u32, Error> {
let mut last_id = 0u32;
let entry_size = self.keysize as usize;
if !self.lookuppath.is_empty() {
// For disk-based lookup
let data_path = Path::new(&self.lookuppath).join(DATA_FILE_NAME);
let mut file = File::open(&data_path)?;
let file_size = fs::metadata(&data_path)?.len();
let mut buffer = vec![0u8; entry_size];
let mut pos = 0u32;
while (pos as u64 * entry_size as u64) < file_size {
file.seek(SeekFrom::Start(pos as u64 * entry_size as u64))?;
let bytes_read = file.read(&mut buffer)?;
if bytes_read == 0 || bytes_read < entry_size {
break;
}
let location = Location::from_bytes(&buffer, self.keysize)?;
if location.position != 0 || location.file_nr != 0 {
last_id = pos;
}
pos += 1;
}
} else {
// For memory-based lookup
for i in 0..(self.data.len() / entry_size) as u32 {
if let Ok(location) = self.get(i) {
if location.position != 0 || location.file_nr != 0 {
last_id = i;
}
}
}
}
Ok(last_id)
}
}
/// Helper function to get the incremental value
fn get_incremental_info(config: &LookupConfig) -> Result<u32, Error> {
if !config.incremental_mode {
return Ok(0);
}
if !config.lookuppath.is_empty() {
let inc_path = Path::new(&config.lookuppath).join(INCREMENTAL_FILE_NAME);
if !inc_path.exists() {
// Create a separate file for storing the incremental value
fs::write(&inc_path, "1")?;
}
let inc_str = fs::read_to_string(&inc_path)?;
let incremental = match inc_str.trim().parse::<u32>() {
Ok(val) => val,
Err(_) => {
// If the value is invalid, reset it to 1
fs::write(&inc_path, "1")?;
1
}
};
Ok(incremental)
} else {
// For memory-based lookup, start with 1
Ok(1)
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::env::temp_dir;
use std::path::PathBuf;
use std::time::{SystemTime, UNIX_EPOCH};
fn get_temp_dir() -> PathBuf {
let timestamp = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap()
.as_secs();
temp_dir().join(format!("ourdb_lookup_test_{}", timestamp))
}
#[test]
fn test_memory_lookup() {
let config = LookupConfig {
size: 1000,
keysize: 4,
lookuppath: String::new(),
incremental_mode: true,
};
let mut lookup = LookupTable::new(config).unwrap();
// Test set and get
let location = Location {
file_nr: 0,
position: 12345,
};
lookup.set(1, location).unwrap();
let retrieved = lookup.get(1).unwrap();
assert_eq!(retrieved.file_nr, location.file_nr);
assert_eq!(retrieved.position, location.position);
// Test incremental mode
let next_id = lookup.get_next_id().unwrap();
assert_eq!(next_id, 2);
lookup.increment_index().unwrap();
let next_id = lookup.get_next_id().unwrap();
assert_eq!(next_id, 3);
}
#[test]
fn test_disk_lookup() {
let temp_dir = get_temp_dir();
fs::create_dir_all(&temp_dir).unwrap();
let config = LookupConfig {
size: 1000,
keysize: 4,
lookuppath: temp_dir.to_string_lossy().to_string(),
incremental_mode: true,
};
let mut lookup = LookupTable::new(config).unwrap();
// Test set and get
let location = Location {
file_nr: 0,
position: 12345,
};
lookup.set(1, location).unwrap();
let retrieved = lookup.get(1).unwrap();
assert_eq!(retrieved.file_nr, location.file_nr);
assert_eq!(retrieved.position, location.position);
// Clean up
fs::remove_dir_all(temp_dir).unwrap();
}
}

View File

@@ -0,0 +1,369 @@
use ourdb::{OurDB, OurDBConfig, OurDBSetArgs};
use rand;
use std::env::temp_dir;
use std::fs;
use std::path::PathBuf;
use std::time::{SystemTime, UNIX_EPOCH};
// Helper function to create a unique temporary directory for tests
fn get_temp_dir() -> PathBuf {
let timestamp = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap()
.as_nanos();
let random_part = rand::random::<u32>();
let dir = temp_dir().join(format!("ourdb_test_{}_{}", timestamp, random_part));
// Ensure the directory exists and is empty
if dir.exists() {
std::fs::remove_dir_all(&dir).unwrap();
}
std::fs::create_dir_all(&dir).unwrap();
dir
}
#[test]
fn test_basic_operations() {
let temp_dir = get_temp_dir();
// Create a new database with incremental mode
let config = OurDBConfig {
path: temp_dir.clone(),
incremental_mode: true,
file_size: None,
keysize: None,
reset: None,
};
let mut db = OurDB::new(config).unwrap();
// Test set and get
let test_data = b"Hello, OurDB!";
let id = db
.set(OurDBSetArgs {
id: None,
data: test_data,
})
.unwrap();
let retrieved = db.get(id).unwrap();
assert_eq!(retrieved, test_data);
// Test update
let updated_data = b"Updated data";
db.set(OurDBSetArgs {
id: Some(id),
data: updated_data,
})
.unwrap();
let retrieved = db.get(id).unwrap();
assert_eq!(retrieved, updated_data);
// Test history
let history = db.get_history(id, 2).unwrap();
assert_eq!(history.len(), 2);
assert_eq!(history[0], updated_data);
assert_eq!(history[1], test_data);
// Test delete
db.delete(id).unwrap();
assert!(db.get(id).is_err());
// Clean up
db.destroy().unwrap();
}
#[test]
fn test_key_value_mode() {
let temp_dir = get_temp_dir();
// Create a new database with key-value mode
let config = OurDBConfig {
path: temp_dir.clone(),
incremental_mode: false,
file_size: None,
keysize: None,
reset: None,
};
let mut db = OurDB::new(config).unwrap();
// Test set with explicit ID
let test_data = b"Key-value data";
let id = 42;
db.set(OurDBSetArgs {
id: Some(id),
data: test_data,
})
.unwrap();
let retrieved = db.get(id).unwrap();
assert_eq!(retrieved, test_data);
// Verify next_id fails in key-value mode
assert!(db.get_next_id().is_err());
// Clean up
db.destroy().unwrap();
}
#[test]
fn test_incremental_mode() {
let temp_dir = get_temp_dir();
// Create a new database with incremental mode
let config = OurDBConfig {
path: temp_dir.clone(),
incremental_mode: true,
file_size: None,
keysize: None,
reset: None,
};
let mut db = OurDB::new(config).unwrap();
// Test auto-increment IDs
let data1 = b"First record";
let id1 = db
.set(OurDBSetArgs {
id: None,
data: data1,
})
.unwrap();
let data2 = b"Second record";
let id2 = db
.set(OurDBSetArgs {
id: None,
data: data2,
})
.unwrap();
// IDs should be sequential
assert_eq!(id2, id1 + 1);
// Verify get_next_id works
let next_id = db.get_next_id().unwrap();
assert_eq!(next_id, id2 + 1);
// Clean up
db.destroy().unwrap();
}
#[test]
fn test_persistence() {
let temp_dir = get_temp_dir();
// Create data in a new database
{
let config = OurDBConfig {
path: temp_dir.clone(),
incremental_mode: true,
file_size: None,
keysize: None,
reset: None,
};
let mut db = OurDB::new(config).unwrap();
let test_data = b"Persistent data";
let id = db
.set(OurDBSetArgs {
id: None,
data: test_data,
})
.unwrap();
// Explicitly close the database
db.close().unwrap();
// ID should be 1 in a new database
assert_eq!(id, 1);
}
// Reopen the database and verify data persists
{
let config = OurDBConfig {
path: temp_dir.clone(),
incremental_mode: true,
file_size: None,
keysize: None,
reset: None,
};
let mut db = OurDB::new(config).unwrap();
// Verify data is still there
let retrieved = db.get(1).unwrap();
assert_eq!(retrieved, b"Persistent data");
// Verify incremental counter persisted
let next_id = db.get_next_id().unwrap();
assert_eq!(next_id, 2);
// Clean up
db.destroy().unwrap();
}
}
#[test]
fn test_different_keysizes() {
for keysize in [2, 3, 4, 6].iter() {
let temp_dir = get_temp_dir();
// Ensure the directory exists
std::fs::create_dir_all(&temp_dir).unwrap();
// Create a new database with specified keysize
let config = OurDBConfig {
path: temp_dir.clone(),
incremental_mode: true,
file_size: None,
keysize: Some(*keysize),
reset: None,
};
let mut db = OurDB::new(config).unwrap();
// Test basic operations
let test_data = b"Keysize test data";
let id = db
.set(OurDBSetArgs {
id: None,
data: test_data,
})
.unwrap();
let retrieved = db.get(id).unwrap();
assert_eq!(retrieved, test_data);
// Clean up
db.destroy().unwrap();
}
}
#[test]
fn test_large_data() {
let temp_dir = get_temp_dir();
// Create a new database
let config = OurDBConfig {
path: temp_dir.clone(),
incremental_mode: true,
file_size: None,
keysize: None,
reset: None,
};
let mut db = OurDB::new(config).unwrap();
// Create a large data set (60KB - within the 64KB limit)
let large_data = vec![b'X'; 60 * 1024];
// Store and retrieve large data
let id = db
.set(OurDBSetArgs {
id: None,
data: &large_data,
})
.unwrap();
let retrieved = db.get(id).unwrap();
assert_eq!(retrieved.len(), large_data.len());
assert_eq!(retrieved, large_data);
// Clean up
db.destroy().unwrap();
}
#[test]
fn test_exceed_size_limit() {
let temp_dir = get_temp_dir();
// Create a new database
let config = OurDBConfig {
path: temp_dir.clone(),
incremental_mode: true,
file_size: None,
keysize: None,
reset: None,
};
let mut db = OurDB::new(config).unwrap();
// Create data larger than the 64KB limit (70KB)
let oversized_data = vec![b'X'; 70 * 1024];
// Attempt to store data that exceeds the size limit
let result = db.set(OurDBSetArgs {
id: None,
data: &oversized_data,
});
// Verify that an error is returned
assert!(
result.is_err(),
"Expected an error when storing data larger than 64KB"
);
// Clean up
db.destroy().unwrap();
}
#[test]
fn test_multiple_files() {
let temp_dir = get_temp_dir();
// Create a new database with small file size to force multiple files
let config = OurDBConfig {
path: temp_dir.clone(),
incremental_mode: true,
file_size: Some(1024), // Very small file size (1KB)
keysize: Some(6), // 6-byte keysize for multiple files
reset: None,
};
let mut db = OurDB::new(config).unwrap();
// Store enough data to span multiple files
let data_size = 500; // bytes per record
let test_data = vec![b'A'; data_size];
let mut ids = Vec::new();
for _ in 0..10 {
let id = db
.set(OurDBSetArgs {
id: None,
data: &test_data,
})
.unwrap();
ids.push(id);
}
// Verify all data can be retrieved
for &id in &ids {
let retrieved = db.get(id).unwrap();
assert_eq!(retrieved.len(), data_size);
}
// Verify multiple files were created
let files = fs::read_dir(&temp_dir)
.unwrap()
.filter_map(Result::ok)
.filter(|entry| {
let path = entry.path();
path.is_file() && path.extension().map_or(false, |ext| ext == "db")
})
.count();
assert!(
files > 1,
"Expected multiple database files, found {}",
files
);
// Clean up
db.destroy().unwrap();
}

View File

@@ -0,0 +1,787 @@
# RadixTree: Architecture for V to Rust Port
## 1. Overview
RadixTree is a space-optimized tree data structure that enables efficient string key operations with persistent storage. This document outlines the architecture for porting the RadixTree module from its original V implementation to Rust, maintaining all existing functionality while leveraging Rust's memory safety, performance, and ecosystem.
The Rust implementation will integrate with the existing OurDB Rust implementation for persistent storage.
```mermaid
graph TD
A[Client Code] --> B[RadixTree API]
B --> C[Node Management]
B --> D[Serialization]
B --> E[Tree Operations]
C --> F[OurDB]
D --> F
E --> C
```
## 2. Current Architecture (V Implementation)
The current V implementation of RadixTree consists of the following components:
### 2.1 Core Data Structures
#### Node
```v
struct Node {
mut:
key_segment string // The segment of the key stored at this node
value []u8 // Value stored at this node (empty if not a leaf)
children []NodeRef // References to child nodes
is_leaf bool // Whether this node is a leaf node
}
```
#### NodeRef
```v
struct NodeRef {
mut:
key_part string // The key segment for this child
node_id u32 // Database ID of the node
}
```
#### RadixTree
```v
@[heap]
pub struct RadixTree {
mut:
db &ourdb.OurDB // Database for persistent storage
root_id u32 // Database ID of the root node
}
```
### 2.2 Key Operations
1. **new()**: Creates a new radix tree with a specified database path
2. **set(key, value)**: Sets a key-value pair in the tree
3. **get(key)**: Retrieves a value by key
4. **update(prefix, new_value)**: Updates the value at a given key prefix
5. **delete(key)**: Removes a key from the tree
6. **list(prefix)**: Lists all keys with a given prefix
7. **getall(prefix)**: Gets all values for keys with a given prefix
### 2.3 Serialization
The V implementation uses a custom binary serialization format for nodes:
- Version byte (1 byte)
- Key segment (string)
- Value length (2 bytes) followed by value bytes
- Children count (2 bytes) followed by children
- Is leaf flag (1 byte)
Each child is serialized as:
- Key part (string)
- Node ID (4 bytes)
### 2.4 Integration with OurDB
The RadixTree uses OurDB for persistent storage:
- Each node is serialized and stored as a record in OurDB
- Node references use OurDB record IDs
- The tree maintains a root node ID for traversal
## 3. Proposed Rust Architecture
The Rust implementation will maintain the same overall architecture while leveraging Rust's type system, ownership model, and error handling.
### 3.1 Core Data Structures
#### Node
```rust
pub struct Node {
key_segment: String,
value: Vec<u8>,
children: Vec<NodeRef>,
is_leaf: bool,
}
```
#### NodeRef
```rust
pub struct NodeRef {
key_part: String,
node_id: u32,
}
```
#### RadixTree
```rust
pub struct RadixTree {
db: ourdb::OurDB,
root_id: u32,
}
```
### 3.2 Public API
```rust
impl RadixTree {
/// Creates a new radix tree with the specified database path
pub fn new(path: &str, reset: bool) -> Result<Self, Error> {
// Implementation
}
/// Sets a key-value pair in the tree
pub fn set(&mut self, key: &str, value: Vec<u8>) -> Result<(), Error> {
// Implementation
}
/// Gets a value by key from the tree
pub fn get(&mut self, key: &str) -> Result<Vec<u8>, Error> {
// Implementation
}
/// Updates the value at a given key prefix
pub fn update(&mut self, prefix: &str, new_value: Vec<u8>) -> Result<(), Error> {
// Implementation
}
/// Deletes a key from the tree
pub fn delete(&mut self, key: &str) -> Result<(), Error> {
// Implementation
}
/// Lists all keys with a given prefix
pub fn list(&mut self, prefix: &str) -> Result<Vec<String>, Error> {
// Implementation
}
/// Gets all values for keys with a given prefix
pub fn getall(&mut self, prefix: &str) -> Result<Vec<Vec<u8>>, Error> {
// Implementation
}
}
```
### 3.3 Error Handling
```rust
#[derive(Debug, thiserror::Error)]
pub enum Error {
#[error("OurDB error: {0}")]
OurDB(#[from] ourdb::Error),
#[error("Key not found: {0}")]
KeyNotFound(String),
#[error("Prefix not found: {0}")]
PrefixNotFound(String),
#[error("Serialization error: {0}")]
Serialization(String),
#[error("Deserialization error: {0}")]
Deserialization(String),
#[error("Invalid operation: {0}")]
InvalidOperation(String),
}
```
### 3.4 Serialization
The Rust implementation will maintain the same binary serialization format for compatibility:
```rust
const VERSION: u8 = 1;
impl Node {
/// Serializes a node to bytes for storage
fn serialize(&self) -> Vec<u8> {
// Implementation
}
/// Deserializes bytes to a node
fn deserialize(data: &[u8]) -> Result<Self, Error> {
// Implementation
}
}
```
### 3.5 Integration with OurDB
The Rust implementation will use the existing OurDB Rust implementation:
```rust
impl RadixTree {
fn get_node(&mut self, node_id: u32) -> Result<Node, Error> {
let data = self.db.get(node_id)?;
Node::deserialize(&data)
}
fn save_node(&mut self, node_id: Option<u32>, node: &Node) -> Result<u32, Error> {
let data = node.serialize();
let args = ourdb::OurDBSetArgs {
id: node_id,
data: &data,
};
Ok(self.db.set(args)?)
}
}
```
## 4. Implementation Strategy
### 4.1 Phase 1: Core Data Structures and Serialization
1. Implement the `Node` and `NodeRef` structs
2. Implement serialization and deserialization functions
3. Implement the `Error` enum for error handling
### 4.2 Phase 2: Basic Tree Operations
1. Implement the `RadixTree` struct with OurDB integration
2. Implement the `new()` function for creating a new tree
3. Implement the `get()` and `set()` functions for basic operations
### 4.3 Phase 3: Advanced Tree Operations
1. Implement the `delete()` function for removing keys
2. Implement the `update()` function for updating values
3. Implement the `list()` and `getall()` functions for prefix operations
### 4.4 Phase 4: Testing and Optimization
1. Port existing tests from V to Rust
2. Add new tests for Rust-specific functionality
3. Benchmark and optimize performance
4. Ensure compatibility with existing RadixTree data
## 5. Implementation Considerations
### 5.1 Memory Management
Leverage Rust's ownership model for safe and efficient memory management:
- Use `String` and `Vec<u8>` for data buffers instead of raw pointers
- Use references and borrows to avoid unnecessary copying
- Implement proper RAII for resource management
### 5.2 Error Handling
Use Rust's `Result` type for comprehensive error handling:
- Define custom error types for RadixTree-specific errors
- Propagate errors using the `?` operator
- Provide detailed error messages
- Implement proper error conversion using the `From` trait
### 5.3 Performance Optimizations
Identify opportunities for performance improvements:
- Use efficient string operations for prefix matching
- Minimize database operations by caching nodes when appropriate
- Use iterators for efficient traversal
- Consider using `Cow<str>` for string operations to avoid unnecessary cloning
### 5.4 Compatibility
Ensure compatibility with the V implementation:
- Maintain the same serialization format
- Ensure identical behavior for all operations
- Support reading existing RadixTree data
## 6. Testing Strategy
### 6.1 Unit Tests
Write comprehensive unit tests for each component:
- Test `Node` serialization/deserialization
- Test string operations (common prefix, etc.)
- Test error handling
### 6.2 Integration Tests
Write integration tests for the complete system:
- Test basic CRUD operations
- Test prefix operations
- Test edge cases (empty keys, very long keys, etc.)
- Test with large datasets
### 6.3 Compatibility Tests
Ensure compatibility with existing RadixTree data:
- Test reading existing V-created RadixTree data
- Test writing data that can be read by the V implementation
### 6.4 Performance Tests
Benchmark performance against the V implementation:
- Measure throughput for set/get operations
- Measure latency for different operations
- Test with different tree sizes and key distributions
## 7. Project Structure
```
radixtree/
├── Cargo.toml
├── src/
│ ├── lib.rs # Public API and re-exports
│ ├── node.rs # Node and NodeRef implementations
│ ├── serialize.rs # Serialization and deserialization
│ ├── error.rs # Error types
│ └── operations.rs # Tree operations implementation
├── tests/
│ ├── basic_test.rs # Basic operations tests
│ ├── prefix_test.rs # Prefix operations tests
│ └── edge_cases.rs # Edge case tests
└── examples/
├── basic.rs # Basic usage example
├── prefix.rs # Prefix operations example
└── performance.rs # Performance benchmark
```
## 8. Dependencies
The Rust implementation will use the following dependencies:
- `ourdb` for persistent storage
- `thiserror` for error handling
- `log` for logging
- `criterion` for benchmarking (dev dependency)
## 9. Compatibility Considerations
To ensure compatibility with the V implementation:
1. Maintain the same serialization format for nodes
2. Ensure identical behavior for all operations
3. Support reading existing RadixTree data
4. Maintain the same performance characteristics
## 10. Future Extensions
Potential future extensions to consider:
1. Async API for non-blocking operations
2. Iterator interface for efficient traversal
3. Batch operations for improved performance
4. Custom serialization formats for specific use cases
5. Compression support for values
6. Concurrency support for parallel operations
## 11. Conclusion
This architecture provides a roadmap for porting RadixTree from V to Rust while maintaining compatibility and leveraging Rust's strengths. The implementation will follow a phased approach, starting with core data structures and gradually building up to the complete system.
The Rust implementation aims to be:
- **Safe**: Leveraging Rust's ownership model for memory safety
- **Fast**: Maintaining or improving performance compared to V
- **Compatible**: Working with existing RadixTree data
- **Extensible**: Providing a foundation for future enhancements
- **Well-tested**: Including comprehensive test coverage
## 12. Implementation Files
### 12.1 Cargo.toml
```toml
[package]
name = "radixtree"
version = "0.1.0"
edition = "2021"
description = "A persistent radix tree implementation using OurDB for storage"
authors = ["OurWorld Team"]
[dependencies]
ourdb = { path = "../ourdb" }
thiserror = "1.0.40"
log = "0.4.17"
[dev-dependencies]
criterion = "0.5.1"
[[bench]]
name = "radixtree_benchmarks"
harness = false
[[example]]
name = "basic_usage"
path = "examples/basic_usage.rs"
[[example]]
name = "prefix_operations"
path = "examples/prefix_operations.rs"
```
### 12.2 src/lib.rs
```rust
//! RadixTree is a space-optimized tree data structure that enables efficient string key operations
//! with persistent storage using OurDB as a backend.
//!
//! This implementation provides a persistent radix tree that can be used for efficient
//! prefix-based key operations, such as auto-complete, routing tables, and more.
mod error;
mod node;
mod operations;
mod serialize;
pub use error::Error;
pub use node::{Node, NodeRef};
use ourdb::{OurDB, OurDBConfig, OurDBSetArgs};
use std::path::PathBuf;
/// RadixTree represents a radix tree data structure with persistent storage.
pub struct RadixTree {
db: OurDB,
root_id: u32,
}
impl RadixTree {
/// Creates a new radix tree with the specified database path.
///
/// # Arguments
///
/// * `path` - The path to the database directory
/// * `reset` - Whether to reset the database if it exists
///
/// # Returns
///
/// A new `RadixTree` instance
///
/// # Errors
///
/// Returns an error if the database cannot be created or opened
pub fn new(path: &str, reset: bool) -> Result<Self, Error> {
// Implementation will go here
unimplemented!()
}
/// Sets a key-value pair in the tree.
///
/// # Arguments
///
/// * `key` - The key to set
/// * `value` - The value to set
///
/// # Errors
///
/// Returns an error if the operation fails
pub fn set(&mut self, key: &str, value: Vec<u8>) -> Result<(), Error> {
// Implementation will go here
unimplemented!()
}
/// Gets a value by key from the tree.
///
/// # Arguments
///
/// * `key` - The key to get
///
/// # Returns
///
/// The value associated with the key
///
/// # Errors
///
/// Returns an error if the key is not found or the operation fails
pub fn get(&mut self, key: &str) -> Result<Vec<u8>, Error> {
// Implementation will go here
unimplemented!()
}
/// Updates the value at a given key prefix.
///
/// # Arguments
///
/// * `prefix` - The key prefix to update
/// * `new_value` - The new value to set
///
/// # Errors
///
/// Returns an error if the prefix is not found or the operation fails
pub fn update(&mut self, prefix: &str, new_value: Vec<u8>) -> Result<(), Error> {
// Implementation will go here
unimplemented!()
}
/// Deletes a key from the tree.
///
/// # Arguments
///
/// * `key` - The key to delete
///
/// # Errors
///
/// Returns an error if the key is not found or the operation fails
pub fn delete(&mut self, key: &str) -> Result<(), Error> {
// Implementation will go here
unimplemented!()
}
/// Lists all keys with a given prefix.
///
/// # Arguments
///
/// * `prefix` - The prefix to search for
///
/// # Returns
///
/// A list of keys that start with the given prefix
///
/// # Errors
///
/// Returns an error if the operation fails
pub fn list(&mut self, prefix: &str) -> Result<Vec<String>, Error> {
// Implementation will go here
unimplemented!()
}
/// Gets all values for keys with a given prefix.
///
/// # Arguments
///
/// * `prefix` - The prefix to search for
///
/// # Returns
///
/// A list of values for keys that start with the given prefix
///
/// # Errors
///
/// Returns an error if the operation fails
pub fn getall(&mut self, prefix: &str) -> Result<Vec<Vec<u8>>, Error> {
// Implementation will go here
unimplemented!()
}
}
```
### 12.3 src/error.rs
```rust
//! Error types for the RadixTree module.
use thiserror::Error;
/// Error type for RadixTree operations.
#[derive(Debug, Error)]
pub enum Error {
/// Error from OurDB operations.
#[error("OurDB error: {0}")]
OurDB(#[from] ourdb::Error),
/// Error when a key is not found.
#[error("Key not found: {0}")]
KeyNotFound(String),
/// Error when a prefix is not found.
#[error("Prefix not found: {0}")]
PrefixNotFound(String),
/// Error during serialization.
#[error("Serialization error: {0}")]
Serialization(String),
/// Error during deserialization.
#[error("Deserialization error: {0}")]
Deserialization(String),
/// Error for invalid operations.
#[error("Invalid operation: {0}")]
InvalidOperation(String),
}
```
### 12.4 src/node.rs
```rust
//! Node types for the RadixTree module.
/// Represents a node in the radix tree.
pub struct Node {
/// The segment of the key stored at this node.
pub key_segment: String,
/// Value stored at this node (empty if not a leaf).
pub value: Vec<u8>,
/// References to child nodes.
pub children: Vec<NodeRef>,
/// Whether this node is a leaf node.
pub is_leaf: bool,
}
/// Reference to a node in the database.
pub struct NodeRef {
/// The key segment for this child.
pub key_part: String,
/// Database ID of the node.
pub node_id: u32,
}
impl Node {
/// Creates a new node.
pub fn new(key_segment: String, value: Vec<u8>, is_leaf: bool) -> Self {
Self {
key_segment,
value,
children: Vec::new(),
is_leaf,
}
}
/// Creates a new root node.
pub fn new_root() -> Self {
Self {
key_segment: String::new(),
value: Vec::new(),
children: Vec::new(),
is_leaf: false,
}
}
}
impl NodeRef {
/// Creates a new node reference.
pub fn new(key_part: String, node_id: u32) -> Self {
Self {
key_part,
node_id,
}
}
}
```
### 12.5 src/serialize.rs
```rust
//! Serialization and deserialization for RadixTree nodes.
use crate::error::Error;
use crate::node::{Node, NodeRef};
/// Current binary format version.
const VERSION: u8 = 1;
impl Node {
/// Serializes a node to bytes for storage.
pub fn serialize(&self) -> Vec<u8> {
// Implementation will go here
unimplemented!()
}
/// Deserializes bytes to a node.
pub fn deserialize(data: &[u8]) -> Result<Self, Error> {
// Implementation will go here
unimplemented!()
}
}
```
### 12.6 src/operations.rs
```rust
//! Implementation of RadixTree operations.
use crate::error::Error;
use crate::node::{Node, NodeRef};
use crate::RadixTree;
impl RadixTree {
/// Helper function to get a node from the database.
pub(crate) fn get_node(&mut self, node_id: u32) -> Result<Node, Error> {
// Implementation will go here
unimplemented!()
}
/// Helper function to save a node to the database.
pub(crate) fn save_node(&mut self, node_id: Option<u32>, node: &Node) -> Result<u32, Error> {
// Implementation will go here
unimplemented!()
}
/// Helper function to find all keys with a given prefix.
fn find_keys_with_prefix(
&mut self,
node_id: u32,
current_path: &str,
prefix: &str,
result: &mut Vec<String>,
) -> Result<(), Error> {
// Implementation will go here
unimplemented!()
}
/// Helper function to recursively collect all keys under a node.
fn collect_all_keys(
&mut self,
node_id: u32,
current_path: &str,
result: &mut Vec<String>,
) -> Result<(), Error> {
// Implementation will go here
unimplemented!()
}
/// Helper function to get the common prefix of two strings.
fn get_common_prefix(a: &str, b: &str) -> String {
// Implementation will go here
unimplemented!()
}
}
```
### 12.7 examples/basic_usage.rs
```rust
//! Basic usage example for RadixTree.
use radixtree::RadixTree;
fn main() -> Result<(), radixtree::Error> {
// Create a temporary directory for the database
let db_path = std::env::temp_dir().join("radixtree_example");
std::fs::create_dir_all(&db_path)?;
println!("Creating radix tree at: {}", db_path.display());
// Create a new radix tree
let mut tree = RadixTree::new(db_path.to_str().unwrap(), true)?;
// Store some data
tree.set("hello", b"world".to_vec())?;
tree.set("help", b"me".to_vec())?;
tree.set("helicopter", b"flying".to_vec())?;
// Retrieve and print the data
let value = tree.get("hello")?;
println!("hello: {}", String::from_utf8_lossy(&value));
// List keys with prefix
let keys = tree.list("hel")?;
println!("Keys with prefix 'hel': {:?}", keys);
// Get all values with prefix
let values = tree.getall("hel")?;
println!("Values with prefix 'hel':");
for (i, value) in values.iter().enumerate() {
println!(" {}: {}", i, String::from_utf8_lossy(value));
}
// Delete a key
tree.delete("help")?;
println!("Deleted 'help'");
// Verify deletion
let keys_after = tree.list("hel")?;
println!("Keys with prefix 'hel' after deletion: {:?}", keys_after);
// Clean up (optional)
if std::env::var("KEEP_DB").is_err() {
std::fs::remove_dir_all(&db_path)?;
println!("Cleaned up database directory");
} else {
println!("Database kept at: {}", db_path.display());
}
Ok(())
}
```

View File

@@ -0,0 +1,27 @@
[package]
name = "radixtree"
version = "0.1.0"
edition = "2021"
description = "A persistent radix tree implementation using OurDB for storage"
authors = ["OurWorld Team"]
[dependencies]
ourdb = { path = "../ourdb" }
thiserror = "1.0.40"
log = "0.4.17"
[dev-dependencies]
criterion = "0.5.1"
tempfile = "3.8.0"
[[bench]]
name = "radixtree_benchmarks"
harness = false
[[example]]
name = "basic_usage"
path = "examples/basic_usage.rs"
[[example]]
name = "prefix_operations"
path = "examples/prefix_operations.rs"

View File

@@ -0,0 +1,265 @@
# Migration Guide: V to Rust RadixTree
This document provides guidance for migrating from the V implementation of RadixTree to the Rust implementation.
## API Changes
The Rust implementation maintains API compatibility with the V implementation, but with some idiomatic Rust changes:
### V API
```v
// Create a new radix tree
mut rt := radixtree.new(path: '/tmp/radixtree_test', reset: true)!
// Set a key-value pair
rt.set('test', 'value1'.bytes())!
// Get a value by key
value := rt.get('test')!
// Update a value at a prefix
rt.update('prefix', 'new_value'.bytes())!
// Delete a key
rt.delete('test')!
// List keys with a prefix
keys := rt.list('prefix')!
// Get all values with a prefix
values := rt.getall('prefix')!
```
### Rust API
```rust
// Create a new radix tree
let mut tree = RadixTree::new("/tmp/radixtree_test", true)?;
// Set a key-value pair
tree.set("test", b"value1".to_vec())?;
// Get a value by key
let value = tree.get("test")?;
// Update a value at a prefix
tree.update("prefix", b"new_value".to_vec())?;
// Delete a key
tree.delete("test")?;
// List keys with a prefix
let keys = tree.list("prefix")?;
// Get all values with a prefix
let values = tree.getall("prefix")?;
```
## Key Differences
1. **Error Handling**: The Rust implementation uses Rust's `Result` type for error handling, while the V implementation uses V's `!` operator.
2. **String Handling**: The Rust implementation uses Rust's `&str` for string parameters and `String` for string return values, while the V implementation uses V's `string` type.
3. **Binary Data**: The Rust implementation uses Rust's `Vec<u8>` for binary data, while the V implementation uses V's `[]u8` type.
4. **Constructor**: The Rust implementation uses a constructor function with separate parameters, while the V implementation uses a struct with named parameters.
5. **Ownership**: The Rust implementation follows Rust's ownership model, requiring mutable references for methods that modify the tree.
## Data Compatibility
The Rust implementation maintains data compatibility with the V implementation:
- The same serialization format is used for nodes
- The same OurDB storage format is used
- Existing RadixTree data created with the V implementation can be read by the Rust implementation
## Migration Steps
1. **Update Dependencies**: Replace the V RadixTree dependency with the Rust RadixTree dependency in your project.
2. **Update Import Statements**: Replace V import statements with Rust use statements.
```v
// V
import freeflowuniverse.herolib.data.radixtree
```
```rust
// Rust
use radixtree::RadixTree;
```
3. **Update Constructor Calls**: Replace V constructor calls with Rust constructor calls.
```v
// V
mut rt := radixtree.new(path: '/path/to/db', reset: false)!
```
```rust
// Rust
let mut tree = RadixTree::new("/path/to/db", false)?;
```
4. **Update Method Calls**: Replace V method calls with Rust method calls.
```v
// V
rt.set('key', 'value'.bytes())!
```
```rust
// Rust
tree.set("key", b"value".to_vec())?;
```
5. **Update Error Handling**: Replace V error handling with Rust error handling.
```v
// V
if value := rt.get('key') {
println('Found: ${value.bytestr()}')
} else {
println('Error: ${err}')
}
```
```rust
// Rust
match tree.get("key") {
Ok(value) => println!("Found: {}", String::from_utf8_lossy(&value)),
Err(e) => println!("Error: {}", e),
}
```
6. **Update String Conversions**: Replace V string conversions with Rust string conversions.
```v
// V
value.bytestr() // Convert []u8 to string
```
```rust
// Rust
String::from_utf8_lossy(&value) // Convert Vec<u8> to string
```
## Example Migration
### V Code
```v
module main
import freeflowuniverse.herolib.data.radixtree
fn main() {
mut rt := radixtree.new(path: '/tmp/radixtree_test', reset: true) or {
println('Error creating RadixTree: ${err}')
return
}
rt.set('hello', 'world'.bytes()) or {
println('Error setting key: ${err}')
return
}
rt.set('help', 'me'.bytes()) or {
println('Error setting key: ${err}')
return
}
if value := rt.get('hello') {
println('hello: ${value.bytestr()}')
} else {
println('Error getting key: ${err}')
return
}
keys := rt.list('hel') or {
println('Error listing keys: ${err}')
return
}
println('Keys with prefix "hel": ${keys}')
values := rt.getall('hel') or {
println('Error getting all values: ${err}')
return
}
println('Values with prefix "hel":')
for i, value in values {
println(' ${i}: ${value.bytestr()}')
}
rt.delete('help') or {
println('Error deleting key: ${err}')
return
}
println('Deleted "help"')
}
```
### Rust Code
```rust
use radixtree::RadixTree;
fn main() -> Result<(), Box<dyn std::error::Error>> {
let mut tree = RadixTree::new("/tmp/radixtree_test", true)
.map_err(|e| format!("Error creating RadixTree: {}", e))?;
tree.set("hello", b"world".to_vec())
.map_err(|e| format!("Error setting key: {}", e))?;
tree.set("help", b"me".to_vec())
.map_err(|e| format!("Error setting key: {}", e))?;
let value = tree.get("hello")
.map_err(|e| format!("Error getting key: {}", e))?;
println!("hello: {}", String::from_utf8_lossy(&value));
let keys = tree.list("hel")
.map_err(|e| format!("Error listing keys: {}", e))?;
println!("Keys with prefix \"hel\": {:?}", keys);
let values = tree.getall("hel")
.map_err(|e| format!("Error getting all values: {}", e))?;
println!("Values with prefix \"hel\":");
for (i, value) in values.iter().enumerate() {
println!(" {}: {}", i, String::from_utf8_lossy(value));
}
tree.delete("help")
.map_err(|e| format!("Error deleting key: {}", e))?;
println!("Deleted \"help\"");
Ok(())
}
```
## Performance Considerations
The Rust implementation should provide similar or better performance compared to the V implementation. However, there are some considerations:
1. **Memory Usage**: The Rust implementation may have different memory usage patterns due to Rust's ownership model.
2. **Error Handling**: The Rust implementation uses Rust's `Result` type, which may have different performance characteristics compared to V's error handling.
3. **String Handling**: The Rust implementation uses Rust's string types, which may have different performance characteristics compared to V's string types.
## Troubleshooting
If you encounter issues during migration, check the following:
1. **Data Compatibility**: Ensure that the data format is compatible between the V and Rust implementations.
2. **API Usage**: Ensure that you're using the correct API for the Rust implementation.
3. **Error Handling**: Ensure that you're handling errors correctly in the Rust implementation.
4. **String Encoding**: Ensure that string encoding is consistent between the V and Rust implementations.
If you encounter any issues that are not covered in this guide, please report them to the project maintainers.

View File

@@ -0,0 +1,189 @@
# RadixTree
A persistent radix tree implementation in Rust using OurDB for storage.
## Overview
RadixTree is a space-optimized tree data structure that enables efficient string key operations with persistent storage. This implementation provides a persistent radix tree that can be used for efficient prefix-based key operations, such as auto-complete, routing tables, and more.
A radix tree (also known as a patricia trie or radix trie) is a space-optimized tree data structure that enables efficient string key operations. Unlike a standard trie where each node represents a single character, a radix tree compresses paths by allowing nodes to represent multiple characters (key segments).
Key characteristics:
- Each node stores a segment of a key (not just a single character)
- Nodes can have multiple children, each representing a different branch
- Leaf nodes contain the actual values
- Optimizes storage by compressing common prefixes
## Features
- Efficient prefix-based key operations
- Persistent storage using OurDB backend
- Memory-efficient storage of strings with common prefixes
- Support for binary values
- Thread-safe operations through OurDB
## Usage
Add the dependency to your `Cargo.toml`:
```toml
[dependencies]
radixtree = { path = "../radixtree" }
```
### Basic Example
```rust
use radixtree::RadixTree;
fn main() -> Result<(), radixtree::Error> {
// Create a new radix tree
let mut tree = RadixTree::new("/tmp/radix", false)?;
// Set key-value pairs
tree.set("hello", b"world".to_vec())?;
tree.set("help", b"me".to_vec())?;
// Get values by key
let value = tree.get("hello")?;
println!("hello: {}", String::from_utf8_lossy(&value)); // Prints: world
// List keys by prefix
let keys = tree.list("hel")?; // Returns ["hello", "help"]
println!("Keys with prefix 'hel': {:?}", keys);
// Get all values by prefix
let values = tree.getall("hel")?; // Returns [b"world", b"me"]
// Delete keys
tree.delete("help")?;
Ok(())
}
```
## API
### Creating a RadixTree
```rust
// Create a new radix tree
let mut tree = RadixTree::new("/tmp/radix", false)?;
// Create a new radix tree and reset if it exists
let mut tree = RadixTree::new("/tmp/radix", true)?;
```
### Setting Values
```rust
// Set a key-value pair
tree.set("key", b"value".to_vec())?;
```
### Getting Values
```rust
// Get a value by key
let value = tree.get("key")?;
```
### Updating Values
```rust
// Update a value at a given prefix
tree.update("prefix", b"new_value".to_vec())?;
```
### Deleting Keys
```rust
// Delete a key
tree.delete("key")?;
```
### Listing Keys by Prefix
```rust
// List all keys with a given prefix
let keys = tree.list("prefix")?;
```
### Getting All Values by Prefix
```rust
// Get all values for keys with a given prefix
let values = tree.getall("prefix")?;
```
## Performance Characteristics
- Search: O(k) where k is the key length
- Insert: O(k) for new keys, may require node splitting
- Delete: O(k) plus potential node cleanup
- Space: O(n) where n is the total length of all keys
## Use Cases
RadixTree is particularly useful for:
- Prefix-based searching
- IP routing tables
- Dictionary implementations
- Auto-complete systems
- File system paths
- Any application requiring efficient string key operations with persistence
## Implementation Details
The RadixTree implementation uses OurDB for persistent storage:
- Each node is serialized and stored as a record in OurDB
- Node references use OurDB record IDs
- The tree maintains a root node ID for traversal
- Node serialization includes version tracking for format evolution
For more detailed information about the implementation, see the [ARCHITECTURE.md](./ARCHITECTURE.md) file.
## Running Tests
The project includes a comprehensive test suite that verifies all functionality:
```bash
# Run all tests
cargo test
# Run specific test file
cargo test --test basic_test
cargo test --test prefix_test
cargo test --test getall_test
cargo test --test serialize_test
```
## Running Examples
The project includes example applications that demonstrate how to use the RadixTree:
```bash
# Run the basic usage example
cargo run --example basic_usage
# Run the prefix operations example
cargo run --example prefix_operations
```
## Benchmarking
The project includes benchmarks to measure performance:
```bash
# Run all benchmarks
cargo bench
# Run specific benchmark
cargo bench -- set
cargo bench -- get
cargo bench -- prefix_operations
```
## License
This project is licensed under the same license as the HeroCode project.

View File

@@ -0,0 +1,141 @@
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use radixtree::RadixTree;
use std::path::PathBuf;
use tempfile::tempdir;
fn criterion_benchmark(c: &mut Criterion) {
// Create a temporary directory for benchmarks
let temp_dir = tempdir().expect("Failed to create temp directory");
let db_path = temp_dir.path().to_str().unwrap();
// Benchmark set operation
c.bench_function("set", |b| {
let mut tree = RadixTree::new(db_path, true).unwrap();
let mut i = 0;
b.iter(|| {
let key = format!("benchmark_key_{}", i);
let value = format!("benchmark_value_{}", i).into_bytes();
tree.set(&key, value).unwrap();
i += 1;
});
});
// Setup tree with data for get/list/delete benchmarks
let mut setup_tree = RadixTree::new(db_path, true).unwrap();
for i in 0..1000 {
let key = format!("benchmark_key_{}", i);
let value = format!("benchmark_value_{}", i).into_bytes();
setup_tree.set(&key, value).unwrap();
}
// Benchmark get operation
c.bench_function("get", |b| {
let mut tree = RadixTree::new(db_path, false).unwrap();
let mut i = 0;
b.iter(|| {
let key = format!("benchmark_key_{}", i % 1000);
let _value = tree.get(&key).unwrap();
i += 1;
});
});
// Benchmark list operation
c.bench_function("list", |b| {
let mut tree = RadixTree::new(db_path, false).unwrap();
b.iter(|| {
let _keys = tree.list("benchmark_key_1").unwrap();
});
});
// Benchmark getall operation
c.bench_function("getall", |b| {
let mut tree = RadixTree::new(db_path, false).unwrap();
b.iter(|| {
let _values = tree.getall("benchmark_key_1").unwrap();
});
});
// Benchmark update operation
c.bench_function("update", |b| {
let mut tree = RadixTree::new(db_path, false).unwrap();
let mut i = 0;
b.iter(|| {
let key = format!("benchmark_key_{}", i % 1000);
let new_value = format!("updated_value_{}", i).into_bytes();
tree.update(&key, new_value).unwrap();
i += 1;
});
});
// Benchmark delete operation
c.bench_function("delete", |b| {
// Create a fresh tree for deletion benchmarks
let delete_dir = tempdir().expect("Failed to create temp directory");
let delete_path = delete_dir.path().to_str().unwrap();
let mut tree = RadixTree::new(delete_path, true).unwrap();
// Setup keys to delete
for i in 0..1000 {
let key = format!("delete_key_{}", i);
let value = format!("delete_value_{}", i).into_bytes();
tree.set(&key, value).unwrap();
}
let mut i = 0;
b.iter(|| {
let key = format!("delete_key_{}", i % 1000);
// Only try to delete if it exists
if tree.get(&key).is_ok() {
tree.delete(&key).unwrap();
}
i += 1;
});
});
// Benchmark prefix operations with varying tree sizes
let mut group = c.benchmark_group("prefix_operations");
for &size in &[100, 1000, 10000] {
// Create a fresh tree for each size
let size_dir = tempdir().expect("Failed to create temp directory");
let size_path = size_dir.path().to_str().unwrap();
let mut tree = RadixTree::new(size_path, true).unwrap();
// Insert data with common prefixes
for i in 0..size {
let prefix = match i % 5 {
0 => "user",
1 => "post",
2 => "comment",
3 => "product",
_ => "category",
};
let key = format!("{}_{}", prefix, i);
let value = format!("value_{}", i).into_bytes();
tree.set(&key, value).unwrap();
}
// Benchmark list operation for this size
group.bench_function(format!("list_size_{}", size), |b| {
b.iter(|| {
for prefix in &["user", "post", "comment", "product", "category"] {
let _keys = tree.list(prefix).unwrap();
}
});
});
// Benchmark getall operation for this size
group.bench_function(format!("getall_size_{}", size), |b| {
b.iter(|| {
for prefix in &["user", "post", "comment", "product", "category"] {
let _values = tree.getall(prefix).unwrap();
}
});
});
}
group.finish();
}
criterion_group!(benches, criterion_benchmark);
criterion_main!(benches);

View File

@@ -0,0 +1,51 @@
use radixtree::RadixTree;
use std::path::PathBuf;
fn main() -> Result<(), radixtree::Error> {
// Create a temporary directory for the database
let db_path = std::env::temp_dir().join("radixtree_example");
std::fs::create_dir_all(&db_path)?;
println!("Creating radix tree at: {}", db_path.display());
// Create a new radix tree
let mut tree = RadixTree::new(db_path.to_str().unwrap(), true)?;
// Store some data
println!("Storing data...");
tree.set("hello", b"world".to_vec())?;
tree.set("help", b"me".to_vec())?;
tree.set("helicopter", b"flying".to_vec())?;
// Retrieve and print the data
let value = tree.get("hello")?;
println!("hello: {}", String::from_utf8_lossy(&value));
// Update a value
println!("Updating value...");
tree.update("hello", b"updated world".to_vec())?;
// Retrieve the updated value
let updated_value = tree.get("hello")?;
println!("hello (updated): {}", String::from_utf8_lossy(&updated_value));
// Delete a key
println!("Deleting 'help'...");
tree.delete("help")?;
// Try to retrieve the deleted key (should fail)
match tree.get("help") {
Ok(value) => println!("Unexpected: help still exists with value: {}", String::from_utf8_lossy(&value)),
Err(e) => println!("As expected, help was deleted: {}", e),
}
// Clean up (optional)
if std::env::var("KEEP_DB").is_err() {
std::fs::remove_dir_all(&db_path)?;
println!("Cleaned up database directory");
} else {
println!("Database kept at: {}", db_path.display());
}
Ok(())
}

View File

@@ -0,0 +1,121 @@
use radixtree::RadixTree;
use std::time::{Duration, Instant};
use std::io::{self, Write};
// Use much smaller batches to avoid hitting OurDB's size limit
const BATCH_SIZE: usize = 1_000;
const NUM_BATCHES: usize = 1_000; // Total records: 1,000,000
const PROGRESS_INTERVAL: usize = 100;
fn main() -> Result<(), radixtree::Error> {
// Overall metrics
let total_start_time = Instant::now();
let mut total_records_inserted = 0;
let mut batch_times = Vec::with_capacity(NUM_BATCHES);
println!("Will insert up to {} records in batches of {}",
BATCH_SIZE * NUM_BATCHES, BATCH_SIZE);
// Process in batches to avoid OurDB size limits
for batch in 0..NUM_BATCHES {
// Create a new database for each batch
let batch_path = std::env::temp_dir().join(format!("radixtree_batch_{}", batch));
// Clean up any existing database
if batch_path.exists() {
std::fs::remove_dir_all(&batch_path)?;
}
std::fs::create_dir_all(&batch_path)?;
println!("\nBatch {}/{}: Creating new radix tree...", batch + 1, NUM_BATCHES);
let mut tree = RadixTree::new(batch_path.to_str().unwrap(), true)?;
let batch_start_time = Instant::now();
let mut last_progress_time = Instant::now();
let mut last_progress_count = 0;
// Insert records for this batch
for i in 0..BATCH_SIZE {
let global_index = batch * BATCH_SIZE + i;
let key = format!("key:{:08}", global_index);
let value = format!("val{}", global_index).into_bytes();
tree.set(&key, value)?;
// Show progress at intervals
if (i + 1) % PROGRESS_INTERVAL == 0 || i == BATCH_SIZE - 1 {
let records_since_last = i + 1 - last_progress_count;
let time_since_last = last_progress_time.elapsed();
let records_per_second = records_since_last as f64 / time_since_last.as_secs_f64();
print!("\rProgress: {}/{} records ({:.2}%) - {:.2} records/sec",
i + 1, BATCH_SIZE,
(i + 1) as f64 / BATCH_SIZE as f64 * 100.0,
records_per_second);
io::stdout().flush().unwrap();
last_progress_time = Instant::now();
last_progress_count = i + 1;
}
}
let batch_duration = batch_start_time.elapsed();
batch_times.push(batch_duration);
total_records_inserted += BATCH_SIZE;
println!("\nBatch {}/{} completed in {:?} ({:.2} records/sec)",
batch + 1, NUM_BATCHES,
batch_duration,
BATCH_SIZE as f64 / batch_duration.as_secs_f64());
// Test random access performance for this batch
println!("Testing access performance for batch {}...", batch + 1);
let mut total_get_time = Duration::new(0, 0);
let num_samples = 100;
// Use a simple distribution pattern
for i in 0..num_samples {
// Distribute samples across the batch
let sample_id = batch * BATCH_SIZE + (i * (BATCH_SIZE / num_samples));
let key = format!("key:{:08}", sample_id);
let get_start = Instant::now();
let _ = tree.get(&key)?;
total_get_time += get_start.elapsed();
}
println!("Average time to retrieve a record: {:?}",
total_get_time / num_samples as u32);
// Test prefix search performance
println!("Testing prefix search performance...");
let prefix = format!("key:{:02}", batch % 100);
let list_start = Instant::now();
let keys = tree.list(&prefix)?;
let list_duration = list_start.elapsed();
println!("Found {} keys with prefix '{}' in {:?}",
keys.len(), prefix, list_duration);
}
// Overall performance summary
let total_duration = total_start_time.elapsed();
println!("\n\nPerformance Summary:");
println!("Total time to insert {} records: {:?}", total_records_inserted, total_duration);
println!("Average insertion rate: {:.2} records/second",
total_records_inserted as f64 / total_duration.as_secs_f64());
// Show performance trend
println!("\nPerformance Trend (batch number vs. time):");
for (i, duration) in batch_times.iter().enumerate() {
if i % 10 == 0 || i == batch_times.len() - 1 { // Only show every 10th point
println!(" Batch {}: {:?} ({:.2} records/sec)",
i + 1,
duration,
BATCH_SIZE as f64 / duration.as_secs_f64());
}
}
Ok(())
}

View File

@@ -0,0 +1,134 @@
use radixtree::RadixTree;
use std::time::{Duration, Instant};
use std::io::{self, Write};
// Number of records to insert
const TOTAL_RECORDS: usize = 1_000_000;
// How often to report progress (every X records)
const PROGRESS_INTERVAL: usize = 10_000;
// How many records to use for performance sampling
const PERFORMANCE_SAMPLE_SIZE: usize = 1000;
fn main() -> Result<(), radixtree::Error> {
// Create a temporary directory for the database
let db_path = std::env::temp_dir().join("radixtree_performance_test");
// Completely remove and recreate the directory to ensure a clean start
if db_path.exists() {
std::fs::remove_dir_all(&db_path)?;
}
std::fs::create_dir_all(&db_path)?;
println!("Creating radix tree at: {}", db_path.display());
println!("Will insert {} records and show progress...", TOTAL_RECORDS);
// Create a new radix tree
let mut tree = RadixTree::new(db_path.to_str().unwrap(), true)?;
// Track overall time
let start_time = Instant::now();
// Track performance metrics
let mut insertion_times = Vec::with_capacity(TOTAL_RECORDS / PROGRESS_INTERVAL);
let mut last_batch_time = Instant::now();
let mut last_batch_records = 0;
// Insert records and track progress
for i in 0..TOTAL_RECORDS {
let key = format!("key:{:08}", i);
// Use smaller values to avoid exceeding OurDB's size limit
let value = format!("val{}", i).into_bytes();
// Time the insertion of every Nth record for performance sampling
if i % PERFORMANCE_SAMPLE_SIZE == 0 {
let insert_start = Instant::now();
tree.set(&key, value)?;
let insert_duration = insert_start.elapsed();
// Only print detailed timing for specific samples to avoid flooding output
if i % (PERFORMANCE_SAMPLE_SIZE * 10) == 0 {
println!("Record {}: Insertion took {:?}", i, insert_duration);
}
} else {
tree.set(&key, value)?;
}
// Show progress at intervals
if (i + 1) % PROGRESS_INTERVAL == 0 || i == TOTAL_RECORDS - 1 {
let records_in_batch = i + 1 - last_batch_records;
let batch_duration = last_batch_time.elapsed();
let records_per_second = records_in_batch as f64 / batch_duration.as_secs_f64();
insertion_times.push((i + 1, batch_duration));
print!("\rProgress: {}/{} records ({:.2}%) - {:.2} records/sec",
i + 1, TOTAL_RECORDS,
(i + 1) as f64 / TOTAL_RECORDS as f64 * 100.0,
records_per_second);
io::stdout().flush().unwrap();
last_batch_time = Instant::now();
last_batch_records = i + 1;
}
}
let total_duration = start_time.elapsed();
println!("\n\nPerformance Summary:");
println!("Total time to insert {} records: {:?}", TOTAL_RECORDS, total_duration);
println!("Average insertion rate: {:.2} records/second",
TOTAL_RECORDS as f64 / total_duration.as_secs_f64());
// Show performance trend
println!("\nPerformance Trend (records inserted vs. time per batch):");
for (i, (record_count, duration)) in insertion_times.iter().enumerate() {
if i % 10 == 0 || i == insertion_times.len() - 1 { // Only show every 10th point to avoid too much output
println!(" After {} records: {:?} for {} records ({:.2} records/sec)",
record_count,
duration,
PROGRESS_INTERVAL,
PROGRESS_INTERVAL as f64 / duration.as_secs_f64());
}
}
// Test access performance with distributed samples
println!("\nTesting access performance with distributed samples...");
let mut total_get_time = Duration::new(0, 0);
let num_samples = 1000;
// Use a simple distribution pattern instead of random
for i in 0..num_samples {
// Distribute samples across the entire range
let sample_id = (i * (TOTAL_RECORDS / num_samples)) % TOTAL_RECORDS;
let key = format!("key:{:08}", sample_id);
let get_start = Instant::now();
let _ = tree.get(&key)?;
total_get_time += get_start.elapsed();
}
println!("Average time to retrieve a record: {:?}",
total_get_time / num_samples as u32);
// Test prefix search performance
println!("\nTesting prefix search performance...");
let prefixes = ["key:0", "key:1", "key:5", "key:9"];
for prefix in &prefixes {
let list_start = Instant::now();
let keys = tree.list(prefix)?;
let list_duration = list_start.elapsed();
println!("Found {} keys with prefix '{}' in {:?}",
keys.len(), prefix, list_duration);
}
// Clean up (optional)
if std::env::var("KEEP_DB").is_err() {
std::fs::remove_dir_all(&db_path)?;
println!("\nCleaned up database directory");
} else {
println!("\nDatabase kept at: {}", db_path.display());
}
Ok(())
}

View File

@@ -0,0 +1,97 @@
use radixtree::RadixTree;
use std::path::PathBuf;
fn main() -> Result<(), radixtree::Error> {
// Create a temporary directory for the database
let db_path = std::env::temp_dir().join("radixtree_prefix_example");
std::fs::create_dir_all(&db_path)?;
println!("Creating radix tree at: {}", db_path.display());
// Create a new radix tree
let mut tree = RadixTree::new(db_path.to_str().unwrap(), true)?;
// Store data with common prefixes
println!("Storing data with common prefixes...");
// User data
tree.set("user:1:name", b"Alice".to_vec())?;
tree.set("user:1:email", b"alice@example.com".to_vec())?;
tree.set("user:2:name", b"Bob".to_vec())?;
tree.set("user:2:email", b"bob@example.com".to_vec())?;
// Post data
tree.set("post:1:title", b"First Post".to_vec())?;
tree.set("post:1:content", b"Hello World!".to_vec())?;
tree.set("post:2:title", b"Second Post".to_vec())?;
tree.set("post:2:content", b"Another post content".to_vec())?;
// Demonstrate listing keys with a prefix
println!("\nListing keys with prefix 'user:1:'");
let user1_keys = tree.list("user:1:")?;
for key in &user1_keys {
println!(" Key: {}", key);
}
println!("\nListing keys with prefix 'post:'");
let post_keys = tree.list("post:")?;
for key in &post_keys {
println!(" Key: {}", key);
}
// Demonstrate getting all values with a prefix
println!("\nGetting all values with prefix 'user:1:'");
let user1_values = tree.getall("user:1:")?;
for (i, value) in user1_values.iter().enumerate() {
println!(" Value {}: {}", i + 1, String::from_utf8_lossy(value));
}
// Demonstrate finding all user names
println!("\nFinding all user names (prefix 'user:*:name')");
let mut user_names = Vec::new();
let all_keys = tree.list("user:")?;
for key in all_keys {
if key.ends_with(":name") {
if let Ok(value) = tree.get(&key) {
user_names.push((key, String::from_utf8_lossy(&value).to_string()));
}
}
}
for (key, name) in user_names {
println!(" {}: {}", key, name);
}
// Demonstrate updating values with a specific prefix
println!("\nUpdating all post titles...");
let post_title_keys = tree.list("post:")?.into_iter().filter(|k| k.ends_with(":title")).collect::<Vec<_>>();
for key in post_title_keys {
let old_value = tree.get(&key)?;
let old_title = String::from_utf8_lossy(&old_value);
let new_title = format!("UPDATED: {}", old_title);
println!(" Updating '{}' to '{}'", old_title, new_title);
tree.update(&key, new_title.as_bytes().to_vec())?;
}
// Verify updates
println!("\nVerifying updates:");
let post_keys = tree.list("post:")?;
for key in post_keys {
if key.ends_with(":title") {
let value = tree.get(&key)?;
println!(" {}: {}", key, String::from_utf8_lossy(&value));
}
}
// Clean up (optional)
if std::env::var("KEEP_DB").is_err() {
std::fs::remove_dir_all(&db_path)?;
println!("\nCleaned up database directory");
} else {
println!("\nDatabase kept at: {}", db_path.display());
}
Ok(())
}

View File

@@ -0,0 +1,35 @@
//! Error types for the RadixTree module.
use thiserror::Error;
/// Error type for RadixTree operations.
#[derive(Debug, Error)]
pub enum Error {
/// Error from OurDB operations.
#[error("OurDB error: {0}")]
OurDB(#[from] ourdb::Error),
/// Error when a key is not found.
#[error("Key not found: {0}")]
KeyNotFound(String),
/// Error when a prefix is not found.
#[error("Prefix not found: {0}")]
PrefixNotFound(String),
/// Error during serialization.
#[error("Serialization error: {0}")]
Serialization(String),
/// Error during deserialization.
#[error("Deserialization error: {0}")]
Deserialization(String),
/// Error for invalid operations.
#[error("Invalid operation: {0}")]
InvalidOperation(String),
/// Error for I/O operations.
#[error("I/O error: {0}")]
IO(#[from] std::io::Error),
}

View File

@@ -0,0 +1,133 @@
//! RadixTree is a space-optimized tree data structure that enables efficient string key operations
//! with persistent storage using OurDB as a backend.
//!
//! This implementation provides a persistent radix tree that can be used for efficient
//! prefix-based key operations, such as auto-complete, routing tables, and more.
mod error;
mod node;
mod operations;
mod serialize;
pub use error::Error;
pub use node::{Node, NodeRef};
use ourdb::OurDB;
/// RadixTree represents a radix tree data structure with persistent storage.
pub struct RadixTree {
db: OurDB,
root_id: u32,
}
impl RadixTree {
/// Creates a new radix tree with the specified database path.
///
/// # Arguments
///
/// * `path` - The path to the database directory
/// * `reset` - Whether to reset the database if it exists
///
/// # Returns
///
/// A new `RadixTree` instance
///
/// # Errors
///
/// Returns an error if the database cannot be created or opened
pub fn new(path: &str, reset: bool) -> Result<Self, Error> {
operations::new_radix_tree(path, reset)
}
/// Sets a key-value pair in the tree.
///
/// # Arguments
///
/// * `key` - The key to set
/// * `value` - The value to set
///
/// # Errors
///
/// Returns an error if the operation fails
pub fn set(&mut self, key: &str, value: Vec<u8>) -> Result<(), Error> {
operations::set(self, key, value)
}
/// Gets a value by key from the tree.
///
/// # Arguments
///
/// * `key` - The key to get
///
/// # Returns
///
/// The value associated with the key
///
/// # Errors
///
/// Returns an error if the key is not found or the operation fails
pub fn get(&mut self, key: &str) -> Result<Vec<u8>, Error> {
operations::get(self, key)
}
/// Updates the value at a given key prefix.
///
/// # Arguments
///
/// * `prefix` - The key prefix to update
/// * `new_value` - The new value to set
///
/// # Errors
///
/// Returns an error if the prefix is not found or the operation fails
pub fn update(&mut self, prefix: &str, new_value: Vec<u8>) -> Result<(), Error> {
operations::update(self, prefix, new_value)
}
/// Deletes a key from the tree.
///
/// # Arguments
///
/// * `key` - The key to delete
///
/// # Errors
///
/// Returns an error if the key is not found or the operation fails
pub fn delete(&mut self, key: &str) -> Result<(), Error> {
operations::delete(self, key)
}
/// Lists all keys with a given prefix.
///
/// # Arguments
///
/// * `prefix` - The prefix to search for
///
/// # Returns
///
/// A list of keys that start with the given prefix
///
/// # Errors
///
/// Returns an error if the operation fails
pub fn list(&mut self, prefix: &str) -> Result<Vec<String>, Error> {
operations::list(self, prefix)
}
/// Gets all values for keys with a given prefix.
///
/// # Arguments
///
/// * `prefix` - The prefix to search for
///
/// # Returns
///
/// A list of values for keys that start with the given prefix
///
/// # Errors
///
/// Returns an error if the operation fails
pub fn getall(&mut self, prefix: &str) -> Result<Vec<Vec<u8>>, Error> {
operations::getall(self, prefix)
}
}

View File

@@ -0,0 +1,59 @@
//! Node types for the RadixTree module.
/// Represents a node in the radix tree.
#[derive(Debug, Clone, PartialEq)]
pub struct Node {
/// The segment of the key stored at this node.
pub key_segment: String,
/// Value stored at this node (empty if not a leaf).
pub value: Vec<u8>,
/// References to child nodes.
pub children: Vec<NodeRef>,
/// Whether this node is a leaf node.
pub is_leaf: bool,
}
/// Reference to a node in the database.
#[derive(Debug, Clone, PartialEq)]
pub struct NodeRef {
/// The key segment for this child.
pub key_part: String,
/// Database ID of the node.
pub node_id: u32,
}
impl Node {
/// Creates a new node.
pub fn new(key_segment: String, value: Vec<u8>, is_leaf: bool) -> Self {
Self {
key_segment,
value,
children: Vec::new(),
is_leaf,
}
}
/// Creates a new root node.
pub fn new_root() -> Self {
Self {
key_segment: String::new(),
value: Vec::new(),
children: Vec::new(),
is_leaf: false,
}
}
}
impl NodeRef {
/// Creates a new node reference.
pub fn new(key_part: String, node_id: u32) -> Self {
Self {
key_part,
node_id,
}
}
}

View File

@@ -0,0 +1,508 @@
//! Implementation of RadixTree operations.
use crate::error::Error;
use crate::node::{Node, NodeRef};
use crate::RadixTree;
use crate::serialize::get_common_prefix;
use ourdb::{OurDB, OurDBConfig, OurDBSetArgs};
use std::path::PathBuf;
/// Creates a new radix tree with the specified database path.
pub fn new_radix_tree(path: &str, reset: bool) -> Result<RadixTree, Error> {
let config = OurDBConfig {
path: PathBuf::from(path),
incremental_mode: true,
file_size: Some(1024 * 1024 * 10), // 10MB file size for better performance with large datasets
keysize: Some(6), // Use keysize=6 to support multiple files (file_nr + position)
reset: None, // Don't reset existing database
};
let mut db = OurDB::new(config)?;
// If reset is true, we would clear the database
// Since OurDB doesn't have a reset method, we'll handle it by
// creating a fresh database when reset is true
// We'll implement this by checking if it's a new database (next_id == 1)
let root_id = if db.get_next_id()? == 1 {
// Create a new root node
let root = Node::new_root();
let root_id = db.set(OurDBSetArgs {
id: None,
data: &root.serialize(),
})?;
// First ID should be 1
assert_eq!(root_id, 1);
root_id
} else {
// Use existing root node
1 // Root node always has ID 1
};
Ok(RadixTree {
db,
root_id,
})
}
/// Sets a key-value pair in the tree.
pub fn set(tree: &mut RadixTree, key: &str, value: Vec<u8>) -> Result<(), Error> {
let mut current_id = tree.root_id;
let mut offset = 0;
// Handle empty key case
if key.is_empty() {
let mut root_node = tree.get_node(current_id)?;
root_node.is_leaf = true;
root_node.value = value;
tree.save_node(Some(current_id), &root_node)?;
return Ok(());
}
while offset < key.len() {
let mut node = tree.get_node(current_id)?;
// Find matching child
let mut matched_child = None;
for (i, child) in node.children.iter().enumerate() {
if key[offset..].starts_with(&child.key_part) {
matched_child = Some((i, child.clone()));
break;
}
}
if matched_child.is_none() {
// No matching child found, create new leaf node
let key_part = key[offset..].to_string();
let new_node = Node {
key_segment: key_part.clone(),
value: value.clone(),
children: Vec::new(),
is_leaf: true,
};
let new_id = tree.save_node(None, &new_node)?;
// Create new child reference and update parent node
node.children.push(NodeRef {
key_part,
node_id: new_id,
});
tree.save_node(Some(current_id), &node)?;
return Ok(());
}
let (child_index, mut child) = matched_child.unwrap();
let common_prefix = get_common_prefix(&key[offset..], &child.key_part);
if common_prefix.len() < child.key_part.len() {
// Split existing node
let child_node = tree.get_node(child.node_id)?;
// Create new intermediate node
let new_node = Node {
key_segment: child.key_part[common_prefix.len()..].to_string(),
value: child_node.value.clone(),
children: child_node.children.clone(),
is_leaf: child_node.is_leaf,
};
let new_id = tree.save_node(None, &new_node)?;
// Update current node
node.children[child_index] = NodeRef {
key_part: common_prefix.to_string(),
node_id: new_id,
};
tree.save_node(Some(current_id), &node)?;
// Update child node reference
child.node_id = new_id;
}
if offset + common_prefix.len() == key.len() {
// Update value at existing node
let mut child_node = tree.get_node(child.node_id)?;
child_node.value = value;
child_node.is_leaf = true;
tree.save_node(Some(child.node_id), &child_node)?;
return Ok(());
}
offset += common_prefix.len();
current_id = child.node_id;
}
Ok(())
}
/// Gets a value by key from the tree.
pub fn get(tree: &mut RadixTree, key: &str) -> Result<Vec<u8>, Error> {
let mut current_id = tree.root_id;
let mut offset = 0;
// Handle empty key case
if key.is_empty() {
let root_node = tree.get_node(current_id)?;
if root_node.is_leaf {
return Ok(root_node.value.clone());
}
return Err(Error::KeyNotFound(key.to_string()));
}
while offset < key.len() {
let node = tree.get_node(current_id)?;
let mut found = false;
for child in &node.children {
if key[offset..].starts_with(&child.key_part) {
if offset + child.key_part.len() == key.len() {
let child_node = tree.get_node(child.node_id)?;
if child_node.is_leaf {
return Ok(child_node.value);
}
}
current_id = child.node_id;
offset += child.key_part.len();
found = true;
break;
}
}
if !found {
return Err(Error::KeyNotFound(key.to_string()));
}
}
Err(Error::KeyNotFound(key.to_string()))
}
/// Updates the value at a given key prefix.
pub fn update(tree: &mut RadixTree, prefix: &str, new_value: Vec<u8>) -> Result<(), Error> {
let mut current_id = tree.root_id;
let mut offset = 0;
// Handle empty prefix case
if prefix.is_empty() {
return Err(Error::InvalidOperation("Empty prefix not allowed".to_string()));
}
while offset < prefix.len() {
let node = tree.get_node(current_id)?;
let mut found = false;
for child in &node.children {
if prefix[offset..].starts_with(&child.key_part) {
if offset + child.key_part.len() == prefix.len() {
// Found exact prefix match
let mut child_node = tree.get_node(child.node_id)?;
if child_node.is_leaf {
// Update the value
child_node.value = new_value;
tree.save_node(Some(child.node_id), &child_node)?;
return Ok(());
}
}
current_id = child.node_id;
offset += child.key_part.len();
found = true;
break;
}
}
if !found {
return Err(Error::PrefixNotFound(prefix.to_string()));
}
}
Err(Error::PrefixNotFound(prefix.to_string()))
}
/// Deletes a key from the tree.
pub fn delete(tree: &mut RadixTree, key: &str) -> Result<(), Error> {
let mut current_id = tree.root_id;
let mut offset = 0;
let mut path = Vec::new();
// Handle empty key case
if key.is_empty() {
let mut root_node = tree.get_node(current_id)?;
if !root_node.is_leaf {
return Err(Error::KeyNotFound(key.to_string()));
}
// For the root node, we just mark it as non-leaf
root_node.is_leaf = false;
root_node.value = Vec::new();
tree.save_node(Some(current_id), &root_node)?;
return Ok(());
}
// Find the node to delete
while offset < key.len() {
let node = tree.get_node(current_id)?;
let mut found = false;
for child in &node.children {
if key[offset..].starts_with(&child.key_part) {
path.push(child.clone());
current_id = child.node_id;
offset += child.key_part.len();
found = true;
// Check if we've matched the full key
if offset == key.len() {
let child_node = tree.get_node(child.node_id)?;
if child_node.is_leaf {
found = true;
break;
}
}
break;
}
}
if !found {
return Err(Error::KeyNotFound(key.to_string()));
}
}
if path.is_empty() {
return Err(Error::KeyNotFound(key.to_string()));
}
// Get the node to delete
let mut last_node = tree.get_node(path.last().unwrap().node_id)?;
// If the node has children, just mark it as non-leaf
if !last_node.children.is_empty() {
last_node.is_leaf = false;
last_node.value = Vec::new();
tree.save_node(Some(path.last().unwrap().node_id), &last_node)?;
return Ok(());
}
// If node has no children, remove it from parent
if path.len() > 1 {
let parent_id = path[path.len() - 2].node_id;
let mut parent_node = tree.get_node(parent_id)?;
// Find and remove the child from parent
for i in 0..parent_node.children.len() {
if parent_node.children[i].node_id == path.last().unwrap().node_id {
parent_node.children.remove(i);
break;
}
}
tree.save_node(Some(parent_id), &parent_node)?;
// Delete the node from the database
tree.db.delete(path.last().unwrap().node_id)?;
} else {
// If this is a direct child of the root, just mark it as non-leaf
last_node.is_leaf = false;
last_node.value = Vec::new();
tree.save_node(Some(path.last().unwrap().node_id), &last_node)?;
}
Ok(())
}
/// Lists all keys with a given prefix.
pub fn list(tree: &mut RadixTree, prefix: &str) -> Result<Vec<String>, Error> {
let mut result = Vec::new();
// Handle empty prefix case - will return all keys
if prefix.is_empty() {
collect_all_keys(tree, tree.root_id, "", &mut result)?;
return Ok(result);
}
// Start from the root and find all matching keys
find_keys_with_prefix(tree, tree.root_id, "", prefix, &mut result)?;
Ok(result)
}
/// Helper function to find all keys with a given prefix.
fn find_keys_with_prefix(
tree: &mut RadixTree,
node_id: u32,
current_path: &str,
prefix: &str,
result: &mut Vec<String>,
) -> Result<(), Error> {
let node = tree.get_node(node_id)?;
// If the current path already matches or exceeds the prefix length
if current_path.len() >= prefix.len() {
// Check if the current path starts with the prefix
if current_path.starts_with(prefix) {
// If this is a leaf node, add it to the results
if node.is_leaf {
result.push(current_path.to_string());
}
// Collect all keys from this subtree
for child in &node.children {
let child_path = format!("{}{}", current_path, child.key_part);
find_keys_with_prefix(tree, child.node_id, &child_path, prefix, result)?;
}
}
return Ok(());
}
// Current path is shorter than the prefix, continue searching
for child in &node.children {
let child_path = format!("{}{}", current_path, child.key_part);
// Check if this child's path could potentially match the prefix
if prefix.starts_with(current_path) {
// The prefix starts with the current path, so we need to check if
// the child's key_part matches the next part of the prefix
let prefix_remainder = &prefix[current_path.len()..];
// If the prefix remainder starts with the child's key_part or vice versa
if prefix_remainder.starts_with(&child.key_part)
|| (child.key_part.starts_with(prefix_remainder)
&& child.key_part.len() >= prefix_remainder.len()) {
find_keys_with_prefix(tree, child.node_id, &child_path, prefix, result)?;
}
}
}
Ok(())
}
/// Helper function to recursively collect all keys under a node.
fn collect_all_keys(
tree: &mut RadixTree,
node_id: u32,
current_path: &str,
result: &mut Vec<String>,
) -> Result<(), Error> {
let node = tree.get_node(node_id)?;
// If this node is a leaf, add its path to the result
if node.is_leaf {
result.push(current_path.to_string());
}
// Recursively collect keys from all children
for child in &node.children {
let child_path = format!("{}{}", current_path, child.key_part);
collect_all_keys(tree, child.node_id, &child_path, result)?;
}
Ok(())
}
/// Gets all values for keys with a given prefix.
pub fn getall(tree: &mut RadixTree, prefix: &str) -> Result<Vec<Vec<u8>>, Error> {
// Get all matching keys
let keys = list(tree, prefix)?;
// Get values for each key
let mut values = Vec::new();
for key in keys {
if let Ok(value) = get(tree, &key) {
values.push(value);
}
}
Ok(values)
}
impl RadixTree {
/// Helper function to get a node from the database.
pub(crate) fn get_node(&mut self, node_id: u32) -> Result<Node, Error> {
let data = self.db.get(node_id)?;
Node::deserialize(&data)
}
/// Helper function to save a node to the database.
pub(crate) fn save_node(&mut self, node_id: Option<u32>, node: &Node) -> Result<u32, Error> {
let data = node.serialize();
let args = OurDBSetArgs {
id: node_id,
data: &data,
};
Ok(self.db.set(args)?)
}
/// Helper function to find all keys with a given prefix.
fn find_keys_with_prefix(
&mut self,
node_id: u32,
current_path: &str,
prefix: &str,
result: &mut Vec<String>,
) -> Result<(), Error> {
let node = self.get_node(node_id)?;
// If the current path already matches or exceeds the prefix length
if current_path.len() >= prefix.len() {
// Check if the current path starts with the prefix
if current_path.starts_with(prefix) {
// If this is a leaf node, add it to the results
if node.is_leaf {
result.push(current_path.to_string());
}
// Collect all keys from this subtree
for child in &node.children {
let child_path = format!("{}{}", current_path, child.key_part);
self.find_keys_with_prefix(child.node_id, &child_path, prefix, result)?;
}
}
return Ok(());
}
// Current path is shorter than the prefix, continue searching
for child in &node.children {
let child_path = format!("{}{}", current_path, child.key_part);
// Check if this child's path could potentially match the prefix
if prefix.starts_with(current_path) {
// The prefix starts with the current path, so we need to check if
// the child's key_part matches the next part of the prefix
let prefix_remainder = &prefix[current_path.len()..];
// If the prefix remainder starts with the child's key_part or vice versa
if prefix_remainder.starts_with(&child.key_part)
|| (child.key_part.starts_with(prefix_remainder)
&& child.key_part.len() >= prefix_remainder.len()) {
self.find_keys_with_prefix(child.node_id, &child_path, prefix, result)?;
}
}
}
Ok(())
}
/// Helper function to recursively collect all keys under a node.
fn collect_all_keys(
&mut self,
node_id: u32,
current_path: &str,
result: &mut Vec<String>,
) -> Result<(), Error> {
let node = self.get_node(node_id)?;
// If this node is a leaf, add its path to the result
if node.is_leaf {
result.push(current_path.to_string());
}
// Recursively collect keys from all children
for child in &node.children {
let child_path = format!("{}{}", current_path, child.key_part);
self.collect_all_keys(child.node_id, &child_path, result)?;
}
Ok(())
}
}

View File

@@ -0,0 +1,156 @@
//! Serialization and deserialization for RadixTree nodes.
use crate::error::Error;
use crate::node::{Node, NodeRef};
use std::io::{Cursor, Read};
use std::mem::size_of;
/// Current binary format version.
const VERSION: u8 = 1;
impl Node {
/// Serializes a node to bytes for storage.
pub fn serialize(&self) -> Vec<u8> {
let mut buffer = Vec::new();
// Add version byte
buffer.push(VERSION);
// Add key segment
write_string(&mut buffer, &self.key_segment);
// Add value as []u8
write_u16(&mut buffer, self.value.len() as u16);
buffer.extend_from_slice(&self.value);
// Add children
write_u16(&mut buffer, self.children.len() as u16);
for child in &self.children {
write_string(&mut buffer, &child.key_part);
write_u32(&mut buffer, child.node_id);
}
// Add leaf flag
buffer.push(if self.is_leaf { 1 } else { 0 });
buffer
}
/// Deserializes bytes to a node.
pub fn deserialize(data: &[u8]) -> Result<Self, Error> {
if data.is_empty() {
return Err(Error::Deserialization("Empty data".to_string()));
}
let mut cursor = Cursor::new(data);
// Read and verify version
let mut version_byte = [0u8; 1];
cursor.read_exact(&mut version_byte)
.map_err(|e| Error::Deserialization(format!("Failed to read version byte: {}", e)))?;
if version_byte[0] != VERSION {
return Err(Error::Deserialization(
format!("Invalid version byte: expected {}, got {}", VERSION, version_byte[0])
));
}
// Read key segment
let key_segment = read_string(&mut cursor)
.map_err(|e| Error::Deserialization(format!("Failed to read key segment: {}", e)))?;
// Read value as []u8
let value_len = read_u16(&mut cursor)
.map_err(|e| Error::Deserialization(format!("Failed to read value length: {}", e)))?;
let mut value = vec![0u8; value_len as usize];
cursor.read_exact(&mut value)
.map_err(|e| Error::Deserialization(format!("Failed to read value: {}", e)))?;
// Read children
let children_len = read_u16(&mut cursor)
.map_err(|e| Error::Deserialization(format!("Failed to read children length: {}", e)))?;
let mut children = Vec::with_capacity(children_len as usize);
for _ in 0..children_len {
let key_part = read_string(&mut cursor)
.map_err(|e| Error::Deserialization(format!("Failed to read child key part: {}", e)))?;
let node_id = read_u32(&mut cursor)
.map_err(|e| Error::Deserialization(format!("Failed to read child node ID: {}", e)))?;
children.push(NodeRef {
key_part,
node_id,
});
}
// Read leaf flag
let mut is_leaf_byte = [0u8; 1];
cursor.read_exact(&mut is_leaf_byte)
.map_err(|e| Error::Deserialization(format!("Failed to read leaf flag: {}", e)))?;
let is_leaf = is_leaf_byte[0] == 1;
Ok(Node {
key_segment,
value,
children,
is_leaf,
})
}
}
// Helper functions for serialization
fn write_string(buffer: &mut Vec<u8>, s: &str) {
let bytes = s.as_bytes();
write_u16(buffer, bytes.len() as u16);
buffer.extend_from_slice(bytes);
}
fn write_u16(buffer: &mut Vec<u8>, value: u16) {
buffer.extend_from_slice(&value.to_le_bytes());
}
fn write_u32(buffer: &mut Vec<u8>, value: u32) {
buffer.extend_from_slice(&value.to_le_bytes());
}
// Helper functions for deserialization
fn read_string(cursor: &mut Cursor<&[u8]>) -> std::io::Result<String> {
let len = read_u16(cursor)? as usize;
let mut bytes = vec![0u8; len];
cursor.read_exact(&mut bytes)?;
String::from_utf8(bytes)
.map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))
}
fn read_u16(cursor: &mut Cursor<&[u8]>) -> std::io::Result<u16> {
let mut bytes = [0u8; size_of::<u16>()];
cursor.read_exact(&mut bytes)?;
Ok(u16::from_le_bytes(bytes))
}
fn read_u32(cursor: &mut Cursor<&[u8]>) -> std::io::Result<u32> {
let mut bytes = [0u8; size_of::<u32>()];
cursor.read_exact(&mut bytes)?;
Ok(u32::from_le_bytes(bytes))
}
/// Helper function to get the common prefix of two strings.
pub fn get_common_prefix(a: &str, b: &str) -> String {
let mut i = 0;
let a_bytes = a.as_bytes();
let b_bytes = b.as_bytes();
while i < a.len() && i < b.len() && a_bytes[i] == b_bytes[i] {
i += 1;
}
a[..i].to_string()
}

View File

@@ -0,0 +1,144 @@
use radixtree::RadixTree;
use std::path::PathBuf;
use tempfile::tempdir;
#[test]
fn test_basic_operations() -> Result<(), radixtree::Error> {
// Create a temporary directory for the test
let temp_dir = tempdir().expect("Failed to create temp directory");
let db_path = temp_dir.path().to_str().unwrap();
// Create a new radix tree
let mut tree = RadixTree::new(db_path, true)?;
// Test setting and getting values
let key = "test_key";
let value = b"test_value".to_vec();
tree.set(key, value.clone())?;
let retrieved_value = tree.get(key)?;
assert_eq!(retrieved_value, value);
// Test updating a value
let new_value = b"updated_value".to_vec();
tree.update(key, new_value.clone())?;
let updated_value = tree.get(key)?;
assert_eq!(updated_value, new_value);
// Test deleting a value
tree.delete(key)?;
// Trying to get a deleted key should return an error
let result = tree.get(key);
assert!(result.is_err());
Ok(())
}
#[test]
fn test_empty_key() -> Result<(), radixtree::Error> {
// Create a temporary directory for the test
let temp_dir = tempdir().expect("Failed to create temp directory");
let db_path = temp_dir.path().to_str().unwrap();
// Create a new radix tree
let mut tree = RadixTree::new(db_path, true)?;
// Test setting and getting empty key
let key = "";
let value = b"value_for_empty_key".to_vec();
tree.set(key, value.clone())?;
let retrieved_value = tree.get(key)?;
assert_eq!(retrieved_value, value);
// Test deleting empty key
tree.delete(key)?;
// Trying to get a deleted key should return an error
let result = tree.get(key);
assert!(result.is_err());
Ok(())
}
#[test]
fn test_multiple_keys() -> Result<(), radixtree::Error> {
// Create a temporary directory for the test
let temp_dir = tempdir().expect("Failed to create temp directory");
let db_path = temp_dir.path().to_str().unwrap();
// Create a new radix tree
let mut tree = RadixTree::new(db_path, true)?;
// Insert multiple keys
let test_data = [
("key1", b"value1".to_vec()),
("key2", b"value2".to_vec()),
("key3", b"value3".to_vec()),
];
for (key, value) in &test_data {
tree.set(key, value.clone())?;
}
// Verify all keys can be retrieved
for (key, expected_value) in &test_data {
let retrieved_value = tree.get(key)?;
assert_eq!(&retrieved_value, expected_value);
}
Ok(())
}
#[test]
fn test_shared_prefixes() -> Result<(), radixtree::Error> {
// Create a temporary directory for the test
let temp_dir = tempdir().expect("Failed to create temp directory");
let db_path = temp_dir.path().to_str().unwrap();
// Create a new radix tree
let mut tree = RadixTree::new(db_path, true)?;
// Insert keys with shared prefixes
let test_data = [
("test", b"value_test".to_vec()),
("testing", b"value_testing".to_vec()),
("tested", b"value_tested".to_vec()),
];
for (key, value) in &test_data {
tree.set(key, value.clone())?;
}
// Verify all keys can be retrieved
for (key, expected_value) in &test_data {
let retrieved_value = tree.get(key)?;
assert_eq!(&retrieved_value, expected_value);
}
Ok(())
}
#[test]
fn test_persistence() -> Result<(), radixtree::Error> {
// Create a temporary directory for the test
let temp_dir = tempdir().expect("Failed to create temp directory");
let db_path = temp_dir.path().to_str().unwrap();
// Create a new radix tree and add some data
{
let mut tree = RadixTree::new(db_path, true)?;
tree.set("persistent_key", b"persistent_value".to_vec())?;
} // Tree is dropped here
// Create a new tree instance with the same path
{
let mut tree = RadixTree::new(db_path, false)?;
let value = tree.get("persistent_key")?;
assert_eq!(value, b"persistent_value".to_vec());
}
Ok(())
}

View File

@@ -0,0 +1,153 @@
use radixtree::RadixTree;
use std::collections::HashMap;
use tempfile::tempdir;
#[test]
fn test_getall() -> Result<(), radixtree::Error> {
// Create a temporary directory for the test
let temp_dir = tempdir().expect("Failed to create temp directory");
let db_path = temp_dir.path().to_str().unwrap();
// Create a new radix tree
let mut tree = RadixTree::new(db_path, true)?;
// Set up test data with common prefixes
let test_data: HashMap<&str, &str> = [
("user_1", "data1"),
("user_2", "data2"),
("user_3", "data3"),
("admin_1", "admin_data1"),
("admin_2", "admin_data2"),
("guest", "guest_data"),
].iter().cloned().collect();
// Set all test data
for (key, value) in &test_data {
tree.set(key, value.as_bytes().to_vec())?;
}
// Test getall with 'user_' prefix
let user_values = tree.getall("user_")?;
// Should return 3 values
assert_eq!(user_values.len(), 3);
// Convert byte arrays to strings for easier comparison
let user_value_strings: Vec<String> = user_values
.iter()
.map(|v| String::from_utf8_lossy(v).to_string())
.collect();
// Check all expected values are present
assert!(user_value_strings.contains(&"data1".to_string()));
assert!(user_value_strings.contains(&"data2".to_string()));
assert!(user_value_strings.contains(&"data3".to_string()));
// Test getall with 'admin_' prefix
let admin_values = tree.getall("admin_")?;
// Should return 2 values
assert_eq!(admin_values.len(), 2);
// Convert byte arrays to strings for easier comparison
let admin_value_strings: Vec<String> = admin_values
.iter()
.map(|v| String::from_utf8_lossy(v).to_string())
.collect();
// Check all expected values are present
assert!(admin_value_strings.contains(&"admin_data1".to_string()));
assert!(admin_value_strings.contains(&"admin_data2".to_string()));
// Test getall with empty prefix (should return all values)
let all_values = tree.getall("")?;
// Should return all 6 values
assert_eq!(all_values.len(), test_data.len());
// Test getall with non-existent prefix
let non_existent_values = tree.getall("xyz")?;
// Should return empty array
assert_eq!(non_existent_values.len(), 0);
Ok(())
}
#[test]
fn test_getall_with_updates() -> Result<(), radixtree::Error> {
// Create a temporary directory for the test
let temp_dir = tempdir().expect("Failed to create temp directory");
let db_path = temp_dir.path().to_str().unwrap();
// Create a new radix tree
let mut tree = RadixTree::new(db_path, true)?;
// Set initial values
tree.set("key1", b"value1".to_vec())?;
tree.set("key2", b"value2".to_vec())?;
tree.set("key3", b"value3".to_vec())?;
// Get initial values
let initial_values = tree.getall("key")?;
assert_eq!(initial_values.len(), 3);
// Update a value
tree.update("key2", b"updated_value2".to_vec())?;
// Get values after update
let updated_values = tree.getall("key")?;
assert_eq!(updated_values.len(), 3);
// Convert to strings for easier comparison
let updated_value_strings: Vec<String> = updated_values
.iter()
.map(|v| String::from_utf8_lossy(v).to_string())
.collect();
// Check the updated value is present
assert!(updated_value_strings.contains(&"value1".to_string()));
assert!(updated_value_strings.contains(&"updated_value2".to_string()));
assert!(updated_value_strings.contains(&"value3".to_string()));
Ok(())
}
#[test]
fn test_getall_with_deletions() -> Result<(), radixtree::Error> {
// Create a temporary directory for the test
let temp_dir = tempdir().expect("Failed to create temp directory");
let db_path = temp_dir.path().to_str().unwrap();
// Create a new radix tree
let mut tree = RadixTree::new(db_path, true)?;
// Set initial values
tree.set("prefix_1", b"value1".to_vec())?;
tree.set("prefix_2", b"value2".to_vec())?;
tree.set("prefix_3", b"value3".to_vec())?;
tree.set("other", b"other_value".to_vec())?;
// Get initial values
let initial_values = tree.getall("prefix_")?;
assert_eq!(initial_values.len(), 3);
// Delete a key
tree.delete("prefix_2")?;
// Get values after deletion
let after_delete_values = tree.getall("prefix_")?;
assert_eq!(after_delete_values.len(), 2);
// Convert to strings for easier comparison
let after_delete_strings: Vec<String> = after_delete_values
.iter()
.map(|v| String::from_utf8_lossy(v).to_string())
.collect();
// Check the remaining values
assert!(after_delete_strings.contains(&"value1".to_string()));
assert!(after_delete_strings.contains(&"value3".to_string()));
Ok(())
}

View File

@@ -0,0 +1,185 @@
use radixtree::RadixTree;
use std::collections::HashMap;
use tempfile::tempdir;
#[test]
fn test_list() -> Result<(), radixtree::Error> {
// Create a temporary directory for the test
let temp_dir = tempdir().expect("Failed to create temp directory");
let db_path = temp_dir.path().to_str().unwrap();
// Create a new radix tree
let mut tree = RadixTree::new(db_path, true)?;
// Insert keys with various prefixes
let test_data: HashMap<&str, &str> = [
("apple", "fruit1"),
("application", "software1"),
("apply", "verb1"),
("banana", "fruit2"),
("ball", "toy1"),
("cat", "animal1"),
("car", "vehicle1"),
("cargo", "shipping1"),
].iter().cloned().collect();
// Set all test data
for (key, value) in &test_data {
tree.set(key, value.as_bytes().to_vec())?;
}
// Test prefix 'app' - should return apple, application, apply
let app_keys = tree.list("app")?;
assert_eq!(app_keys.len(), 3);
assert!(app_keys.contains(&"apple".to_string()));
assert!(app_keys.contains(&"application".to_string()));
assert!(app_keys.contains(&"apply".to_string()));
// Test prefix 'ba' - should return banana, ball
let ba_keys = tree.list("ba")?;
assert_eq!(ba_keys.len(), 2);
assert!(ba_keys.contains(&"banana".to_string()));
assert!(ba_keys.contains(&"ball".to_string()));
// Test prefix 'car' - should return car, cargo
let car_keys = tree.list("car")?;
assert_eq!(car_keys.len(), 2);
assert!(car_keys.contains(&"car".to_string()));
assert!(car_keys.contains(&"cargo".to_string()));
// Test prefix 'z' - should return empty list
let z_keys = tree.list("z")?;
assert_eq!(z_keys.len(), 0);
// Test empty prefix - should return all keys
let all_keys = tree.list("")?;
assert_eq!(all_keys.len(), test_data.len());
for key in test_data.keys() {
assert!(all_keys.contains(&key.to_string()));
}
// Test exact key as prefix - should return just that key
let exact_key = tree.list("apple")?;
assert_eq!(exact_key.len(), 1);
assert_eq!(exact_key[0], "apple");
Ok(())
}
#[test]
fn test_list_with_deletion() -> Result<(), radixtree::Error> {
// Create a temporary directory for the test
let temp_dir = tempdir().expect("Failed to create temp directory");
let db_path = temp_dir.path().to_str().unwrap();
// Create a new radix tree
let mut tree = RadixTree::new(db_path, true)?;
// Set keys with common prefixes
tree.set("test1", b"value1".to_vec())?;
tree.set("test2", b"value2".to_vec())?;
tree.set("test3", b"value3".to_vec())?;
tree.set("other", b"value4".to_vec())?;
// Initial check
let test_keys = tree.list("test")?;
assert_eq!(test_keys.len(), 3);
assert!(test_keys.contains(&"test1".to_string()));
assert!(test_keys.contains(&"test2".to_string()));
assert!(test_keys.contains(&"test3".to_string()));
// Delete one key
tree.delete("test2")?;
// Check after deletion
let test_keys_after = tree.list("test")?;
assert_eq!(test_keys_after.len(), 2);
assert!(test_keys_after.contains(&"test1".to_string()));
assert!(!test_keys_after.contains(&"test2".to_string()));
assert!(test_keys_after.contains(&"test3".to_string()));
// Check all keys
let all_keys = tree.list("")?;
assert_eq!(all_keys.len(), 3);
assert!(all_keys.contains(&"other".to_string()));
Ok(())
}
#[test]
fn test_list_edge_cases() -> Result<(), radixtree::Error> {
// Create a temporary directory for the test
let temp_dir = tempdir().expect("Failed to create temp directory");
let db_path = temp_dir.path().to_str().unwrap();
// Create a new radix tree
let mut tree = RadixTree::new(db_path, true)?;
// Test with empty tree
let empty_result = tree.list("any")?;
assert_eq!(empty_result.len(), 0);
// Set a single key
tree.set("single", b"value".to_vec())?;
// Test with prefix that's longer than any key
let long_prefix = tree.list("singlelonger")?;
assert_eq!(long_prefix.len(), 0);
// Test with partial prefix match
let partial = tree.list("sing")?;
assert_eq!(partial.len(), 1);
assert_eq!(partial[0], "single");
// Test with very long keys
let long_key1 = "a".repeat(100) + "key1";
let long_key2 = "a".repeat(100) + "key2";
tree.set(&long_key1, b"value1".to_vec())?;
tree.set(&long_key2, b"value2".to_vec())?;
let long_prefix_result = tree.list(&"a".repeat(100))?;
assert_eq!(long_prefix_result.len(), 2);
assert!(long_prefix_result.contains(&long_key1));
assert!(long_prefix_result.contains(&long_key2));
Ok(())
}
#[test]
fn test_list_performance() -> Result<(), radixtree::Error> {
// Create a temporary directory for the test
let temp_dir = tempdir().expect("Failed to create temp directory");
let db_path = temp_dir.path().to_str().unwrap();
// Create a new radix tree
let mut tree = RadixTree::new(db_path, true)?;
// Insert a large number of keys with different prefixes
let prefixes = ["user", "post", "comment", "like", "share"];
// Set 100 keys for each prefix (500 total)
for prefix in &prefixes {
for i in 0..100 {
let key = format!("{}_{}", prefix, i);
tree.set(&key, format!("value_{}", key).as_bytes().to_vec())?;
}
}
// Test retrieving by each prefix
for prefix in &prefixes {
let keys = tree.list(prefix)?;
assert_eq!(keys.len(), 100);
// Verify all keys have the correct prefix
for key in &keys {
assert!(key.starts_with(prefix));
}
}
// Test retrieving all keys
let all_keys = tree.list("")?;
assert_eq!(all_keys.len(), 500);
Ok(())
}

View File

@@ -0,0 +1,180 @@
use radixtree::{Node, NodeRef};
#[test]
fn test_node_serialization() {
// Create a node with some data
let node = Node {
key_segment: "test".to_string(),
value: b"test_value".to_vec(),
children: vec![
NodeRef {
key_part: "child1".to_string(),
node_id: 1,
},
NodeRef {
key_part: "child2".to_string(),
node_id: 2,
},
],
is_leaf: true,
};
// Serialize the node
let serialized = node.serialize();
// Deserialize the node
let deserialized = Node::deserialize(&serialized).expect("Failed to deserialize node");
// Verify the deserialized node matches the original
assert_eq!(deserialized.key_segment, node.key_segment);
assert_eq!(deserialized.value, node.value);
assert_eq!(deserialized.is_leaf, node.is_leaf);
assert_eq!(deserialized.children.len(), node.children.len());
for (i, child) in node.children.iter().enumerate() {
assert_eq!(deserialized.children[i].key_part, child.key_part);
assert_eq!(deserialized.children[i].node_id, child.node_id);
}
}
#[test]
fn test_empty_node_serialization() {
// Create an empty node
let node = Node {
key_segment: "".to_string(),
value: vec![],
children: vec![],
is_leaf: false,
};
// Serialize the node
let serialized = node.serialize();
// Deserialize the node
let deserialized = Node::deserialize(&serialized).expect("Failed to deserialize node");
// Verify the deserialized node matches the original
assert_eq!(deserialized.key_segment, node.key_segment);
assert_eq!(deserialized.value, node.value);
assert_eq!(deserialized.is_leaf, node.is_leaf);
assert_eq!(deserialized.children.len(), node.children.len());
}
#[test]
fn test_node_with_many_children() {
// Create a node with many children
let mut children = Vec::new();
for i in 0..100 {
children.push(NodeRef {
key_part: format!("child{}", i),
node_id: i as u32,
});
}
let node = Node {
key_segment: "parent".to_string(),
value: b"parent_value".to_vec(),
children,
is_leaf: true,
};
// Serialize the node
let serialized = node.serialize();
// Deserialize the node
let deserialized = Node::deserialize(&serialized).expect("Failed to deserialize node");
// Verify the deserialized node matches the original
assert_eq!(deserialized.key_segment, node.key_segment);
assert_eq!(deserialized.value, node.value);
assert_eq!(deserialized.is_leaf, node.is_leaf);
assert_eq!(deserialized.children.len(), node.children.len());
for (i, child) in node.children.iter().enumerate() {
assert_eq!(deserialized.children[i].key_part, child.key_part);
assert_eq!(deserialized.children[i].node_id, child.node_id);
}
}
#[test]
fn test_node_with_large_value() {
// Create a node with a large value
let large_value = vec![0u8; 4096]; // 4KB value
let node = Node {
key_segment: "large_value".to_string(),
value: large_value.clone(),
children: vec![],
is_leaf: true,
};
// Serialize the node
let serialized = node.serialize();
// Deserialize the node
let deserialized = Node::deserialize(&serialized).expect("Failed to deserialize node");
// Verify the deserialized node matches the original
assert_eq!(deserialized.key_segment, node.key_segment);
assert_eq!(deserialized.value, node.value);
assert_eq!(deserialized.is_leaf, node.is_leaf);
assert_eq!(deserialized.children.len(), node.children.len());
}
#[test]
fn test_version_compatibility() {
// This test ensures that the serialization format is compatible with version 1
// Create a node
let node = Node {
key_segment: "test".to_string(),
value: b"test_value".to_vec(),
children: vec![
NodeRef {
key_part: "child".to_string(),
node_id: 1,
},
],
is_leaf: true,
};
// Serialize the node
let serialized = node.serialize();
// Verify the first byte is the version byte (1)
assert_eq!(serialized[0], 1);
// Deserialize the node
let deserialized = Node::deserialize(&serialized).expect("Failed to deserialize node");
// Verify the deserialized node matches the original
assert_eq!(deserialized.key_segment, node.key_segment);
assert_eq!(deserialized.value, node.value);
assert_eq!(deserialized.is_leaf, node.is_leaf);
assert_eq!(deserialized.children.len(), node.children.len());
}
#[test]
fn test_invalid_serialization() {
// Test with empty data
let result = Node::deserialize(&[]);
assert!(result.is_err());
// Test with invalid version
let result = Node::deserialize(&[2, 0, 0, 0, 0]);
assert!(result.is_err());
// Test with truncated data
let node = Node {
key_segment: "test".to_string(),
value: b"test_value".to_vec(),
children: vec![],
is_leaf: true,
};
let serialized = node.serialize();
let truncated = &serialized[0..serialized.len() / 2];
let result = Node::deserialize(truncated);
assert!(result.is_err());
}

View File

@@ -0,0 +1,30 @@
[package]
name = "tst"
version = "0.1.0"
edition = "2021"
description = "A persistent ternary search tree implementation using OurDB for storage"
authors = ["OurWorld Team"]
[dependencies]
ourdb = { path = "../ourdb" }
thiserror = "1.0.40"
[dev-dependencies]
# criterion = "0.5.1"
# Uncomment when benchmarks are implemented
# [[bench]]
# name = "tst_benchmarks"
# harness = false
[[example]]
name = "basic_usage"
path = "examples/basic_usage.rs"
[[example]]
name = "prefix_ops"
path = "examples/prefix_ops.rs"
[[example]]
name = "performance"
path = "examples/performance.rs"

185
packages/data/tst/README.md Normal file
View File

@@ -0,0 +1,185 @@
# Ternary Search Tree (TST)
A persistent ternary search tree implementation in Rust using OurDB for storage.
## Overview
TST is a space-optimized tree data structure that enables efficient string key operations with persistent storage. This implementation provides a persistent ternary search tree that can be used for efficient string key operations, such as auto-complete, routing tables, and more.
A ternary search tree is a type of trie where each node has three children: left, middle, and right. Unlike a radix tree which compresses common prefixes, a TST stores one character per node and uses a binary search tree-like structure for efficient traversal.
Key characteristics:
- Each node stores a single character
- Nodes have three children: left (for characters < current), middle (for next character in key), and right (for characters > current)
- Leaf nodes contain the actual values
- Balanced structure for consistent performance across operations
## Features
- Efficient string key operations
- Persistent storage using OurDB backend
- Balanced tree structure for consistent performance
- Support for binary values
- Thread-safe operations through OurDB
## Usage
Add the dependency to your `Cargo.toml`:
```toml
[dependencies]
tst = { path = "../tst" }
```
### Basic Example
```rust
use tst::TST;
fn main() -> Result<(), tst::Error> {
// Create a new ternary search tree
let mut tree = TST::new("/tmp/tst", false)?;
// Set key-value pairs
tree.set("hello", b"world".to_vec())?;
tree.set("help", b"me".to_vec())?;
// Get values by key
let value = tree.get("hello")?;
println!("hello: {}", String::from_utf8_lossy(&value)); // Prints: world
// List keys by prefix
let keys = tree.list("hel")?; // Returns ["hello", "help"]
println!("Keys with prefix 'hel': {:?}", keys);
// Get all values by prefix
let values = tree.getall("hel")?; // Returns [b"world", b"me"]
// Delete keys
tree.delete("help")?;
Ok(())
}
```
## API
### Creating a TST
```rust
// Create a new ternary search tree
let mut tree = TST::new("/tmp/tst", false)?;
// Create a new ternary search tree and reset if it exists
let mut tree = TST::new("/tmp/tst", true)?;
```
### Setting Values
```rust
// Set a key-value pair
tree.set("key", b"value".to_vec())?;
```
### Getting Values
```rust
// Get a value by key
let value = tree.get("key")?;
```
### Deleting Keys
```rust
// Delete a key
tree.delete("key")?;
```
### Listing Keys by Prefix
```rust
// List all keys with a given prefix
let keys = tree.list("prefix")?;
```
### Getting All Values by Prefix
```rust
// Get all values for keys with a given prefix
let values = tree.getall("prefix")?;
```
## Performance Characteristics
- Search: O(k) where k is the key length
- Insert: O(k) for new keys
- Delete: O(k) plus potential node cleanup
- Space: O(n) where n is the total number of nodes
## Use Cases
TST is particularly useful for:
- Prefix-based searching
- Auto-complete systems
- Dictionary implementations
- Spell checking
- Any application requiring efficient string key operations with persistence
## Implementation Details
The TST implementation uses OurDB for persistent storage:
- Each node is serialized and stored as a record in OurDB
- Node references use OurDB record IDs
- The tree maintains a root node ID for traversal
- Node serialization includes version tracking for format evolution
## Running Tests
The project includes a comprehensive test suite that verifies all functionality:
```bash
cd ~/code/git.threefold.info/herocode/db/tst
# Run all tests
cargo test
# Run specific test file
cargo test --test basic_test
cargo test --test prefix_test
```
## Running Examples
The project includes example applications that demonstrate how to use the TST:
```bash
# Run the basic usage example
cargo run --example basic_usage
# Run the prefix operations example
cargo run --example prefix_ops
# Run the performance test
cargo run --example performance
```
## Comparison with RadixTree
While both TST and RadixTree provide efficient string key operations, they have different characteristics:
- **TST**: Stores one character per node, with a balanced structure for consistent performance across operations.
- **RadixTree**: Compresses common prefixes, which can be more space-efficient for keys with long common prefixes.
Choose TST when:
- You need balanced performance across all operations
- Your keys don't share long common prefixes
- You want a simpler implementation with predictable performance
Choose RadixTree when:
- Space efficiency is a priority
- Your keys share long common prefixes
- You prioritize lookup performance over balanced performance
## License
This project is licensed under the same license as the HeroCode project.

View File

@@ -0,0 +1,75 @@
use std::time::Instant;
use tst::TST;
fn main() -> Result<(), tst::Error> {
// Create a temporary directory for the database
let db_path = std::env::temp_dir().join("tst_example");
std::fs::create_dir_all(&db_path)?;
println!("Creating ternary search tree at: {}", db_path.display());
// Create a new TST
let mut tree = TST::new(db_path.to_str().unwrap(), true)?;
// Store some data
println!("Inserting data...");
tree.set("hello", b"world".to_vec())?;
tree.set("help", b"me".to_vec())?;
tree.set("helicopter", b"flying".to_vec())?;
tree.set("apple", b"fruit".to_vec())?;
tree.set("application", b"software".to_vec())?;
tree.set("banana", b"yellow".to_vec())?;
// Retrieve and print the data
let value = tree.get("hello")?;
println!("hello: {}", String::from_utf8_lossy(&value));
// List keys with prefix
println!("\nListing keys with prefix 'hel':");
let start = Instant::now();
let keys = tree.list("hel")?;
let duration = start.elapsed();
for key in &keys {
println!(" {}", key);
}
println!("Found {} keys in {:?}", keys.len(), duration);
// Get all values with prefix
println!("\nGetting all values with prefix 'app':");
let start = Instant::now();
let values = tree.getall("app")?;
let duration = start.elapsed();
for (i, value) in values.iter().enumerate() {
println!(" Value {}: {}", i + 1, String::from_utf8_lossy(value));
}
println!("Found {} values in {:?}", values.len(), duration);
// Delete a key
println!("\nDeleting 'help'...");
tree.delete("help")?;
// Verify deletion
println!("Listing keys with prefix 'hel' after deletion:");
let keys_after = tree.list("hel")?;
for key in &keys_after {
println!(" {}", key);
}
// Try to get a deleted key
match tree.get("help") {
Ok(_) => println!("Unexpectedly found 'help' after deletion!"),
Err(e) => println!("As expected, 'help' was not found: {}", e),
}
// Clean up (optional)
if std::env::var("KEEP_DB").is_err() {
std::fs::remove_dir_all(&db_path)?;
println!("\nCleaned up database directory");
} else {
println!("\nDatabase kept at: {}", db_path.display());
}
Ok(())
}

View File

@@ -0,0 +1,167 @@
use std::io::{self, Write};
use std::time::{Duration, Instant};
use tst::TST;
// Function to generate a test value of specified size
fn generate_test_value(index: usize, size: usize) -> Vec<u8> {
let base_value = format!("val{:08}", index);
let mut value = Vec::with_capacity(size);
// Fill with repeating pattern to reach desired size
while value.len() < size {
value.extend_from_slice(base_value.as_bytes());
}
// Truncate to exact size
value.truncate(size);
value
}
// Number of records to insert
const TOTAL_RECORDS: usize = 100_000;
// How often to report progress (every X records)
const PROGRESS_INTERVAL: usize = 1_000;
// How many records to use for performance sampling
const PERFORMANCE_SAMPLE_SIZE: usize = 100;
fn main() -> Result<(), tst::Error> {
// Create a temporary directory for the database
let db_path = std::env::temp_dir().join("tst_performance_test");
// Completely remove and recreate the directory to ensure a clean start
if db_path.exists() {
std::fs::remove_dir_all(&db_path)?;
}
std::fs::create_dir_all(&db_path)?;
println!("Creating ternary search tree at: {}", db_path.display());
println!("Will insert {} records and show progress...", TOTAL_RECORDS);
// Create a new TST
let mut tree = TST::new(db_path.to_str().unwrap(), true)?;
// Track overall time
let start_time = Instant::now();
// Track performance metrics
let mut insertion_times = Vec::with_capacity(TOTAL_RECORDS / PROGRESS_INTERVAL);
let mut last_batch_time = Instant::now();
let mut last_batch_records = 0;
// Insert records and track progress
for i in 0..TOTAL_RECORDS {
let key = format!("key:{:08}", i);
// Generate a 100-byte value
let value = generate_test_value(i, 100);
// Time the insertion of every Nth record for performance sampling
if i % PERFORMANCE_SAMPLE_SIZE == 0 {
let insert_start = Instant::now();
tree.set(&key, value)?;
let insert_duration = insert_start.elapsed();
// Only print detailed timing for specific samples to avoid flooding output
if i % (PERFORMANCE_SAMPLE_SIZE * 10) == 0 {
println!("Record {}: Insertion took {:?}", i, insert_duration);
}
} else {
tree.set(&key, value)?;
}
// Show progress at intervals
if (i + 1) % PROGRESS_INTERVAL == 0 || i == TOTAL_RECORDS - 1 {
let records_in_batch = i + 1 - last_batch_records;
let batch_duration = last_batch_time.elapsed();
let records_per_second = records_in_batch as f64 / batch_duration.as_secs_f64();
insertion_times.push((i + 1, batch_duration));
print!(
"\rProgress: {}/{} records ({:.2}%) - {:.2} records/sec",
i + 1,
TOTAL_RECORDS,
(i + 1) as f64 / TOTAL_RECORDS as f64 * 100.0,
records_per_second
);
io::stdout().flush().unwrap();
last_batch_time = Instant::now();
last_batch_records = i + 1;
}
}
let total_duration = start_time.elapsed();
println!("\n\nPerformance Summary:");
println!(
"Total time to insert {} records: {:?}",
TOTAL_RECORDS, total_duration
);
println!(
"Average insertion rate: {:.2} records/second",
TOTAL_RECORDS as f64 / total_duration.as_secs_f64()
);
// Show performance trend
println!("\nPerformance Trend (records inserted vs. time per batch):");
for (i, (record_count, duration)) in insertion_times.iter().enumerate() {
if i % 10 == 0 || i == insertion_times.len() - 1 {
// Only show every 10th point to avoid too much output
println!(
" After {} records: {:?} for {} records ({:.2} records/sec)",
record_count,
duration,
PROGRESS_INTERVAL,
PROGRESS_INTERVAL as f64 / duration.as_secs_f64()
);
}
}
// Test access performance with distributed samples
println!("\nTesting access performance with distributed samples...");
let mut total_get_time = Duration::new(0, 0);
let num_samples = 1000;
// Use a simple distribution pattern instead of random
for i in 0..num_samples {
// Distribute samples across the entire range
let sample_id = (i * (TOTAL_RECORDS / num_samples)) % TOTAL_RECORDS;
let key = format!("key:{:08}", sample_id);
let get_start = Instant::now();
let _ = tree.get(&key)?;
total_get_time += get_start.elapsed();
}
println!(
"Average time to retrieve a record: {:?}",
total_get_time / num_samples as u32
);
// Test prefix search performance
println!("\nTesting prefix search performance...");
let prefixes = ["key:0", "key:1", "key:5", "key:9"];
for prefix in &prefixes {
let list_start = Instant::now();
let keys = tree.list(prefix)?;
let list_duration = list_start.elapsed();
println!(
"Found {} keys with prefix '{}' in {:?}",
keys.len(),
prefix,
list_duration
);
}
// Clean up (optional)
if std::env::var("KEEP_DB").is_err() {
std::fs::remove_dir_all(&db_path)?;
println!("\nCleaned up database directory");
} else {
println!("\nDatabase kept at: {}", db_path.display());
}
Ok(())
}

View File

@@ -0,0 +1,184 @@
use std::time::Instant;
use tst::TST;
fn main() -> Result<(), tst::Error> {
// Create a temporary directory for the database
let db_path = std::env::temp_dir().join("tst_prefix_example");
std::fs::create_dir_all(&db_path)?;
println!("Creating ternary search tree at: {}", db_path.display());
// Create a new TST
let mut tree = TST::new(db_path.to_str().unwrap(), true)?;
// Insert a variety of keys with different prefixes
println!("Inserting data with various prefixes...");
// Names
let names = [
"Alice",
"Alexander",
"Amanda",
"Andrew",
"Amy",
"Bob",
"Barbara",
"Benjamin",
"Brenda",
"Brian",
"Charlie",
"Catherine",
"Christopher",
"Cynthia",
"Carl",
"David",
"Diana",
"Daniel",
"Deborah",
"Donald",
"Edward",
"Elizabeth",
"Eric",
"Emily",
"Ethan",
];
for (i, name) in names.iter().enumerate() {
let value = format!("person-{}", i).into_bytes();
tree.set(name, value)?;
}
// Cities
let cities = [
"New York",
"Los Angeles",
"Chicago",
"Houston",
"Phoenix",
"Philadelphia",
"San Antonio",
"San Diego",
"Dallas",
"San Jose",
"Austin",
"Jacksonville",
"Fort Worth",
"Columbus",
"San Francisco",
"Charlotte",
"Indianapolis",
"Seattle",
"Denver",
"Washington",
];
for (i, city) in cities.iter().enumerate() {
let value = format!("city-{}", i).into_bytes();
tree.set(city, value)?;
}
// Countries
let countries = [
"United States",
"Canada",
"Mexico",
"Brazil",
"Argentina",
"United Kingdom",
"France",
"Germany",
"Italy",
"Spain",
"China",
"Japan",
"India",
"Australia",
"Russia",
];
for (i, country) in countries.iter().enumerate() {
let value = format!("country-{}", i).into_bytes();
tree.set(country, value)?;
}
println!(
"Total items inserted: {}",
names.len() + cities.len() + countries.len()
);
// Test prefix operations
test_prefix(&mut tree, "A")?;
test_prefix(&mut tree, "B")?;
test_prefix(&mut tree, "C")?;
test_prefix(&mut tree, "San")?;
test_prefix(&mut tree, "United")?;
// Test non-existent prefix
test_prefix(&mut tree, "Z")?;
// Test empty prefix (should return all keys)
println!("\nTesting empty prefix (should return all keys):");
let start = Instant::now();
let all_keys = tree.list("")?;
let duration = start.elapsed();
println!(
"Found {} keys with empty prefix in {:?}",
all_keys.len(),
duration
);
println!("First 5 keys (alphabetically):");
for key in all_keys.iter().take(5) {
println!(" {}", key);
}
// Clean up (optional)
if std::env::var("KEEP_DB").is_err() {
std::fs::remove_dir_all(&db_path)?;
println!("\nCleaned up database directory");
} else {
println!("\nDatabase kept at: {}", db_path.display());
}
Ok(())
}
fn test_prefix(tree: &mut TST, prefix: &str) -> Result<(), tst::Error> {
println!("\nTesting prefix '{}':", prefix);
// Test list operation
let start = Instant::now();
let keys = tree.list(prefix)?;
let list_duration = start.elapsed();
println!(
"Found {} keys with prefix '{}' in {:?}",
keys.len(),
prefix,
list_duration
);
if !keys.is_empty() {
println!("Keys:");
for key in &keys {
println!(" {}", key);
}
// Test getall operation
let start = Instant::now();
let values = tree.getall(prefix)?;
let getall_duration = start.elapsed();
println!("Retrieved {} values in {:?}", values.len(), getall_duration);
println!(
"First value: {}",
if !values.is_empty() {
String::from_utf8_lossy(&values[0])
} else {
"None".into()
}
);
}
Ok(())
}

View File

@@ -0,0 +1,36 @@
//! Error types for the TST module.
use std::io;
use thiserror::Error;
/// Error type for TST operations.
#[derive(Debug, Error)]
pub enum Error {
/// Error from OurDB operations.
#[error("OurDB error: {0}")]
OurDB(#[from] ourdb::Error),
/// Error when a key is not found.
#[error("Key not found: {0}")]
KeyNotFound(String),
/// Error when a prefix is not found.
#[error("Prefix not found: {0}")]
PrefixNotFound(String),
/// Error during serialization.
#[error("Serialization error: {0}")]
Serialization(String),
/// Error during deserialization.
#[error("Deserialization error: {0}")]
Deserialization(String),
/// Error for invalid operations.
#[error("Invalid operation: {0}")]
InvalidOperation(String),
/// IO error.
#[error("IO error: {0}")]
IO(#[from] io::Error),
}

View File

@@ -0,0 +1,122 @@
//! TST is a space-optimized tree data structure that enables efficient string key operations
//! with persistent storage using OurDB as a backend.
//!
//! This implementation provides a persistent ternary search tree that can be used for efficient
//! string key operations, such as auto-complete, routing tables, and more.
mod error;
mod node;
mod operations;
mod serialize;
pub use error::Error;
pub use node::TSTNode;
use ourdb::OurDB;
/// TST represents a ternary search tree data structure with persistent storage.
pub struct TST {
/// Database for persistent storage
db: OurDB,
/// Database ID of the root node
root_id: Option<u32>,
}
impl TST {
/// Creates a new TST with the specified database path.
///
/// # Arguments
///
/// * `path` - The path to the database directory
/// * `reset` - Whether to reset the database if it exists
///
/// # Returns
///
/// A new `TST` instance
///
/// # Errors
///
/// Returns an error if the database cannot be created or opened
pub fn new(path: &str, reset: bool) -> Result<Self, Error> {
operations::new_tst(path, reset)
}
/// Sets a key-value pair in the tree.
///
/// # Arguments
///
/// * `key` - The key to set
/// * `value` - The value to set
///
/// # Errors
///
/// Returns an error if the operation fails
pub fn set(&mut self, key: &str, value: Vec<u8>) -> Result<(), Error> {
operations::set(self, key, value)
}
/// Gets a value by key from the tree.
///
/// # Arguments
///
/// * `key` - The key to get
///
/// # Returns
///
/// The value associated with the key
///
/// # Errors
///
/// Returns an error if the key is not found or the operation fails
pub fn get(&mut self, key: &str) -> Result<Vec<u8>, Error> {
operations::get(self, key)
}
/// Deletes a key from the tree.
///
/// # Arguments
///
/// * `key` - The key to delete
///
/// # Errors
///
/// Returns an error if the key is not found or the operation fails
pub fn delete(&mut self, key: &str) -> Result<(), Error> {
operations::delete(self, key)
}
/// Lists all keys with a given prefix.
///
/// # Arguments
///
/// * `prefix` - The prefix to search for
///
/// # Returns
///
/// A list of keys that start with the given prefix
///
/// # Errors
///
/// Returns an error if the operation fails
pub fn list(&mut self, prefix: &str) -> Result<Vec<String>, Error> {
operations::list(self, prefix)
}
/// Gets all values for keys with a given prefix.
///
/// # Arguments
///
/// * `prefix` - The prefix to search for
///
/// # Returns
///
/// A list of values for keys that start with the given prefix
///
/// # Errors
///
/// Returns an error if the operation fails
pub fn getall(&mut self, prefix: &str) -> Result<Vec<Vec<u8>>, Error> {
operations::getall(self, prefix)
}
}

View File

@@ -0,0 +1,49 @@
//! Node types for the TST module.
/// Represents a node in the ternary search tree.
#[derive(Debug, Clone, PartialEq)]
pub struct TSTNode {
/// The character stored at this node.
pub character: char,
/// Value stored at this node (empty if not end of key).
pub value: Vec<u8>,
/// Whether this node represents the end of a key.
pub is_end_of_key: bool,
/// Reference to the left child node (for characters < current character).
pub left_id: Option<u32>,
/// Reference to the middle child node (for next character in key).
pub middle_id: Option<u32>,
/// Reference to the right child node (for characters > current character).
pub right_id: Option<u32>,
}
impl TSTNode {
/// Creates a new node.
pub fn new(character: char, value: Vec<u8>, is_end_of_key: bool) -> Self {
Self {
character,
value,
is_end_of_key,
left_id: None,
middle_id: None,
right_id: None,
}
}
/// Creates a new root node.
pub fn new_root() -> Self {
Self {
character: '\0', // Use null character for root
value: Vec::new(),
is_end_of_key: false,
left_id: None,
middle_id: None,
right_id: None,
}
}
}

View File

@@ -0,0 +1,453 @@
//! Implementation of TST operations.
use crate::error::Error;
use crate::node::TSTNode;
use crate::TST;
use ourdb::{OurDB, OurDBConfig, OurDBSetArgs};
use std::path::PathBuf;
/// Creates a new TST with the specified database path.
pub fn new_tst(path: &str, reset: bool) -> Result<TST, Error> {
let path_buf = PathBuf::from(path);
// Create the configuration for OurDB with reset parameter
let config = OurDBConfig {
path: path_buf.clone(),
incremental_mode: true,
file_size: Some(1024 * 1024), // 1MB file size for better performance with large datasets
keysize: Some(4), // Use keysize=4 (default)
reset: Some(reset), // Use the reset parameter
};
// Create a new OurDB instance (it will handle reset internally)
let mut db = OurDB::new(config)?;
let root_id = if db.get_next_id()? == 1 || reset {
// Create a new root node
let root = TSTNode::new_root();
let root_id = db.set(OurDBSetArgs {
id: None,
data: &root.serialize(),
})?;
Some(root_id)
} else {
// Use existing root node
Some(1) // Root node always has ID 1
};
Ok(TST { db, root_id })
}
/// Sets a key-value pair in the tree.
pub fn set(tree: &mut TST, key: &str, value: Vec<u8>) -> Result<(), Error> {
if key.is_empty() {
return Err(Error::InvalidOperation("Empty key not allowed".to_string()));
}
let root_id = match tree.root_id {
Some(id) => id,
None => return Err(Error::InvalidOperation("Tree not initialized".to_string())),
};
let chars: Vec<char> = key.chars().collect();
set_recursive(tree, root_id, &chars, 0, value)?;
Ok(())
}
/// Recursive helper function for setting a key-value pair.
fn set_recursive(
tree: &mut TST,
node_id: u32,
chars: &[char],
pos: usize,
value: Vec<u8>,
) -> Result<u32, Error> {
let mut node = tree.get_node(node_id)?;
if pos >= chars.len() {
// We've reached the end of the key
node.is_end_of_key = true;
node.value = value;
return tree.save_node(Some(node_id), &node);
}
let current_char = chars[pos];
if node.character == '\0' {
// Root node or empty node, set the character
node.character = current_char;
let node_id = tree.save_node(Some(node_id), &node)?;
// Continue with the next character
if pos + 1 < chars.len() {
let new_node = TSTNode::new(chars[pos + 1], Vec::new(), false);
let new_id = tree.save_node(None, &new_node)?;
let mut updated_node = tree.get_node(node_id)?;
updated_node.middle_id = Some(new_id);
tree.save_node(Some(node_id), &updated_node)?;
return set_recursive(tree, new_id, chars, pos + 1, value);
} else {
// This is the last character
let mut updated_node = tree.get_node(node_id)?;
updated_node.is_end_of_key = true;
updated_node.value = value;
return tree.save_node(Some(node_id), &updated_node);
}
}
if current_char < node.character {
// Go left
if let Some(left_id) = node.left_id {
return set_recursive(tree, left_id, chars, pos, value);
} else {
// Create new left node
let new_node = TSTNode::new(current_char, Vec::new(), false);
let new_id = tree.save_node(None, &new_node)?;
// Update current node
node.left_id = Some(new_id);
tree.save_node(Some(node_id), &node)?;
return set_recursive(tree, new_id, chars, pos, value);
}
} else if current_char > node.character {
// Go right
if let Some(right_id) = node.right_id {
return set_recursive(tree, right_id, chars, pos, value);
} else {
// Create new right node
let new_node = TSTNode::new(current_char, Vec::new(), false);
let new_id = tree.save_node(None, &new_node)?;
// Update current node
node.right_id = Some(new_id);
tree.save_node(Some(node_id), &node)?;
return set_recursive(tree, new_id, chars, pos, value);
}
} else {
// Character matches, go middle (next character)
if pos + 1 >= chars.len() {
// This is the last character
node.is_end_of_key = true;
node.value = value;
return tree.save_node(Some(node_id), &node);
}
if let Some(middle_id) = node.middle_id {
return set_recursive(tree, middle_id, chars, pos + 1, value);
} else {
// Create new middle node
let new_node = TSTNode::new(chars[pos + 1], Vec::new(), false);
let new_id = tree.save_node(None, &new_node)?;
// Update current node
node.middle_id = Some(new_id);
tree.save_node(Some(node_id), &node)?;
return set_recursive(tree, new_id, chars, pos + 1, value);
}
}
}
/// Gets a value by key from the tree.
pub fn get(tree: &mut TST, key: &str) -> Result<Vec<u8>, Error> {
if key.is_empty() {
return Err(Error::InvalidOperation("Empty key not allowed".to_string()));
}
let root_id = match tree.root_id {
Some(id) => id,
None => return Err(Error::InvalidOperation("Tree not initialized".to_string())),
};
let chars: Vec<char> = key.chars().collect();
let node_id = find_node(tree, root_id, &chars, 0)?;
let node = tree.get_node(node_id)?;
if node.is_end_of_key {
Ok(node.value.clone())
} else {
Err(Error::KeyNotFound(key.to_string()))
}
}
/// Finds a node by key.
fn find_node(tree: &mut TST, node_id: u32, chars: &[char], pos: usize) -> Result<u32, Error> {
let node = tree.get_node(node_id)?;
if pos >= chars.len() {
return Ok(node_id);
}
let current_char = chars[pos];
if current_char < node.character {
// Go left
if let Some(left_id) = node.left_id {
find_node(tree, left_id, chars, pos)
} else {
Err(Error::KeyNotFound(chars.iter().collect()))
}
} else if current_char > node.character {
// Go right
if let Some(right_id) = node.right_id {
find_node(tree, right_id, chars, pos)
} else {
Err(Error::KeyNotFound(chars.iter().collect()))
}
} else {
// Character matches
if pos + 1 >= chars.len() {
// This is the last character
Ok(node_id)
} else if let Some(middle_id) = node.middle_id {
// Go to next character
find_node(tree, middle_id, chars, pos + 1)
} else {
Err(Error::KeyNotFound(chars.iter().collect()))
}
}
}
/// Deletes a key from the tree.
pub fn delete(tree: &mut TST, key: &str) -> Result<(), Error> {
if key.is_empty() {
return Err(Error::InvalidOperation("Empty key not allowed".to_string()));
}
let root_id = match tree.root_id {
Some(id) => id,
None => return Err(Error::InvalidOperation("Tree not initialized".to_string())),
};
let chars: Vec<char> = key.chars().collect();
let node_id = find_node(tree, root_id, &chars, 0)?;
let mut node = tree.get_node(node_id)?;
if !node.is_end_of_key {
return Err(Error::KeyNotFound(key.to_string()));
}
// If the node has a middle child, just mark it as not end of key
if node.middle_id.is_some() || node.left_id.is_some() || node.right_id.is_some() {
node.is_end_of_key = false;
node.value = Vec::new();
tree.save_node(Some(node_id), &node)?;
return Ok(());
}
// Otherwise, we need to remove the node and update its parent
// This is more complex and would require tracking the path to the node
// For simplicity, we'll just mark it as not end of key for now
node.is_end_of_key = false;
node.value = Vec::new();
tree.save_node(Some(node_id), &node)?;
Ok(())
}
/// Lists all keys with a given prefix.
pub fn list(tree: &mut TST, prefix: &str) -> Result<Vec<String>, Error> {
let root_id = match tree.root_id {
Some(id) => id,
None => return Err(Error::InvalidOperation("Tree not initialized".to_string())),
};
let mut result = Vec::new();
// Handle empty prefix case - will return all keys
if prefix.is_empty() {
collect_all_keys(tree, root_id, String::new(), &mut result)?;
return Ok(result);
}
// Find the node corresponding to the prefix
let chars: Vec<char> = prefix.chars().collect();
let node_id = match find_prefix_node(tree, root_id, &chars, 0) {
Ok(id) => id,
Err(_) => return Ok(Vec::new()), // Prefix not found, return empty list
};
// For empty prefix, we start with an empty string
// For non-empty prefix, we start with the prefix minus the last character
// (since the last character is in the node we found)
let prefix_base = if chars.len() > 1 {
chars[0..chars.len() - 1].iter().collect()
} else {
String::new()
};
// Collect all keys from the subtree
collect_keys_with_prefix(tree, node_id, prefix_base, &mut result)?;
Ok(result)
}
/// Finds the node corresponding to a prefix.
fn find_prefix_node(
tree: &mut TST,
node_id: u32,
chars: &[char],
pos: usize,
) -> Result<u32, Error> {
if pos >= chars.len() {
return Ok(node_id);
}
let node = tree.get_node(node_id)?;
let current_char = chars[pos];
if current_char < node.character {
// Go left
if let Some(left_id) = node.left_id {
find_prefix_node(tree, left_id, chars, pos)
} else {
Err(Error::PrefixNotFound(chars.iter().collect()))
}
} else if current_char > node.character {
// Go right
if let Some(right_id) = node.right_id {
find_prefix_node(tree, right_id, chars, pos)
} else {
Err(Error::PrefixNotFound(chars.iter().collect()))
}
} else {
// Character matches
if pos + 1 >= chars.len() {
// This is the last character of the prefix
Ok(node_id)
} else if let Some(middle_id) = node.middle_id {
// Go to next character
find_prefix_node(tree, middle_id, chars, pos + 1)
} else {
Err(Error::PrefixNotFound(chars.iter().collect()))
}
}
}
/// Collects all keys with a given prefix.
fn collect_keys_with_prefix(
tree: &mut TST,
node_id: u32,
current_path: String,
result: &mut Vec<String>,
) -> Result<(), Error> {
let node = tree.get_node(node_id)?;
let mut new_path = current_path.clone();
// For non-root nodes, add the character to the path
if node.character != '\0' {
new_path.push(node.character);
}
// If this node is an end of key, add it to the result
if node.is_end_of_key {
result.push(new_path.clone());
}
// Recursively collect keys from all children
if let Some(left_id) = node.left_id {
collect_keys_with_prefix(tree, left_id, current_path.clone(), result)?;
}
if let Some(middle_id) = node.middle_id {
collect_keys_with_prefix(tree, middle_id, new_path.clone(), result)?;
}
if let Some(right_id) = node.right_id {
collect_keys_with_prefix(tree, right_id, current_path.clone(), result)?;
}
Ok(())
}
/// Recursively collects all keys under a node.
fn collect_all_keys(
tree: &mut TST,
node_id: u32,
current_path: String,
result: &mut Vec<String>,
) -> Result<(), Error> {
let node = tree.get_node(node_id)?;
let mut new_path = current_path.clone();
// Skip adding the character for the root node
if node.character != '\0' {
new_path.push(node.character);
}
// If this node is an end of key, add it to the result
if node.is_end_of_key {
result.push(new_path.clone());
}
// Recursively collect keys from all children
if let Some(left_id) = node.left_id {
collect_all_keys(tree, left_id, current_path.clone(), result)?;
}
if let Some(middle_id) = node.middle_id {
collect_all_keys(tree, middle_id, new_path.clone(), result)?;
}
if let Some(right_id) = node.right_id {
collect_all_keys(tree, right_id, current_path.clone(), result)?;
}
Ok(())
}
/// Gets all values for keys with a given prefix.
pub fn getall(tree: &mut TST, prefix: &str) -> Result<Vec<Vec<u8>>, Error> {
// Get all matching keys
let keys = list(tree, prefix)?;
// Get values for each key
let mut values = Vec::new();
let mut errors = Vec::new();
for key in keys {
match get(tree, &key) {
Ok(value) => values.push(value),
Err(e) => errors.push(format!("Error getting value for key '{}': {:?}", key, e)),
}
}
// If we couldn't get any values but had keys, return the first error
if values.is_empty() && !errors.is_empty() {
return Err(Error::InvalidOperation(errors.join("; ")));
}
Ok(values)
}
impl TST {
/// Helper function to get a node from the database.
pub(crate) fn get_node(&mut self, node_id: u32) -> Result<TSTNode, Error> {
match self.db.get(node_id) {
Ok(data) => TSTNode::deserialize(&data),
Err(err) => Err(Error::OurDB(err)),
}
}
/// Helper function to save a node to the database.
pub(crate) fn save_node(&mut self, node_id: Option<u32>, node: &TSTNode) -> Result<u32, Error> {
let data = node.serialize();
let args = OurDBSetArgs {
id: node_id,
data: &data,
};
match self.db.set(args) {
Ok(id) => Ok(id),
Err(err) => Err(Error::OurDB(err)),
}
}
}

View File

@@ -0,0 +1,129 @@
//! Serialization and deserialization for TST nodes.
use crate::error::Error;
use crate::node::TSTNode;
/// Current binary format version.
const VERSION: u8 = 1;
impl TSTNode {
/// Serializes a node to bytes for storage.
pub fn serialize(&self) -> Vec<u8> {
let mut buffer = Vec::new();
// Version
buffer.push(VERSION);
// Character (as UTF-32)
let char_bytes = (self.character as u32).to_le_bytes();
buffer.extend_from_slice(&char_bytes);
// Is end of key
buffer.push(if self.is_end_of_key { 1 } else { 0 });
// Value (only if is_end_of_key)
if self.is_end_of_key {
let value_len = (self.value.len() as u32).to_le_bytes();
buffer.extend_from_slice(&value_len);
buffer.extend_from_slice(&self.value);
} else {
// Zero length
buffer.extend_from_slice(&[0, 0, 0, 0]);
}
// Child pointers
let left_id = self.left_id.unwrap_or(0).to_le_bytes();
buffer.extend_from_slice(&left_id);
let middle_id = self.middle_id.unwrap_or(0).to_le_bytes();
buffer.extend_from_slice(&middle_id);
let right_id = self.right_id.unwrap_or(0).to_le_bytes();
buffer.extend_from_slice(&right_id);
buffer
}
/// Deserializes bytes to a node.
pub fn deserialize(data: &[u8]) -> Result<Self, Error> {
if data.len() < 14 {
// Minimum size: version + char + is_end + value_len + 3 child IDs
return Err(Error::Deserialization("Data too short".to_string()));
}
let mut pos = 0;
// Version
let version = data[pos];
pos += 1;
if version != VERSION {
return Err(Error::Deserialization(format!(
"Unsupported version: {}",
version
)));
}
// Character
let char_bytes = [data[pos], data[pos + 1], data[pos + 2], data[pos + 3]];
let char_code = u32::from_le_bytes(char_bytes);
let character = char::from_u32(char_code)
.ok_or_else(|| Error::Deserialization("Invalid character".to_string()))?;
pos += 4;
// Is end of key
let is_end_of_key = data[pos] != 0;
pos += 1;
// Value length
let value_len_bytes = [data[pos], data[pos + 1], data[pos + 2], data[pos + 3]];
let value_len = u32::from_le_bytes(value_len_bytes) as usize;
pos += 4;
// Value
let value = if value_len > 0 {
if pos + value_len > data.len() {
return Err(Error::Deserialization(
"Value length exceeds data".to_string(),
));
}
data[pos..pos + value_len].to_vec()
} else {
Vec::new()
};
pos += value_len;
// Child pointers
if pos + 12 > data.len() {
return Err(Error::Deserialization(
"Data too short for child pointers".to_string(),
));
}
let left_id_bytes = [data[pos], data[pos + 1], data[pos + 2], data[pos + 3]];
let left_id = u32::from_le_bytes(left_id_bytes);
pos += 4;
let middle_id_bytes = [data[pos], data[pos + 1], data[pos + 2], data[pos + 3]];
let middle_id = u32::from_le_bytes(middle_id_bytes);
pos += 4;
let right_id_bytes = [data[pos], data[pos + 1], data[pos + 2], data[pos + 3]];
let right_id = u32::from_le_bytes(right_id_bytes);
Ok(TSTNode {
character,
value,
is_end_of_key,
left_id: if left_id == 0 { None } else { Some(left_id) },
middle_id: if middle_id == 0 {
None
} else {
Some(middle_id)
},
right_id: if right_id == 0 { None } else { Some(right_id) },
})
}
}
// Function removed as it was unused

View File

@@ -0,0 +1,294 @@
use std::env::temp_dir;
use std::fs;
use std::time::SystemTime;
use tst::TST;
fn get_test_db_path() -> String {
let timestamp = SystemTime::now()
.duration_since(SystemTime::UNIX_EPOCH)
.unwrap()
.as_nanos();
let path = temp_dir().join(format!("tst_test_{}", timestamp));
// If the path exists, remove it first
if path.exists() {
let _ = fs::remove_dir_all(&path);
}
// Create the directory
fs::create_dir_all(&path).unwrap();
path.to_string_lossy().to_string()
}
fn cleanup_test_db(path: &str) {
// Make sure to clean up properly
let _ = fs::remove_dir_all(path);
}
#[test]
fn test_create_tst() {
let path = get_test_db_path();
let result = TST::new(&path, true);
match &result {
Ok(_) => (),
Err(e) => println!("Error creating TST: {:?}", e),
}
assert!(result.is_ok());
if let Ok(mut tst) = result {
// Make sure we can perform a basic operation
let set_result = tst.set("test_key", b"test_value".to_vec());
assert!(set_result.is_ok());
}
cleanup_test_db(&path);
}
#[test]
fn test_set_and_get() {
let path = get_test_db_path();
// Create a new TST with reset=true to ensure a clean state
let result = TST::new(&path, true);
assert!(result.is_ok());
let mut tree = result.unwrap();
// Test setting and getting a key
let key = "test_key";
let value = b"test_value".to_vec();
let set_result = tree.set(key, value.clone());
assert!(set_result.is_ok());
let get_result = tree.get(key);
assert!(get_result.is_ok());
assert_eq!(get_result.unwrap(), value);
// Make sure to clean up properly
cleanup_test_db(&path);
}
#[test]
fn test_get_nonexistent_key() {
let path = get_test_db_path();
let mut tree = TST::new(&path, true).unwrap();
// Test getting a key that doesn't exist
let get_result = tree.get("nonexistent_key");
assert!(get_result.is_err());
cleanup_test_db(&path);
}
#[test]
fn test_delete() {
let path = get_test_db_path();
// Create a new TST with reset=true to ensure a clean state
let result = TST::new(&path, true);
assert!(result.is_ok());
let mut tree = result.unwrap();
// Set a key
let key = "delete_test";
let value = b"to_be_deleted".to_vec();
let set_result = tree.set(key, value);
assert!(set_result.is_ok());
// Verify it exists
let get_result = tree.get(key);
assert!(get_result.is_ok());
// Delete it
let delete_result = tree.delete(key);
assert!(delete_result.is_ok());
// Verify it's gone
let get_after_delete = tree.get(key);
assert!(get_after_delete.is_err());
// Make sure to clean up properly
cleanup_test_db(&path);
}
#[test]
fn test_multiple_keys() {
let path = get_test_db_path();
// Create a new TST with reset=true to ensure a clean state
let result = TST::new(&path, true);
assert!(result.is_ok());
let mut tree = result.unwrap();
// Insert multiple keys - use fewer keys to avoid filling the lookup table
let keys = ["apple", "banana", "cherry"];
for (i, key) in keys.iter().enumerate() {
let value = format!("value_{}", i).into_bytes();
let set_result = tree.set(key, value);
// Print error if set fails
if set_result.is_err() {
println!("Error setting key '{}': {:?}", key, set_result);
}
assert!(set_result.is_ok());
}
// Verify all keys exist
for (i, key) in keys.iter().enumerate() {
let expected_value = format!("value_{}", i).into_bytes();
let get_result = tree.get(key);
assert!(get_result.is_ok());
assert_eq!(get_result.unwrap(), expected_value);
}
// Make sure to clean up properly
cleanup_test_db(&path);
}
#[test]
fn test_list_prefix() {
let path = get_test_db_path();
// Create a new TST with reset=true to ensure a clean state
let result = TST::new(&path, true);
assert!(result.is_ok());
let mut tree = result.unwrap();
// Insert keys with common prefixes - use fewer keys to avoid filling the lookup table
let keys = ["apple", "application", "append", "banana", "bandana"];
for key in &keys {
let set_result = tree.set(key, key.as_bytes().to_vec());
assert!(set_result.is_ok());
}
// Test prefix "app"
let list_result = tree.list("app");
assert!(list_result.is_ok());
let app_keys = list_result.unwrap();
// Print the keys for debugging
println!("Keys with prefix 'app':");
for key in &app_keys {
println!(" {}", key);
}
// Check that each key is present
assert!(app_keys.contains(&"apple".to_string()));
assert!(app_keys.contains(&"application".to_string()));
assert!(app_keys.contains(&"append".to_string()));
// Test prefix "ban"
let list_result = tree.list("ban");
assert!(list_result.is_ok());
let ban_keys = list_result.unwrap();
assert!(ban_keys.contains(&"banana".to_string()));
assert!(ban_keys.contains(&"bandana".to_string()));
// Test non-existent prefix
let list_result = tree.list("z");
assert!(list_result.is_ok());
let z_keys = list_result.unwrap();
assert_eq!(z_keys.len(), 0);
// Make sure to clean up properly
cleanup_test_db(&path);
}
#[test]
fn test_getall_prefix() {
let path = get_test_db_path();
// Create a new TST with reset=true to ensure a clean state
let result = TST::new(&path, true);
assert!(result.is_ok());
let mut tree = result.unwrap();
// Insert keys with common prefixes - use fewer keys to avoid filling the lookup table
let keys = ["apple", "application", "append"];
for key in &keys {
let set_result = tree.set(key, key.as_bytes().to_vec());
assert!(set_result.is_ok());
}
// Test getall with prefix "app"
let getall_result = tree.getall("app");
assert!(getall_result.is_ok());
let app_values = getall_result.unwrap();
// Convert values to strings for easier comparison
let app_value_strings: Vec<String> = app_values
.iter()
.map(|v| String::from_utf8_lossy(v).to_string())
.collect();
// Print the values for debugging
println!("Values with prefix 'app':");
for value in &app_value_strings {
println!(" {}", value);
}
// Check that each value is present
assert!(app_value_strings.contains(&"apple".to_string()));
assert!(app_value_strings.contains(&"application".to_string()));
assert!(app_value_strings.contains(&"append".to_string()));
// Make sure to clean up properly
cleanup_test_db(&path);
}
#[test]
fn test_empty_prefix() {
let path = get_test_db_path();
// Create a new TST with reset=true to ensure a clean state
let result = TST::new(&path, true);
assert!(result.is_ok());
let mut tree = result.unwrap();
// Insert some keys
let keys = ["apple", "banana", "cherry"];
for key in &keys {
let set_result = tree.set(key, key.as_bytes().to_vec());
assert!(set_result.is_ok());
}
// Test list with empty prefix (should return all keys)
let list_result = tree.list("");
assert!(list_result.is_ok());
let all_keys = list_result.unwrap();
// Print the keys for debugging
println!("Keys with empty prefix:");
for key in &all_keys {
println!(" {}", key);
}
// Check that each key is present
for key in &keys {
assert!(all_keys.contains(&key.to_string()));
}
// Make sure to clean up properly
cleanup_test_db(&path);
}

View File

@@ -0,0 +1,267 @@
use std::env::temp_dir;
use std::fs;
use std::time::SystemTime;
use tst::TST;
fn get_test_db_path() -> String {
let timestamp = SystemTime::now()
.duration_since(SystemTime::UNIX_EPOCH)
.unwrap()
.as_nanos();
let path = temp_dir().join(format!("tst_prefix_test_{}", timestamp));
// If the path exists, remove it first
if path.exists() {
let _ = fs::remove_dir_all(&path);
}
// Create the directory
fs::create_dir_all(&path).unwrap();
path.to_string_lossy().to_string()
}
fn cleanup_test_db(path: &str) {
// Make sure to clean up properly
let _ = fs::remove_dir_all(path);
}
#[test]
fn test_prefix_with_common_prefixes() {
let path = get_test_db_path();
let mut tree = TST::new(&path, true).unwrap();
// Insert keys with common prefixes
let test_data = [
("test", b"value1".to_vec()),
("testing", b"value2".to_vec()),
("tested", b"value3".to_vec()),
("tests", b"value4".to_vec()),
("tester", b"value5".to_vec()),
];
for (key, value) in &test_data {
tree.set(key, value.clone()).unwrap();
}
// Test prefix "test"
let keys = tree.list("test").unwrap();
assert_eq!(keys.len(), 5);
for (key, _) in &test_data {
assert!(keys.contains(&key.to_string()));
}
// Test prefix "teste"
let keys = tree.list("teste").unwrap();
assert_eq!(keys.len(), 2);
assert!(keys.contains(&"tested".to_string()));
assert!(keys.contains(&"tester".to_string()));
cleanup_test_db(&path);
}
#[test]
fn test_prefix_with_different_prefixes() {
let path = get_test_db_path();
let mut tree = TST::new(&path, true).unwrap();
// Insert keys with different prefixes
let test_data = [
("apple", b"fruit1".to_vec()),
("banana", b"fruit2".to_vec()),
("cherry", b"fruit3".to_vec()),
("date", b"fruit4".to_vec()),
("elderberry", b"fruit5".to_vec()),
];
for (key, value) in &test_data {
tree.set(key, value.clone()).unwrap();
}
// Test each prefix
for (key, _) in &test_data {
let prefix = &key[0..1]; // First character
let keys = tree.list(prefix).unwrap();
assert!(keys.contains(&key.to_string()));
}
// Test non-existent prefix
let keys = tree.list("z").unwrap();
assert_eq!(keys.len(), 0);
cleanup_test_db(&path);
}
#[test]
fn test_prefix_with_empty_string() {
let path = get_test_db_path();
// Create a new TST with reset=true to ensure a clean state
let result = TST::new(&path, true);
assert!(result.is_ok());
let mut tree = result.unwrap();
// Insert some keys
let test_data = [
("apple", b"fruit1".to_vec()),
("banana", b"fruit2".to_vec()),
("cherry", b"fruit3".to_vec()),
];
for (key, value) in &test_data {
let set_result = tree.set(key, value.clone());
assert!(set_result.is_ok());
}
// Test empty prefix (should return all keys)
let list_result = tree.list("");
assert!(list_result.is_ok());
let keys = list_result.unwrap();
// Print the keys for debugging
println!("Keys with empty prefix:");
for key in &keys {
println!(" {}", key);
}
// Check that each key is present
for (key, _) in &test_data {
assert!(keys.contains(&key.to_string()));
}
// Make sure to clean up properly
cleanup_test_db(&path);
}
#[test]
fn test_getall_with_prefix() {
let path = get_test_db_path();
let mut tree = TST::new(&path, true).unwrap();
// Insert keys with common prefixes
let test_data = [
("test", b"value1".to_vec()),
("testing", b"value2".to_vec()),
("tested", b"value3".to_vec()),
("tests", b"value4".to_vec()),
("tester", b"value5".to_vec()),
];
for (key, value) in &test_data {
tree.set(key, value.clone()).unwrap();
}
// Test getall with prefix "test"
let values = tree.getall("test").unwrap();
assert_eq!(values.len(), 5);
for (_, value) in &test_data {
assert!(values.contains(value));
}
cleanup_test_db(&path);
}
#[test]
fn test_prefix_with_unicode_characters() {
let path = get_test_db_path();
let mut tree = TST::new(&path, true).unwrap();
// Insert keys with Unicode characters
let test_data = [
("café", b"coffee".to_vec()),
("cafétéria", b"cafeteria".to_vec()),
("caffè", b"italian coffee".to_vec()),
("café au lait", b"coffee with milk".to_vec()),
];
for (key, value) in &test_data {
tree.set(key, value.clone()).unwrap();
}
// Test prefix "café"
let keys = tree.list("café").unwrap();
// Print the keys for debugging
println!("Keys with prefix 'café':");
for key in &keys {
println!(" {}", key);
}
// Check that the keys we expect are present
assert!(keys.contains(&"café".to_string()));
assert!(keys.contains(&"café au lait".to_string()));
// We don't assert on the exact count because Unicode handling can vary
// Test prefix "caf"
let keys = tree.list("caf").unwrap();
// Print the keys for debugging
println!("Keys with prefix 'caf':");
for key in &keys {
println!(" {}", key);
}
// Check that each key is present individually
// Due to Unicode handling, we need to be careful with exact matching
// The important thing is that we can find the keys we need
// Check that we have at least the café and café au lait keys
assert!(keys.contains(&"café".to_string()));
assert!(keys.contains(&"café au lait".to_string()));
// We don't assert on the exact count because Unicode handling can vary
cleanup_test_db(&path);
}
#[test]
fn test_prefix_with_long_keys() {
let path = get_test_db_path();
let mut tree = TST::new(&path, true).unwrap();
// Insert long keys
let test_data = [
(
"this_is_a_very_long_key_for_testing_purposes_1",
b"value1".to_vec(),
),
(
"this_is_a_very_long_key_for_testing_purposes_2",
b"value2".to_vec(),
),
(
"this_is_a_very_long_key_for_testing_purposes_3",
b"value3".to_vec(),
),
("this_is_another_long_key_for_testing", b"value4".to_vec()),
];
for (key, value) in &test_data {
tree.set(key, value.clone()).unwrap();
}
// Test prefix "this_is_a_very"
let keys = tree.list("this_is_a_very").unwrap();
assert_eq!(keys.len(), 3);
// Test prefix "this_is"
let keys = tree.list("this_is").unwrap();
assert_eq!(keys.len(), 4);
for (key, _) in &test_data {
assert!(keys.contains(&key.to_string()));
}
cleanup_test_db(&path);
}

View File

@@ -0,0 +1,516 @@
use serde::{Deserialize, Serialize};
use std::error::Error;
use std::fmt;
use std::fs;
use std::path::{Path, PathBuf};
use std::thread;
use std::time::Duration;
use sal_os;
use sal_process;
use crate::qcow2;
/// Error type for Cloud Hypervisor operations
#[derive(Debug)]
pub enum CloudHvError {
CommandFailed(String),
IoError(String),
JsonError(String),
DependencyMissing(String),
InvalidSpec(String),
NotFound(String),
}
impl fmt::Display for CloudHvError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
CloudHvError::CommandFailed(e) => write!(f, "{}", e),
CloudHvError::IoError(e) => write!(f, "IO error: {}", e),
CloudHvError::JsonError(e) => write!(f, "JSON error: {}", e),
CloudHvError::DependencyMissing(e) => write!(f, "Dependency missing: {}", e),
CloudHvError::InvalidSpec(e) => write!(f, "Invalid spec: {}", e),
CloudHvError::NotFound(e) => write!(f, "{}", e),
}
}
}
impl Error for CloudHvError {}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VmSpec {
pub id: String,
/// Optional for firmware boot; required for direct kernel boot
pub kernel_path: Option<String>,
/// Optional for direct kernel boot; required for firmware boot
pub firmware_path: Option<String>,
/// Disk image path (qcow2 or raw)
pub disk_path: String,
/// API socket path for ch-remote and management
pub api_socket: String,
/// vCPUs to boot with
pub vcpus: u32,
/// Memory in MB
pub memory_mb: u32,
/// Kernel cmdline (only used for direct kernel boot)
pub cmdline: Option<String>,
/// Extra args (raw) if you need to extend; keep minimal for Phase 2
pub extra_args: Option<Vec<String>>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VmRuntime {
/// PID of cloud-hypervisor process if running
pub pid: Option<i64>,
/// Last known status: "stopped" | "running"
pub status: String,
/// Console log file path
pub log_file: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VmRecord {
pub spec: VmSpec,
pub runtime: VmRuntime,
}
fn ensure_deps() -> Result<(), CloudHvError> {
if sal_process::which("cloud-hypervisor-static").is_none() {
return Err(CloudHvError::DependencyMissing(
"cloud-hypervisor-static not found on PATH. Install Cloud Hypervisor static binary.".into(),
));
}
if sal_process::which("ch-remote-static").is_none() {
return Err(CloudHvError::DependencyMissing(
"ch-remote-static not found on PATH. Install Cloud Hypervisor tools (static).".into(),
));
}
Ok(())
}
fn hero_vm_root() -> PathBuf {
let home = std::env::var("HOME").unwrap_or_else(|_| "/tmp".into());
Path::new(&home).join("hero/virt/vms")
}
fn vm_dir(id: &str) -> PathBuf {
hero_vm_root().join(id)
}
fn vm_json_path(id: &str) -> PathBuf {
vm_dir(id).join("vm.json")
}
fn vm_log_path(id: &str) -> PathBuf {
vm_dir(id).join("logs/console.log")
}
fn vm_pid_path(id: &str) -> PathBuf {
vm_dir(id).join("pid")
}
fn write_json(path: &Path, value: &serde_json::Value) -> Result<(), CloudHvError> {
if let Some(parent) = path.parent() {
fs::create_dir_all(parent).map_err(|e| CloudHvError::IoError(e.to_string()))?;
}
let s = serde_json::to_string_pretty(value).map_err(|e| CloudHvError::JsonError(e.to_string()))?;
fs::write(path, s).map_err(|e| CloudHvError::IoError(e.to_string()))
}
fn read_json(path: &Path) -> Result<serde_json::Value, CloudHvError> {
let content = fs::read_to_string(path).map_err(|e| CloudHvError::IoError(e.to_string()))?;
serde_json::from_str(&content).map_err(|e| CloudHvError::JsonError(e.to_string()))
}
fn proc_exists(pid: i64) -> bool {
#[cfg(target_os = "linux")]
{
Path::new(&format!("/proc/{}", pid)).exists()
}
#[cfg(not(target_os = "linux"))]
{
// Minimal check for non-Linux; try a kill -0 style command
let res = sal_process::run(&format!("kill -0 {}", pid)).die(false).silent(true).execute();
res.map(|r| r.success).unwrap_or(false)
}
}
/// Create and persist a VM spec
pub fn vm_create(spec: &VmSpec) -> Result<String, CloudHvError> {
// Validate inputs minimally
if spec.id.trim().is_empty() {
return Err(CloudHvError::InvalidSpec("spec.id must not be empty".into()));
}
// Validate boot method: either firmware_path exists or kernel_path exists
let has_fw = spec
.firmware_path
.as_ref()
.map(|p| Path::new(p).exists())
.unwrap_or(false);
let has_kernel = spec
.kernel_path
.as_ref()
.map(|p| Path::new(p).exists())
.unwrap_or(false);
if !(has_fw || has_kernel) {
return Err(CloudHvError::InvalidSpec(
"either firmware_path or kernel_path must be set to an existing file".into(),
));
}
if !Path::new(&spec.disk_path).exists() {
return Err(CloudHvError::InvalidSpec(format!(
"disk_path not found: {}",
&spec.disk_path
)));
}
if spec.vcpus == 0 {
return Err(CloudHvError::InvalidSpec("vcpus must be >= 1".into()));
}
if spec.memory_mb == 0 {
return Err(CloudHvError::InvalidSpec("memory_mb must be >= 128".into()));
}
// Prepare directory layout
let dir = vm_dir(&spec.id);
sal_os::mkdir(
dir.to_str()
.unwrap_or_else(|| "/tmp/hero/virt/vms/__invalid__"),
)
.map_err(|e| CloudHvError::IoError(e.to_string()))?;
let log_dir = dir.join("logs");
sal_os::mkdir(log_dir.to_str().unwrap()).map_err(|e| CloudHvError::IoError(e.to_string()))?;
// Persist initial record
let rec = VmRecord {
spec: spec.clone(),
runtime: VmRuntime {
pid: None,
status: "stopped".into(),
log_file: vm_log_path(&spec.id).to_string_lossy().into_owned(),
},
};
let value = serde_json::to_value(&rec).map_err(|e| CloudHvError::JsonError(e.to_string()))?;
write_json(&vm_json_path(&spec.id), &value)?;
Ok(spec.id.clone())
}
/// Start a VM using cloud-hypervisor
pub fn vm_start(id: &str) -> Result<(), CloudHvError> {
ensure_deps()?;
// Load record
let p = vm_json_path(id);
if !p.exists() {
return Err(CloudHvError::NotFound(format!("VM '{}' not found", id)));
}
let value = read_json(&p)?;
let mut rec: VmRecord =
serde_json::from_value(value).map_err(|e| CloudHvError::JsonError(e.to_string()))?;
// Prepare invocation
let api_socket = if rec.spec.api_socket.trim().is_empty() {
vm_dir(id).join("api.sock").to_string_lossy().into_owned()
} else {
rec.spec.api_socket.clone()
};
let log_file = vm_log_path(id).to_string_lossy().into_owned();
// Ensure API socket directory exists and remove any stale socket file
let api_path = Path::new(&api_socket);
if let Some(parent) = api_path.parent() {
fs::create_dir_all(parent).map_err(|e| CloudHvError::IoError(e.to_string()))?;
}
// Best-effort removal of stale socket
let _ = fs::remove_file(&api_path);
// Preflight disk: if source is qcow2, convert to raw to avoid CH "Compressed blocks not supported"
// This is best-effort: if qemu-img is unavailable or info fails, we skip conversion.
let mut disk_to_use = rec.spec.disk_path.clone();
if let Ok(info) = qcow2::info(&disk_to_use) {
if info.get("format").and_then(|v| v.as_str()) == Some("qcow2") {
let dest = vm_dir(id).join("disk.raw").to_string_lossy().into_owned();
let cmd = format!(
"qemu-img convert -O raw {} {}",
shell_escape(&disk_to_use),
shell_escape(&dest)
);
match sal_process::run(&cmd).silent(true).execute() {
Ok(res) if res.success => {
disk_to_use = dest;
}
Ok(res) => {
return Err(CloudHvError::CommandFailed(format!(
"Failed converting qcow2 to raw: {}",
res.stderr
)));
}
Err(e) => {
return Err(CloudHvError::CommandFailed(format!(
"Failed converting qcow2 to raw: {}",
e
)));
}
}
}
}
// Build command (minimal args for Phase 2)
// We redirect all output to log_file via shell and keep process in background with nohup
// CH CLI flags (very common subset)
// --disk path=... uses virtio-blk by default
let mut parts: Vec<String> = vec![
"cloud-hypervisor-static".into(),
"--api-socket".into(),
api_socket.clone(),
];
if let Some(fw) = rec.spec.firmware_path.clone() {
// Firmware boot path
parts.push("--firmware".into());
parts.push(fw);
} else if let Some(kpath) = rec.spec.kernel_path.clone() {
// Direct kernel boot path
let cmdline = rec
.spec
.cmdline
.clone()
.unwrap_or_else(|| "console=ttyS0 reboot=k panic=1".to_string());
parts.push("--kernel".into());
parts.push(kpath);
parts.push("--cmdline".into());
parts.push(cmdline);
} else {
return Err(CloudHvError::InvalidSpec(
"neither firmware_path nor kernel_path set at start time".into(),
));
}
parts.push("--disk".into());
parts.push(format!("path={}", disk_to_use));
parts.push("--cpus".into());
parts.push(format!("boot={}", rec.spec.vcpus));
parts.push("--memory".into());
parts.push(format!("size={}M", rec.spec.memory_mb));
parts.push("--serial".into());
parts.push("tty".into());
parts.push("--console".into());
parts.push("off".into());
if let Some(extra) = rec.spec.extra_args.clone() {
for e in extra {
parts.push(e);
}
}
let args_str = shell_join(&parts);
let script = format!(
"#!/bin/bash -e
nohup {} > '{}' 2>&1 &
echo $! > '{}'
",
args_str,
log_file,
vm_pid_path(id).to_string_lossy()
);
// Execute script; this will background cloud-hypervisor and return
let result = sal_process::run(&script).execute();
match result {
Ok(res) => {
if !res.success {
return Err(CloudHvError::CommandFailed(format!(
"Failed to start VM '{}': {}",
id, res.stderr
)));
}
}
Err(e) => {
return Err(CloudHvError::CommandFailed(format!(
"Failed to start VM '{}': {}",
id, e
)))
}
}
// Read PID back
let pid = match fs::read_to_string(vm_pid_path(id)) {
Ok(s) => s.trim().parse::<i64>().ok(),
Err(_) => None,
};
// Update state
rec.runtime.pid = pid;
rec.runtime.status = if pid.is_some() { "running".into() } else { "stopped".into() };
rec.runtime.log_file = log_file;
rec.spec.api_socket = api_socket.clone();
let value = serde_json::to_value(&rec).map_err(|e| CloudHvError::JsonError(e.to_string()))?;
write_json(&vm_json_path(id), &value)?;
Ok(())
}
/// Return VM record info (spec + runtime) by id
pub fn vm_info(id: &str) -> Result<VmRecord, CloudHvError> {
let p = vm_json_path(id);
if !p.exists() {
return Err(CloudHvError::NotFound(format!("VM '{}' not found", id)));
}
let value = read_json(&p)?;
let rec: VmRecord = serde_json::from_value(value).map_err(|e| CloudHvError::JsonError(e.to_string()))?;
Ok(rec)
}
/// Stop a VM via ch-remote (graceful), optionally force kill
pub fn vm_stop(id: &str, force: bool) -> Result<(), CloudHvError> {
ensure_deps().ok(); // best-effort; we might still force-kill
let p = vm_json_path(id);
if !p.exists() {
return Err(CloudHvError::NotFound(format!("VM '{}' not found", id)));
}
let value = read_json(&p)?;
let mut rec: VmRecord =
serde_json::from_value(value).map_err(|e| CloudHvError::JsonError(e.to_string()))?;
// Attempt graceful shutdown if api socket known
if !rec.spec.api_socket.trim().is_empty() {
let cmd = format!("ch-remote-static --api-socket {} shutdown", rec.spec.api_socket);
let _ = sal_process::run(&cmd).die(false).silent(true).execute();
}
// Wait for process to exit (up to ~10s)
if let Some(pid) = rec.runtime.pid {
for _ in 0..50 {
if !proc_exists(pid) {
break;
}
thread::sleep(Duration::from_millis(200));
}
// If still alive and force, kill -9 and wait again (up to ~10s)
if proc_exists(pid) && force {
// Send SIGKILL without extra shell layers; suppress errors/noise
let _ = sal_process::run(&format!("kill -9 {}", pid))
.die(false)
.silent(true)
.execute();
for _ in 0..50 {
if !proc_exists(pid) {
break;
}
thread::sleep(Duration::from_millis(200));
}
}
}
// Update state
rec.runtime.status = "stopped".into();
rec.runtime.pid = None;
let value = serde_json::to_value(&rec).map_err(|e| CloudHvError::JsonError(e.to_string()))?;
write_json(&vm_json_path(id), &value)?;
// Remove pid file
let _ = fs::remove_file(vm_pid_path(id));
Ok(())
}
/// Delete a VM definition; optionally delete disks.
pub fn vm_delete(id: &str, delete_disks: bool) -> Result<(), CloudHvError> {
let p = vm_json_path(id);
if !p.exists() {
return Err(CloudHvError::NotFound(format!("VM '{}' not found", id)));
}
let rec: VmRecord = serde_json::from_value(read_json(&p)?)
.map_err(|e| CloudHvError::JsonError(e.to_string()))?;
// If appears to be running, attempt a force stop first (best-effort)
if let Some(pid) = rec.runtime.pid {
if proc_exists(pid) {
let _ = vm_stop(id, true);
// Re-check original PID for liveness (up to ~5s)
for _ in 0..25 {
if !proc_exists(pid) {
break;
}
thread::sleep(Duration::from_millis(200));
}
if proc_exists(pid) {
return Err(CloudHvError::CommandFailed(
"VM appears to be running; stop it first".into(),
));
}
}
}
if delete_disks {
let _ = fs::remove_file(&rec.spec.disk_path);
}
let d = vm_dir(id);
fs::remove_dir_all(&d).map_err(|e| CloudHvError::IoError(e.to_string()))?;
Ok(())
}
/// List all VMs
pub fn vm_list() -> Result<Vec<VmRecord>, CloudHvError> {
let root = hero_vm_root();
if !root.exists() {
return Ok(vec![]);
}
let mut out = vec![];
for entry in fs::read_dir(&root).map_err(|e| CloudHvError::IoError(e.to_string()))? {
let entry = entry.map_err(|e| CloudHvError::IoError(e.to_string()))?;
let p = entry.path();
if !p.is_dir() {
continue;
}
let vm_json = p.join("vm.json");
if !vm_json.exists() {
continue;
}
let rec: VmRecord = serde_json::from_value(read_json(&vm_json)?)
.map_err(|e| CloudHvError::JsonError(e.to_string()))?;
out.push(rec);
}
Ok(out)
}
/// Render a shell-safe command string from vector of tokens
fn shell_join(parts: &Vec<String>) -> String {
let mut s = String::new();
for (i, p) in parts.iter().enumerate() {
if i > 0 {
s.push(' ');
}
s.push_str(&shell_escape(p));
}
s
}
fn shell_escape(s: &str) -> String {
if s.is_empty() {
return "''".into();
}
if s
.chars()
.all(|c| c.is_ascii_alphanumeric() || "-_./=:".contains(c))
{
return s.into();
}
// single-quote wrap, escape existing quotes
let mut out = String::from("'");
for ch in s.chars() {
if ch == '\'' {
out.push_str("'\"'\"'");
} else {
out.push(ch);
}
}
out.push('\'');
out
}

View File

@@ -24,6 +24,8 @@
pub mod buildah;
pub mod nerdctl;
pub mod rfs;
pub mod qcow2;
pub mod cloudhv;
pub mod rhai;

View File

@@ -1,3 +1,5 @@
pub mod buildah;
pub mod nerdctl;
pub mod rfs;
pub mod rfs;
pub mod qcow2;
pub mod cloudhv;

View File

@@ -0,0 +1,200 @@
use serde_json::Value;
use std::error::Error;
use std::fmt;
use std::fs;
use std::path::Path;
use sal_os;
use sal_process::{self, RunError};
/// Error type for qcow2 operations
#[derive(Debug)]
pub enum Qcow2Error {
/// Failed to execute a system command
CommandExecutionFailed(String),
/// Command executed but returned non-zero or failed semantics
CommandFailed(String),
/// JSON parsing error
JsonParseError(String),
/// IO error (filesystem)
IoError(String),
/// Dependency missing or invalid input
Other(String),
}
impl fmt::Display for Qcow2Error {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Qcow2Error::CommandExecutionFailed(e) => write!(f, "Command execution failed: {}", e),
Qcow2Error::CommandFailed(e) => write!(f, "{}", e),
Qcow2Error::JsonParseError(e) => write!(f, "JSON parse error: {}", e),
Qcow2Error::IoError(e) => write!(f, "IO error: {}", e),
Qcow2Error::Other(e) => write!(f, "{}", e),
}
}
}
impl Error for Qcow2Error {}
fn from_run_error(e: RunError) -> Qcow2Error {
Qcow2Error::CommandExecutionFailed(e.to_string())
}
fn ensure_parent_dir(path: &str) -> Result<(), Qcow2Error> {
if let Some(parent) = Path::new(path).parent() {
fs::create_dir_all(parent).map_err(|e| Qcow2Error::IoError(e.to_string()))?;
}
Ok(())
}
fn ensure_qemu_img() -> Result<(), Qcow2Error> {
if sal_process::which("qemu-img").is_none() {
return Err(Qcow2Error::Other(
"qemu-img not found on PATH. Please install qemu-utils (Debian/Ubuntu) or the QEMU tools for your distro.".to_string(),
));
}
Ok(())
}
fn run_quiet(cmd: &str) -> Result<sal_process::CommandResult, Qcow2Error> {
sal_process::run(cmd)
.silent(true)
.execute()
.map_err(from_run_error)
.and_then(|res| {
if res.success {
Ok(res)
} else {
Err(Qcow2Error::CommandFailed(format!(
"Command failed (code {}): {}\n{}",
res.code, cmd, res.stderr
)))
}
})
}
/// Create a qcow2 image at path with a given virtual size (in GiB)
pub fn create(path: &str, size_gb: i64) -> Result<String, Qcow2Error> {
ensure_qemu_img()?;
if size_gb <= 0 {
return Err(Qcow2Error::Other(
"size_gb must be > 0 for qcow2.create".to_string(),
));
}
ensure_parent_dir(path)?;
let cmd = format!("qemu-img create -f qcow2 {} {}G", path, size_gb);
run_quiet(&cmd)?;
Ok(path.to_string())
}
/// Return qemu-img info as a JSON value
pub fn info(path: &str) -> Result<Value, Qcow2Error> {
ensure_qemu_img()?;
if !Path::new(path).exists() {
return Err(Qcow2Error::IoError(format!("Image not found: {}", path)));
}
let cmd = format!("qemu-img info --output=json {}", path);
let res = run_quiet(&cmd)?;
serde_json::from_str::<Value>(&res.stdout).map_err(|e| Qcow2Error::JsonParseError(e.to_string()))
}
/// Create an offline snapshot on a qcow2 image
pub fn snapshot_create(path: &str, name: &str) -> Result<(), Qcow2Error> {
ensure_qemu_img()?;
if name.trim().is_empty() {
return Err(Qcow2Error::Other("snapshot name cannot be empty".to_string()));
}
let cmd = format!("qemu-img snapshot -c {} {}", name, path);
run_quiet(&cmd).map(|_| ())
}
/// Delete a snapshot on a qcow2 image
pub fn snapshot_delete(path: &str, name: &str) -> Result<(), Qcow2Error> {
ensure_qemu_img()?;
if name.trim().is_empty() {
return Err(Qcow2Error::Other("snapshot name cannot be empty".to_string()));
}
let cmd = format!("qemu-img snapshot -d {} {}", name, path);
run_quiet(&cmd).map(|_| ())
}
/// Snapshot representation (subset of qemu-img info snapshots)
#[derive(Debug, Clone)]
pub struct Qcow2Snapshot {
pub id: Option<String>,
pub name: Option<String>,
pub vm_state_size: Option<i64>,
pub date_sec: Option<i64>,
pub date_nsec: Option<i64>,
pub vm_clock_nsec: Option<i64>,
}
/// List snapshots on a qcow2 image (offline)
pub fn snapshot_list(path: &str) -> Result<Vec<Qcow2Snapshot>, Qcow2Error> {
let v = info(path)?;
let mut out = Vec::new();
if let Some(snaps) = v.get("snapshots").and_then(|s| s.as_array()) {
for s in snaps {
let snap = Qcow2Snapshot {
id: s.get("id").and_then(|x| x.as_str()).map(|s| s.to_string()),
name: s.get("name").and_then(|x| x.as_str()).map(|s| s.to_string()),
vm_state_size: s.get("vm-state-size").and_then(|x| x.as_i64()),
date_sec: s.get("date-sec").and_then(|x| x.as_i64()),
date_nsec: s.get("date-nsec").and_then(|x| x.as_i64()),
vm_clock_nsec: s.get("vm-clock-nsec").and_then(|x| x.as_i64()),
};
out.push(snap);
}
}
Ok(out)
}
/// Result for building the base image
#[derive(Debug, Clone)]
pub struct BuildBaseResult {
pub base_image_path: String,
pub snapshot: String,
pub url: String,
pub resized_to_gb: Option<i64>,
}
/// Build/download Ubuntu 24.04 base image (Noble cloud image), optionally resize, and create a base snapshot
pub fn build_ubuntu_24_04_base(dest_dir: &str, size_gb: Option<i64>) -> Result<BuildBaseResult, Qcow2Error> {
ensure_qemu_img()?;
// Ensure destination directory exists
sal_os::mkdir(dest_dir).map_err(|e| Qcow2Error::IoError(e.to_string()))?;
// Canonical Ubuntu Noble cloud image (amd64)
let url = "https://cloud-images.ubuntu.com/noble/current/noble-server-cloudimg-amd64.img";
// Build destination path
let dest_dir_sanitized = dest_dir.trim_end_matches('/');
let dest_path = format!("{}/noble-server-cloudimg-amd64.img", dest_dir_sanitized);
// Download if not present
let path_obj = Path::new(&dest_path);
if !path_obj.exists() {
// 50MB minimum for sanity; the actual image is much larger
sal_os::download_file(url, &dest_path, 50_000)
.map_err(|e| Qcow2Error::IoError(e.to_string()))?;
}
// Resize if requested
if let Some(sz) = size_gb {
if sz > 0 {
let cmd = format!("qemu-img resize {} {}G", dest_path, sz);
run_quiet(&cmd)?;
}
}
// Create "base" snapshot
snapshot_create(&dest_path, "base")?;
Ok(BuildBaseResult {
base_image_path: dest_path,
snapshot: "base".to_string(),
url: url.to_string(),
resized_to_gb: size_gb.filter(|v| *v > 0),
})
}

View File

@@ -8,6 +8,8 @@ use rhai::{Engine, EvalAltResult};
pub mod buildah;
pub mod nerdctl;
pub mod rfs;
pub mod qcow2;
pub mod cloudhv;
/// Register all Virt module functions with the Rhai engine
///
@@ -28,6 +30,12 @@ pub fn register_virt_module(engine: &mut Engine) -> Result<(), Box<EvalAltResult
// Register RFS module functions
rfs::register_rfs_module(engine)?;
// Register QCOW2 module functions
qcow2::register_qcow2_module(engine)?;
// Register Cloud Hypervisor module functions
cloudhv::register_cloudhv_module(engine)?;
Ok(())
}
@@ -35,3 +43,5 @@ pub fn register_virt_module(engine: &mut Engine) -> Result<(), Box<EvalAltResult
pub use buildah::{bah_new, register_bah_module};
pub use nerdctl::register_nerdctl_module;
pub use rfs::register_rfs_module;
pub use qcow2::register_qcow2_module;
pub use cloudhv::register_cloudhv_module;

View File

@@ -0,0 +1,173 @@
use crate::cloudhv;
use crate::cloudhv::{VmRecord, VmRuntime, VmSpec};
use rhai::{Array, Dynamic, Engine, EvalAltResult, Map};
// Error adapter
fn hv_to_rhai<T>(r: Result<T, cloudhv::CloudHvError>) -> Result<T, Box<EvalAltResult>> {
r.map_err(|e| {
Box::new(EvalAltResult::ErrorRuntime(
format!("cloudhv error: {}", e).into(),
rhai::Position::NONE,
))
})
}
// Map conversions
fn map_to_vmspec(spec: Map) -> Result<VmSpec, Box<EvalAltResult>> {
let id = must_get_string(&spec, "id")?;
let kernel_path = get_string(&spec, "kernel_path");
let firmware_path = get_string(&spec, "firmware_path");
let disk_path = must_get_string(&spec, "disk_path")?;
let api_socket = get_string(&spec, "api_socket").unwrap_or_else(|| "".to_string());
let vcpus = get_int(&spec, "vcpus").unwrap_or(1) as u32;
let memory_mb = get_int(&spec, "memory_mb").unwrap_or(512) as u32;
let cmdline = get_string(&spec, "cmdline");
let extra_args = get_string_array(&spec, "extra_args");
Ok(VmSpec {
id,
kernel_path,
firmware_path,
disk_path,
api_socket,
vcpus,
memory_mb,
cmdline,
extra_args,
})
}
fn vmspec_to_map(s: &VmSpec) -> Map {
let mut m = Map::new();
m.insert("id".into(), s.id.clone().into());
if let Some(k) = &s.kernel_path {
m.insert("kernel_path".into(), k.clone().into());
} else {
m.insert("kernel_path".into(), Dynamic::UNIT);
}
if let Some(fw) = &s.firmware_path {
m.insert("firmware_path".into(), fw.clone().into());
} else {
m.insert("firmware_path".into(), Dynamic::UNIT);
}
m.insert("disk_path".into(), s.disk_path.clone().into());
m.insert("api_socket".into(), s.api_socket.clone().into());
m.insert("vcpus".into(), (s.vcpus as i64).into());
m.insert("memory_mb".into(), (s.memory_mb as i64).into());
if let Some(c) = &s.cmdline {
m.insert("cmdline".into(), c.clone().into());
} else {
m.insert("cmdline".into(), Dynamic::UNIT);
}
if let Some(arr) = &s.extra_args {
let mut a = Array::new();
for s in arr {
a.push(s.clone().into());
}
m.insert("extra_args".into(), a.into());
} else {
m.insert("extra_args".into(), Dynamic::UNIT);
}
m
}
fn vmruntime_to_map(r: &VmRuntime) -> Map {
let mut m = Map::new();
match r.pid {
Some(p) => m.insert("pid".into(), (p as i64).into()),
None => m.insert("pid".into(), Dynamic::UNIT),
};
m.insert("status".into(), r.status.clone().into());
m.insert("log_file".into(), r.log_file.clone().into());
m
}
fn vmrecord_to_map(rec: &VmRecord) -> Map {
let mut m = Map::new();
m.insert("spec".into(), vmspec_to_map(&rec.spec).into());
m.insert("runtime".into(), vmruntime_to_map(&rec.runtime).into());
m
}
// Helpers for reading Rhai Map fields
fn must_get_string(m: &Map, k: &str) -> Result<String, Box<EvalAltResult>> {
match m.get(k) {
Some(v) if v.is_string() => Ok(v.clone().cast::<String>()),
_ => Err(Box::new(EvalAltResult::ErrorRuntime(
format!("missing or non-string field '{}'", k).into(),
rhai::Position::NONE,
))),
}
}
fn get_string(m: &Map, k: &str) -> Option<String> {
m.get(k).and_then(|v| if v.is_string() { Some(v.clone().cast::<String>()) } else { None })
}
fn get_int(m: &Map, k: &str) -> Option<i64> {
m.get(k).and_then(|v| v.as_int().ok())
}
fn get_string_array(m: &Map, k: &str) -> Option<Vec<String>> {
m.get(k).and_then(|v| {
if v.is_array() {
let arr = v.clone().cast::<Array>();
let mut out = vec![];
for it in arr {
if it.is_string() {
out.push(it.cast::<String>());
}
}
Some(out)
} else {
None
}
})
}
// Rhai-exposed functions
pub fn cloudhv_vm_create(spec: Map) -> Result<String, Box<EvalAltResult>> {
let s = map_to_vmspec(spec)?;
hv_to_rhai(cloudhv::vm_create(&s))
}
pub fn cloudhv_vm_start(id: &str) -> Result<(), Box<EvalAltResult>> {
hv_to_rhai(cloudhv::vm_start(id))
}
pub fn cloudhv_vm_stop(id: &str, force: bool) -> Result<(), Box<EvalAltResult>> {
hv_to_rhai(cloudhv::vm_stop(id, force))
}
pub fn cloudhv_vm_delete(id: &str, delete_disks: bool) -> Result<(), Box<EvalAltResult>> {
hv_to_rhai(cloudhv::vm_delete(id, delete_disks))
}
pub fn cloudhv_vm_list() -> Result<Array, Box<EvalAltResult>> {
let vms = hv_to_rhai(cloudhv::vm_list())?;
let mut arr = Array::new();
for rec in vms {
arr.push(vmrecord_to_map(&rec).into());
}
Ok(arr)
}
pub fn cloudhv_vm_info(id: &str) -> Result<Map, Box<EvalAltResult>> {
let rec = hv_to_rhai(cloudhv::vm_info(id))?;
Ok(vmrecord_to_map(&rec))
}
// Module registration
pub fn register_cloudhv_module(engine: &mut Engine) -> Result<(), Box<EvalAltResult>> {
engine.register_fn("cloudhv_vm_create", cloudhv_vm_create);
engine.register_fn("cloudhv_vm_start", cloudhv_vm_start);
engine.register_fn("cloudhv_vm_stop", cloudhv_vm_stop);
engine.register_fn("cloudhv_vm_delete", cloudhv_vm_delete);
engine.register_fn("cloudhv_vm_list", cloudhv_vm_list);
engine.register_fn("cloudhv_vm_info", cloudhv_vm_info);
Ok(())
}

View File

@@ -0,0 +1,139 @@
use crate::qcow2;
use crate::qcow2::{BuildBaseResult, Qcow2Error, Qcow2Snapshot};
use rhai::{Array, Dynamic, Engine, EvalAltResult, Map};
use serde_json::Value;
// Convert Qcow2Error to Rhai error
fn qcow2_error_to_rhai<T>(result: Result<T, Qcow2Error>) -> Result<T, Box<EvalAltResult>> {
result.map_err(|e| {
Box::new(EvalAltResult::ErrorRuntime(
format!("qcow2 error: {}", e).into(),
rhai::Position::NONE,
))
})
}
// Convert serde_json::Value to Rhai Dynamic recursively (maps, arrays, scalars)
fn json_to_dynamic(v: &Value) -> Dynamic {
match v {
Value::Null => Dynamic::UNIT,
Value::Bool(b) => (*b).into(),
Value::Number(n) => {
if let Some(i) = n.as_i64() {
i.into()
} else {
// Avoid float dependency differences; fall back to string
n.to_string().into()
}
}
Value::String(s) => s.clone().into(),
Value::Array(arr) => {
let mut a = Array::new();
for item in arr {
a.push(json_to_dynamic(item));
}
a.into()
}
Value::Object(obj) => {
let mut m = Map::new();
for (k, val) in obj {
m.insert(k.into(), json_to_dynamic(val));
}
m.into()
}
}
}
// Wrappers exposed to Rhai
pub fn qcow2_create(path: &str, size_gb: i64) -> Result<String, Box<EvalAltResult>> {
qcow2_error_to_rhai(qcow2::create(path, size_gb))
}
pub fn qcow2_info(path: &str) -> Result<Dynamic, Box<EvalAltResult>> {
let v = qcow2_error_to_rhai(qcow2::info(path))?;
Ok(json_to_dynamic(&v))
}
pub fn qcow2_snapshot_create(path: &str, name: &str) -> Result<(), Box<EvalAltResult>> {
qcow2_error_to_rhai(qcow2::snapshot_create(path, name))
}
pub fn qcow2_snapshot_delete(path: &str, name: &str) -> Result<(), Box<EvalAltResult>> {
qcow2_error_to_rhai(qcow2::snapshot_delete(path, name))
}
pub fn qcow2_snapshot_list(path: &str) -> Result<Array, Box<EvalAltResult>> {
let snaps = qcow2_error_to_rhai(qcow2::snapshot_list(path))?;
let mut arr = Array::new();
for s in snaps {
arr.push(snapshot_to_map(&s).into());
}
Ok(arr)
}
fn snapshot_to_map(s: &Qcow2Snapshot) -> Map {
let mut m = Map::new();
if let Some(id) = &s.id {
m.insert("id".into(), id.clone().into());
} else {
m.insert("id".into(), Dynamic::UNIT);
}
if let Some(name) = &s.name {
m.insert("name".into(), name.clone().into());
} else {
m.insert("name".into(), Dynamic::UNIT);
}
if let Some(v) = s.vm_state_size {
m.insert("vm_state_size".into(), v.into());
} else {
m.insert("vm_state_size".into(), Dynamic::UNIT);
}
if let Some(v) = s.date_sec {
m.insert("date_sec".into(), v.into());
} else {
m.insert("date_sec".into(), Dynamic::UNIT);
}
if let Some(v) = s.date_nsec {
m.insert("date_nsec".into(), v.into());
} else {
m.insert("date_nsec".into(), Dynamic::UNIT);
}
if let Some(v) = s.vm_clock_nsec {
m.insert("vm_clock_nsec".into(), v.into());
} else {
m.insert("vm_clock_nsec".into(), Dynamic::UNIT);
}
m
}
pub fn qcow2_build_ubuntu_24_04_base(
dest_dir: &str,
size_gb: i64,
) -> Result<Map, Box<EvalAltResult>> {
// size_gb: pass None if <=0
let size_opt = if size_gb > 0 { Some(size_gb) } else { None };
let r: BuildBaseResult = qcow2_error_to_rhai(qcow2::build_ubuntu_24_04_base(dest_dir, size_opt))?;
let mut m = Map::new();
m.insert("base_image_path".into(), r.base_image_path.into());
m.insert("snapshot".into(), r.snapshot.into());
m.insert("url".into(), r.url.into());
if let Some(sz) = r.resized_to_gb {
m.insert("resized_to_gb".into(), sz.into());
} else {
m.insert("resized_to_gb".into(), Dynamic::UNIT);
}
Ok(m)
}
// Module registration
pub fn register_qcow2_module(engine: &mut Engine) -> Result<(), Box<EvalAltResult>> {
engine.register_fn("qcow2_create", qcow2_create);
engine.register_fn("qcow2_info", qcow2_info);
engine.register_fn("qcow2_snapshot_create", qcow2_snapshot_create);
engine.register_fn("qcow2_snapshot_delete", qcow2_snapshot_delete);
engine.register_fn("qcow2_snapshot_list", qcow2_snapshot_list);
engine.register_fn("qcow2_build_ubuntu_24_04_base", qcow2_build_ubuntu_24_04_base);
Ok(())
}

View File

@@ -0,0 +1,84 @@
// Basic tests for QCOW2 SAL (offline, will skip if qemu-img is not present)
print("=== QCOW2 Basic Tests ===");
// Dependency check
let qemu = which("qemu-img");
if qemu == () {
print("⚠️ qemu-img not available - skipping QCOW2 tests");
print("Install qemu-utils (Debian/Ubuntu) or QEMU tools for your distro.");
print("=== QCOW2 Tests Skipped ===");
exit();
}
// Helper: unique temp path (use monotonic timestamp; avoid shell quoting issues)
let now = run_silent("date +%s%N");
let suffix = if now.success && now.stdout != "" { now.stdout.trim() } else { "100000" };
let img_path = `/tmp/qcow2_test_${suffix}.img`;
print("\n--- Test 1: Create image ---");
try {
let created_path = qcow2_create(img_path, 1);
// created_path should equal img_path
print(`✓ Created qcow2: ${created_path}`);
} catch (err) {
print(`❌ Create failed: ${err}`);
exit();
}
print("\n--- Test 2: Info ---");
let info;
try {
info = qcow2_info(img_path);
} catch (err) {
print(`❌ Info failed: ${err}`);
exit();
}
print("✓ Info fetched");
if info.format != () { print(` format: ${info.format}`); }
if info["virtual-size"] != () { print(` virtual-size: ${info["virtual-size"]}`); }
print("\n--- Test 3: Snapshot create/list/delete (offline) ---");
let snap_name = "s1";
try {
qcow2_snapshot_create(img_path, snap_name);
} catch (err) {
print(`❌ snapshot_create failed: ${err}`);
exit();
}
print("✓ snapshot created: s1");
let snaps;
try {
snaps = qcow2_snapshot_list(img_path);
} catch (err) {
print(`❌ snapshot_list failed: ${err}`);
exit();
}
print(`✓ snapshot_list ok, count=${snaps.len()}`);
try {
qcow2_snapshot_delete(img_path, snap_name);
} catch (err) {
print(`❌ snapshot_delete failed: ${err}`);
exit();
}
print("✓ snapshot deleted: s1");
// Optional: Base image builder (commented to avoid big downloads by default)
// Uncomment to test manually on a dev machine with bandwidth.
print("\n--- Optional: Build Ubuntu 24.04 Base ---");
let base_dir = "/tmp/virt_images";
let m;
try {
m = qcow2_build_ubuntu_24_04_base(base_dir, 10);
} catch (err) {
print(`⚠️ base build failed or skipped: ${err}`);
exit();
}
print(`✓ Base image path: ${m.base_image_path}`);
print(`✓ Base snapshot: ${m.snapshot}`);
print(`✓ Source URL: ${m.url}`);
if m.resized_to_gb != () { print(`✓ Resized to: ${m.resized_to_gb}G`); }
print("\n=== QCOW2 Basic Tests Completed ===");

View File

@@ -0,0 +1,164 @@
// Basic Cloud Hypervisor SAL smoke test (minimal)
// - Skips gracefully if dependencies or inputs are missing
// - Creates a VM spec, optionally starts/stops it if all inputs are available
print("=== Cloud Hypervisor Basic Tests ===");
// Dependency checks (static binaries only)
let chs = which("cloud-hypervisor-static");
let chrs = which("ch-remote-static");
// Normalize which() results: () or "" both mean missing (depending on SAL which variant)
let ch_missing = (chs == () || chs == "");
let chr_missing = (chrs == () || chrs == "");
if ch_missing || chr_missing {
print("⚠️ cloud-hypervisor-static and/or ch-remote-static not available - skipping CloudHV tests");
print("Install Cloud Hypervisor static binaries to run these tests.");
print("=== CloudHV Tests Skipped ===");
exit();
}
// Inputs (adjust these for your environment)
// Prefer firmware boot if firmware is available; otherwise fallback to direct kernel boot.
let firmware_path = "/tmp/virt_images/hypervisor-fw";
let kernel_path = "/path/to/vmlinux"; // optional when firmware_path is present
// We can reuse the base image from the QCOW2 test/builder if present.
let disk_path = "/tmp/virt_images/noble-server-cloudimg-amd64.img";
// Validate inputs
let missing = false;
let have_firmware = exist(firmware_path);
let have_kernel = exist(kernel_path);
if !have_firmware && !have_kernel {
print(`⚠️ neither firmware_path (${firmware_path}) nor kernel_path (${kernel_path}) found (start/stop will be skipped)`);
missing = true;
}
if !exist(disk_path) {
print(`⚠️ disk_path not found: ${disk_path} (start/stop will be skipped)`);
missing = true;
}
// Unique id
let rid = run_silent("date +%s%N");
let suffix = if rid.success && rid.stdout != "" { rid.stdout.trim() } else { "100000" };
let vm_id = `testvm_${suffix}`;
print("\n--- Test 1: Create VM definition ---");
let spec = #{
"id": vm_id,
"disk_path": disk_path,
"api_socket": "", // default under VM dir
"vcpus": 1,
"memory_mb": 1024,
// For firmware boot:
// Provide firmware_path only if it exists
// For kernel boot:
// Provide kernel_path and optionally a cmdline
};
if have_firmware {
spec.firmware_path = firmware_path;
} else if have_kernel {
spec.kernel_path = kernel_path;
spec.cmdline = "console=ttyS0 reboot=k panic=1";
}
// "extra_args": can be added if needed, e.g.:
// spec.extra_args = ["--rng", "src=/dev/urandom"];
try {
let created_id = cloudhv_vm_create(spec);
print(`✓ VM created: ${created_id}`);
} catch (err) {
print(`❌ VM create failed: ${err}`);
print("=== CloudHV Tests Aborted ===");
exit();
}
print("\n--- Test 2: VM info ---");
try {
let info = cloudhv_vm_info(vm_id);
print(`✓ VM info loaded: id=${info.spec.id}, status=${info.runtime.status}`);
} catch (err) {
print(`❌ VM info failed: ${err}`);
print("=== CloudHV Tests Aborted ===");
exit();
}
print("\n--- Test 3: VM list ---");
try {
let vms = cloudhv_vm_list();
print(`✓ VM list size: ${vms.len()}`);
} catch (err) {
print(`❌ VM list failed: ${err}`);
print("=== CloudHV Tests Aborted ===");
exit();
}
// Start/Stop only if inputs exist
if !missing {
print("\n--- Test 4: Start VM ---");
try {
cloudhv_vm_start(vm_id);
print("✓ VM start invoked");
} catch (err) {
print(`⚠️ VM start failed (this can happen if kernel/cmdline are incompatible): ${err}`);
}
print("\n waiting for VM to be ready...");
// Discover API socket and PID from SAL
let info1 = cloudhv_vm_info(vm_id);
let api_sock = info1.spec.api_socket;
let pid = info1.runtime.pid;
// 1) Wait for API socket to appear (up to ~50s)
let sock_ok = false;
for x in 0..50 {
if exist(api_sock) { sock_ok = true; break; }
sleep(1);
}
print(`api_sock_exists=${sock_ok} path=${api_sock}`);
// 2) Probe ch-remote info with retries (up to ~20s)
if sock_ok {
let info_ok = false;
for x in 0..20 {
let r = run_silent(`ch-remote-static --api-socket ${api_sock} info`);
if r.success {
info_ok = true;
break;
}
sleep(1);
}
if info_ok {
print("VM API is ready (ch-remote info OK)");
} else {
print("⚠️ VM API did not become ready in time (continuing)");
}
} else {
print("⚠️ API socket not found (continuing)");
}
print("\n--- Test 5: Stop VM (graceful) ---");
try {
cloudhv_vm_stop(vm_id, false);
print("✓ VM stop invoked (graceful)");
} catch (err) {
print(`⚠️ VM stop failed: ${err}`);
}
} else {
print("\n⚠ Skipping start/stop because required inputs are missing.");
}
print("\n--- Test 6: Delete VM definition ---");
try {
cloudhv_vm_delete(vm_id, false);
print("✓ VM deleted");
} catch (err) {
print(`❌ VM delete failed: ${err}`);
print("=== CloudHV Tests Aborted ===");
exit();
}
print("\n=== Cloud Hypervisor Basic Tests Completed ===");

Submodule research/robot_hetzner_rhai deleted from 59583124a8

5
rhailib/.gitignore vendored Normal file
View File

@@ -0,0 +1,5 @@
target
worker_rhai_temp_db
dump.rdb
.DS_Store
.env

27
rhailib/Cargo.toml Normal file
View File

@@ -0,0 +1,27 @@
[package]
name = "rhailib"
version = "0.1.0"
edition = "2021" # Changed to 2021 for consistency with other crates
[dependencies]
anyhow = "1.0"
chrono = { version = "0.4", features = ["serde"] }
env_logger = "0.10"
log = "0.4"
redis = { version = "0.25.0", features = ["tokio-comp"] }
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
tokio = { version = "1", features = ["macros", "rt-multi-thread", "time", "sync", "signal"] }
rhai = "1.21.0"
derive = { path = "src/derive" }
[dev-dependencies]
criterion = { version = "0.5", features = ["html_reports"] }
uuid = { version = "1.6", features = ["v4", "serde"] } # For examples like dedicated_reply_queue_demo
tempfile = "3.10"
[[bench]]
name = "simple_rhai_bench"
harness = false

114
rhailib/README.md Normal file
View File

@@ -0,0 +1,114 @@
# rhailib: Distributed Rhai Scripting for HeroModels
`rhailib` provides a robust infrastructure for executing Rhai scripts in a distributed manner, primarily designed to integrate with and extend the HeroModels ecosystem. It allows for dynamic scripting capabilities, offloading computation, and enabling flexible automation.
## Overview
The `rhailib` system is composed of three main components working together, leveraging Redis for task queuing and state management:
1. **Rhai Engine (`src/engine`):**
This crate is the core of the scripting capability. It provides a Rhai engine pre-configured with various HeroModels modules (e.g., Calendar, Flow, Legal). Scripts executed within this engine can interact directly with HeroModels data and logic. The `engine` is utilized by the `rhai_worker` to process tasks.
2. **Rhai Client (`src/client`):**
This crate offers an interface for applications to submit Rhai scripts as tasks to the distributed execution system. Clients can send scripts to named Redis queues (referred to as "contexts"), optionally wait for results, and handle timeouts.
3. **Rhai Worker (`src/worker`):**
This executable component listens to one or more Redis queues ("contexts") for incoming tasks. When a task (a Rhai script) is received, the worker fetches its details, uses the `rhai_engine` to execute the script, and then updates the task's status and results back into Redis. Multiple worker instances can be deployed to scale script execution.
## Architecture & Workflow
The typical workflow is as follows:
1. **Task Submission:** An application using `rhai_dispatcher` submits a Rhai script to a specific Redis list (e.g., `rhai:queue:my_context`). Task details, including the script and status, are stored in a Redis hash.
2. **Task Consumption:** A `rhai_worker` instance, configured to listen to `rhai:queue:my_context`, picks up the task ID from the queue using a blocking pop operation.
3. **Script Execution:** The worker retrieves the script from Redis and executes it using an instance of the `rhai_engine`. This engine provides the necessary HeroModels context for the script.
4. **Result Storage:** Upon completion (or error), the worker updates the task's status (e.g., `completed`, `failed`) and stores any return value or error message in the corresponding Redis hash.
5. **Result Retrieval (Optional):** The `rhai_dispatcher` can poll the Redis hash for the task's status and retrieve the results once available.
This architecture allows for:
- Asynchronous script execution.
- Scalable processing of Rhai scripts by running multiple workers.
- Decoupling of script submission from execution.
## Project Structure
The core components are organized as separate crates within the `src/` directory:
- `src/client/`: Contains the `rhai_dispatcher` library.
- `src/engine/`: Contains the `rhai_engine` library.
- `src/worker/`: Contains the `rhai_worker` library and its executable.
Each of these directories contains its own `README.md` file with more detailed information about its specific functionality, setup, and usage.
## Getting Started
To work with this project:
1. Ensure you have Rust and Cargo installed.
2. A running Redis instance is required for the `client` and `worker` components to communicate.
3. Explore the individual README files in `src/client/`, `src/worker/`, and `src/engine/` for detailed instructions on building, configuring, and running each component.
You can typically build all components using:
```bash
cargo build --workspace
```
Or build and run specific examples or binaries as detailed in their respective READMEs.
## Async API Integration
`rhailib` includes a powerful async architecture that enables Rhai scripts to perform HTTP API calls despite Rhai's synchronous nature. This allows scripts to integrate with external services like Stripe, payment processors, and other REST/GraphQL APIs.
### Key Features
- **Async HTTP Support**: Make API calls from synchronous Rhai scripts
- **Multi-threaded Architecture**: Uses MPSC channels to bridge sync/async execution
- **Built-in Stripe Integration**: Complete payment processing capabilities
- **Builder Pattern APIs**: Fluent, chainable API for creating complex objects
- **Error Handling**: Graceful error handling with try/catch support
- **Environment Configuration**: Secure credential management via environment variables
### Quick Example
```rhai
// Configure API client
configure_stripe(STRIPE_API_KEY);
// Create a product with pricing
let product = new_product()
.name("Premium Software License")
.description("Professional software solution")
.metadata("category", "software");
let product_id = product.create();
// Create subscription pricing
let monthly_price = new_price()
.amount(2999) // $29.99 in cents
.currency("usd")
.product(product_id)
.recurring("month");
let price_id = monthly_price.create();
// Create a subscription
let subscription = new_subscription()
.customer("cus_customer_id")
.add_price(price_id)
.trial_days(14)
.create();
```
### Documentation
- **[Async Architecture Guide](docs/ASYNC_RHAI_ARCHITECTURE.md)**: Detailed technical documentation of the async architecture, including design decisions, thread safety, and extensibility patterns.
- **[API Integration Guide](docs/API_INTEGRATION_GUIDE.md)**: Practical guide with examples for integrating external APIs, error handling patterns, and best practices.
## Purpose
`rhailib` aims to provide a flexible and powerful way to extend applications with custom logic written in Rhai, executed in a controlled and scalable environment. This is particularly useful for tasks such as:
- Implementing dynamic business rules.
- Automating processes with external API integration.
- Running background computations.
- Processing payments and subscriptions.
- Customizing application behavior without recompilation.
- Integrating with third-party services (Stripe, webhooks, etc.).

View File

@@ -0,0 +1 @@
/target

View File

@@ -0,0 +1,24 @@
[package]
name = "rhai_dispatcher"
version = "0.1.0"
edition = "2021"
[[bin]]
name = "dispatcher"
path = "cmd/dispatcher.rs"
[dependencies]
clap = { version = "4.4", features = ["derive"] }
env_logger = "0.10"
redis = { version = "0.25.0", features = ["tokio-comp"] }
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
uuid = { version = "1.6", features = ["v4", "serde"] }
chrono = { version = "0.4", features = ["serde"] }
log = "0.4"
tokio = { version = "1", features = ["macros", "rt-multi-thread"] } # For async main in examples, and general async
colored = "2.0"
[dev-dependencies] # For examples later
env_logger = "0.10"
rhai = "1.18.0" # For examples that might need to show engine setup

View File

@@ -0,0 +1,107 @@
# Rhai Client
The `rhai-client` crate provides a fluent builder-based interface for submitting Rhai scripts to a distributed task execution system over Redis. It enables applications to offload Rhai script execution to one or more worker services and await the results.
## Features
- **Fluent Builder API**: A `RhaiDispatcherBuilder` for easy client configuration and a `PlayRequestBuilder` for constructing and submitting script execution requests.
- **Asynchronous Operations**: Built with `tokio` for non-blocking I/O.
- **Request-Reply Pattern**: Submits tasks and awaits results on a dedicated reply queue, eliminating the need for polling.
- **Configurable Timeouts**: Set timeouts for how long the client should wait for a task to complete.
- **Direct-to-Worker-Queue Submission**: Tasks are sent to a queue named after the `worker_id`, allowing for direct and clear task routing.
- **Manual Status Check**: Provides an option to manually check the status of a task by its ID.
## Core Components
- **`RhaiDispatcherBuilder`**: A builder to construct a `RhaiDispatcher`. Requires a `caller_id` and Redis URL.
- **`RhaiDispatcher`**: The main client for interacting with the task system. It's used to create `PlayRequestBuilder` instances.
- **`PlayRequestBuilder`**: A fluent builder for creating and dispatching a script execution request. You can set:
- `worker_id`: The ID of the worker queue to send the task to.
- `script` or `script_path`: The Rhai script to execute.
- `request_id`: An optional unique ID for the request.
- `timeout`: How long to wait for a result.
- **Submission Methods**:
- `submit()`: Submits the request and returns immediately (fire-and-forget).
- `await_response()`: Submits the request and waits for the result or a timeout.
- **`RhaiTaskDetails`**: A struct representing the details of a task, including its script, status (`pending`, `processing`, `completed`, `error`), output, and error messages.
- **`RhaiDispatcherError`**: An enum for various errors, such as Redis errors, serialization issues, or task timeouts.
## How It Works
1. A `RhaiDispatcher` is created using the `RhaiDispatcherBuilder`, configured with a `caller_id` and Redis URL.
2. A `PlayRequestBuilder` is created from the client.
3. The script, `worker_id`, and an optional `timeout` are configured on the builder.
4. When `await_response()` is called:
a. A unique `task_id` (UUID v4) is generated.
b. Task details are stored in a Redis hash with a key like `rhailib:<task_id>`.
c. The `task_id` is pushed to the worker's queue, named `rhailib:<worker_id>`.
d. The client performs a blocking pop (`BLPOP`) on a dedicated reply queue (`rhailib:reply:<task_id>`), waiting for the worker to send the result.
5. A `rhai-worker` process, listening on the `rhailib:<worker_id>` queue, picks up the task, executes it, and pushes the final `RhaiTaskDetails` to the reply queue.
6. The client receives the result from the reply queue and returns it to the caller.
## Prerequisites
- A running Redis instance accessible by the client and the worker services.
## Usage Example
The following example demonstrates how to build a client, submit a script, and wait for the result.
```rust
use rhai_dispatcher::{RhaiDispatcherBuilder, RhaiDispatcherError};
use std::time::Duration;
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
env_logger::init();
// 1. Build the client
let client = RhaiDispatcherBuilder::new()
.caller_id("my-app-instance-1")
.redis_url("redis://127.0.0.1/")
.build()?;
// 2. Define the script and target worker
let script = r#" "Hello, " + worker_id + "!" "#;
let worker_id = "worker-1";
// 3. Use the PlayRequestBuilder to configure and submit the request
let result = client
.new_play_request()
.worker_id(worker_id)
.script(script)
.timeout(Duration::from_secs(5))
.await_response()
.await;
match result {
Ok(details) => {
log::info!("Task completed successfully!");
log::info!("Status: {}", details.status);
if let Some(output) = details.output {
log::info!("Output: {}", output);
}
}
Err(RhaiDispatcherError::Timeout(task_id)) => {
log::error!("Task {} timed out.", task_id);
}
Err(e) => {
log::error!("An unexpected error occurred: {}", e);
}
}
Ok(())
}
```
Refer to the `examples/` directory for more specific use cases, such as `timeout_example.rs` which tests the timeout mechanism.
## Building and Running Examples
To run an example (e.g., `timeout_example`):
```bash
cd src/client # (or wherever this client's Cargo.toml is)
cargo run --example timeout_example
```
Ensure a Redis server is running and accessible at `redis://127.0.0.1/`.

View File

@@ -0,0 +1,157 @@
# Rhai Client Binary
A command-line client for executing Rhai scripts on remote workers via Redis.
## Binary: `client`
### Installation
Build the binary:
```bash
cargo build --bin client --release
```
### Usage
```bash
# Basic usage - requires caller and circle keys
client --caller-key <CALLER_KEY> --circle-key <CIRCLE_KEY>
# Execute inline script
client -c <CALLER_KEY> -k <CIRCLE_KEY> --script "print('Hello World!')"
# Execute script from file
client -c <CALLER_KEY> -k <CIRCLE_KEY> --file script.rhai
# Use specific worker (defaults to circle key)
client -c <CALLER_KEY> -k <CIRCLE_KEY> -w <WORKER_KEY> --script "2 + 2"
# Custom Redis and timeout
client -c <CALLER_KEY> -k <CIRCLE_KEY> --redis-url redis://localhost:6379/1 --timeout 60
# Remove timestamps from logs
client -c <CALLER_KEY> -k <CIRCLE_KEY> --no-timestamp
# Increase verbosity
client -c <CALLER_KEY> -k <CIRCLE_KEY> -v --script "debug_info()"
```
### Command-Line Options
| Option | Short | Default | Description |
|--------|-------|---------|-------------|
| `--caller-key` | `-c` | **Required** | Caller public key (your identity) |
| `--circle-key` | `-k` | **Required** | Circle public key (execution context) |
| `--worker-key` | `-w` | `circle-key` | Worker public key (target worker) |
| `--redis-url` | `-r` | `redis://localhost:6379` | Redis connection URL |
| `--script` | `-s` | | Rhai script to execute |
| `--file` | `-f` | | Path to Rhai script file |
| `--timeout` | `-t` | `30` | Timeout for script execution (seconds) |
| `--no-timestamp` | | `false` | Remove timestamps from log output |
| `--verbose` | `-v` | | Increase verbosity (stackable) |
### Execution Modes
#### Inline Script Execution
```bash
# Execute a simple calculation
client -c caller_123 -k circle_456 -s "let result = 2 + 2; print(result);"
# Execute with specific worker
client -c caller_123 -k circle_456 -w worker_789 -s "get_user_data()"
```
#### Script File Execution
```bash
# Execute script from file
client -c caller_123 -k circle_456 -f examples/data_processing.rhai
# Execute with custom timeout
client -c caller_123 -k circle_456 -f long_running_script.rhai -t 120
```
#### Interactive Mode
```bash
# Enter interactive REPL mode (when no script or file provided)
client -c caller_123 -k circle_456
# Interactive mode with verbose logging
client -c caller_123 -k circle_456 -v --no-timestamp
```
### Interactive Mode
When no script (`-s`) or file (`-f`) is provided, the client enters interactive mode:
```
🔗 Starting Rhai Client
📋 Configuration:
Caller Key: caller_123
Circle Key: circle_456
Worker Key: circle_456
Redis URL: redis://localhost:6379
Timeout: 30s
✅ Connected to Redis at redis://localhost:6379
🎮 Entering interactive mode
Type Rhai scripts and press Enter to execute. Type 'exit' or 'quit' to close.
rhai> let x = 42; print(x);
Status: completed
Output: 42
rhai> exit
👋 Goodbye!
```
### Configuration Examples
#### Development Usage
```bash
# Simple development client
client -c dev_user -k dev_circle
# Development with clean logs
client -c dev_user -k dev_circle --no-timestamp -v
```
#### Production Usage
```bash
# Production client with specific worker
client \
--caller-key prod_user_123 \
--circle-key prod_circle_456 \
--worker-key prod_worker_789 \
--redis-url redis://redis-cluster:6379/0 \
--timeout 300 \
--file production_script.rhai
```
#### Batch Processing
```bash
# Process multiple scripts
for script in scripts/*.rhai; do
client -c batch_user -k batch_circle -f "$script" --no-timestamp
done
```
### Key Concepts
- **Caller Key**: Your identity - used for authentication and tracking
- **Circle Key**: Execution context - defines the environment/permissions
- **Worker Key**: Target worker - which worker should execute the script (defaults to circle key)
### Error Handling
The client provides clear error messages for:
- Missing required keys
- Redis connection failures
- Script execution timeouts
- Worker unavailability
- Script syntax errors
### Dependencies
- `rhai_dispatcher`: Core client library for Redis-based script execution
- `redis`: Redis client for task queue communication
- `clap`: Command-line argument parsing
- `env_logger`: Logging infrastructure
- `tokio`: Async runtime

View File

@@ -0,0 +1,207 @@
use clap::Parser;
use rhai_dispatcher::{RhaiDispatcher, RhaiDispatcherBuilder};
use log::{error, info};
use colored::Colorize;
use std::io::{self, Write};
use std::time::Duration;
#[derive(Parser, Debug)]
#[command(author, version, about = "Rhai Client - Script execution client", long_about = None)]
struct Args {
/// Caller public key (caller ID)
#[arg(short = 'c', long = "caller-key", help = "Caller public key (your identity)")]
caller_id: String,
/// Circle public key (context ID)
#[arg(short = 'k', long = "circle-key", help = "Circle public key (execution context)")]
context_id: String,
/// Worker public key (defaults to circle public key if not provided)
#[arg(short = 'w', long = "worker-key", help = "Worker public key (defaults to circle key)")]
worker_id: String,
/// Redis URL
#[arg(short, long, default_value = "redis://localhost:6379", help = "Redis connection URL")]
redis_url: String,
/// Rhai script to execute
#[arg(short, long, help = "Rhai script to execute")]
script: Option<String>,
/// Path to Rhai script file
#[arg(short, long, help = "Path to Rhai script file")]
file: Option<String>,
/// Timeout for script execution (in seconds)
#[arg(short, long, default_value = "30", help = "Timeout for script execution in seconds")]
timeout: u64,
/// Increase verbosity (can be used multiple times)
#[arg(short, long, action = clap::ArgAction::Count, help = "Increase verbosity (-v for debug, -vv for trace)")]
verbose: u8,
/// Disable timestamps in log output
#[arg(long, help = "Remove timestamps from log output")]
no_timestamp: bool,
}
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let args = Args::parse();
// Configure logging based on verbosity level
let log_config = match args.verbose {
0 => "warn,rhai_dispatcher=warn",
1 => "info,rhai_dispatcher=info",
2 => "debug,rhai_dispatcher=debug",
_ => "trace,rhai_dispatcher=trace",
};
std::env::set_var("RUST_LOG", log_config);
// Configure env_logger with or without timestamps
if args.no_timestamp {
env_logger::Builder::from_default_env()
.format_timestamp(None)
.init();
} else {
env_logger::init();
}
if args.verbose > 0 {
info!("🔗 Starting Rhai Dispatcher");
info!("📋 Configuration:");
info!(" Caller ID: {}", args.caller_id);
info!(" Context ID: {}", args.context_id);
info!(" Worker ID: {}", args.worker_id);
info!(" Redis URL: {}", args.redis_url);
info!(" Timeout: {}s", args.timeout);
info!("");
}
// Create the Rhai client
let client = RhaiDispatcherBuilder::new()
.caller_id(&args.caller_id)
.worker_id(&args.worker_id)
.context_id(&args.context_id)
.redis_url(&args.redis_url)
.build()?;
if args.verbose > 0 {
info!("✅ Connected to Redis at {}", args.redis_url);
}
// Determine execution mode
if let Some(script_content) = args.script {
// Execute inline script
if args.verbose > 0 {
info!("📜 Executing inline script");
}
execute_script(&client, script_content, args.timeout).await?;
} else if let Some(file_path) = args.file {
// Execute script from file
if args.verbose > 0 {
info!("📁 Loading script from file: {}", file_path);
}
let script_content = std::fs::read_to_string(&file_path)
.map_err(|e| format!("Failed to read script file '{}': {}", file_path, e))?;
execute_script(&client, script_content, args.timeout).await?;
} else {
// Interactive mode
info!("🎮 Entering interactive mode");
info!("Type Rhai scripts and press Enter to execute. Type 'exit' or 'quit' to close.");
run_interactive_mode(&client, args.timeout, args.verbose).await?;
}
Ok(())
}
async fn execute_script(
client: &RhaiDispatcher,
script: String,
timeout_secs: u64,
) -> Result<(), Box<dyn std::error::Error>> {
info!("⚡ Executing script: {:.50}...", script);
let timeout = Duration::from_secs(timeout_secs);
match client
.new_play_request()
.script(&script)
.timeout(timeout)
.await_response()
.await
{
Ok(result) => {
info!("✅ Script execution completed");
println!("Status: {}", result.status);
if let Some(output) = result.output {
println!("Output: {}", output);
}
if let Some(error) = result.error {
println!("Error: {}", error);
}
}
Err(e) => {
error!("❌ Script execution failed: {}", e);
return Err(Box::new(e));
}
}
Ok(())
}
async fn run_interactive_mode(
client: &RhaiDispatcher,
timeout_secs: u64,
verbose: u8,
) -> Result<(), Box<dyn std::error::Error>> {
let timeout = Duration::from_secs(timeout_secs);
loop {
print!("rhai> ");
io::stdout().flush()?;
let mut input = String::new();
io::stdin().read_line(&mut input)?;
let input = input.trim();
if input.is_empty() {
continue;
}
if input == "exit" || input == "quit" {
info!("👋 Goodbye!");
break;
}
if verbose > 0 {
info!("⚡ Executing: {}", input);
}
match client
.new_play_request()
.script(input)
.timeout(timeout)
.await_response()
.await
{
Ok(result) => {
if let Some(output) = result.output {
println!("{}", output.color("green"));
}
if let Some(error) = result.error {
println!("{}", format!("error: {}", error).color("red"));
}
}
Err(e) => {
println!("{}", format!("error: {}", e).red());
}
}
println!(); // Add blank line for readability
}
Ok(())
}

View File

@@ -0,0 +1,190 @@
# Architecture of the `rhai_dispatcher` Crate
The `rhai_dispatcher` crate provides a Redis-based client library for submitting Rhai scripts to distributed worker services and awaiting their execution results. It implements a request-reply pattern using Redis as the message broker.
## Core Architecture
The client follows a builder pattern design with clear separation of concerns:
```mermaid
graph TD
A[RhaiDispatcherBuilder] --> B[RhaiDispatcher]
B --> C[PlayRequestBuilder]
C --> D[PlayRequest]
D --> E[Redis Task Queue]
E --> F[Worker Service]
F --> G[Redis Reply Queue]
G --> H[Client Response]
subgraph "Client Components"
A
B
C
D
end
subgraph "Redis Infrastructure"
E
G
end
subgraph "External Services"
F
end
```
## Key Components
### 1. RhaiDispatcherBuilder
A builder pattern implementation for constructing `RhaiDispatcher` instances with proper configuration validation.
**Responsibilities:**
- Configure Redis connection URL
- Set caller ID for task attribution
- Validate configuration before building client
**Key Methods:**
- `caller_id(id: &str)` - Sets the caller identifier
- `redis_url(url: &str)` - Configures Redis connection
- `build()` - Creates the final `RhaiDispatcher` instance
### 2. RhaiDispatcher
The main client interface that manages Redis connections and provides factory methods for creating play requests.
**Responsibilities:**
- Maintain Redis connection pool
- Provide factory methods for request builders
- Handle low-level Redis operations
- Manage task status queries
**Key Methods:**
- `new_play_request()` - Creates a new `PlayRequestBuilder`
- `get_task_status(task_id)` - Queries task status from Redis
- Internal methods for Redis operations
### 3. PlayRequestBuilder
A fluent builder for constructing and submitting script execution requests.
**Responsibilities:**
- Configure script execution parameters
- Handle script loading from files or strings
- Manage request timeouts
- Provide submission methods (fire-and-forget vs await-response)
**Key Methods:**
- `worker_id(id: &str)` - Target worker queue (determines which worker processes the task)
- `context_id(id: &str)` - Target context ID (determines execution context/circle)
- `script(content: &str)` - Set script content directly
- `script_path(path: &str)` - Load script from file
- `timeout(duration: Duration)` - Set execution timeout
- `submit()` - Fire-and-forget submission
- `await_response()` - Submit and wait for result
**Architecture Note:** The decoupling of `worker_id` and `context_id` allows a single worker to process tasks for multiple contexts (circles), providing greater deployment flexibility.
### 4. Data Structures
#### RhaiTaskDetails
Represents the complete state of a task throughout its lifecycle.
```rust
pub struct RhaiTaskDetails {
pub task_id: String,
pub script: String,
pub status: String, // "pending", "processing", "completed", "error"
pub output: Option<String>,
pub error: Option<String>,
pub created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>,
pub caller_id: String,
}
```
#### RhaiDispatcherError
Comprehensive error handling for various failure scenarios:
- `RedisError` - Redis connection/operation failures
- `SerializationError` - JSON serialization/deserialization issues
- `Timeout` - Task execution timeouts
- `TaskNotFound` - Missing tasks after submission
## Communication Protocol
### Task Submission Flow
1. **Task Creation**: Client generates unique UUID for task identification
2. **Task Storage**: Task details stored in Redis hash: `rhailib:<task_id>`
3. **Queue Submission**: Task ID pushed to worker queue: `rhailib:<worker_id>`
4. **Reply Queue Setup**: Client listens on: `rhailib:reply:<task_id>`
### Redis Key Patterns
- **Task Storage**: `rhailib:<task_id>` (Redis Hash)
- **Worker Queues**: `rhailib:<worker_id>` (Redis List)
- **Reply Queues**: `rhailib:reply:<task_id>` (Redis List)
### Message Flow Diagram
```mermaid
sequenceDiagram
participant C as Client
participant R as Redis
participant W as Worker
C->>R: HSET rhailib:task_id (task details)
C->>R: LPUSH rhailib:worker_id task_id
C->>R: BLPOP rhailib:reply:task_id (blocking)
W->>R: BRPOP rhailib:worker_id (blocking)
W->>W: Execute Rhai Script
W->>R: LPUSH rhailib:reply:task_id (result)
R->>C: Return result from BLPOP
C->>R: DEL rhailib:reply:task_id (cleanup)
```
## Concurrency and Async Design
The client is built on `tokio` for asynchronous operations:
- **Connection Pooling**: Uses Redis multiplexed connections for efficiency
- **Non-blocking Operations**: All Redis operations are async
- **Timeout Handling**: Configurable timeouts with proper cleanup
- **Error Propagation**: Comprehensive error handling with context
## Configuration and Deployment
### Prerequisites
- Redis server accessible to both client and workers
- Proper network connectivity between components
- Sufficient Redis memory for task storage
### Configuration Options
- **Redis URL**: Connection string for Redis instance
- **Caller ID**: Unique identifier for client instance
- **Timeouts**: Per-request timeout configuration
- **Worker Targeting**: Direct worker queue addressing
## Security Considerations
- **Task Isolation**: Each task uses unique identifiers
- **Queue Separation**: Worker-specific queues prevent cross-contamination
- **Cleanup**: Automatic cleanup of reply queues after completion
- **Error Handling**: Secure error propagation without sensitive data leakage
## Performance Characteristics
- **Scalability**: Horizontal scaling through multiple worker instances
- **Throughput**: Limited by Redis performance and network latency
- **Memory Usage**: Efficient with connection pooling and cleanup
- **Latency**: Low latency for local Redis deployments
## Integration Points
The client integrates with:
- **Worker Services**: Via Redis queue protocol
- **Monitoring Systems**: Through structured logging
- **Application Code**: Via builder pattern API
- **Configuration Systems**: Through environment variables and builders

View File

@@ -0,0 +1,90 @@
use log::info;
use rhai_dispatcher::{RhaiDispatcherBuilder, RhaiDispatcherError};
use std::time::{Duration, Instant};
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
env_logger::builder()
.filter_level(log::LevelFilter::Info)
.init();
// Build the client using the new builder pattern
let client = RhaiDispatcherBuilder::new()
.caller_id("timeout-example-runner")
.redis_url("redis://127.0.0.1/")
.build()?;
info!("RhaiDispatcher created.");
let script_content = r#"
// This script will never be executed by a worker because the recipient does not exist.
let x = 10;
let y = x + 32;
y
"#;
// The worker_id points to a worker queue that doesn't have a worker.
let non_existent_recipient = "non_existent_worker_for_timeout_test";
let very_short_timeout = Duration::from_secs(2);
info!(
"Submitting script to non-existent recipient '{}' with a timeout of {:?}...",
non_existent_recipient, very_short_timeout
);
let start_time = Instant::now();
// Use the new PlayRequestBuilder
let result = client
.new_play_request()
.worker_id(non_existent_recipient)
.script(script_content)
.timeout(very_short_timeout)
.await_response()
.await;
match result {
Ok(details) => {
log::error!(
"Timeout Example FAILED: Expected a timeout, but got Ok: {:?}",
details
);
Err("Expected timeout, but task completed successfully.".into())
}
Err(e) => {
let elapsed = start_time.elapsed();
info!("Timeout Example: Received error as expected: {}", e);
info!("Elapsed time: {:?}", elapsed);
match e {
RhaiDispatcherError::Timeout(task_id) => {
info!("Timeout Example PASSED: Correctly received RhaiDispatcherError::Timeout for task_id: {}", task_id);
// Ensure the elapsed time is close to the timeout duration
// Allow for some buffer for processing
assert!(
elapsed >= very_short_timeout
&& elapsed < very_short_timeout + Duration::from_secs(1),
"Elapsed time {:?} should be close to timeout {:?}",
elapsed,
very_short_timeout
);
info!(
"Elapsed time {:?} is consistent with timeout duration {:?}.",
elapsed, very_short_timeout
);
Ok(())
}
other_error => {
log::error!(
"Timeout Example FAILED: Expected RhaiDispatcherError::Timeout, but got other error: {:?}",
other_error
);
Err(format!(
"Expected RhaiDispatcherError::Timeout, got other error: {:?}",
other_error
)
.into())
}
}
}
}
}

View File

@@ -0,0 +1,638 @@
//! # Rhai Client Library
//!
//! A Redis-based client library for submitting Rhai scripts to distributed worker services
//! and awaiting their execution results. This crate implements a request-reply pattern
//! using Redis as the message broker.
//!
//! ## Quick Start
//!
//! ```rust
//! use rhai_dispatcher::{RhaiDispatcherBuilder, RhaiDispatcherError};
//! use std::time::Duration;
//!
//! #[tokio::main]
//! async fn main() -> Result<(), Box<dyn std::error::Error>> {
//! // Build the client
//! let client = RhaiDispatcherBuilder::new()
//! .caller_id("my-app-instance-1")
//! .redis_url("redis://127.0.0.1/")
//! .build()?;
//!
//! // Submit a script and await the result
//! let result = client
//! .new_play_request()
//! .worker_id("worker-1")
//! .script(r#""Hello, World!""#)
//! .timeout(Duration::from_secs(5))
//! .await_response()
//! .await?;
//!
//! println!("Result: {:?}", result);
//! Ok(())
//! }
//! ```
use chrono::Utc;
use log::{debug, error, info, warn}; // Added error
use redis::AsyncCommands;
use serde::{Deserialize, Serialize};
use std::time::Duration; // Duration is still used, Instant and sleep were removed
use uuid::Uuid;
/// Redis namespace prefix for all rhailib-related keys
const NAMESPACE_PREFIX: &str = "rhailib:";
/// Represents the complete details and state of a Rhai task execution.
///
/// This structure contains all information about a task throughout its lifecycle,
/// from submission to completion. It's used for both storing task state in Redis
/// and returning results to clients.
///
/// # Fields
///
/// * `task_id` - Unique identifier for the task (UUID)
/// * `script` - The Rhai script content to execute
/// * `status` - Current execution status: "pending", "processing", "completed", or "error"
/// * `output` - Script execution output (if successful)
/// * `error` - Error message (if execution failed)
/// * `created_at` - Timestamp when the task was created
/// * `updated_at` - Timestamp when the task was last modified
/// * `caller_id` - Identifier of the client that submitted the task
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct RhaiTaskDetails {
#[serde(rename = "taskId")] // Ensure consistent naming with other fields
pub task_id: String,
pub script: String,
pub status: String, // "pending", "processing", "completed", "error"
// client_rpc_id: Option<Value> is removed.
// Worker responses should ideally not include it, or Serde will ignore unknown fields by default.
pub output: Option<String>,
pub error: Option<String>, // Renamed from error_message for consistency
#[serde(rename = "createdAt")]
pub created_at: chrono::DateTime<chrono::Utc>,
#[serde(rename = "updatedAt")]
pub updated_at: chrono::DateTime<chrono::Utc>,
#[serde(rename = "callerId")]
pub caller_id: String,
#[serde(rename = "contextId")]
pub context_id: String,
#[serde(rename = "workerId")]
pub worker_id: String,
}
/// Comprehensive error type for all possible failures in the Rhai client.
///
/// This enum covers all error scenarios that can occur during client operations,
/// from Redis connectivity issues to task execution timeouts.
#[derive(Debug)]
pub enum RhaiDispatcherError {
/// Redis connection or operation error
RedisError(redis::RedisError),
/// JSON serialization/deserialization error
SerializationError(serde_json::Error),
/// Task execution timeout - contains the task_id that timed out
Timeout(String),
/// Task not found after submission - contains the task_id (rare occurrence)
TaskNotFound(String),
/// Context ID is missing
ContextIdMissing,
}
impl From<redis::RedisError> for RhaiDispatcherError {
fn from(err: redis::RedisError) -> Self {
RhaiDispatcherError::RedisError(err)
}
}
impl From<serde_json::Error> for RhaiDispatcherError {
fn from(err: serde_json::Error) -> Self {
RhaiDispatcherError::SerializationError(err)
}
}
impl std::fmt::Display for RhaiDispatcherError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
RhaiDispatcherError::RedisError(e) => write!(f, "Redis error: {}", e),
RhaiDispatcherError::SerializationError(e) => write!(f, "Serialization error: {}", e),
RhaiDispatcherError::Timeout(task_id) => {
write!(f, "Timeout waiting for task {} to complete", task_id)
}
RhaiDispatcherError::TaskNotFound(task_id) => {
write!(f, "Task {} not found after submission", task_id)
}
RhaiDispatcherError::ContextIdMissing => {
write!(f, "Context ID is missing")
}
}
}
}
impl std::error::Error for RhaiDispatcherError {}
/// The main client for interacting with the Rhai task execution system.
///
/// This client manages Redis connections and provides factory methods for creating
/// script execution requests. It maintains a caller ID for task attribution and
/// handles all low-level Redis operations.
///
/// # Example
///
/// ```rust
/// use rhai_dispatcher::RhaiDispatcherBuilder;
///
/// let client = RhaiDispatcherBuilder::new()
/// .caller_id("my-service")
/// .redis_url("redis://localhost/")
/// .build()?;
/// ```
pub struct RhaiDispatcher {
redis_client: redis::Client,
caller_id: String,
worker_id: String,
context_id: String,
}
/// Builder for constructing `RhaiDispatcher` instances with proper configuration.
///
/// This builder ensures that all required configuration is provided before
/// creating a client instance. It validates the configuration and provides
/// sensible defaults where appropriate.
///
/// # Required Configuration
///
/// - `caller_id`: A unique identifier for this client instance
///
/// # Optional Configuration
///
/// - `redis_url`: Redis connection URL (defaults to "redis://127.0.0.1/")
pub struct RhaiDispatcherBuilder {
redis_url: Option<String>,
caller_id: String,
worker_id: String,
context_id: String,
}
impl RhaiDispatcherBuilder {
/// Creates a new `RhaiDispatcherBuilder` with default settings.
///
/// The builder starts with no Redis URL (will default to "redis://127.0.0.1/")
/// and an empty caller ID (which must be set before building).
pub fn new() -> Self {
Self {
redis_url: None,
caller_id: "".to_string(),
worker_id: "".to_string(),
context_id: "".to_string(),
}
}
/// Sets the caller ID for this client instance.
///
/// The caller ID is used to identify which client submitted a task and is
/// included in task metadata. This is required and the build will fail if
/// not provided.
///
/// # Arguments
///
/// * `caller_id` - A unique identifier for this client instance
pub fn caller_id(mut self, caller_id: &str) -> Self {
self.caller_id = caller_id.to_string();
self
}
/// Sets the circle ID for this client instance.
///
/// The circle ID is used to identify which circle's context a task should be executed in.
/// This is required at the time the client dispatches a script, but can be set on construction or on script dispatch.
///
/// # Arguments
///
/// * `context_id` - A unique identifier for this client instance
pub fn context_id(mut self, context_id: &str) -> Self {
self.context_id = context_id.to_string();
self
}
/// Sets the worker ID for this client instance.
///
/// The worker ID is used to identify which worker a task should be executed on.
/// This is required at the time the client dispatches a script, but can be set on construction or on script dispatch.
///
/// # Arguments
///
/// * `worker_id` - A unique identifier for this client instance
pub fn worker_id(mut self, worker_id: &str) -> Self {
self.worker_id = worker_id.to_string();
self
}
/// Sets the Redis connection URL.
///
/// If not provided, defaults to "redis://127.0.0.1/".
///
/// # Arguments
///
/// * `url` - Redis connection URL (e.g., "redis://localhost:6379/0")
pub fn redis_url(mut self, url: &str) -> Self {
self.redis_url = Some(url.to_string());
self
}
/// Builds the final `RhaiDispatcher` instance.
///
/// This method validates the configuration and creates the Redis client.
/// It will return an error if the caller ID is empty or if the Redis
/// connection cannot be established.
///
/// # Returns
///
/// * `Ok(RhaiDispatcher)` - Successfully configured client
/// * `Err(RhaiDispatcherError)` - Configuration or connection error
pub fn build(self) -> Result<RhaiDispatcher, RhaiDispatcherError> {
let url = self
.redis_url
.unwrap_or_else(|| "redis://127.0.0.1/".to_string());
let client = redis::Client::open(url)?;
Ok(RhaiDispatcher {
redis_client: client,
caller_id: self.caller_id,
worker_id: self.worker_id,
context_id: self.context_id,
})
}
}
/// Representation of a script execution request.
///
/// This structure contains all the information needed to execute a Rhai script
/// on a worker service, including the script content, target worker, and timeout.
#[derive(Debug, Clone)]
pub struct PlayRequest {
pub id: String,
pub worker_id: String,
pub context_id: String,
pub script: String,
pub timeout: Duration,
}
/// Builder for constructing and submitting script execution requests.
///
/// This builder provides a fluent interface for configuring script execution
/// parameters and offers two submission modes: fire-and-forget (`submit()`)
/// and request-reply (`await_response()`).
///
/// # Example
///
/// ```rust
/// use std::time::Duration;
///
/// let result = client
/// .new_play_request()
/// .worker_id("worker-1")
/// .script(r#"print("Hello, World!");"#)
/// .timeout(Duration::from_secs(30))
/// .await_response()
/// .await?;
/// ```
pub struct PlayRequestBuilder<'a> {
client: &'a RhaiDispatcher,
request_id: String,
worker_id: String,
context_id: String,
caller_id: String,
script: String,
timeout: Duration,
retries: u32,
}
impl<'a> PlayRequestBuilder<'a> {
pub fn new(client: &'a RhaiDispatcher) -> Self {
Self {
client,
request_id: "".to_string(),
worker_id: client.worker_id.clone(),
context_id: client.context_id.clone(),
caller_id: client.caller_id.clone(),
script: "".to_string(),
timeout: Duration::from_secs(5),
retries: 0,
}
}
pub fn request_id(mut self, request_id: &str) -> Self {
self.request_id = request_id.to_string();
self
}
pub fn worker_id(mut self, worker_id: &str) -> Self {
self.worker_id = worker_id.to_string();
self
}
pub fn context_id(mut self, context_id: &str) -> Self {
self.context_id = context_id.to_string();
self
}
pub fn script(mut self, script: &str) -> Self {
self.script = script.to_string();
self
}
pub fn script_path(mut self, script_path: &str) -> Self {
self.script = std::fs::read_to_string(script_path).unwrap();
self
}
pub fn timeout(mut self, timeout: Duration) -> Self {
self.timeout = timeout;
self
}
pub fn build(self) -> Result<PlayRequest, RhaiDispatcherError> {
let request_id = if self.request_id.is_empty() {
// Generate a UUID for the request_id
Uuid::new_v4().to_string()
} else {
self.request_id.clone()
};
if self.context_id.is_empty() {
return Err(RhaiDispatcherError::ContextIdMissing);
}
if self.caller_id.is_empty() {
return Err(RhaiDispatcherError::ContextIdMissing);
}
let play_request = PlayRequest {
id: request_id,
worker_id: self.worker_id.clone(),
context_id: self.context_id.clone(),
script: self.script.clone(),
timeout: self.timeout,
};
Ok(play_request)
}
pub async fn submit(self) -> Result<(), RhaiDispatcherError> {
// Build the request and submit using self.client
println!(
"Submitting request {} with timeout {:?}",
self.request_id, self.timeout
);
self.client.submit_play_request(&self.build()?).await?;
Ok(())
}
pub async fn await_response(self) -> Result<RhaiTaskDetails, RhaiDispatcherError> {
// Build the request and submit using self.client
let result = self
.client
.submit_play_request_and_await_result(&self.build()?)
.await;
result
}
}
impl RhaiDispatcher {
pub fn new_play_request(&self) -> PlayRequestBuilder {
PlayRequestBuilder::new(self)
}
// Internal helper to submit script details and push to work queue
async fn submit_play_request_using_connection(
&self,
conn: &mut redis::aio::MultiplexedConnection,
play_request: &PlayRequest,
) -> Result<(), RhaiDispatcherError> {
let now = Utc::now();
let task_key = format!("{}{}", NAMESPACE_PREFIX, play_request.id);
let worker_queue_key = format!(
"{}{}",
NAMESPACE_PREFIX,
play_request.worker_id.replace(" ", "_").to_lowercase()
);
debug!(
"Submitting play request: {} to worker: {} with namespace prefix: {}",
play_request.id, play_request.worker_id, NAMESPACE_PREFIX
);
let hset_args: Vec<(String, String)> = vec![
("taskId".to_string(), play_request.id.to_string()), // Add taskId
("script".to_string(), play_request.script.clone()), // script is moved here
("callerId".to_string(), self.caller_id.clone()), // script is moved here
("contextId".to_string(), play_request.context_id.clone()), // script is moved here
("status".to_string(), "pending".to_string()),
("createdAt".to_string(), now.to_rfc3339()),
("updatedAt".to_string(), now.to_rfc3339()),
];
// Ensure hset_args is a slice of tuples (String, String)
// The redis crate's hset_multiple expects &[(K, V)]
// conn.hset_multiple::<_, String, String, ()>(&task_key, &hset_args).await?;
// Simpler:
// Explicitly type K, F, V for hset_multiple if inference is problematic.
// RV (return value of the command itself) is typically () for HSET type commands.
conn.hset_multiple::<_, _, _, ()>(&task_key, &hset_args)
.await?;
// lpush also infers its types, RV is typically i64 (length of list) or () depending on exact command variant
// For `redis::AsyncCommands::lpush`, it's `RedisResult<R>` where R: FromRedisValue
// Often this is the length of the list. Let's allow inference or specify if needed.
let _: redis::RedisResult<i64> =
conn.lpush(&worker_queue_key, play_request.id.clone()).await;
Ok(())
}
// Internal helper to await response from worker
async fn await_response_from_connection(
&self,
conn: &mut redis::aio::MultiplexedConnection,
task_key: &String,
reply_queue_key: &String,
timeout: Duration,
) -> Result<RhaiTaskDetails, RhaiDispatcherError> {
// BLPOP on the reply queue
// The timeout for BLPOP is in seconds (integer)
let blpop_timeout_secs = timeout.as_secs().max(1); // Ensure at least 1 second for BLPOP timeout
match conn
.blpop::<&String, Option<(String, String)>>(reply_queue_key, blpop_timeout_secs as f64)
.await
{
Ok(Some((_queue, result_message_str))) => {
// Attempt to deserialize the result message into RhaiTaskDetails or a similar structure
// For now, we assume the worker sends back a JSON string of RhaiTaskDetails
// or at least status, output, error.
// Let's refine what the worker sends. For now, assume it's a simplified result.
// The worker should ideally send a JSON string that can be parsed into RhaiTaskDetails.
// For this example, let's assume the worker sends a JSON string of a simplified result structure.
// A more robust approach would be for the worker to send the full RhaiTaskDetails (or relevant parts)
// and the client deserializes that.
// For now, let's assume the worker sends a JSON string of RhaiTaskDetails.
match serde_json::from_str::<RhaiTaskDetails>(&result_message_str) {
Ok(details) => {
info!(
"Task {} finished with status: {}",
details.task_id, details.status
);
// Optionally, delete the reply queue
let _: redis::RedisResult<i32> = conn.del(&reply_queue_key).await;
Ok(details)
}
Err(e) => {
error!(
"Failed to deserialize result message from reply queue: {}",
e
);
// Optionally, delete the reply queue
let _: redis::RedisResult<i32> = conn.del(&reply_queue_key).await;
Err(RhaiDispatcherError::SerializationError(e))
}
}
}
Ok(None) => {
// BLPOP timed out
warn!(
"Timeout waiting for result on reply queue {} for task {}",
reply_queue_key, task_key
);
// Optionally, delete the reply queue
let _: redis::RedisResult<i32> = conn.del(&reply_queue_key).await;
Err(RhaiDispatcherError::Timeout(task_key.clone()))
}
Err(e) => {
// Redis error
error!(
"Redis error on BLPOP for reply queue {}: {}",
reply_queue_key, e
);
// Optionally, delete the reply queue
let _: redis::RedisResult<i32> = conn.del(&reply_queue_key).await;
Err(RhaiDispatcherError::RedisError(e))
}
}
}
// New method using dedicated reply queue
pub async fn submit_play_request(
&self,
play_request: &PlayRequest,
) -> Result<(), RhaiDispatcherError> {
let mut conn = self.redis_client.get_multiplexed_async_connection().await?;
self.submit_play_request_using_connection(
&mut conn,
&play_request, // Pass the task_id parameter
)
.await?;
Ok(())
}
// New method using dedicated reply queue
pub async fn submit_play_request_and_await_result(
&self,
play_request: &PlayRequest,
) -> Result<RhaiTaskDetails, RhaiDispatcherError> {
let mut conn = self.redis_client.get_multiplexed_async_connection().await?;
let reply_queue_key = format!("{}:reply:{}", NAMESPACE_PREFIX, play_request.id); // Derived from the passed task_id
self.submit_play_request_using_connection(
&mut conn,
&play_request, // Pass the task_id parameter
)
.await?;
info!(
"Task {} submitted. Waiting for result on queue {} with timeout {:?}...",
play_request.id, // This is the UUID
reply_queue_key,
play_request.timeout
);
self.await_response_from_connection(
&mut conn,
&play_request.id,
&reply_queue_key,
play_request.timeout,
)
.await
}
// Method to get task status
pub async fn get_task_status(
&self,
task_id: &str,
) -> Result<Option<RhaiTaskDetails>, RhaiDispatcherError> {
let mut conn = self.redis_client.get_multiplexed_async_connection().await?;
let task_key = format!("{}{}", NAMESPACE_PREFIX, task_id);
let result_map: Option<std::collections::HashMap<String, String>> =
conn.hgetall(&task_key).await?;
match result_map {
Some(map) => {
// Reconstruct RhaiTaskDetails from HashMap
let details = RhaiTaskDetails {
task_id: task_id.to_string(), // Use the task_id parameter passed to the function
script: map.get("script").cloned().unwrap_or_else(|| {
warn!("Task {}: 'script' field missing from Redis hash, defaulting to empty.", task_id);
String::new()
}),
status: map.get("status").cloned().unwrap_or_else(|| {
warn!("Task {}: 'status' field missing from Redis hash, defaulting to empty.", task_id);
String::new()
}),
// client_rpc_id is no longer a field in RhaiTaskDetails
output: map.get("output").cloned(),
error: map.get("error").cloned(),
created_at: map.get("createdAt")
.and_then(|s| chrono::DateTime::parse_from_rfc3339(s).ok())
.map(|dt| dt.with_timezone(&Utc))
.unwrap_or_else(|| {
warn!("Task {}: 'createdAt' field missing or invalid in Redis hash, defaulting to Utc::now().", task_id);
Utc::now()
}),
updated_at: map.get("updatedAt")
.and_then(|s| chrono::DateTime::parse_from_rfc3339(s).ok())
.map(|dt| dt.with_timezone(&Utc))
.unwrap_or_else(|| {
warn!("Task {}: 'updatedAt' field missing or invalid in Redis hash, defaulting to Utc::now().", task_id);
Utc::now()
}),
caller_id: map.get("callerId").cloned().expect("callerId field missing from Redis hash"),
worker_id: map.get("workerId").cloned().expect("workerId field missing from Redis hash"),
context_id: map.get("contextId").cloned().expect("contextId field missing from Redis hash"),
};
// It's important to also check if the 'taskId' field exists in the map and matches the input task_id
// for data integrity, though the struct construction above uses the input task_id directly.
if let Some(redis_task_id) = map.get("taskId") {
if redis_task_id != task_id {
warn!("Task {}: Mismatch between requested task_id and taskId found in Redis hash ('{}'). Proceeding with requested task_id.", task_id, redis_task_id);
}
} else {
warn!("Task {}: 'taskId' field missing from Redis hash.", task_id);
}
Ok(Some(details))
}
None => Ok(None),
}
}
}
#[cfg(test)]
mod tests {
// use super::*;
// Basic tests can be added later, especially once examples are in place.
// For now, ensuring it compiles is the priority.
#[test]
fn it_compiles() {
assert_eq!(2 + 2, 4);
}
}

View File

@@ -0,0 +1,38 @@
[package]
name = "rhailib_engine"
version = "0.1.0"
edition = "2021"
description = "Central Rhai engine for heromodels"
[dependencies]
rhai = { version = "1.21.0", features = ["std", "sync", "decimal", "internals"] }
heromodels = { path = "../../../db/heromodels", features = ["rhai"] }
heromodels_core = { path = "../../../db/heromodels_core" }
chrono = "0.4"
heromodels-derive = { path = "../../../db/heromodels-derive" }
rhailib_dsl = { path = "../dsl" }
[features]
default = ["calendar", "finance"]
calendar = []
finance = []
# Flow module is now updated to use our approach to Rhai engine registration
flow = []
legal = []
projects = []
biz = []
[[example]]
name = "calendar_example"
path = "examples/calendar/example.rs"
required-features = ["calendar"]
[[example]]
name = "flow_example"
path = "examples/flow/example.rs"
required-features = ["flow"]
[[example]]
name = "finance"
path = "examples/finance/example.rs"
required-features = ["finance"]

View File

@@ -0,0 +1,135 @@
# HeroModels Rhai Engine (`engine`)
The `engine` crate provides a central Rhai scripting engine for the HeroModels project. It offers a unified way to interact with various HeroModels modules (like Calendar, Flow, Legal, etc.) through Rhai scripts, leveraging a shared database connection.
## Overview
This crate facilitates:
1. **Centralized Engine Creation**: A function `create_heromodels_engine` to instantiate a Rhai engine pre-configured with common settings and all enabled HeroModels modules.
2. **Modular Registration**: HeroModels modules (Calendar, Flow, etc.) can be registered with a Rhai engine based on feature flags.
3. **Script Evaluation Utilities**: Helper functions for compiling Rhai scripts into Abstract Syntax Trees (ASTs) and for evaluating scripts or ASTs.
4. **Mock Database**: Includes a `mock_db` module for testing and running examples without needing a live database.
## Core Components & Usage
### Library (`src/lib.rs`)
- **`create_heromodels_engine(db: Arc<OurDB>) -> Engine`**:
Creates and returns a new `rhai::Engine` instance. This engine is configured with default settings (e.g., max expression depths, string/array/map sizes) and then all available HeroModels modules (controlled by feature flags) are registered with it, using the provided `db` (an `Arc<OurDB>`) instance.
- **`register_all_modules(engine: &mut Engine, db: Arc<OurDB>)`**:
Registers all HeroModels modules for which features are enabled (e.g., `calendar`, `flow`, `legal`, `projects`, `biz`) with the given Rhai `engine`. Each module is passed the shared `db` instance.
- **`eval_script(engine: &Engine, script: &str) -> Result<rhai::Dynamic, Box<rhai::EvalAltResult>>`**:
A utility function to directly evaluate a Rhai script string using the provided `engine`.
- **`compile_script(engine: &Engine, script: &str) -> Result<AST, Box<rhai::EvalAltResult>>`**:
Compiles a Rhai script string into an `AST` (Abstract Syntax Tree) for potentially faster repeated execution.
- **`run_ast(engine: &Engine, ast: &AST, scope: &mut Scope) -> Result<rhai::Dynamic, Box<rhai::EvalAltResult>>`**:
Runs a pre-compiled `AST` with a given `scope` using the provided `engine`.
- **`mock_db` module**:
Provides `create_mock_db()` which returns an `Arc<OurDB>` instance suitable for testing and examples. This allows scripts that interact with database functionalities to run without external database dependencies.
### Basic Usage
```rust
use std::sync::Arc;
use engine::{create_heromodels_engine, eval_script};
use engine::mock_db::create_mock_db; // For example usage
use heromodels::db::hero::OurDB; // Actual DB type
// Create a mock database (or connect to a real one)
let db: Arc<OurDB> = create_mock_db();
// Create the Rhai engine with all enabled modules registered
let engine = create_heromodels_engine(db);
// Run a Rhai script
let script = r#"
// Example: Assuming 'calendar' feature is enabled
let cal = new_calendar("My Test Calendar");
cal.set_description("This is a test.");
print(`Created calendar: ${cal.get_name()}`);
cal.get_id() // Return the ID
"#;
match eval_script(&engine, script) {
Ok(val) => println!("Script returned: {:?}", val),
Err(err) => eprintln!("Script error: {}", err),
}
```
### Using Specific Modules Manually
If you need more fine-grained control or only want specific modules (and prefer not to rely solely on feature flags at compile time for `create_heromodels_engine`), you can initialize an engine and register modules manually:
```rust
use std::sync::Arc;
use rhai::Engine;
use engine::mock_db::create_mock_db; // For example usage
use heromodels::db::hero::OurDB;
// Import the specific module registration function
use heromodels::models::calendar::register_calendar_rhai_module;
// Create a mock database
let db: Arc<OurDB> = create_mock_db();
// Create a new Rhai engine
let mut engine = Engine::new();
// Register only the calendar module
register_calendar_rhai_module(&mut engine, db.clone());
// Now you can use calendar-related functions in your scripts
let result = engine.eval::<String>(r#" let c = new_calendar("Solo Cal"); c.get_name() "#);
match result {
Ok(name) => println!("Calendar name: {}", name),
Err(err) => eprintln!("Error: {}", err),
}
```
## Examples
This crate includes several examples demonstrating how to use different HeroModels modules with Rhai. Each example typically requires its corresponding feature to be enabled.
- `calendar_example`: Working with calendars, events, and attendees (requires `calendar` feature).
- `flow_example`: Working with flows, steps, and signature requirements (requires `flow` feature).
- `finance_example`: Working with financial models (requires `finance` feature).
- *(Additional examples for `legal`, `projects`, `biz` would follow the same pattern if present).*
To run an example (e.g., `calendar_example`):
```bash
cargo run --example calendar_example --features calendar
```
*(Note: Examples in `Cargo.toml` already specify `required-features`, so simply `cargo run --example calendar_example` might suffice if those features are part of the default set or already enabled.)*
## Features
The crate uses feature flags to control which HeroModels modules are compiled and registered:
- `calendar`: Enables the Calendar module.
- `finance`: Enables the Finance module.
- `flow`: Enables the Flow module.
- `legal`: Enables the Legal module.
- `projects`: Enables the Projects module.
- `biz`: Enables the Business module.
The `default` features are `["calendar", "finance"]`. You can enable other modules by specifying them during the build or in your project's `Cargo.toml` if this `engine` crate is a dependency.
## Dependencies
Key dependencies include:
- `rhai`: The Rhai scripting engine.
- `heromodels`: Provides the core data models and database interaction logic, including the Rhai registration functions for each module.
- `heromodels_core`: Core utilities for HeroModels.
- `chrono`: For date/time utilities.
- `heromodels-derive`: Procedural macros used by HeroModels.
## License
This crate is part of the HeroModels project and shares its license.

View File

@@ -0,0 +1,16 @@
fn main() {
// Tell Cargo to re-run this build script if the calendar/rhai.rs file changes
println!("cargo:rerun-if-changed=../heromodels/src/models/calendar/rhai.rs");
// Tell Cargo to re-run this build script if the flow/rhai.rs file changes
println!("cargo:rerun-if-changed=../heromodels/src/models/flow/rhai.rs");
// Tell Cargo to re-run this build script if the legal/rhai.rs file changes
println!("cargo:rerun-if-changed=../heromodels/src/models/legal/rhai.rs");
// Tell Cargo to re-run this build script if the projects/rhai.rs file changes
println!("cargo:rerun-if-changed=../heromodels/src/models/projects/rhai.rs");
// Tell Cargo to re-run this build script if the biz/rhai.rs file changes
println!("cargo:rerun-if-changed=../heromodels/src/models/biz/rhai.rs");
}

View File

@@ -0,0 +1,331 @@
# Architecture of the `rhailib_engine` Crate
The `rhailib_engine` crate serves as the central Rhai scripting engine for the heromodels ecosystem. It provides a unified interface for creating, configuring, and executing Rhai scripts with access to all business domain modules through a feature-based architecture.
## Core Architecture
The engine acts as an orchestration layer that brings together the DSL modules and provides execution utilities:
```mermaid
graph TD
A[rhailib_engine] --> B[Engine Creation]
A --> C[Script Execution]
A --> D[Mock Database]
A --> E[Feature Management]
B --> B1[create_heromodels_engine]
B --> B2[Engine Configuration]
B --> B3[DSL Registration]
C --> C1[eval_script]
C --> C2[eval_file]
C --> C3[compile_script]
C --> C4[run_ast]
D --> D1[create_mock_db]
D --> D2[seed_mock_db]
D --> D3[Domain Data Seeding]
E --> E1[calendar]
E --> E2[finance]
E --> E3[flow]
E --> E4[legal]
E --> E5[projects]
E --> E6[biz]
B3 --> F[rhailib_dsl]
F --> G[All Domain Modules]
```
## Core Components
### 1. Engine Factory (`create_heromodels_engine`)
The primary entry point for creating a fully configured Rhai engine:
```rust
pub fn create_heromodels_engine() -> Engine
```
**Responsibilities:**
- Creates a new Rhai engine instance
- Configures engine limits and settings
- Registers all available DSL modules
- Returns a ready-to-use engine
**Configuration Settings:**
- **Expression Depth**: 128 levels for both expressions and functions
- **String Size Limit**: 10 MB maximum string size
- **Array Size Limit**: 10,000 elements maximum
- **Map Size Limit**: 10,000 key-value pairs maximum
### 2. Script Execution Utilities
#### Direct Script Evaluation
```rust
pub fn eval_script(engine: &Engine, script: &str) -> Result<Dynamic, Box<EvalAltResult>>
```
Executes Rhai script strings directly with immediate results.
#### File-Based Script Execution
```rust
pub fn eval_file(engine: &Engine, file_path: &Path) -> Result<Dynamic, Box<EvalAltResult>>
```
Loads and executes Rhai scripts from filesystem with proper error handling.
#### Compiled Script Execution
```rust
pub fn compile_script(engine: &Engine, script: &str) -> Result<AST, Box<EvalAltResult>>
pub fn run_ast(engine: &Engine, ast: &AST, scope: &mut Scope) -> Result<Dynamic, Box<EvalAltResult>>
```
Provides compilation and execution of scripts for performance optimization.
### 3. Mock Database System
#### Database Creation
```rust
pub fn create_mock_db() -> Arc<OurDB>
```
Creates an in-memory database instance for testing and examples.
#### Data Seeding
```rust
pub fn seed_mock_db(db: Arc<OurDB>)
```
Populates the mock database with representative data across all domains.
## Feature-Based Architecture
The engine uses Cargo features to control which domain modules are included:
### Available Features
- **`calendar`** (default): Calendar and event management
- **`finance`** (default): Financial accounts, assets, and marketplace
- **`flow`**: Workflow and approval processes
- **`legal`**: Contract and legal document management
- **`projects`**: Project and task management
- **`biz`**: Business operations and entities
### Feature Integration Pattern
```rust
#[cfg(feature = "calendar")]
use heromodels::models::calendar::*;
#[cfg(feature = "finance")]
use heromodels::models::finance::*;
```
This allows for:
- **Selective Compilation**: Only include needed functionality
- **Reduced Binary Size**: Exclude unused domain modules
- **Modular Deployment**: Different configurations for different use cases
## Mock Database Architecture
### Database Structure
The mock database provides a complete testing environment:
```mermaid
graph LR
A[Mock Database] --> B[Calendar Data]
A --> C[Finance Data]
A --> D[Flow Data]
A --> E[Legal Data]
A --> F[Projects Data]
B --> B1[Calendars]
B --> B2[Events]
B --> B3[Attendees]
C --> C1[Accounts]
C --> C2[Assets - ERC20/ERC721]
C --> C3[Marketplace Listings]
D --> D1[Flows]
D --> D2[Flow Steps]
D --> D3[Signature Requirements]
E --> E1[Contracts]
E --> E2[Contract Revisions]
E --> E3[Contract Signers]
F --> F1[Projects]
F --> F2[Project Members]
F --> F3[Project Tags]
```
### Seeding Strategy
Each domain has its own seeding function that creates realistic test data:
#### Calendar Seeding
- Creates work calendars with descriptions
- Adds team meetings with attendees
- Sets up recurring events
#### Finance Seeding
- Creates demo trading accounts
- Generates ERC20 tokens and ERC721 NFTs
- Sets up marketplace listings with metadata
#### Flow Seeding (Feature-Gated)
- Creates document approval workflows
- Defines multi-step approval processes
- Sets up signature requirements
#### Legal Seeding (Feature-Gated)
- Creates service agreements
- Adds contract revisions and versions
- Defines contract signers and roles
#### Projects Seeding (Feature-Gated)
- Creates project instances with status tracking
- Assigns team members and priorities
- Adds project tags and categorization
## Error Handling Architecture
### Comprehensive Error Propagation
```rust
Result<Dynamic, Box<EvalAltResult>>
```
All functions return proper Rhai error types that include:
- **Script Compilation Errors**: Syntax and parsing issues
- **Runtime Errors**: Execution failures and exceptions
- **File System Errors**: File reading and path resolution issues
- **Database Errors**: Mock database operation failures
### Error Context Enhancement
File operations include enhanced error context:
```rust
Err(Box::new(EvalAltResult::ErrorSystem(
format!("Failed to read script file: {}", file_path.display()),
Box::new(io_err),
)))
```
## Performance Considerations
### Engine Configuration
Optimized settings for production use:
- **Memory Limits**: Prevent runaway script execution
- **Depth Limits**: Avoid stack overflow from deep recursion
- **Size Limits**: Control memory usage for large data structures
### Compilation Strategy
- **AST Caching**: Compile once, execute multiple times
- **Scope Management**: Efficient variable scope handling
- **Module Registration**: One-time registration at engine creation
### Mock Database Performance
- **In-Memory Storage**: Fast access for testing scenarios
- **Temporary Directories**: Automatic cleanup after use
- **Lazy Loading**: Data seeded only when needed
## Integration Patterns
### Script Development Workflow
```rust
// 1. Create engine with all modules
let engine = create_heromodels_engine();
// 2. Execute business logic scripts
let result = eval_script(&engine, r#"
let company = new_company()
.name("Tech Startup")
.business_type("startup");
save_company(company)
"#)?;
// 3. Handle results and errors
match result {
Ok(value) => println!("Success: {:?}", value),
Err(error) => eprintln!("Error: {}", error),
}
```
### Testing Integration
```rust
// 1. Create mock database
let db = create_mock_db();
seed_mock_db(db.clone());
// 2. Create engine
let engine = create_heromodels_engine();
// 3. Test scripts against seeded data
let script = r#"
let calendars = list_calendars();
calendars.len()
"#;
let count = eval_script(&engine, script)?;
```
### File-Based Script Execution
```rust
// Execute scripts from files
let result = eval_file(&engine, Path::new("scripts/business_logic.rhai"))?;
```
## Deployment Configurations
### Minimal Configuration
```toml
[dependencies]
rhailib_engine = { version = "0.1.0", default-features = false, features = ["calendar"] }
```
### Full Configuration
```toml
[dependencies]
rhailib_engine = { version = "0.1.0", features = ["calendar", "finance", "flow", "legal", "projects", "biz"] }
```
### Custom Configuration
```toml
[dependencies]
rhailib_engine = { version = "0.1.0", default-features = false, features = ["finance", "biz"] }
```
## Security Considerations
### Script Execution Limits
- **Resource Limits**: Prevent resource exhaustion attacks
- **Execution Time**: Configurable timeouts for long-running scripts
- **Memory Bounds**: Controlled memory allocation
### Database Access
- **Mock Environment**: Safe testing without production data exposure
- **Temporary Storage**: Automatic cleanup prevents data persistence
- **Isolated Execution**: Each test run gets fresh database state
## Extensibility
### Adding New Domains
1. Create new feature flag in `Cargo.toml`
2. Add conditional imports for new models
3. Implement seeding function for test data
4. Register with DSL module system
### Custom Engine Configuration
```rust
let mut engine = Engine::new();
// Custom configuration
engine.set_max_expr_depths(256, 256);
// Register specific modules
rhailib_dsl::register_dsl_modules(&mut engine);
```
This architecture provides a robust, feature-rich foundation for Rhai script execution while maintaining flexibility, performance, and security.

View File

@@ -0,0 +1,101 @@
// calendar_script.rhai
// Example Rhai script for working with Calendar models
// Constants for AttendanceStatus
const NO_RESPONSE = "NoResponse";
const ACCEPTED = "Accepted";
const DECLINED = "Declined";
const TENTATIVE = "Tentative";
// Create a new calendar using builder pattern
let my_calendar = new_calendar()
.name("Team Calendar")
.description("Calendar for team events and meetings");
print(`Created calendar: ${my_calendar.name} (${my_calendar.id})`);
// Add attendees to the event
let alice = new_attendee()
.with_contact_id(1)
.with_status(NO_RESPONSE);
let bob = new_attendee()
.with_contact_id(2)
.with_status(ACCEPTED);
let charlie = new_attendee()
.with_contact_id(3)
.with_status(TENTATIVE);
// Create a new event using builder pattern
// Note: Timestamps are in seconds since epoch
let now = timestamp_now();
let one_hour = 60 * 60;
let meeting = new_event()
.title("Weekly Sync")
.reschedule(now, now + one_hour)
.location("Conference Room A")
.description("Regular team sync meeting")
.add_attendee(alice)
.add_attendee(bob)
.add_attendee(charlie)
.save_event();
print(`Created event: ${meeting.title}`);
meeting.delete_event();
print(`Deleted event: ${meeting.title}`);
// Print attendees info
let attendees = meeting.attendees;
print(`Added attendees to the event`);
// Update Charlie's attendee status directly
meeting.update_attendee_status(3, ACCEPTED);
print(`Updated Charlie's status to: ${ACCEPTED}`);
// Add the event to the calendar
my_calendar.add_event_to_calendar(meeting);
// Print events info
print(`Added event to calendar`);
// Save the calendar to the database
let saved_calendar = my_calendar.save_calendar();
print(`Calendar saved to database with ID: ${saved_calendar.id}`);
// Retrieve the calendar from the database using the ID from the saved calendar
let retrieved_calendar = get_calendar_by_id(saved_calendar.id);
if retrieved_calendar != () {
print(`Retrieved calendar: ${retrieved_calendar.name}`);
print(`Retrieved calendar successfully`);
} else {
print("Failed to retrieve calendar from database");
}
// List all calendars in the database
let all_calendars = list_calendars();
print("\nListing all calendars in database:");
let calendar_count = 0;
for calendar in all_calendars {
print(` - Calendar: ${calendar.name} (ID: ${calendar.id})`);
calendar_count += 1;
}
print(`Total calendars: ${calendar_count}`);
// List all events in the database
let all_events = list_events();
print("\nListing all events in database:");
let event_count = 0;
for event in all_events {
print(` - Event: ${event.title} (ID: ${event.id})`);
event_count += 1;
}
print(`Total events: ${event_count}`);
// Helper function to get current timestamp
fn timestamp_now() {
// This would typically be provided by the host application
// For this example, we'll use a fixed timestamp
1685620800 // June 1, 2023, 12:00 PM
}

View File

@@ -0,0 +1,70 @@
use engine::mock_db::create_mock_db;
use engine::{create_heromodels_engine, eval_file};
use rhai::Engine;
mod mock;
use mock::seed_calendar_data;
fn main() -> Result<(), Box<dyn std::error::Error>> {
println!("Calendar Rhai Example");
println!("=====================");
// Create a mock database
let db = create_mock_db();
// Seed the database with some initial data
seed_calendar_data(db.clone());
// Create the Rhai engine using our central engine creator
let mut engine = create_heromodels_engine(db.clone());
// Register timestamp helper functions
register_timestamp_helpers(&mut engine);
// Get the path to the script
let manifest_dir = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"));
let script_path = manifest_dir
.join("examples")
.join("calendar")
.join("calendar_script.rhai");
println!("\nRunning script: {}", script_path.display());
println!("---------------------");
// Run the script
match eval_file(&engine, &script_path) {
Ok(result) => {
if !result.is_unit() {
println!("\nScript returned: {:?}", result);
}
println!("\nScript executed successfully!");
Ok(())
}
Err(err) => {
eprintln!("\nError running script: {}", err);
Err(Box::new(std::io::Error::new(
std::io::ErrorKind::Other,
err.to_string(),
)))
}
}
}
// Register timestamp helper functions with the engine
fn register_timestamp_helpers(engine: &mut Engine) {
use chrono::{TimeZone, Utc};
// Function to get current timestamp
engine.register_fn("timestamp_now", || Utc::now().timestamp() as i64);
// Function to format a timestamp
engine.register_fn("format_timestamp", |ts: i64| {
let dt = Utc
.timestamp_opt(ts, 0)
.single()
.expect("Invalid timestamp");
dt.format("%Y-%m-%d %H:%M:%S UTC").to_string()
});
println!("Timestamp helper functions registered successfully.");
}

View File

@@ -0,0 +1,60 @@
use chrono::Utc;
use heromodels::db::hero::OurDB;
use heromodels::db::{Collection, Db};
use heromodels::models::calendar::{Calendar, Event};
use heromodels_core::Model;
use std::sync::Arc;
/// Seed the mock database with calendar data
pub fn seed_calendar_data(db: Arc<OurDB>) {
// Create a calendar
let calendar = Calendar::new(None, "Work Calendar".to_string())
.description("My work schedule".to_string());
// Store the calendar in the database
let (calendar_id, mut saved_calendar) = db
.collection::<Calendar>()
.expect("Failed to get Calendar collection")
.set(&calendar)
.expect("Failed to store calendar");
// Create an event
let now = Utc::now().timestamp();
let end_time = now + 3600; // Add 1 hour in seconds
let event = Event::new()
.title("Team Meeting".to_string())
.reschedule(now, end_time)
.location("Conference Room A".to_string())
.description("Weekly sync".to_string())
.build();
// Store the event in the database first to get its ID
let (event_id, saved_event) = db
.collection()
.expect("Failed to get Event collection")
.set(&event)
.expect("Failed to store event");
// Add the event ID to the calendar
saved_calendar = saved_calendar.add_event(event_id as i64);
// Store the updated calendar in the database
let (_calendar_id, final_calendar) = db
.collection::<Calendar>()
.expect("Failed to get Calendar collection")
.set(&saved_calendar)
.expect("Failed to store calendar");
println!("Mock database seeded with calendar data:");
println!(
" - Added calendar: {} (ID: {})",
final_calendar.name,
final_calendar.get_id()
);
println!(
" - Added event: {} (ID: {})",
saved_event.title,
saved_event.get_id()
);
}

View File

@@ -0,0 +1,70 @@
use engine::mock_db::create_mock_db;
use engine::{create_heromodels_engine, eval_file};
use rhai::Engine;
use std::path::Path;
mod mock;
use mock::seed_finance_data;
fn main() -> Result<(), Box<dyn std::error::Error>> {
println!("Finance Rhai Example");
println!("===================");
// Create a mock database
let db = create_mock_db();
// Seed the database with some initial data
seed_finance_data(db.clone());
// Create the Rhai engine using our central engine creator
let mut engine = create_heromodels_engine(db.clone());
// Register timestamp helper functions
register_timestamp_helpers(&mut engine);
// Get the path to the script
let script_path = Path::new(file!())
.parent()
.unwrap()
.join("finance_script.rhai");
println!("\nRunning script: {}", script_path.display());
println!("---------------------");
// Run the script
match eval_file(&engine, &script_path) {
Ok(result) => {
if !result.is_unit() {
println!("\nScript returned: {:?}", result);
}
println!("\nScript executed successfully!");
Ok(())
}
Err(err) => {
eprintln!("\nError running script: {}", err);
Err(Box::new(std::io::Error::new(
std::io::ErrorKind::Other,
err.to_string(),
)))
}
}
}
// Register timestamp helper functions with the engine
fn register_timestamp_helpers(engine: &mut Engine) {
use chrono::{TimeZone, Utc};
// Function to get current timestamp
engine.register_fn("timestamp_now", || Utc::now().timestamp() as i64);
// Function to format a timestamp
engine.register_fn("format_timestamp", |ts: i64| {
let dt = Utc
.timestamp_opt(ts, 0)
.single()
.expect("Invalid timestamp");
dt.format("%Y-%m-%d %H:%M:%S UTC").to_string()
});
println!("Timestamp helper functions registered successfully.");
}

View File

@@ -0,0 +1,202 @@
// finance_script.rhai
// Example Rhai script for working with Finance models
// Constants for AssetType
const NATIVE = "Native";
const ERC20 = "Erc20";
const ERC721 = "Erc721";
const ERC1155 = "Erc1155";
// Constants for ListingStatus
const ACTIVE = "Active";
const SOLD = "Sold";
const CANCELLED = "Cancelled";
const EXPIRED = "Expired";
// Constants for ListingType
const FIXED_PRICE = "FixedPrice";
const AUCTION = "Auction";
const EXCHANGE = "Exchange";
// Constants for BidStatus
const BID_ACTIVE = "Active";
const BID_ACCEPTED = "Accepted";
const BID_REJECTED = "Rejected";
const BID_CANCELLED = "Cancelled";
// Create a new account using builder pattern
let alice_account = new_account()
.name("Alice's Account")
.user_id(101)
.description("Alice's primary trading account")
.ledger("ethereum")
.address("0x1234567890abcdef1234567890abcdef12345678")
.pubkey("0xabcdef1234567890abcdef1234567890abcdef12");
print(`Created account: ${alice_account.get_name()} (User ID: ${alice_account.get_user_id()})`);
// Save the account to the database
let saved_alice = set_account(alice_account);
print(`Account saved to database with ID: ${saved_alice.get_id()}`);
// Create a new asset using builder pattern
let token_asset = new_asset()
.name("HERO Token")
.description("Herocode governance token")
.amount(1000.0)
.address("0x9876543210abcdef9876543210abcdef98765432")
.asset_type(ERC20)
.decimals(18);
print(`Created asset: ${token_asset.get_name()} (${token_asset.get_amount()} ${token_asset.get_asset_type()})`);
// Save the asset to the database
let saved_token = set_asset(token_asset);
print(`Asset saved to database with ID: ${saved_token.get_id()}`);
// Add the asset to Alice's account
saved_alice = saved_alice.add_asset(saved_token.get_id());
saved_alice = set_account(saved_alice);
print(`Added asset ${saved_token.get_name()} to ${saved_alice.get_name()}`);
// Create a new NFT asset
let nft_asset = new_asset()
.name("Herocode #42")
.description("Unique digital collectible")
.amount(1.0)
.address("0xabcdef1234567890abcdef1234567890abcdef12")
.asset_type(ERC721)
.decimals(0);
// Save the NFT to the database
let saved_nft = set_asset(nft_asset);
print(`NFT saved to database with ID: ${saved_nft.get_id()}`);
// Create Bob's account
let bob_account = new_account()
.name("Bob's Account")
.user_id(102)
.description("Bob's trading account")
.ledger("ethereum")
.address("0xfedcba0987654321fedcba0987654321fedcba09")
.pubkey("0x654321fedcba0987654321fedcba0987654321fe");
// Save Bob's account
let saved_bob = set_account(bob_account);
print(`Created and saved Bob's account with ID: ${saved_bob.get_id()}`);
// Create a listing for the NFT
let nft_listing = new_listing()
.seller_id(saved_alice.get_id())
.asset_id(saved_nft.get_id())
.price(0.5)
.currency("ETH")
.listing_type(AUCTION)
.title("Rare Herocode NFT")
.description("One of a kind digital collectible")
.image_url("https://example.com/nft/42.png")
.expires_at(timestamp_now() + 86400) // 24 hours from now
.add_tag("rare")
.add_tag("collectible")
.add_tag("digital art")
.set_listing();
// Save the listing
print(`Created listing: ${nft_listing.get_title()} (ID: ${nft_listing.get_id()})`);
print(`Listing status: ${nft_listing.get_status()}, Type: ${nft_listing.get_listing_type()}`);
print(`Listing price: ${nft_listing.get_price()} ${nft_listing.get_currency()}`);
// Create a bid from Bob
let bob_bid = new_bid()
.listing_id(nft_listing.get_id().to_string())
.bidder_id(saved_bob.get_id())
.amount(1.5)
.currency("ETH")
.set_bid();
// Save the bid
print(`Created bid from ${saved_bob.get_name()} for ${bob_bid.get_amount()} ${bob_bid.get_currency()}`);
// Add the bid to the listing
nft_listing.add_bid(bob_bid);
nft_listing.set_listing();
print(`Added bid to listing ${nft_listing.get_title()}`);
// Create another bid with higher amount
let charlie_account = new_account()
.name("Charlie's Account")
.user_id(103)
.description("Charlie's trading account")
.ledger("ethereum")
.address("0x1122334455667788991122334455667788990011")
.pubkey("0x8877665544332211887766554433221188776655");
let saved_charlie = set_account(charlie_account);
print(`Created and saved Charlie's account with ID: ${saved_charlie.get_id()}`);
let charlie_bid = new_bid()
.listing_id(nft_listing.get_id().to_string())
.bidder_id(saved_charlie.get_id())
.amount(2.5)
.currency("ETH")
.set_bid();
print(`Created higher bid from ${saved_charlie.get_name()} for ${charlie_bid.get_amount()} ${charlie_bid.get_currency()}`);
// Add the higher bid to the listing
nft_listing.add_bid(charlie_bid)
.set_listing();
print(`Added higher bid to listing ${nft_listing.get_title()}`);
nft_listing.sale_price(2.5)
.set_listing();
// Complete the sale to the highest bidder (Charlie)
nft_listing.complete_sale(saved_charlie.get_id())
.set_listing();
print(`Completed sale of ${nft_listing.get_title()} to ${saved_charlie.get_name()}`);
print(`New listing status: ${saved_listing.get_status()}`);
// Retrieve the listing from the database
let retrieved_listing = get_listing_by_id(saved_listing.get_id());
print(`Retrieved listing: ${retrieved_listing.get_title()} (Status: ${retrieved_listing.get_status()})`);
// Create a fixed price listing
let token_listing = new_listing()
.seller_id(saved_alice.get_id())
.asset_id(saved_token.get_id())
.price(100.0)
.currency("USDC")
.listing_type(FIXED_PRICE)
.title("HERO Tokens for Sale")
.description("100 HERO tokens at fixed price")
.set_listing();
// Save the fixed price listing
print(`Created fixed price listing: ${token_listing.get_title()} (ID: ${token_listing.get_id()})`);
// Cancel the listing
token_listing.cancel();
token_listing.set_listing();
print(`Cancelled listing: ${token_listing.get_title()}`);
print(`Listing status: ${token_listing.get_status()}`);
// Print summary of all accounts
print("\nAccount Summary:");
print(`Alice (ID: ${saved_alice.get_id()}): ${saved_alice.get_assets().len()} assets`);
print(`Bob (ID: ${saved_bob.get_id()}): ${saved_bob.get_assets().len()} assets`);
print(`Charlie (ID: ${saved_charlie.get_id()}): ${saved_charlie.get_assets().len()} assets`);
// Print summary of all listings
print("\nListing Summary:");
print(`NFT Auction (ID: ${nft_listing.get_id()}): ${nft_listing.get_status()}`);
print(`Token Sale (ID: ${token_listing.get_id()}): ${token_listing.get_status()}`);
// Print summary of all bids
print("\nBid Summary:");
print(`Bob's bid: ${bob_bid.get_amount()} ${bob_bid.get_currency()} (Status: ${bob_bid.get_status()})`);
print(`Charlie's bid: ${charlie_bid.get_amount()} ${charlie_bid.get_currency()} (Status: ${charlie_bid.get_status()})`);

View File

@@ -0,0 +1,111 @@
use heromodels::db::hero::OurDB;
use heromodels::db::{Collection, Db};
use heromodels::models::finance::account::Account;
use heromodels::models::finance::asset::{Asset, AssetType};
use heromodels::models::finance::marketplace::{Listing, ListingType};
use heromodels_core::Model;
use std::sync::Arc;
/// Seed the mock database with finance data
pub fn seed_finance_data(db: Arc<OurDB>) {
// Create a user account
let account = Account::new()
.name("Demo Account")
.user_id(1)
.description("Demo trading account")
.ledger("ethereum")
.address("0x1234567890abcdef1234567890abcdef12345678")
.pubkey("0xabcdef1234567890abcdef1234567890abcdef12");
// Store the account in the database
let (account_id, mut updated_account) = db
.collection::<Account>()
.expect("Failed to get Account collection")
.set(&account)
.expect("Failed to store account");
// Create an ERC20 token asset
let token_asset = Asset::new()
.name("HERO Token")
.description("Herocode governance token")
.amount(1000.0)
.address("0x9876543210abcdef9876543210abcdef98765432")
.asset_type(AssetType::Erc20)
.decimals(18);
// Store the token asset in the database
let (token_id, updated_token) = db
.collection::<Asset>()
.expect("Failed to get Asset collection")
.set(&token_asset)
.expect("Failed to store token asset");
// Create an NFT asset
let nft_asset = Asset::new()
.name("Herocode #1")
.description("Unique digital collectible")
.amount(1.0)
.address("0xabcdef1234567890abcdef1234567890abcdef12")
.asset_type(AssetType::Erc721)
.decimals(0);
// Store the NFT asset in the database
let (nft_id, updated_nft) = db
.collection::<Asset>()
.expect("Failed to get Asset collection")
.set(&nft_asset)
.expect("Failed to store NFT asset");
// Add assets to the account
updated_account = updated_account.add_asset(token_id);
updated_account = updated_account.add_asset(nft_id);
// Update the account in the database
let (_, final_account) = db
.collection::<Account>()
.expect("Failed to get Account collection")
.set(&updated_account)
.expect("Failed to store updated account");
// Create a listing for the NFT
let listing = Listing::new()
.seller_id(account_id)
.asset_id(nft_id)
.price(0.5)
.currency("ETH")
.listing_type(ListingType::Auction)
.title("Rare Herocode NFT".to_string())
.description("One of a kind digital collectible".to_string())
.image_url(Some("https://example.com/nft/1.png".to_string()))
.add_tag("rare".to_string())
.add_tag("collectible".to_string());
// Store the listing in the database
let (_listing_id, updated_listing) = db
.collection::<Listing>()
.expect("Failed to get Listing collection")
.set(&listing)
.expect("Failed to store listing");
println!("Mock database seeded with finance data:");
println!(
" - Added account: {} (ID: {})",
final_account.name,
final_account.get_id()
);
println!(
" - Added token asset: {} (ID: {})",
updated_token.name,
updated_token.get_id()
);
println!(
" - Added NFT asset: {} (ID: {})",
updated_nft.name,
updated_nft.get_id()
);
println!(
" - Added listing: {} (ID: {})",
updated_listing.title,
updated_listing.get_id()
);
}

View File

@@ -0,0 +1,162 @@
use engine::mock_db::create_mock_db;
use engine::{create_heromodels_engine, eval_file};
use heromodels::models::flow::{Flow, FlowStep, SignatureRequirement};
use heromodels_core::Model;
use rhai::Scope;
use std::path::Path;
mod mock;
use mock::seed_flow_data;
fn main() -> Result<(), Box<dyn std::error::Error>> {
println!("Flow Rhai Example");
println!("=================");
// Create a mock database
let db = create_mock_db();
// Seed the database with initial data
seed_flow_data(db.clone());
// Create the Rhai engine with all modules registered
let engine = create_heromodels_engine(db.clone());
// Get the path to the script
let script_path = Path::new(file!())
.parent()
.unwrap()
.join("flow_script.rhai");
println!("\nRunning script: {}", script_path.display());
println!("---------------------");
// Run the script
match eval_file(&engine, &script_path.to_string_lossy()) {
Ok(result) => {
if !result.is_unit() {
println!("\nScript returned: {:?}", result);
}
println!("\nScript executed successfully!");
}
Err(err) => {
eprintln!("\nError running script: {}", err);
return Err(Box::new(std::io::Error::new(
std::io::ErrorKind::Other,
err.to_string(),
)));
}
}
// Demonstrate direct Rust interaction with the Rhai-exposed flow functionality
println!("\nDirect Rust interaction with Rhai-exposed flow functionality");
println!("----------------------------------------------------------");
// Create a new scope
let mut scope = Scope::new();
// Create a new flow using the Rhai function
let result = engine.eval::<Flow>("new_flow(0, \"Direct Rust Flow\")");
match result {
Ok(mut flow) => {
println!(
"Created flow from Rust: {} (ID: {})",
flow.name,
flow.get_id()
);
// Set flow status using the builder pattern
flow = flow.status("active".to_string());
println!("Set flow status to: {}", flow.status);
// Create a new flow step using the Rhai function
let result = engine.eval::<FlowStep>("new_flow_step(0, 1)");
match result {
Ok(mut step) => {
println!(
"Created flow step from Rust: Step Order {} (ID: {})",
step.step_order,
step.get_id()
);
// Set step description
step = step.description("Direct Rust Step".to_string());
println!(
"Set step description to: {}",
step.description
.clone()
.unwrap_or_else(|| "None".to_string())
);
// Create a signature requirement using the Rhai function
let result = engine.eval::<SignatureRequirement>(
"new_signature_requirement(0, 1, \"Direct Rust Signer\", \"Please sign this document\")"
);
match result {
Ok(req) => {
println!(
"Created signature requirement from Rust: Public Key {} (ID: {})",
req.public_key,
req.get_id()
);
// Add the step to the flow using the builder pattern
flow = flow.add_step(step);
println!(
"Added step to flow. Flow now has {} steps",
flow.steps.len()
);
// Save the flow to the database using the Rhai function
let save_flow_script = "fn save_it(f) { return db::save_flow(f); }";
let save_flow_ast = engine.compile(save_flow_script).unwrap();
let result = engine.call_fn::<Flow>(
&mut scope,
&save_flow_ast,
"save_it",
(flow,),
);
match result {
Ok(saved_flow) => {
println!(
"Saved flow to database with ID: {}",
saved_flow.get_id()
);
}
Err(err) => eprintln!("Error saving flow: {}", err),
}
// Save the signature requirement to the database using the Rhai function
let save_req_script =
"fn save_it(r) { return db::save_signature_requirement(r); }";
let save_req_ast = engine.compile(save_req_script).unwrap();
let result = engine.call_fn::<SignatureRequirement>(
&mut scope,
&save_req_ast,
"save_it",
(req,),
);
match result {
Ok(saved_req) => {
println!(
"Saved signature requirement to database with ID: {}",
saved_req.get_id()
);
}
Err(err) => {
eprintln!("Error saving signature requirement: {}", err)
}
}
}
Err(err) => eprintln!("Error creating signature requirement: {}", err),
}
}
Err(err) => eprintln!("Error creating flow step: {}", err),
}
}
Err(err) => eprintln!("Error creating flow: {}", err),
}
Ok(())
}

View File

@@ -0,0 +1,111 @@
// flow_script.rhai
// Example Rhai script for working with Flow models
// Constants for Flow status
const STATUS_DRAFT = "draft";
const STATUS_ACTIVE = "active";
const STATUS_COMPLETED = "completed";
const STATUS_CANCELLED = "cancelled";
// Create a new flow using builder pattern
let my_flow = new_flow(0, "flow-123");
name(my_flow, "Document Approval Flow");
status(my_flow, STATUS_DRAFT);
print(`Created flow: ${get_flow_name(my_flow)} (ID: ${get_flow_id(my_flow)})`);
print(`Status: ${get_flow_status(my_flow)}`);
// Create flow steps using builder pattern
let step1 = new_flow_step(0, 1);
description(step1, "Initial review by legal team");
status(step1, STATUS_DRAFT);
let step2 = new_flow_step(0, 2);
description(step2, "Approval by department head");
status(step2, STATUS_DRAFT);
let step3 = new_flow_step(0, 3);
description(step3, "Final signature by CEO");
status(step3, STATUS_DRAFT);
// Create signature requirements using builder pattern
let req1 = new_signature_requirement(0, get_flow_step_id(step1), "legal@example.com", "Please review this document");
signed_by(req1, "Legal Team");
status(req1, STATUS_DRAFT);
let req2 = new_signature_requirement(0, get_flow_step_id(step2), "dept@example.com", "Department approval needed");
signed_by(req2, "Department Head");
status(req2, STATUS_DRAFT);
let req3 = new_signature_requirement(0, get_flow_step_id(step3), "ceo@example.com", "Final approval required");
signed_by(req3, "CEO");
status(req3, STATUS_DRAFT);
print(`Created flow steps with signature requirements`);
// Add steps to the flow
let flow_with_steps = my_flow;
add_step(flow_with_steps, step1);
add_step(flow_with_steps, step2);
add_step(flow_with_steps, step3);
print(`Added steps to flow. Flow now has ${get_flow_steps(flow_with_steps).len()} steps`);
// Activate the flow
let active_flow = flow_with_steps;
status(active_flow, STATUS_ACTIVE);
print(`Updated flow status to: ${get_flow_status(active_flow)}`);
// Save the flow to the database
let saved_flow = db::save_flow(active_flow);
print(`Flow saved to database with ID: ${get_flow_id(saved_flow)}`);
// Save signature requirements to the database
let saved_req1 = db::save_signature_requirement(req1);
let saved_req2 = db::save_signature_requirement(req2);
let saved_req3 = db::save_signature_requirement(req3);
print(`Signature requirements saved to database with IDs: ${get_signature_requirement_id(saved_req1)}, ${get_signature_requirement_id(saved_req2)}, ${get_signature_requirement_id(saved_req3)}`);
// Retrieve the flow from the database
let retrieved_flow = db::get_flow_by_id(get_flow_id(saved_flow));
print(`Retrieved flow: ${get_flow_name(retrieved_flow)}`);
print(`It has ${get_flow_steps(retrieved_flow).len()} steps`);
// Complete the flow
let completed_flow = retrieved_flow;
status(completed_flow, STATUS_COMPLETED);
print(`Updated retrieved flow status to: ${get_flow_status(completed_flow)}`);
// Save the updated flow
db::save_flow(completed_flow);
print("Updated flow saved to database");
// List all flows in the database
let all_flows = db::list_flows();
print("\nListing all flows in database:");
let flow_count = 0;
for flow in all_flows {
print(` - Flow: ${get_flow_name(flow)} (ID: ${get_flow_id(flow)})`);
flow_count += 1;
}
print(`Total flows: ${flow_count}`);
// List all signature requirements
let all_reqs = db::list_signature_requirements();
print("\nListing all signature requirements in database:");
let req_count = 0;
for req in all_reqs {
print(` - Requirement for step ${get_signature_requirement_flow_step_id(req)} (ID: ${get_signature_requirement_id(req)})`);
req_count += 1;
}
print(`Total signature requirements: ${req_count}`);
// Clean up - delete the flow
db::delete_flow(get_flow_id(completed_flow));
print(`Deleted flow with ID: ${get_flow_id(completed_flow)}`);
// Clean up - delete signature requirements
db::delete_signature_requirement(get_signature_requirement_id(saved_req1));
db::delete_signature_requirement(get_signature_requirement_id(saved_req2));
db::delete_signature_requirement(get_signature_requirement_id(saved_req3));
print("Deleted all signature requirements");

View File

@@ -0,0 +1,65 @@
use heromodels::db::hero::OurDB;
use heromodels::db::{Collection, Db};
use heromodels::models::flow::{Flow, FlowStep, SignatureRequirement};
use heromodels_core::Model;
use std::sync::Arc;
/// Seed the mock database with flow data
#[cfg(feature = "flow")]
pub fn seed_flow_data(db: Arc<OurDB>) {
// Create a flow
let flow = Flow::new(None, "Onboarding Flow".to_string())
.description("New employee onboarding process".to_string())
.status("active".to_string());
// Create a signature requirement first
let sig_req = SignatureRequirement::new(
None,
1,
"hr_manager_pubkey".to_string(),
"Please sign the employment contract".to_string(),
);
let (sig_req_id, saved_sig_req) = db
.collection::<SignatureRequirement>()
.expect("Failed to get SignatureRequirement collection")
.set(&sig_req)
.expect("Failed to store signature requirement");
// Create a flow step and add the signature requirement
let step = FlowStep::new(None, 1)
.description("Complete HR paperwork".to_string())
.add_signature_requirement(sig_req_id);
let (step_id, saved_step) = db
.collection::<FlowStep>()
.expect("Failed to get FlowStep collection")
.set(&step)
.expect("Failed to store flow step");
// Add the step to the flow
let flow_with_step = flow.add_step(step_id);
// Store the flow
let (_flow_id, saved_flow) = db
.collection::<Flow>()
.expect("Failed to get Flow collection")
.set(&flow_with_step)
.expect("Failed to store flow");
println!("Mock database seeded with flow data:");
println!(
" - Added flow: {} (ID: {})",
saved_flow.name,
saved_flow.get_id()
);
println!(
" - Added step with order: {} (ID: {})",
saved_step.step_order,
saved_step.get_id()
);
println!(
" - Added signature requirement for: {} (ID: {})",
saved_sig_req.public_key,
saved_sig_req.get_id()
);
}

View File

@@ -0,0 +1,305 @@
//! # Rhailib Engine
//!
//! The central Rhai scripting engine for the heromodels ecosystem. This crate provides
//! a unified interface for creating, configuring, and executing Rhai scripts with access
//! to all business domain modules.
//!
//! ## Features
//!
//! - **Unified Engine Creation**: Pre-configured Rhai engine with all DSL modules
//! - **Script Execution Utilities**: Direct evaluation, file-based execution, and AST compilation
//! - **Mock Database System**: Complete testing environment with seeded data
//! - **Feature-Based Architecture**: Modular compilation based on required domains
//!
//! ## Quick Start
//!
//! ```rust
//! use rhailib_engine::{create_heromodels_engine, eval_script};
//!
//! // Create a fully configured engine
//! let engine = create_heromodels_engine();
//!
//! // Execute a business logic script
//! let result = eval_script(&engine, r#"
//! let company = new_company()
//! .name("Acme Corp")
//! .business_type("global");
//! company.name
//! "#)?;
//!
//! println!("Company name: {}", result.as_string().unwrap());
//! ```
//!
//! ## Available Features
//!
//! - `calendar` (default): Calendar and event management
//! - `finance` (default): Financial accounts, assets, and marketplace
//! - `flow`: Workflow and approval processes
//! - `legal`: Contract and legal document management
//! - `projects`: Project and task management
//! - `biz`: Business operations and entities
use rhai::{Engine, EvalAltResult, Scope, AST};
use rhailib_dsl;
use std::fs;
use std::path::Path;
/// Mock database module for testing and examples
pub mod mock_db;
/// Creates a fully configured Rhai engine with all available DSL modules.
///
/// This function creates a new Rhai engine instance, configures it with appropriate
/// limits and settings, and registers all available business domain modules based
/// on enabled features.
///
/// # Engine Configuration
///
/// The engine is configured with the following limits:
/// - **Expression Depth**: 128 levels for both expressions and functions
/// - **String Size**: 10 MB maximum
/// - **Array Size**: 10,000 elements maximum
/// - **Map Size**: 10,000 key-value pairs maximum
///
/// # Registered Modules
///
/// All enabled DSL modules are automatically registered, including:
/// - Business operations (companies, products, sales, shareholders)
/// - Financial models (accounts, assets, marketplace)
/// - Content management (collections, images, PDFs, books)
/// - Workflow management (flows, steps, signatures)
/// - And more based on enabled features
///
/// # Returns
///
/// A fully configured `Engine` instance ready for script execution.
///
/// # Example
///
/// ```rust
/// use rhailib_engine::create_heromodels_engine;
///
/// let engine = create_heromodels_engine();
///
/// // Engine is now ready to execute scripts with access to all DSL functions
/// let result = engine.eval::<String>(r#"
/// let company = new_company().name("Test Corp");
/// company.name
/// "#).unwrap();
/// assert_eq!(result, "Test Corp");
/// ```
pub fn create_heromodels_engine() -> Engine {
let mut engine = Engine::new();
// Configure engine settings
engine.set_max_expr_depths(128, 128);
engine.set_max_string_size(10 * 1024 * 1024); // 10 MB
engine.set_max_array_size(10 * 1024); // 10K elements
engine.set_max_map_size(10 * 1024); // 10K elements
// Register all heromodels Rhai modules
rhailib_dsl::register_dsl_modules(&mut engine);
engine
}
// /// Register all heromodels Rhai modules with the engine
// pub fn register_all_modules(engine: &mut Engine, db: Arc<OurDB>) {
// // Register the calendar module if the feature is enabled
// heromodels::models::access::register_access_rhai_module(engine, db.clone());
// #[cfg(feature = "calendar")]
// heromodels::models::calendar::register_calendar_rhai_module(engine, db.clone());
// heromodels::models::contact::register_contact_rhai_module(engine, db.clone());
// heromodels::models::library::register_library_rhai_module(engine, db.clone());
// heromodels::models::circle::register_circle_rhai_module(engine, db.clone());
// // Register the flow module if the feature is enabled
// #[cfg(feature = "flow")]
// heromodels::models::flow::register_flow_rhai_module(engine, db.clone());
// // // Register the finance module if the feature is enabled
// // #[cfg(feature = "finance")]
// // heromodels::models::finance::register_finance_rhai_module(engine, db.clone());
// // Register the legal module if the feature is enabled
// #[cfg(feature = "legal")]
// heromodels::models::legal::register_legal_rhai_module(engine, db.clone());
// // Register the projects module if the feature is enabled
// #[cfg(feature = "projects")]
// heromodels::models::projects::register_projects_rhai_module(engine, db.clone());
// // Register the biz module if the feature is enabled
// #[cfg(feature = "biz")]
// heromodels::models::biz::register_biz_rhai_module(engine, db.clone());
// println!("Heromodels Rhai modules registered successfully.");
// }
/// Evaluates a Rhai script string and returns the result.
///
/// This function provides a convenient way to execute Rhai script strings directly
/// using the provided engine. It's suitable for one-off script execution or when
/// the script content is dynamically generated.
///
/// # Arguments
///
/// * `engine` - The Rhai engine to use for script execution
/// * `script` - The Rhai script content as a string
///
/// # Returns
///
/// * `Ok(Dynamic)` - The result of script execution
/// * `Err(Box<EvalAltResult>)` - Script compilation or execution error
///
/// # Example
///
/// ```rust
/// use rhailib_engine::{create_heromodels_engine, eval_script};
///
/// let engine = create_heromodels_engine();
/// let result = eval_script(&engine, r#"
/// let x = 42;
/// let y = 8;
/// x + y
/// "#)?;
/// assert_eq!(result.as_int().unwrap(), 50);
/// ```
pub fn eval_script(
engine: &Engine,
script: &str,
) -> Result<rhai::Dynamic, Box<rhai::EvalAltResult>> {
engine.eval::<rhai::Dynamic>(script)
}
/// Evaluates a Rhai script from a file and returns the result.
///
/// This function reads a Rhai script from the filesystem and executes it using
/// the provided engine. It handles file reading errors gracefully and provides
/// meaningful error messages.
///
/// # Arguments
///
/// * `engine` - The Rhai engine to use for script execution
/// * `file_path` - Path to the Rhai script file
///
/// # Returns
///
/// * `Ok(Dynamic)` - The result of script execution
/// * `Err(Box<EvalAltResult>)` - File reading, compilation, or execution error
///
/// # Example
///
/// ```rust
/// use rhailib_engine::{create_heromodels_engine, eval_file};
/// use std::path::Path;
///
/// let engine = create_heromodels_engine();
/// let result = eval_file(&engine, Path::new("scripts/business_logic.rhai"))?;
/// println!("Script result: {:?}", result);
/// ```
///
/// # Error Handling
///
/// File reading errors are converted to Rhai `ErrorSystem` variants with
/// descriptive messages including the file path that failed to load.
pub fn eval_file(
engine: &Engine,
file_path: &Path,
) -> Result<rhai::Dynamic, Box<rhai::EvalAltResult>> {
match fs::read_to_string(file_path) {
Ok(script_content) => engine.eval::<rhai::Dynamic>(&script_content),
Err(io_err) => Err(Box::new(EvalAltResult::ErrorSystem(
format!("Failed to read script file: {}", file_path.display()),
Box::new(io_err),
))),
}
}
/// Compiles a Rhai script string into an Abstract Syntax Tree (AST).
///
/// This function compiles a Rhai script into an AST that can be executed multiple
/// times with different scopes. This is more efficient than re-parsing the script
/// for each execution when the same script needs to be run repeatedly.
///
/// # Arguments
///
/// * `engine` - The Rhai engine to use for compilation
/// * `script` - The Rhai script content as a string
///
/// # Returns
///
/// * `Ok(AST)` - The compiled Abstract Syntax Tree
/// * `Err(Box<EvalAltResult>)` - Script compilation error
///
/// # Example
///
/// ```rust
/// use rhailib_engine::{create_heromodels_engine, compile_script, run_ast};
/// use rhai::Scope;
///
/// let engine = create_heromodels_engine();
/// let ast = compile_script(&engine, r#"
/// let company = new_company().name(company_name);
/// save_company(company)
/// "#)?;
///
/// // Execute the compiled script multiple times with different variables
/// let mut scope1 = Scope::new();
/// scope1.push("company_name", "Acme Corp");
/// let result1 = run_ast(&engine, &ast, &mut scope1)?;
///
/// let mut scope2 = Scope::new();
/// scope2.push("company_name", "Tech Startup");
/// let result2 = run_ast(&engine, &ast, &mut scope2)?;
/// ```
pub fn compile_script(engine: &Engine, script: &str) -> Result<AST, Box<rhai::EvalAltResult>> {
Ok(engine.compile(script)?)
}
/// Executes a compiled Rhai script AST with the provided scope.
///
/// This function runs a pre-compiled AST using the provided engine and scope.
/// The scope can contain variables and functions that will be available to
/// the script during execution.
///
/// # Arguments
///
/// * `engine` - The Rhai engine to use for execution
/// * `ast` - The compiled Abstract Syntax Tree to execute
/// * `scope` - Mutable scope containing variables and functions for the script
///
/// # Returns
///
/// * `Ok(Dynamic)` - The result of script execution
/// * `Err(Box<EvalAltResult>)` - Script execution error
///
/// # Example
///
/// ```rust
/// use rhailib_engine::{create_heromodels_engine, compile_script, run_ast};
/// use rhai::Scope;
///
/// let engine = create_heromodels_engine();
/// let ast = compile_script(&engine, "x + y")?;
///
/// let mut scope = Scope::new();
/// scope.push("x", 10_i64);
/// scope.push("y", 32_i64);
///
/// let result = run_ast(&engine, &ast, &mut scope)?;
/// assert_eq!(result.as_int().unwrap(), 42);
/// ```
///
/// # Performance Notes
///
/// Using compiled ASTs is significantly more efficient than re-parsing scripts
/// for repeated execution, especially for complex scripts or when executing
/// the same logic with different input parameters.
pub fn run_ast(
engine: &Engine,
ast: &AST,
scope: &mut Scope,
) -> Result<rhai::Dynamic, Box<rhai::EvalAltResult>> {
engine.eval_ast_with_scope(scope, ast)
}

View File

@@ -0,0 +1,374 @@
use chrono::Utc;
use heromodels::db::hero::OurDB;
use heromodels::db::{Collection, Db}; // Import both Db and Collection traits
use heromodels::models::calendar::{Calendar, Event};
use heromodels_core::Model; // Import Model trait to use build method
use std::env;
use std::sync::Arc;
// Import finance models
use heromodels::models::finance::account::Account;
use heromodels::models::finance::asset::{Asset, AssetType};
use heromodels::models::finance::marketplace::{Listing, ListingType};
// Conditionally import other modules based on features
#[cfg(feature = "flow")]
use heromodels::models::flow::{Flow, FlowStep, SignatureRequirement};
#[cfg(feature = "legal")]
use heromodels::models::legal::{
Contract, ContractRevision, ContractSigner, ContractStatus, SignerStatus,
};
#[cfg(feature = "projects")]
use heromodels::models::projects::{ItemType, Priority, Project, Status as ProjectStatus};
/// Create a mock in-memory database for examples
pub fn create_mock_db() -> Arc<OurDB> {
// Create a temporary directory for the database files
let temp_dir = env::temp_dir().join("engine_examples");
std::fs::create_dir_all(&temp_dir).expect("Failed to create temp directory");
// Create a new OurDB instance with reset=true to ensure it's clean
let db = OurDB::new(temp_dir, true).expect("Failed to create OurDB instance");
Arc::new(db)
}
/// Seed the mock database with some initial data for all modules
pub fn seed_mock_db(db: Arc<OurDB>) {
// Seed calendar data
seed_calendar_data(db.clone());
// Seed finance data
seed_finance_data(db.clone());
// Seed flow data if the feature is enabled
#[cfg(feature = "flow")]
seed_flow_data(db.clone());
// Seed legal data if the feature is enabled
#[cfg(feature = "legal")]
seed_legal_data(db.clone());
// Seed projects data if the feature is enabled
#[cfg(feature = "projects")]
seed_projects_data(db.clone());
println!("Mock database seeded with initial data for all enabled modules.");
}
/// Seed the mock database with calendar data
fn seed_calendar_data(db: Arc<OurDB>) {
// Create a calendar
let mut calendar = Calendar::new(None, "Work Calendar".to_string());
calendar.description = Some("My work schedule".to_string());
// Store the calendar in the database
let (_calendar_id, _updated_calendar) = db
.collection::<Calendar>()
.expect("Failed to get Calendar collection")
.set(&calendar)
.expect("Failed to store calendar");
// Create an event
let now = Utc::now().timestamp();
let end_time = now + 3600; // Add 1 hour in seconds
// Use the builder pattern for Event
let event = Event::new()
.title("Team Meeting".to_string())
.reschedule(now, end_time)
.location("Conference Room A".to_string())
.description("Weekly sync".to_string())
// .add_attendee(Attendee::new(1))
// .add_attendee(Attendee::new(2))
.build();
// // Add attendees to the event using the builder pattern
// let attendee1 = Attendee::new(1);
// let attendee2 = Attendee::new(2);
// // Add attendees using the builder pattern
// event = event.add_attendee(attendee1);
// event = event.add_attendee(attendee2);
// Call build and capture the returned value
// let event = event.build();
// Store the event in the database first to get its ID
let (event_id, updated_event) = db
.collection()
.expect("Failed to get Event collection")
.set(&event)
.expect("Failed to store event");
// Add the event ID to the calendar
calendar = calendar.add_event(event_id as i64);
// Store the calendar in the database
let (_calendar_id, updated_calendar) = db
.collection::<Calendar>()
.expect("Failed to get Calendar collection")
.set(&calendar)
.expect("Failed to store calendar");
println!("Mock database seeded with calendar data:");
println!(
" - Added calendar: {} (ID: {})",
updated_calendar.name, updated_calendar.base_data.id
);
println!(
" - Added event: {} (ID: {})",
updated_event.title, updated_event.base_data.id
);
}
/// Seed the mock database with flow data
#[cfg(feature = "flow")]
fn seed_flow_data(db: Arc<OurDB>) {
// Create a flow
let mut flow = Flow::new(0, "Document Approval".to_string());
// Set flow properties using the builder pattern
flow = flow.status("draft".to_string());
flow = flow.name("Document Approval Flow".to_string());
// Create flow steps
let mut step1 = FlowStep::new(0, 1);
step1 = step1.description("Initial review by legal team".to_string());
step1 = step1.status("pending".to_string());
let mut step2 = FlowStep::new(0, 2);
step2 = step2.description("Approval by department head".to_string());
step2 = step2.status("pending".to_string());
// Add signature requirements
let mut req1 = SignatureRequirement::new(
0,
1,
"Legal Team".to_string(),
"Please review this document".to_string(),
);
let mut req2 = SignatureRequirement::new(
0,
2,
"Department Head".to_string(),
"Please approve this document".to_string(),
);
// Add steps to flow
flow = flow.add_step(step1);
flow = flow.add_step(step2);
// Store in the database
let (_, updated_flow) = db
.collection::<Flow>()
.expect("Failed to get Flow collection")
.set(&flow)
.expect("Failed to store flow");
// Store signature requirements in the database
let (_, updated_req1) = db
.collection::<SignatureRequirement>()
.expect("Failed to get SignatureRequirement collection")
.set(&req1)
.expect("Failed to store signature requirement");
let (_, updated_req2) = db
.collection::<SignatureRequirement>()
.expect("Failed to get SignatureRequirement collection")
.set(&req2)
.expect("Failed to store signature requirement");
println!("Mock database seeded with flow data:");
println!(
" - Added flow: {} (ID: {})",
updated_flow.name, updated_flow.base_data.id
);
println!(" - Added {} steps", updated_flow.steps.len());
println!(
" - Added signature requirements with IDs: {} and {}",
updated_req1.base_data.id, updated_req2.base_data.id
);
}
/// Seed the mock database with legal data
#[cfg(feature = "legal")]
fn seed_legal_data(db: Arc<OurDB>) {
// Create a contract
let mut contract = Contract::new(None, "Service Agreement".to_string());
contract.description = Some("Agreement for software development services".to_string());
contract.status = ContractStatus::Draft;
// Create a revision
let revision = ContractRevision::new(
None,
"Initial draft".to_string(),
"https://example.com/contract/v1".to_string(),
);
// Create signers
let signer1 = ContractSigner::new(None, 1, "Client".to_string());
let signer2 = ContractSigner::new(None, 2, "Provider".to_string());
// Add revision and signers to contract
contract.add_revision(revision);
contract.add_signer(signer1);
contract.add_signer(signer2);
// Store in the database
let (_, updated_contract) = db
.collection::<Contract>()
.expect("Failed to get Contract collection")
.set(&contract)
.expect("Failed to store contract");
println!("Mock database seeded with legal data:");
println!(
" - Added contract: {} (ID: {})",
updated_contract.name, updated_contract.base_data.id
);
println!(
" - Added {} revisions and {} signers",
updated_contract.revisions.len(),
updated_contract.signers.len()
);
}
/// Seed the mock database with projects data
#[cfg(feature = "projects")]
fn seed_projects_data(db: Arc<OurDB>) {
// Create a project
let mut project = Project::new(None, "Website Redesign".to_string());
project.description = Some("Redesign the company website".to_string());
project.status = ProjectStatus::InProgress;
project.priority = Priority::High;
// Add members and tags
project.add_member_id(1);
project.add_member_id(2);
project.add_tag("design".to_string());
project.add_tag("web".to_string());
// Store in the database
let (_, updated_project) = db
.collection::<Project>()
.expect("Failed to get Project collection")
.set(&project)
.expect("Failed to store project");
println!("Mock database seeded with projects data:");
println!(
" - Added project: {} (ID: {})",
updated_project.name, updated_project.base_data.id
);
println!(
" - Status: {}, Priority: {}",
updated_project.status, updated_project.priority
);
println!(
" - Added {} members and {} tags",
updated_project.member_ids.len(),
updated_project.tags.len()
);
}
/// Seed the mock database with finance data
fn seed_finance_data(db: Arc<OurDB>) {
// Create a user account
let mut account = Account::new()
.name("Demo Account")
.user_id(1)
.description("Demo trading account")
.ledger("ethereum")
.address("0x1234567890abcdef1234567890abcdef12345678")
.pubkey("0xabcdef1234567890abcdef1234567890abcdef12");
// Store the account in the database
let (account_id, updated_account) = db
.collection::<Account>()
.expect("Failed to get Account collection")
.set(&account)
.expect("Failed to store account");
// Create an ERC20 token asset
let token_asset = Asset::new()
.name("HERO Token")
.description("Herocode governance token")
.amount(1000.0)
.address("0x9876543210abcdef9876543210abcdef98765432")
.asset_type(AssetType::Erc20)
.decimals(18);
// Store the token asset in the database
let (token_id, updated_token) = db
.collection::<Asset>()
.expect("Failed to get Asset collection")
.set(&token_asset)
.expect("Failed to store token asset");
// Create an NFT asset
let nft_asset = Asset::new()
.name("Herocode #1")
.description("Unique digital collectible")
.amount(1.0)
.address("0xabcdef1234567890abcdef1234567890abcdef12")
.asset_type(AssetType::Erc721)
.decimals(0);
// Store the NFT asset in the database
let (nft_id, updated_nft) = db
.collection::<Asset>()
.expect("Failed to get Asset collection")
.set(&nft_asset)
.expect("Failed to store NFT asset");
// Add assets to the account
account = updated_account.add_asset(token_id);
account = account.add_asset(nft_id);
// Update the account in the database
let (_, updated_account) = db
.collection::<Account>()
.expect("Failed to get Account collection")
.set(&account)
.expect("Failed to store updated account");
// Create a listing for the NFT
let listing = Listing::new()
.seller_id(account_id)
.asset_id(nft_id)
.price(0.5)
.currency("ETH")
.listing_type(ListingType::Auction)
.title("Rare Herocode NFT".to_string())
.description("One of a kind digital collectible".to_string())
.image_url(Some("hcttps://example.com/nft/1.png".to_string()))
.add_tag("rare".to_string())
.add_tag("collectible".to_string());
// Store the listing in the database
let (_listing_id, updated_listing) = db
.collection::<Listing>()
.expect("Failed to get Listing collection")
.set(&listing)
.expect("Failed to store listing");
println!("Mock database seeded with finance data:");
println!(
" - Added account: {} (ID: {})",
updated_account.name, updated_account.base_data.id
);
println!(
" - Added token asset: {} (ID: {})",
updated_token.name, updated_token.base_data.id
);
println!(
" - Added NFT asset: {} (ID: {})",
updated_nft.name, updated_nft.base_data.id
);
println!(
" - Added listing: {} (ID: {})",
updated_listing.title, updated_listing.base_data.id
);
}

View File

@@ -0,0 +1,97 @@
use heromodels::db::Db;
use macros::{
register_authorized_create_by_id_fn, register_authorized_delete_by_id_fn,
register_authorized_get_by_id_fn,
};
use rhai::plugin::*;
use rhai::{Array, Dynamic, Engine, EvalAltResult, Module, INT};
use std::mem;
use std::sync::Arc;
use heromodels::db::hero::OurDB;
use heromodels::db::Collection;
use heromodels::models::flow::flow::Flow;
use heromodels::models::flow::flow_step::FlowStep;
type RhaiFlow = Flow;
type RhaiFlowStep = FlowStep;
#[export_module]
mod rhai_flow_module {
use super::{Array, Dynamic, RhaiFlow, RhaiFlowStep, INT};
#[rhai_fn(name = "new_flow", return_raw)]
pub fn new_flow() -> Result<RhaiFlow, Box<EvalAltResult>> {
Ok(Flow::new())
}
// --- Setters ---
#[rhai_fn(name = "name", return_raw)]
pub fn set_name(flow: &mut RhaiFlow, name: String) -> Result<RhaiFlow, Box<EvalAltResult>> {
let owned = std::mem::take(flow);
*flow = owned.name(name);
Ok(flow.clone())
}
#[rhai_fn(name = "status", return_raw)]
pub fn set_status(flow: &mut RhaiFlow, status: String) -> Result<RhaiFlow, Box<EvalAltResult>> {
let owned = std::mem::take(flow);
*flow = owned.status(status);
Ok(flow.clone())
}
#[rhai_fn(name = "add_step", return_raw)]
pub fn add_step(
flow: &mut RhaiFlow,
step: RhaiFlowStep,
) -> Result<RhaiFlow, Box<EvalAltResult>> {
let owned = std::mem::take(flow);
*flow = owned.add_step(step);
Ok(flow.clone())
}
// --- Getters ---
#[rhai_fn(get = "id", pure)]
pub fn get_id(f: &mut RhaiFlow) -> INT {
f.base_data.id as INT
}
#[rhai_fn(get = "name", pure)]
pub fn get_name(f: &mut RhaiFlow) -> String {
f.name.clone()
}
#[rhai_fn(get = "status", pure)]
pub fn get_status(f: &mut RhaiFlow) -> String {
f.status.clone()
}
#[rhai_fn(get = "steps", pure)]
pub fn get_steps(f: &mut RhaiFlow) -> Array {
f.steps.clone().into_iter().map(Dynamic::from).collect()
}
}
pub fn register_flow_rhai_module(engine: &mut Engine) {
engine.build_type::<RhaiFlow>();
let mut module = exported_module!(rhai_flow_module);
register_authorized_create_by_id_fn!(
module: &mut module,
rhai_fn_name: "save_flow",
resource_type_str: "Flow",
rhai_return_rust_type: heromodels::models::flow::flow::Flow
);
register_authorized_get_by_id_fn!(
module: &mut module,
rhai_fn_name: "get_flow",
resource_type_str: "Flow",
rhai_return_rust_type: heromodels::models::flow::flow::Flow
);
register_authorized_delete_by_id_fn!(
module: &mut module,
rhai_fn_name: "delete_flow",
resource_type_str: "Flow",
rhai_return_rust_type: heromodels::models::flow::flow::Flow
);
engine.register_global_module(module.into());
}

View File

@@ -0,0 +1,86 @@
use heromodels::db::Db;
use macros::{
register_authorized_create_by_id_fn, register_authorized_delete_by_id_fn,
register_authorized_get_by_id_fn,
};
use rhai::plugin::*;
use rhai::{Dynamic, Engine, EvalAltResult, Module, INT};
use std::mem;
use std::sync::Arc;
use heromodels::db::hero::OurDB;
use heromodels::db::Collection;
use heromodels::models::flow::flow_step::FlowStep;
type RhaiFlowStep = FlowStep;
#[export_module]
mod rhai_flow_step_module {
use super::{RhaiFlowStep, INT};
#[rhai_fn(name = "new_flow_step", return_raw)]
pub fn new_flow_step() -> Result<RhaiFlowStep, Box<EvalAltResult>> {
Ok(FlowStep::default())
}
// --- Setters ---
#[rhai_fn(name = "description", return_raw)]
pub fn set_description(
step: &mut RhaiFlowStep,
description: String,
) -> Result<RhaiFlowStep, Box<EvalAltResult>> {
let owned = std::mem::take(step);
*step = owned.description(description);
Ok(step.clone())
}
#[rhai_fn(name = "status", return_raw)]
pub fn set_status(
step: &mut RhaiFlowStep,
status: String,
) -> Result<RhaiFlowStep, Box<EvalAltResult>> {
let owned = std::mem::take(step);
*step = owned.status(status);
Ok(step.clone())
}
// --- Getters ---
#[rhai_fn(get = "id", pure)]
pub fn get_id(s: &mut RhaiFlowStep) -> INT {
s.base_data.id as INT
}
#[rhai_fn(get = "description", pure)]
pub fn get_description(s: &mut RhaiFlowStep) -> Option<String> {
s.description.clone()
}
#[rhai_fn(get = "status", pure)]
pub fn get_status(s: &mut RhaiFlowStep) -> String {
s.status.clone()
}
}
pub fn register_flow_step_rhai_module(engine: &mut Engine) {
engine.build_type::<RhaiFlowStep>();
let mut module = exported_module!(rhai_flow_step_module);
register_authorized_create_by_id_fn!(
module: &mut module,
rhai_fn_name: "save_flow_step",
resource_type_str: "FlowStep",
rhai_return_rust_type: heromodels::models::flow::flow_step::FlowStep
);
register_authorized_get_by_id_fn!(
module: &mut module,
rhai_fn_name: "get_flow_step",
resource_type_str: "FlowStep",
rhai_return_rust_type: heromodels::models::flow::flow_step::FlowStep
);
register_authorized_delete_by_id_fn!(
module: &mut module,
rhai_fn_name: "delete_flow_step",
resource_type_str: "FlowStep",
rhai_return_rust_type: heromodels::models::flow::flow_step::FlowStep
);
engine.register_global_module(module.into());
}

View File

@@ -0,0 +1,17 @@
use rhai::Engine;
pub mod flow;
pub mod flow_step;
pub mod signature_requirement;
pub mod orchestrated_flow;
pub mod orchestrated_flow_step;
// Re-export the orchestrated models for easy access
pub use orchestrated_flow::{OrchestratedFlow, OrchestratorError, FlowStatus};
pub use orchestrated_flow_step::OrchestratedFlowStep;
pub fn register_flow_rhai_modules(engine: &mut Engine) {
flow::register_flow_rhai_module(engine);
flow_step::register_flow_step_rhai_module(engine);
signature_requirement::register_signature_requirement_rhai_module(engine);
}

View File

@@ -0,0 +1,154 @@
//! Orchestrated Flow model for DAG-based workflow execution
use heromodels_core::BaseModelData;
use serde::{Deserialize, Serialize};
use std::collections::HashSet;
use thiserror::Error;
use super::orchestrated_flow_step::OrchestratedFlowStep;
/// Extended Flow with orchestrator-specific steps
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct OrchestratedFlow {
/// Base model data (id, created_at, updated_at)
pub base_data: BaseModelData,
/// Name of the flow
pub name: String,
/// Orchestrated steps with dependencies
pub orchestrated_steps: Vec<OrchestratedFlowStep>,
}
impl OrchestratedFlow {
/// Create a new orchestrated flow
pub fn new(name: &str) -> Self {
Self {
base_data: BaseModelData::new(),
name: name.to_string(),
orchestrated_steps: Vec::new(),
}
}
/// Add a step to the flow
pub fn add_step(mut self, step: OrchestratedFlowStep) -> Self {
self.orchestrated_steps.push(step);
self
}
/// Get the flow ID
pub fn id(&self) -> u32 {
self.base_data.id
}
/// Validate the DAG structure (no cycles)
pub fn validate_dag(&self) -> Result<(), OrchestratorError> {
let mut visited = HashSet::new();
let mut rec_stack = HashSet::new();
for step in &self.orchestrated_steps {
if !visited.contains(&step.id()) {
if self.has_cycle(step.id(), &mut visited, &mut rec_stack)? {
return Err(OrchestratorError::CyclicDependency);
}
}
}
Ok(())
}
/// Check for cycles in the dependency graph
fn has_cycle(
&self,
step_id: u32,
visited: &mut HashSet<u32>,
rec_stack: &mut HashSet<u32>,
) -> Result<bool, OrchestratorError> {
visited.insert(step_id);
rec_stack.insert(step_id);
let step = self.orchestrated_steps
.iter()
.find(|s| s.id() == step_id)
.ok_or(OrchestratorError::StepNotFound(step_id))?;
for &dep_id in &step.depends_on {
if !visited.contains(&dep_id) {
if self.has_cycle(dep_id, visited, rec_stack)? {
return Ok(true);
}
} else if rec_stack.contains(&dep_id) {
return Ok(true);
}
}
rec_stack.remove(&step_id);
Ok(false)
}
}
/// Orchestrator errors
#[derive(Error, Debug)]
pub enum OrchestratorError {
#[error("Database error: {0}")]
DatabaseError(String),
#[error("Executor error: {0}")]
ExecutorError(String),
#[error("No ready steps found - possible deadlock")]
NoReadySteps,
#[error("Step {0} failed: {1:?}")]
StepFailed(u32, Option<String>),
#[error("Cyclic dependency detected in workflow")]
CyclicDependency,
#[error("Step {0} not found")]
StepNotFound(u32),
#[error("Invalid dependency: step {0} depends on non-existent step {1}")]
InvalidDependency(u32, u32),
}
/// Flow execution status
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub enum FlowStatus {
Pending,
Running,
Completed,
Failed,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_orchestrated_flow_builder() {
let step1 = OrchestratedFlowStep::new("step1").script("let x = 1;");
let step2 = OrchestratedFlowStep::new("step2").script("let y = 2;");
let flow = OrchestratedFlow::new("test_flow")
.add_step(step1)
.add_step(step2);
assert_eq!(flow.name, "test_flow");
assert_eq!(flow.orchestrated_steps.len(), 2);
}
#[test]
fn test_dag_validation_no_cycle() {
let step1 = OrchestratedFlowStep::new("step1").script("let x = 1;");
let step2 = OrchestratedFlowStep::new("step2")
.script("let y = 2;")
.depends_on(step1.id());
let flow = OrchestratedFlow::new("test_flow")
.add_step(step1)
.add_step(step2);
assert!(flow.validate_dag().is_ok());
}
}

View File

@@ -0,0 +1,124 @@
//! Orchestrated Flow Step model for DAG-based workflow execution
use heromodels_core::BaseModelData;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
/// Extended FlowStep with orchestrator-specific fields
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct OrchestratedFlowStep {
/// Base model data (id, created_at, updated_at)
pub base_data: BaseModelData,
/// Name of the flow step
pub name: String,
/// Rhai script to execute
pub script: String,
/// IDs of steps this step depends on
pub depends_on: Vec<u32>,
/// Execution context (circle)
pub context_id: String,
/// Target worker for execution
pub worker_id: String,
/// Input parameters
pub inputs: HashMap<String, String>,
/// Output results
pub outputs: HashMap<String, String>,
}
impl OrchestratedFlowStep {
/// Create a new orchestrated flow step
pub fn new(name: &str) -> Self {
Self {
base_data: BaseModelData::new(),
name: name.to_string(),
script: String::new(),
depends_on: Vec::new(),
context_id: String::new(),
worker_id: String::new(),
inputs: HashMap::new(),
outputs: HashMap::new(),
}
}
/// Set the script content
pub fn script(mut self, script: &str) -> Self {
self.script = script.to_string();
self
}
/// Add a dependency on another step
pub fn depends_on(mut self, step_id: u32) -> Self {
self.depends_on.push(step_id);
self
}
/// Set the context ID
pub fn context_id(mut self, context_id: &str) -> Self {
self.context_id = context_id.to_string();
self
}
/// Set the worker ID
pub fn worker_id(mut self, worker_id: &str) -> Self {
self.worker_id = worker_id.to_string();
self
}
/// Add an input parameter
pub fn input(mut self, key: &str, value: &str) -> Self {
self.inputs.insert(key.to_string(), value.to_string());
self
}
/// Get the step ID
pub fn id(&self) -> u32 {
self.base_data.id
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_orchestrated_flow_step_builder() {
let step = OrchestratedFlowStep::new("test_step")
.script("let x = 1;")
.context_id("test_context")
.worker_id("test_worker")
.input("key1", "value1");
assert_eq!(step.name, "test_step");
assert_eq!(step.script, "let x = 1;");
assert_eq!(step.context_id, "test_context");
assert_eq!(step.worker_id, "test_worker");
assert_eq!(step.inputs.get("key1"), Some(&"value1".to_string()));
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_orchestrated_flow_step_builder() {
let step = OrchestratedFlowStep::new("test_step")
.script("let x = 1;")
.context_id("test_context")
.worker_id("test_worker")
.input("key1", "value1");
assert_eq!(step.flow_step.name, "test_step");
assert_eq!(step.script, "let x = 1;");
assert_eq!(step.context_id, "test_context");
assert_eq!(step.worker_id, "test_worker");
assert_eq!(step.inputs.get("key1"), Some(&"value1".to_string()));
}
}

View File

@@ -0,0 +1,145 @@
use heromodels::db::Db;
use macros::{
register_authorized_create_by_id_fn, register_authorized_delete_by_id_fn,
register_authorized_get_by_id_fn,
};
use rhai::plugin::*;
use rhai::{Dynamic, Engine, EvalAltResult, Module, INT};
use std::mem;
use std::sync::Arc;
use heromodels::db::hero::OurDB;
use heromodels::db::Collection;
use heromodels::models::flow::signature_requirement::SignatureRequirement;
type RhaiSignatureRequirement = SignatureRequirement;
#[export_module]
mod rhai_signature_requirement_module {
use super::{RhaiSignatureRequirement, INT};
#[rhai_fn(name = "new_signature_requirement", return_raw)]
pub fn new_signature_requirement() -> Result<RhaiSignatureRequirement, Box<EvalAltResult>> {
Ok(SignatureRequirement::default())
}
// --- Setters ---
#[rhai_fn(name = "flow_step_id", return_raw)]
pub fn set_flow_step_id(
sr: &mut RhaiSignatureRequirement,
flow_step_id: INT,
) -> Result<RhaiSignatureRequirement, Box<EvalAltResult>> {
let mut owned = std::mem::take(sr);
owned.flow_step_id = flow_step_id as u32;
*sr = owned;
Ok(sr.clone())
}
#[rhai_fn(name = "public_key", return_raw)]
pub fn set_public_key(
sr: &mut RhaiSignatureRequirement,
public_key: String,
) -> Result<RhaiSignatureRequirement, Box<EvalAltResult>> {
let mut owned = std::mem::take(sr);
owned.public_key = public_key;
*sr = owned;
Ok(sr.clone())
}
#[rhai_fn(name = "message", return_raw)]
pub fn set_message(
sr: &mut RhaiSignatureRequirement,
message: String,
) -> Result<RhaiSignatureRequirement, Box<EvalAltResult>> {
let mut owned = std::mem::take(sr);
owned.message = message;
*sr = owned;
Ok(sr.clone())
}
#[rhai_fn(name = "signed_by", return_raw)]
pub fn set_signed_by(
sr: &mut RhaiSignatureRequirement,
signed_by: String,
) -> Result<RhaiSignatureRequirement, Box<EvalAltResult>> {
let owned = std::mem::take(sr);
*sr = owned.signed_by(signed_by);
Ok(sr.clone())
}
#[rhai_fn(name = "signature", return_raw)]
pub fn set_signature(
sr: &mut RhaiSignatureRequirement,
signature: String,
) -> Result<RhaiSignatureRequirement, Box<EvalAltResult>> {
let owned = std::mem::take(sr);
*sr = owned.signature(signature);
Ok(sr.clone())
}
#[rhai_fn(name = "status", return_raw)]
pub fn set_status(
sr: &mut RhaiSignatureRequirement,
status: String,
) -> Result<RhaiSignatureRequirement, Box<EvalAltResult>> {
let owned = std::mem::take(sr);
*sr = owned.status(status);
Ok(sr.clone())
}
// --- Getters ---
#[rhai_fn(get = "id", pure)]
pub fn get_id(s: &mut RhaiSignatureRequirement) -> INT {
s.base_data.id as INT
}
#[rhai_fn(get = "flow_step_id", pure)]
pub fn get_flow_step_id(s: &mut RhaiSignatureRequirement) -> INT {
s.flow_step_id as INT
}
#[rhai_fn(get = "public_key", pure)]
pub fn get_public_key(s: &mut RhaiSignatureRequirement) -> String {
s.public_key.clone()
}
#[rhai_fn(get = "message", pure)]
pub fn get_message(s: &mut RhaiSignatureRequirement) -> String {
s.message.clone()
}
#[rhai_fn(get = "signed_by", pure)]
pub fn get_signed_by(s: &mut RhaiSignatureRequirement) -> Option<String> {
s.signed_by.clone()
}
#[rhai_fn(get = "signature", pure)]
pub fn get_signature(s: &mut RhaiSignatureRequirement) -> Option<String> {
s.signature.clone()
}
#[rhai_fn(get = "status", pure)]
pub fn get_status(s: &mut RhaiSignatureRequirement) -> String {
s.status.clone()
}
}
pub fn register_signature_requirement_rhai_module(engine: &mut Engine) {
engine.build_type::<RhaiSignatureRequirement>();
let mut module = exported_module!(rhai_signature_requirement_module);
register_authorized_create_by_id_fn!(
module: &mut module,
rhai_fn_name: "save_signature_requirement",
resource_type_str: "SignatureRequirement",
rhai_return_rust_type: heromodels::models::flow::signature_requirement::SignatureRequirement
);
register_authorized_get_by_id_fn!(
module: &mut module,
rhai_fn_name: "get_signature_requirement",
resource_type_str: "SignatureRequirement",
rhai_return_rust_type: heromodels::models::flow::signature_requirement::SignatureRequirement
);
register_authorized_delete_by_id_fn!(
module: &mut module,
rhai_fn_name: "delete_signature_requirement",
resource_type_str: "SignatureRequirement",
rhai_return_rust_type: heromodels::models::flow::signature_requirement::SignatureRequirement
);
engine.register_global_module(module.into());
}

View File

@@ -0,0 +1,51 @@
[package]
name = "orchestrator"
version = "0.1.0"
edition = "2021"
[dependencies]
# Core async runtime
tokio = { version = "1", features = ["macros", "rt-multi-thread", "sync", "time"] }
async-trait = "0.1"
futures = "0.3"
futures-util = "0.3"
# Serialization
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
# Error handling
thiserror = "1.0"
# Collections
uuid = { version = "1.6", features = ["v4", "serde"] }
# Time handling
chrono = { version = "0.4", features = ["serde"] }
# HTTP client
reqwest = { version = "0.11", features = ["json"] }
# WebSocket client
tokio-tungstenite = "0.20"
# Rhai scripting
rhai = "1.21.0"
# Database and models
heromodels = { path = "/Users/timurgordon/code/git.ourworld.tf/herocode/db/heromodels" }
heromodels_core = { path = "/Users/timurgordon/code/git.ourworld.tf/herocode/db/heromodels_core" }
# DSL integration for flow models
rhailib_dsl = { path = "../dsl" }
# Dispatcher integration
rhai_dispatcher = { path = "../dispatcher" }
# Logging
log = "0.4"
tracing = "0.1"
tracing-subscriber = "0.3"
[dev-dependencies]
tokio-test = "0.4"

View File

@@ -0,0 +1,320 @@
# Rationale for Orchestrator
We may have scripts that run asynchrounsly, depend on human input or depend on other scripts to complete. We want to be able to implement high-level workflows of rhai scripts.
## Design
Direct Acyclic Graphs (DAGs) are a natural fit for representing workflows.
## Requirements
1. Uses Direct Acyclic Graphs (DAGs) to represent workflows.
2. Each step in the workflow defines the script to execute, the inputs to pass to it, and the outputs to expect from it.
3. Simplicity: the output cases are binary (success or failure), and params inputted / outputted are simple key-value pairs.
4. Multiple steps can depend on the same step.
5. Scripts are executed using [RhaiDispatcher](../dispatcher/README.md).
## Architecture
The Orchestrator is a simple DAG-based workflow execution system that extends the heromodels flow structures to support workflows with dependencies and distributed script execution.
### Core Component
```mermaid
graph TB
subgraph "Orchestrator"
O[Orchestrator] --> RE[RhaiExecutor Trait]
O --> DB[(Database)]
end
subgraph "Executor Implementations"
RE --> RD[RhaiDispatcher]
RE --> WS[WebSocketClient]
RE --> HTTP[HttpClient]
RE --> LOCAL[LocalExecutor]
end
subgraph "Data Models (heromodels)"
F[Flow] --> FS[FlowStep]
FS --> SR[SignatureRequirement]
end
subgraph "Infrastructure"
RD --> RQ[Redis Queues]
RD --> W[Workers]
WS --> WSS[WebSocket Server]
HTTP --> API[REST API]
end
```
### Execution Abstraction
The orchestrator uses a trait-based approach for script execution, allowing different execution backends:
#### RhaiExecutor Trait
```rust
use rhai_dispatcher::{PlayRequestBuilder, RhaiTaskDetails, RhaiDispatcherError};
#[async_trait]
pub trait RhaiExecutor {
async fn call(&self, request: PlayRequestBuilder<'_>) -> Result<RhaiTaskDetails, RhaiDispatcherError>;
}
```
#### Executor Implementations
**RhaiDispatcher Implementation:**
```rust
pub struct DispatcherExecutor {
dispatcher: RhaiDispatcher,
}
#[async_trait]
impl RhaiExecutor for DispatcherExecutor {
async fn call(&self, request: PlayRequestBuilder<'_>) -> Result<RhaiTaskDetails, RhaiDispatcherError> {
// Use RhaiDispatcher to execute script via Redis queues
request.await_response().await
}
}
```
**WebSocket Client Implementation:**
```rust
pub struct WebSocketExecutor {
ws_client: WebSocketClient,
endpoint: String,
}
#[async_trait]
impl RhaiExecutor for WebSocketExecutor {
async fn call(&self, request: PlayRequestBuilder<'_>) -> Result<RhaiTaskDetails, RhaiDispatcherError> {
// Build the PlayRequest and send via WebSocket
let play_request = request.build()?;
// Send script execution request via WebSocket
let ws_message = serde_json::to_string(&play_request)?;
self.ws_client.send(ws_message).await?;
// Wait for response and convert to RhaiTaskDetails
let response = self.ws_client.receive().await?;
serde_json::from_str(&response).map_err(RhaiDispatcherError::from)
}
}
```
**HTTP Client Implementation:**
```rust
pub struct HttpExecutor {
http_client: reqwest::Client,
base_url: String,
}
#[async_trait]
impl RhaiExecutor for HttpExecutor {
async fn call(&self, request: PlayRequestBuilder<'_>) -> Result<RhaiTaskDetails, RhaiDispatcherError> {
// Build the PlayRequest and send via HTTP
let play_request = request.build()?;
// Send script execution request via HTTP API
let response = self.http_client
.post(&format!("{}/execute", self.base_url))
.json(&play_request)
.send()
.await?;
response.json().await.map_err(RhaiDispatcherError::from)
}
}
```
**Local Executor Implementation:**
```rust
pub struct LocalExecutor {
engine: Engine,
}
#[async_trait]
impl RhaiExecutor for LocalExecutor {
async fn call(&self, request: PlayRequestBuilder<'_>) -> Result<RhaiTaskDetails, RhaiDispatcherError> {
// Build the PlayRequest and execute locally
let play_request = request.build()?;
// Execute script directly in local Rhai engine
let result = self.engine.eval::<String>(&play_request.script);
// Convert to RhaiTaskDetails format
let task_details = RhaiTaskDetails {
task_id: play_request.id,
script: play_request.script,
status: if result.is_ok() { "completed".to_string() } else { "error".to_string() },
output: result.ok(),
error: result.err().map(|e| e.to_string()),
created_at: chrono::Utc::now(),
updated_at: chrono::Utc::now(),
caller_id: "local".to_string(),
context_id: play_request.context_id,
worker_id: "local".to_string(),
};
Ok(task_details)
}
}
```
### Data Model Extensions
Simple extensions to the existing heromodels flow structures:
#### Enhanced FlowStep Model
```rust
// Extends heromodels::models::flow::FlowStep
pub struct FlowStep {
// ... existing heromodels::models::flow::FlowStep fields
pub script: String, // Rhai script to execute
pub depends_on: Vec<u32>, // IDs of steps this step depends on
pub context_id: String, // Execution context (circle)
pub inputs: HashMap<String, String>, // Input parameters
pub outputs: HashMap<String, String>, // Output results
}
```
### Execution Flow
```mermaid
sequenceDiagram
participant Client as Client
participant O as Orchestrator
participant RE as RhaiExecutor
participant DB as Database
Client->>O: Submit Flow
O->>DB: Store flow and steps
O->>O: Find steps with no dependencies
loop Until all steps complete
O->>RE: Execute ready steps
RE-->>O: Return results
O->>DB: Update step status
O->>O: Find newly ready steps
end
O->>Client: Flow completed
```
### Flexible Orchestrator Implementation
```rust
use rhai_dispatcher::{RhaiDispatcher, PlayRequestBuilder};
use std::collections::HashSet;
pub struct Orchestrator<E: RhaiExecutor> {
executor: E,
database: Arc<Database>,
}
impl<E: RhaiExecutor> Orchestrator<E> {
pub fn new(executor: E, database: Arc<Database>) -> Self {
Self { executor, database }
}
pub async fn execute_flow(&self, flow: Flow) -> Result<(), OrchestratorError> {
// 1. Store flow in database
self.database.collection::<Flow>()?.set(&flow)?;
// 2. Find steps with no dependencies (depends_on is empty)
let mut pending_steps: Vec<FlowStep> = flow.steps.clone();
let mut completed_steps: HashSet<u32> = HashSet::new();
while !pending_steps.is_empty() {
// Find ready steps (all dependencies completed)
let ready_steps: Vec<FlowStep> = pending_steps
.iter()
.filter(|step| {
step.depends_on.iter().all(|dep_id| completed_steps.contains(dep_id))
})
.cloned()
.collect();
if ready_steps.is_empty() {
return Err(OrchestratorError::NoReadySteps);
}
// Execute ready steps concurrently
let mut tasks = Vec::new();
for step in ready_steps {
let executor = &self.executor;
let task = async move {
// Create PlayRequestBuilder for this step
let request = RhaiDispatcher::new_play_request()
.script(&step.script)
.context_id(&step.context_id)
.worker_id(&step.worker_id);
// Execute via the trait
let result = executor.call(request).await?;
Ok((step.base_data.id, result))
};
tasks.push(task);
}
// Wait for all ready steps to complete
let results = futures::future::try_join_all(tasks).await?;
// Update step status and mark as completed
for (step_id, task_details) in results {
if task_details.status == "completed" {
completed_steps.insert(step_id);
// Update step status in database
// self.update_step_status(step_id, "completed", task_details.output).await?;
} else {
return Err(OrchestratorError::StepFailed(step_id, task_details.error));
}
}
// Remove completed steps from pending
pending_steps.retain(|step| !completed_steps.contains(&step.base_data.id));
}
Ok(())
}
pub async fn get_flow_status(&self, flow_id: u32) -> Result<FlowStatus, OrchestratorError> {
// Return current status of flow and all its steps
let flow = self.database.collection::<Flow>()?.get(flow_id)?;
// Implementation would check step statuses and return overall flow status
Ok(FlowStatus::Running) // Placeholder
}
}
pub enum OrchestratorError {
DatabaseError(String),
ExecutorError(RhaiDispatcherError),
NoReadySteps,
StepFailed(u32, Option<String>),
}
pub enum FlowStatus {
Pending,
Running,
Completed,
Failed,
}
// Usage examples:
// let orchestrator = Orchestrator::new(DispatcherExecutor::new(dispatcher), db);
// let orchestrator = Orchestrator::new(WebSocketExecutor::new(ws_client), db);
// let orchestrator = Orchestrator::new(HttpExecutor::new(http_client), db);
// let orchestrator = Orchestrator::new(LocalExecutor::new(engine), db);
```
### Key Features
1. **DAG Validation**: Ensures no circular dependencies exist in the `depends_on` relationships
2. **Parallel Execution**: Executes independent steps concurrently via multiple workers
3. **Simple Dependencies**: Each step lists the step IDs it depends on
4. **RhaiDispatcher Integration**: Uses existing dispatcher for script execution
5. **Binary Outcomes**: Steps either succeed or fail (keeping it simple as per requirements)
This simple architecture provides DAG-based workflow execution while leveraging the existing rhailib infrastructure and keeping complexity minimal.

View File

@@ -0,0 +1,283 @@
//! Basic workflow example demonstrating orchestrator usage
use orchestrator::{
interface::LocalInterface,
orchestrator::Orchestrator,
OrchestratedFlow, OrchestratedFlowStep, FlowStatus,
};
use std::sync::Arc;
use std::collections::HashMap;
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
// Initialize logging
tracing_subscriber::fmt().init();
// Create executor
let executor = Arc::new(LocalInterface::new());
// Create orchestrator
let orchestrator = Orchestrator::new(executor);
println!("🚀 Starting basic workflow example");
// Example 1: Simple sequential workflow
println!("\n📋 Example 1: Sequential Workflow");
let sequential_flow = create_sequential_workflow();
let flow_id = orchestrator.execute_flow(sequential_flow).await?;
// Wait for completion and show results
wait_and_show_results(&orchestrator, flow_id, "Sequential").await;
// Example 2: Parallel workflow with convergence
println!("\n📋 Example 2: Parallel Workflow");
let parallel_flow = create_parallel_workflow();
let flow_id = orchestrator.execute_flow(parallel_flow).await?;
// Wait for completion and show results
wait_and_show_results(&orchestrator, flow_id, "Parallel").await;
// Example 3: Complex workflow with multiple dependencies
println!("\n📋 Example 3: Complex Workflow");
let complex_flow = create_complex_workflow();
let flow_id = orchestrator.execute_flow(complex_flow).await?;
// Wait for completion and show results
wait_and_show_results(&orchestrator, flow_id, "Complex").await;
// Clean up completed flows
orchestrator.cleanup_completed_flows().await;
println!("\n✅ All examples completed successfully!");
Ok(())
}
/// Create a simple sequential workflow
fn create_sequential_workflow() -> OrchestratedFlow {
let step1 = OrchestratedFlowStep::new("data_preparation")
.script(r#"
let data = [1, 2, 3, 4, 5];
let sum = 0;
for item in data {
sum += item;
}
let result = sum;
"#)
.context_id("sequential_context")
.worker_id("worker_1");
let step2 = OrchestratedFlowStep::new("data_processing")
.script(r#"
let processed_data = dep_1_result * 2;
let result = processed_data;
"#)
.depends_on(step1.id())
.context_id("sequential_context")
.worker_id("worker_2");
let step3 = OrchestratedFlowStep::new("data_output")
.script(r#"
let final_result = "Processed value: " + dep_2_result;
let result = final_result;
"#)
.depends_on(step2.id())
.context_id("sequential_context")
.worker_id("worker_3");
OrchestratedFlow::new("sequential_workflow")
.add_step(step1)
.add_step(step2)
.add_step(step3)
}
/// Create a parallel workflow with convergence
fn create_parallel_workflow() -> OrchestratedFlow {
let step1 = OrchestratedFlowStep::new("fetch_user_data")
.script(r#"
let user_id = 12345;
let user_name = "Alice";
let result = user_name;
"#)
.context_id("parallel_context")
.worker_id("user_service");
let step2 = OrchestratedFlowStep::new("fetch_order_data")
.script(r#"
let order_id = 67890;
let order_total = 99.99;
let result = order_total;
"#)
.context_id("parallel_context")
.worker_id("order_service");
let step3 = OrchestratedFlowStep::new("fetch_inventory_data")
.script(r#"
let product_id = "ABC123";
let stock_count = 42;
let result = stock_count;
"#)
.context_id("parallel_context")
.worker_id("inventory_service");
let step4 = OrchestratedFlowStep::new("generate_report")
.script(r#"
let report = "User: " + dep_1_result +
", Order Total: $" + dep_2_result +
", Stock: " + dep_3_result + " units";
let result = report;
"#)
.depends_on(step1.id())
.depends_on(step2.id())
.depends_on(step3.id())
.context_id("parallel_context")
.worker_id("report_service");
OrchestratedFlow::new("parallel_workflow")
.add_step(step1)
.add_step(step2)
.add_step(step3)
.add_step(step4)
}
/// Create a complex workflow with multiple dependency levels
fn create_complex_workflow() -> OrchestratedFlow {
// Level 1: Initial data gathering
let step1 = OrchestratedFlowStep::new("load_config")
.script(r#"
let config = #{
api_url: "https://api.example.com",
timeout: 30,
retries: 3
};
let result = config.api_url;
"#)
.context_id("complex_context")
.worker_id("config_service");
let step2 = OrchestratedFlowStep::new("authenticate")
.script(r#"
let token = "auth_token_12345";
let expires_in = 3600;
let result = token;
"#)
.context_id("complex_context")
.worker_id("auth_service");
// Level 2: Data fetching (depends on config and auth)
let step3 = OrchestratedFlowStep::new("fetch_customers")
.script(r#"
let api_url = dep_1_result;
let auth_token = dep_2_result;
let customers = ["Customer A", "Customer B", "Customer C"];
let result = customers.len();
"#)
.depends_on(step1.id())
.depends_on(step2.id())
.context_id("complex_context")
.worker_id("customer_service");
let step4 = OrchestratedFlowStep::new("fetch_products")
.script(r#"
let api_url = dep_1_result;
let auth_token = dep_2_result;
let products = ["Product X", "Product Y", "Product Z"];
let result = products.len();
"#)
.depends_on(step1.id())
.depends_on(step2.id())
.context_id("complex_context")
.worker_id("product_service");
// Level 3: Data processing (depends on fetched data)
let step5 = OrchestratedFlowStep::new("calculate_metrics")
.script(r#"
let customer_count = dep_3_result;
let product_count = dep_4_result;
let ratio = customer_count / product_count;
let result = ratio;
"#)
.depends_on(step3.id())
.depends_on(step4.id())
.context_id("complex_context")
.worker_id("analytics_service");
// Level 4: Final reporting
let step6 = OrchestratedFlowStep::new("generate_dashboard")
.script(r#"
let customer_count = dep_3_result;
let product_count = dep_4_result;
let ratio = dep_5_result;
let dashboard = "Dashboard: " + customer_count + " customers, " +
product_count + " products, ratio: " + ratio;
let result = dashboard;
"#)
.depends_on(step3.id())
.depends_on(step4.id())
.depends_on(step5.id())
.context_id("complex_context")
.worker_id("dashboard_service");
OrchestratedFlow::new("complex_workflow")
.add_step(step1)
.add_step(step2)
.add_step(step3)
.add_step(step4)
.add_step(step5)
.add_step(step6)
}
/// Wait for flow completion and show results
async fn wait_and_show_results(
orchestrator: &Orchestrator<LocalInterface>,
flow_id: u32,
workflow_name: &str,
) {
println!(" ⏳ Executing {} workflow (ID: {})...", workflow_name, flow_id);
// Poll for completion
loop {
tokio::time::sleep(tokio::time::Duration::from_millis(50)).await;
if let Some(execution) = orchestrator.get_flow_status(flow_id).await {
match execution.status {
FlowStatus::Completed => {
println!("{} workflow completed successfully!", workflow_name);
println!(" 📊 Executed {} steps in {:?}",
execution.completed_steps.len(),
execution.completed_at.unwrap() - execution.started_at);
// Show step results
for (step_id, outputs) in &execution.step_results {
if let Some(result) = outputs.get("result") {
let step_name = execution.flow.orchestrated_steps
.iter()
.find(|s| s.id() == *step_id)
.map(|s| s.flow_step.name.as_str())
.unwrap_or("unknown");
println!(" 📝 Step '{}': {}", step_name, result);
}
}
break;
}
FlowStatus::Failed => {
println!("{} workflow failed!", workflow_name);
if !execution.failed_steps.is_empty() {
println!(" 💥 Failed steps: {:?}", execution.failed_steps);
}
break;
}
FlowStatus::Running => {
print!(".");
std::io::Write::flush(&mut std::io::stdout()).unwrap();
}
FlowStatus::Pending => {
println!(" ⏸️ {} workflow is pending...", workflow_name);
}
}
} else {
println!("{} workflow not found!", workflow_name);
break;
}
}
}

View File

@@ -0,0 +1,61 @@
//! Dispatcher interface implementation using RhaiDispatcher
use crate::RhaiInterface;
use async_trait::async_trait;
use rhai_dispatcher::{PlayRequest, RhaiDispatcher, RhaiDispatcherError};
use std::sync::Arc;
/// Dispatcher-based interface using RhaiDispatcher
pub struct DispatcherInterface {
dispatcher: Arc<RhaiDispatcher>,
}
impl DispatcherInterface {
/// Create a new dispatcher interface
pub fn new(dispatcher: Arc<RhaiDispatcher>) -> Self {
Self { dispatcher }
}
}
#[async_trait]
impl RhaiInterface for DispatcherInterface {
async fn submit_play_request(&self, play_request: &PlayRequest) -> Result<(), RhaiDispatcherError> {
self.dispatcher.submit_play_request(play_request).await
}
async fn submit_play_request_and_await_result(&self, play_request: &PlayRequest) -> Result<String, RhaiDispatcherError> {
self.dispatcher.submit_play_request_and_await_result(play_request).await
}
}
#[cfg(test)]
mod tests {
use super::*;
#[tokio::test]
async fn test_dispatcher_interface_creation() {
// This test just verifies we can create the interface
// Note: Actual testing would require a properly configured RhaiDispatcher
// For now, we'll create a mock or skip the actual dispatcher creation
// This is a placeholder test - adjust based on actual RhaiDispatcher constructor
// let dispatcher = Arc::new(RhaiDispatcher::new());
// let interface = DispatcherInterface::new(dispatcher);
// Just verify the test compiles for now
assert!(true);
}
#[tokio::test]
async fn test_dispatcher_interface_methods() {
// This test would verify the interface methods work correctly
// when a proper RhaiDispatcher is available
let play_request = PlayRequest {
script: "let x = 5; x + 3".to_string(),
};
// Placeholder assertions - would test actual functionality with real dispatcher
assert_eq!(play_request.script, "let x = 5; x + 3");
}
}

View File

@@ -0,0 +1,111 @@
//! Local interface implementation for in-process script execution
use crate::RhaiInterface;
use async_trait::async_trait;
use rhai_dispatcher::{PlayRequest, RhaiDispatcherError};
/// Local interface for in-process script execution
pub struct LocalInterface {
engine: rhai::Engine,
}
impl LocalInterface {
/// Create a new local interface
pub fn new() -> Self {
let engine = rhai::Engine::new();
Self { engine }
}
/// Create a new local interface with custom engine
pub fn with_engine(engine: rhai::Engine) -> Self {
Self { engine }
}
}
impl Default for LocalInterface {
fn default() -> Self {
Self::new()
}
}
#[async_trait]
impl RhaiInterface for LocalInterface {
async fn submit_play_request(&self, _play_request: &PlayRequest) -> Result<(), RhaiDispatcherError> {
// For local interface, fire-and-forget doesn't make much sense
// We'll just execute and ignore the result
let _ = self.submit_play_request_and_await_result(_play_request).await?;
Ok(())
}
async fn submit_play_request_and_await_result(&self, play_request: &PlayRequest) -> Result<String, RhaiDispatcherError> {
let mut scope = rhai::Scope::new();
// Execute the script
let result = self
.engine
.eval_with_scope::<rhai::Dynamic>(&mut scope, &play_request.script)
.map_err(|e| RhaiDispatcherError::TaskNotFound(format!("Script execution error: {}", e)))?;
// Return the result as a string
if result.is_unit() {
Ok(String::new())
} else {
Ok(result.to_string())
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[tokio::test]
async fn test_local_interface_basic() {
let interface = LocalInterface::new();
let play_request = PlayRequest {
script: "let x = 5; x + 3".to_string(),
};
let result = interface.submit_play_request_and_await_result(&play_request).await;
assert!(result.is_ok());
let output = result.unwrap();
assert_eq!(output, "8");
}
#[tokio::test]
async fn test_local_interface_fire_and_forget() {
let interface = LocalInterface::new();
let play_request = PlayRequest {
script: "let x = 5; x + 3".to_string(),
};
let result = interface.submit_play_request(&play_request).await;
assert!(result.is_ok());
}
#[tokio::test]
async fn test_local_interface_with_error() {
let interface = LocalInterface::new();
let play_request = PlayRequest {
script: "invalid_syntax +++".to_string(),
};
let result = interface.submit_play_request_and_await_result(&play_request).await;
assert!(result.is_err());
}
#[tokio::test]
async fn test_local_interface_empty_result() {
let interface = LocalInterface::new();
let play_request = PlayRequest {
script: "let x = 42;".to_string(),
};
let result = interface.submit_play_request_and_await_result(&play_request).await;
assert!(result.is_ok());
let output = result.unwrap();
assert_eq!(output, "");
}
}

View File

@@ -0,0 +1,9 @@
//! Interface implementations for different backends
pub mod local;
pub mod ws;
pub mod dispatcher;
pub use local::*;
pub use ws::*;
pub use dispatcher::*;

View File

@@ -0,0 +1,117 @@
//! WebSocket interface implementation for remote script execution
use crate::RhaiInterface;
use async_trait::async_trait;
use rhai_dispatcher::{PlayRequest, RhaiDispatcherError};
use reqwest::Client;
use serde_json::json;
/// WebSocket-based interface for remote script execution
pub struct WsInterface {
client: Client,
base_url: String,
}
impl WsInterface {
/// Create a new WebSocket interface
pub fn new(base_url: String) -> Self {
Self {
client: Client::new(),
base_url,
}
}
}
#[async_trait]
impl RhaiInterface for WsInterface {
async fn submit_play_request(&self, play_request: &PlayRequest) -> Result<(), RhaiDispatcherError> {
let payload = json!({
"script": play_request.script
});
let response = self
.client
.post(&format!("{}/submit", self.base_url))
.json(&payload)
.send()
.await
.map_err(|e| RhaiDispatcherError::TaskNotFound(format!("Network error: {}", e)))?;
if response.status().is_success() {
Ok(())
} else {
let error_text = response
.text()
.await
.unwrap_or_else(|_| "Unknown error".to_string());
Err(RhaiDispatcherError::TaskNotFound(format!("HTTP error: {}", error_text)))
}
}
async fn submit_play_request_and_await_result(&self, play_request: &PlayRequest) -> Result<String, RhaiDispatcherError> {
let payload = json!({
"script": play_request.script
});
let response = self
.client
.post(&format!("{}/execute", self.base_url))
.json(&payload)
.send()
.await
.map_err(|e| RhaiDispatcherError::TaskNotFound(format!("Network error: {}", e)))?;
if response.status().is_success() {
let result: String = response
.text()
.await
.map_err(|e| RhaiDispatcherError::TaskNotFound(format!("Response parsing error: {}", e)))?;
Ok(result)
} else {
let error_text = response
.text()
.await
.unwrap_or_else(|_| "Unknown error".to_string());
Err(RhaiDispatcherError::TaskNotFound(format!("HTTP error: {}", error_text)))
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_ws_interface_creation() {
let interface = WsInterface::new("http://localhost:8080".to_string());
assert_eq!(interface.base_url, "http://localhost:8080");
}
#[tokio::test]
async fn test_ws_interface_call_with_mock_server() {
// This test would require a mock HTTP server
// For now, just test that we can create the interface
let interface = WsInterface::new("http://localhost:8080".to_string());
let play_request = PlayRequest {
script: "let x = 1;".to_string(),
};
// This will fail without a real server, but that's expected in unit tests
let result = interface.submit_play_request_and_await_result(&play_request).await;
assert!(result.is_err()); // Expected to fail without server
}
#[tokio::test]
async fn test_ws_interface_fire_and_forget() {
let interface = WsInterface::new("http://localhost:8080".to_string());
let play_request = PlayRequest {
script: "let x = 1;".to_string(),
};
// This will fail without a real server, but that's expected in unit tests
let result = interface.submit_play_request(&play_request).await;
assert!(result.is_err()); // Expected to fail without server
}
}

View File

@@ -0,0 +1,35 @@
//! # Orchestrator
//!
//! A simple DAG-based workflow execution system that extends the heromodels flow structures
//! to support workflows with dependencies and distributed script execution.
use async_trait::async_trait;
use rhai_dispatcher::{PlayRequest, RhaiDispatcherError};
pub mod interface;
pub mod orchestrator;
pub use interface::*;
pub use orchestrator::*;
/// Trait for executing Rhai scripts through different backends
/// Uses the same signature as RhaiDispatcher for consistency
#[async_trait]
pub trait RhaiInterface {
/// Submit a play request without waiting for result (fire-and-forget)
async fn submit_play_request(&self, play_request: &PlayRequest) -> Result<(), RhaiDispatcherError>;
/// Submit a play request and await the result
/// Returns just the output string on success
async fn submit_play_request_and_await_result(&self, play_request: &PlayRequest) -> Result<String, RhaiDispatcherError>;
}
// Re-export the flow models from DSL
pub use rhailib_dsl::flow::{OrchestratedFlow, OrchestratedFlowStep, OrchestratorError, FlowStatus};
// Conversion from RhaiDispatcherError to OrchestratorError
impl From<RhaiDispatcherError> for OrchestratorError {
fn from(err: RhaiDispatcherError) -> Self {
OrchestratorError::ExecutorError(err.to_string())
}
}

View File

@@ -0,0 +1,418 @@
//! Main orchestrator implementation for DAG-based workflow execution
use crate::{
OrchestratedFlow, OrchestratedFlowStep, OrchestratorError, FlowStatus, RhaiInterface,
};
use rhai_dispatcher::PlayRequest;
use futures::future::try_join_all;
use std::collections::{HashMap, HashSet};
use std::sync::Arc;
use tokio::sync::RwLock;
use tracing::{debug, error, info, warn};
/// Main orchestrator for executing DAG-based workflows
pub struct Orchestrator<I: RhaiInterface> {
/// Interface for running scripts
interface: Arc<I>,
/// Active flow executions
active_flows: Arc<RwLock<HashMap<u32, FlowExecution>>>,
}
/// Represents an active flow execution
#[derive(Debug, Clone)]
pub struct FlowExecution {
/// The flow being executed
pub flow: OrchestratedFlow,
/// Current status
pub status: FlowStatus,
/// Completed step IDs
pub completed_steps: HashSet<u32>,
/// Failed step IDs
pub failed_steps: HashSet<u32>,
/// Step results
pub step_results: HashMap<u32, HashMap<String, String>>,
/// Execution start time
pub started_at: chrono::DateTime<chrono::Utc>,
/// Execution end time
pub completed_at: Option<chrono::DateTime<chrono::Utc>>,
}
impl FlowExecution {
/// Create a new flow execution
pub fn new(flow: OrchestratedFlow) -> Self {
Self {
flow,
status: FlowStatus::Pending,
completed_steps: HashSet::new(),
failed_steps: HashSet::new(),
step_results: HashMap::new(),
started_at: chrono::Utc::now(),
completed_at: None,
}
}
/// Check if a step is ready to execute (all dependencies completed)
pub fn is_step_ready(&self, step: &OrchestratedFlowStep) -> bool {
if self.completed_steps.contains(&step.id()) || self.failed_steps.contains(&step.id()) {
return false;
}
step.depends_on.iter().all(|dep_id| self.completed_steps.contains(dep_id))
}
/// Get all ready steps
pub fn get_ready_steps(&self) -> Vec<&OrchestratedFlowStep> {
self.flow
.orchestrated_steps
.iter()
.filter(|step| self.is_step_ready(step))
.collect()
}
/// Mark a step as completed
pub fn complete_step(&mut self, step_id: u32, outputs: HashMap<String, String>) {
self.completed_steps.insert(step_id);
self.step_results.insert(step_id, outputs);
// Check if flow is complete
if self.completed_steps.len() == self.flow.orchestrated_steps.len() {
self.status = FlowStatus::Completed;
self.completed_at = Some(chrono::Utc::now());
}
}
/// Mark a step as failed
pub fn fail_step(&mut self, step_id: u32) {
self.failed_steps.insert(step_id);
self.status = FlowStatus::Failed;
self.completed_at = Some(chrono::Utc::now());
}
/// Check if the flow execution is finished
pub fn is_finished(&self) -> bool {
matches!(self.status, FlowStatus::Completed | FlowStatus::Failed)
}
}
impl<I: RhaiInterface + Send + Sync + 'static> Orchestrator<I> {
/// Create a new orchestrator
pub fn new(interface: Arc<I>) -> Self {
Self {
interface,
active_flows: Arc::new(RwLock::new(HashMap::new())),
}
}
/// Start executing a flow
pub async fn execute_flow(&self, flow: OrchestratedFlow) -> Result<u32, OrchestratorError> {
let flow_id = flow.id();
flow.validate_dag()?;
info!("Starting execution of flow {} with {} steps", flow_id, flow.orchestrated_steps.len());
// Create flow execution
let mut execution = FlowExecution::new(flow);
execution.status = FlowStatus::Running;
// Store the execution
{
let mut active_flows = self.active_flows.write().await;
active_flows.insert(flow_id, execution);
}
// Start execution in background
let orchestrator = self.clone();
tokio::spawn(async move {
if let Err(e) = orchestrator.execute_flow_steps(flow_id).await {
error!("Flow {} execution failed: {}", flow_id, e);
// Mark flow as failed
let mut active_flows = orchestrator.active_flows.write().await;
if let Some(execution) = active_flows.get_mut(&flow_id) {
execution.status = FlowStatus::Failed;
execution.completed_at = Some(chrono::Utc::now());
}
}
});
Ok(flow_id)
}
/// Execute flow steps using DAG traversal
async fn execute_flow_steps(&self, flow_id: u32) -> Result<(), OrchestratorError> {
loop {
let ready_steps = {
let active_flows = self.active_flows.read().await;
let execution = active_flows
.get(&flow_id)
.ok_or(OrchestratorError::StepNotFound(flow_id))?;
if execution.is_finished() {
info!("Flow {} execution completed with status: {:?}", flow_id, execution.status);
return Ok(());
}
execution.get_ready_steps().into_iter().cloned().collect::<Vec<_>>()
};
if ready_steps.is_empty() {
// Check if we're deadlocked
let active_flows = self.active_flows.read().await;
let execution = active_flows
.get(&flow_id)
.ok_or(OrchestratorError::StepNotFound(flow_id))?;
if !execution.is_finished() {
warn!("No ready steps found for flow {} - possible deadlock", flow_id);
return Err(OrchestratorError::NoReadySteps);
}
return Ok(());
}
debug!("Executing {} ready steps for flow {}", ready_steps.len(), flow_id);
// Execute ready steps concurrently
let step_futures = ready_steps.into_iter().map(|step| {
let orchestrator = self.clone();
async move {
orchestrator.execute_step(flow_id, step).await
}
});
// Wait for all steps to complete
let results = try_join_all(step_futures).await?;
// Update execution state
{
let mut active_flows = self.active_flows.write().await;
let execution = active_flows
.get_mut(&flow_id)
.ok_or(OrchestratorError::StepNotFound(flow_id))?;
for (step_id, outputs) in results {
execution.complete_step(step_id, outputs);
}
}
// Small delay to prevent tight loop
tokio::time::sleep(tokio::time::Duration::from_millis(10)).await;
}
}
/// Execute a single step
async fn execute_step(
&self,
flow_id: u32,
step: OrchestratedFlowStep,
) -> Result<(u32, HashMap<String, String>), OrchestratorError> {
let step_id = step.id();
info!("Executing step {} for flow {}", step_id, flow_id);
// Prepare inputs with dependency outputs
let mut inputs = step.inputs.clone();
// Add outputs from dependency steps
{
let active_flows = self.active_flows.read().await;
let execution = active_flows
.get(&flow_id)
.ok_or(OrchestratorError::StepNotFound(flow_id))?;
for dep_id in &step.depends_on {
if let Some(dep_outputs) = execution.step_results.get(dep_id) {
for (key, value) in dep_outputs {
inputs.insert(format!("dep_{}_{}", dep_id, key), value.clone());
}
}
}
}
// Create play request
let play_request = PlayRequest {
id: format!("{}_{}", flow_id, step_id),
worker_id: step.worker_id.clone(),
context_id: step.context_id.clone(),
script: step.script.clone(),
timeout: std::time::Duration::from_secs(30), // Default timeout
};
// Execute the script
match self.interface.submit_play_request_and_await_result(&play_request).await {
Ok(output) => {
info!("Step {} completed successfully", step_id);
let mut outputs = HashMap::new();
outputs.insert("result".to_string(), output);
Ok((step_id, outputs))
}
Err(e) => {
error!("Step {} failed: {}", step_id, e);
// Mark step as failed
{
let mut active_flows = self.active_flows.write().await;
if let Some(execution) = active_flows.get_mut(&flow_id) {
execution.fail_step(step_id);
}
}
Err(OrchestratorError::StepFailed(step_id, Some(e.to_string())))
}
}
}
/// Get the status of a flow execution
pub async fn get_flow_status(&self, flow_id: u32) -> Option<FlowExecution> {
let active_flows = self.active_flows.read().await;
active_flows.get(&flow_id).cloned()
}
/// Cancel a flow execution
pub async fn cancel_flow(&self, flow_id: u32) -> Result<(), OrchestratorError> {
let mut active_flows = self.active_flows.write().await;
if let Some(execution) = active_flows.get_mut(&flow_id) {
execution.status = FlowStatus::Failed;
execution.completed_at = Some(chrono::Utc::now());
info!("Flow {} cancelled", flow_id);
Ok(())
} else {
Err(OrchestratorError::StepNotFound(flow_id))
}
}
/// List all active flows
pub async fn list_active_flows(&self) -> Vec<(u32, FlowStatus)> {
let active_flows = self.active_flows.read().await;
active_flows
.iter()
.map(|(id, execution)| (*id, execution.status.clone()))
.collect()
}
/// Clean up completed flows
pub async fn cleanup_completed_flows(&self) {
let mut active_flows = self.active_flows.write().await;
active_flows.retain(|_, execution| !execution.is_finished());
}
}
impl<I: RhaiInterface + Send + Sync> Clone for Orchestrator<I> {
fn clone(&self) -> Self {
Self {
interface: self.interface.clone(),
active_flows: self.active_flows.clone(),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::interface::LocalInterface;
use std::collections::HashMap;
#[tokio::test]
async fn test_simple_flow_execution() {
let interface = Arc::new(LocalInterface::new());
let orchestrator = Orchestrator::new(interface);
// Create a simple flow with two steps
let step1 = OrchestratedFlowStep::new("step1")
.script("let result = 10;")
.context_id("test")
.worker_id("worker1");
let step2 = OrchestratedFlowStep::new("step2")
.script("let result = dep_1_result + 5;")
.depends_on(step1.id())
.context_id("test")
.worker_id("worker1");
let flow = OrchestratedFlow::new("test_flow")
.add_step(step1)
.add_step(step2);
// Execute the flow
let flow_id = orchestrator.execute_flow(flow).await.unwrap();
// Wait for completion
tokio::time::sleep(tokio::time::Duration::from_millis(100)).await;
let status = orchestrator.get_flow_status(flow_id).await.unwrap();
assert_eq!(status.status, FlowStatus::Completed);
assert_eq!(status.completed_steps.len(), 2);
}
#[tokio::test]
async fn test_parallel_execution() {
let interface = Arc::new(LocalInterface::new());
let orchestrator = Orchestrator::new(interface);
// Create a flow with parallel steps
let step1 = OrchestratedFlowStep::new("step1")
.script("let result = 10;")
.context_id("test")
.worker_id("worker1");
let step2 = OrchestratedFlowStep::new("step2")
.script("let result = 20;")
.context_id("test")
.worker_id("worker2");
let step3 = OrchestratedFlowStep::new("step3")
.script("let result = dep_1_result + dep_2_result;")
.depends_on(step1.id())
.depends_on(step2.id())
.context_id("test")
.worker_id("worker3");
let flow = OrchestratedFlow::new("parallel_flow")
.add_step(step1)
.add_step(step2)
.add_step(step3);
// Execute the flow
let flow_id = orchestrator.execute_flow(flow).await.unwrap();
// Wait for completion
tokio::time::sleep(tokio::time::Duration::from_millis(100)).await;
let status = orchestrator.get_flow_status(flow_id).await.unwrap();
assert_eq!(status.status, FlowStatus::Completed);
assert_eq!(status.completed_steps.len(), 3);
}
#[test]
fn test_flow_execution_state() {
let step1 = OrchestratedFlowStep::new("step1").script("let x = 1;");
let step2 = OrchestratedFlowStep::new("step2")
.script("let y = 2;")
.depends_on(step1.id());
let flow = OrchestratedFlow::new("test_flow")
.add_step(step1.clone())
.add_step(step2.clone());
let mut execution = FlowExecution::new(flow);
// Initially, only step1 should be ready
assert!(execution.is_step_ready(&step1));
assert!(!execution.is_step_ready(&step2));
// After completing step1, step2 should be ready
execution.complete_step(step1.id(), HashMap::new());
assert!(!execution.is_step_ready(&step1)); // Already completed
assert!(execution.is_step_ready(&step2));
// After completing step2, flow should be complete
execution.complete_step(step2.id(), HashMap::new());
assert_eq!(execution.status, FlowStatus::Completed);
}
}

View File

@@ -0,0 +1,42 @@
//! Main orchestrator implementation for DAG-based workflow execution
use crate::{
OrchestratedFlow, OrchestratedFlowStep, OrchestratorError, FlowStatus, RhaiInterface, ScriptRequest,
};
use futures::future::try_join_all;
use std::collections::{HashMap, HashSet};
use std::sync::Arc;
use tokio::sync::RwLock;
use tracing::{debug, error, info, warn};
impl<I: RhaiInterface + Send + Sync + 'static> Orchestrator<I> {
/// Get a flow by ID
pub fn get_flow(&self, flow_id: u32) -> Result<OrchestratedFlow, OrchestratorError> {
self.interface
.new_play_request()
.script(format!("json_encode(get_flow({}))", flow_id))
.submit_play_request_and_await_result()
.await
.map(|result| serde_json::from_str(&result).unwrap())
}
pub fn get_flows(&self) -> Result<Vec<OrchestratedFlow>, OrchestratorError> {
self.interface
.new_play_request()
.script("json_encode(get_flows())")
.submit_play_request_and_await_result()
.await
.map(|result| serde_json::from_str(&result).unwrap())
}
pub fn get_active_flows(&self) -> Result<Vec<OrchestratedFlow>, OrchestratorError> {
self.interface
.new_play_request()
.script("json_encode(get_flows())")
.submit_play_request_and_await_result()
.await
.map(|result| serde_json::from_str(&result).unwrap())
}
}

2
rhailib/_archive/worker/.gitignore vendored Normal file
View File

@@ -0,0 +1,2 @@
/target
worker_rhai_temp_db

Some files were not shown because too many files have changed in this diff Show More