From 5670efc4cb2842d9c8916d780b099c19cb2835d1 Mon Sep 17 00:00:00 2001 From: despiegk Date: Fri, 31 Jan 2025 08:29:17 +0300 Subject: [PATCH] ... --- examples/data/ourdb_example.vsh | 42 ++++ examples/data/radixtree.vsh | 27 +++ examples/virt/docker/docker_init.vsh | 0 lib/core/log/backend_db.v | 25 --- lib/core/log/events.v | 10 - lib/core/log/factory.v | 18 -- lib/core/log/logger.v | 55 ----- lib/core/log/model.v | 32 --- lib/data/ourdb/README.md | 76 ++++--- lib/data/ourdb/backend.v | 23 +-- lib/data/ourdb/db.v | 15 +- lib/data/ourdb/db_update_test.v | 48 +++++ lib/data/ourdb/factory.v | 6 +- lib/data/ourdb/lookup.v | 2 +- lib/data/radixtree/README.md | 132 ++++++++++++ lib/data/radixtree/factory_test.v | 124 ++++++++++++ lib/data/radixtree/radixtree.v | 289 +++++++++++++++++++++++++++ lib/data/radixtree/radixtree_debug.v | 111 ++++++++++ lib/data/radixtree/serialize.v | 77 +++++++ lib/data/radixtree/serialize_test.v | 110 ++++++++++ 20 files changed, 1038 insertions(+), 184 deletions(-) create mode 100755 examples/data/ourdb_example.vsh create mode 100755 examples/data/radixtree.vsh mode change 100644 => 100755 examples/virt/docker/docker_init.vsh delete mode 100644 lib/core/log/backend_db.v delete mode 100644 lib/core/log/events.v delete mode 100644 lib/core/log/factory.v delete mode 100644 lib/core/log/logger.v delete mode 100644 lib/core/log/model.v create mode 100644 lib/data/ourdb/db_update_test.v create mode 100644 lib/data/radixtree/README.md create mode 100644 lib/data/radixtree/factory_test.v create mode 100644 lib/data/radixtree/radixtree.v create mode 100644 lib/data/radixtree/radixtree_debug.v create mode 100644 lib/data/radixtree/serialize.v create mode 100644 lib/data/radixtree/serialize_test.v diff --git a/examples/data/ourdb_example.vsh b/examples/data/ourdb_example.vsh new file mode 100755 index 00000000..8647b7ce --- /dev/null +++ b/examples/data/ourdb_example.vsh @@ -0,0 +1,42 @@ +#!/usr/bin/env -S v -n -w -gc none -no-retry-compilation -cc tcc -d use_openssl -enable-globals run + +import freeflowuniverse.herolib.data.ourdb + + +const test_dir = '/tmp/ourdb' + + +mut db := ourdb.new( + record_nr_max: 16777216 - 1 // max size of records + record_size_max: 1024 + path: test_dir + reset: true +)! + +defer { + db.destroy() or { panic('failed to destroy db: ${err}') } +} + +// Test set and get +test_data := 'Hello, World!'.bytes() +id := db.set(data: test_data)! + +retrieved := db.get(id)! +assert retrieved == test_data + +assert id==0 + +// Test overwrite +new_data := 'Updated data'.bytes() +id2 := db.set(id:0, data: new_data)! +assert id2==0 + +// // Verify lookup table has the correct location +// location := db.lookup.get(id2)! +// println('Location after update - file_nr: ${location.file_nr}, position: ${location.position}') + +// Get and verify the updated data +retrieved2 := db.get(id2)! +println('Retrieved data: ${retrieved2}') +println('Expected data: ${new_data}') +assert retrieved2 == new_data \ No newline at end of file diff --git a/examples/data/radixtree.vsh b/examples/data/radixtree.vsh new file mode 100755 index 00000000..f50d4a87 --- /dev/null +++ b/examples/data/radixtree.vsh @@ -0,0 +1,27 @@ +#!/usr/bin/env -S v -n -w -gc none -no-retry-compilation -cc tcc -d use_openssl -enable-globals run + +import freeflowuniverse.herolib.data.radixtree + +mut rt := radixtree.new(path:'/tmp/radixtree_test',reset:true)! + +// Show initial state +println('\nInitial state:') +rt.debug_db()! + +// Test insert +println('\nInserting key "test" with value "value1"') +rt.insert('test', 'value1'.bytes())! + +// Show state after insert +println('\nState after insert:') +rt.debug_db()! + +// Print tree structure +rt.print_tree()! + +// Test search +if value := rt.search('test') { + println('\nFound value: ${value.bytestr()}') +} else { + println('\nError: ${err}') +} diff --git a/examples/virt/docker/docker_init.vsh b/examples/virt/docker/docker_init.vsh old mode 100644 new mode 100755 diff --git a/lib/core/log/backend_db.v b/lib/core/log/backend_db.v deleted file mode 100644 index 2417ae8d..00000000 --- a/lib/core/log/backend_db.v +++ /dev/null @@ -1,25 +0,0 @@ -module log - -import db.sqlite - -pub struct DBBackend { -pub: - db sqlite.DB -} - -@[params] -pub struct DBBackendConfig { -pub: - db sqlite.DB -} - -// factory for -pub fn new_backend(config DBBackendConfig) !DBBackend { - sql config.db { - create table Log - } or { panic(err) } - - return DBBackend{ - db: config.db - } -} \ No newline at end of file diff --git a/lib/core/log/events.v b/lib/core/log/events.v deleted file mode 100644 index 555f35f0..00000000 --- a/lib/core/log/events.v +++ /dev/null @@ -1,10 +0,0 @@ -module log - -import time - -@[params] -pub struct ViewEvent { -pub mut: - page string - duration time.Duration -} \ No newline at end of file diff --git a/lib/core/log/factory.v b/lib/core/log/factory.v deleted file mode 100644 index 3547d0f2..00000000 --- a/lib/core/log/factory.v +++ /dev/null @@ -1,18 +0,0 @@ -module log - -import db.sqlite - -pub struct Logger { - db_path string - // DBBackend -} - -pub fn new(db_path string) !Logger { - db := sqlite.connect(db_path)! - sql db { - create table Log - } or { panic(err) } - return Logger{ - db_path: db_path - } -} diff --git a/lib/core/log/logger.v b/lib/core/log/logger.v deleted file mode 100644 index a91e82c4..00000000 --- a/lib/core/log/logger.v +++ /dev/null @@ -1,55 +0,0 @@ -module log - -import db.sqlite - -pub fn (logger Logger) new_log(log Log) ! { - db := sqlite.connect(logger.db_path)! - - sql db { - insert log into Log - }! -} - -pub struct LogFilter { - Log - matches_all bool - limit int -} - -pub fn (logger Logger) filter_logs(filter LogFilter) ![]Log { - db := sqlite.connect(logger.db_path)! - mut select_stmt := 'select * from Log' - - mut matchers := []string{} - if filter.event != '' { - matchers << "event == '${filter.event}'" - } - - if filter.subject != '' { - matchers << "subject == '${filter.subject}'" - } - - if filter.object != '' { - matchers << "object == '${filter.object}'" - } - - if matchers.len > 0 { - matchers_str := if filter.matches_all { - matchers.join(' AND ') - } else { - matchers.join(' OR ') - } - select_stmt += ' where ${matchers_str}' - } - - responses := db.exec(select_stmt)! - - mut logs := []Log{} - for response in responses { - logs << sql db { - select from Log where id == response.vals[0].int() - }! - } - - return logs -} \ No newline at end of file diff --git a/lib/core/log/model.v b/lib/core/log/model.v deleted file mode 100644 index 852b2021..00000000 --- a/lib/core/log/model.v +++ /dev/null @@ -1,32 +0,0 @@ -module log - -import time - -pub struct Log { - id int @[primary; sql: serial] -pub: - timestamp time.Time -pub mut: - event string - subject string - object string - message string // a custom message that can be attached to a log -} - -// pub struct Event { -// name string -// description string -// } - -// // log_request logs http requests -// pub fn create_log(log Log) Log { -// return Log{ -// ...log -// timestamp: time.now() -// }) -// } - -// // log_request logs http requests -// pub fn (mut a Analyzer) get_logs(subject string) []Log { -// return []Log{} -// } diff --git a/lib/data/ourdb/README.md b/lib/data/ourdb/README.md index 308f0e34..2e7b1d19 100644 --- a/lib/data/ourdb/README.md +++ b/lib/data/ourdb/README.md @@ -5,18 +5,22 @@ OurDB is a lightweight, efficient key-value database implementation in V that pr ## Usage Example ```v - -//record_nr_max u32 = 16777216 - 1 // max number of records -//record_size_max u32 = 1024*4 // max record size (4KB default) -//file_size u32 = 500 * (1 << 20) // file size (500MB default) -//path string // storage directory - import freeflowuniverse.herolib.data.ourdb -mut db := ourdb.new(path:"/tmp/mydb")! +// Configure and create a new database instance +mut db := ourdb.new( + path: '/tmp/mydb', // storage directory + record_nr_max: 16777216 - 1, // max number of records (default) + record_size_max: 1024 * 4, // max record size (4KB default) + file_size: 500 * (1 << 20), // file size (500MB default) + incremental_mode: true // enable auto-incrementing IDs (default) +)! -// Store data (note: set() takes []u8 as value) -db.set(1, 'Hello World'.bytes())! +// Store data with auto-incrementing ID (incremental mode) +id := db.set(data: 'Hello World'.bytes())! + +// Store data with specific ID (is an update) +id2 := db.set(id: 1, data: 'Hello Again'.bytes())! // Retrieve data data := db.get(1)! // Returns []u8 @@ -28,7 +32,6 @@ history := db.get_history(1, 5)! // Get last 5 versions db.delete(1)! ``` - ## Features - Efficient key-value storage @@ -37,6 +40,19 @@ db.delete(1)! - Support for multiple backend files - Configurable record sizes and counts - Memory and disk-based lookup tables +- Optional incremental ID mode + +## Configuration Options + +```v +struct OurDBConfig { + record_nr_max u32 = 16777216 - 1 // max size of records + record_size_max u32 = 1024 * 4 // max size in bytes of a record (4KB default) + file_size u32 = 500 * (1 << 20) // file size (500MB default) + path string // directory where we will store the DB + incremental_mode bool = true // enable auto-incrementing IDs +} +``` ## Architecture @@ -46,26 +62,29 @@ OurDB consists of three main components working together in a layered architectu - Provides the public API for database operations - Handles high-level operations (set, get, delete, history) - Coordinates between lookup and backend components -- Located in `db.v` +- Supports both key-value and incremental ID modes ### 2. Lookup Table (lookup.v) - Maps keys to physical locations in the backend storage - Supports both memory and disk-based lookup tables -- Configurable key sizes for optimization +- Automatically optimizes key sizes based on database configuration - Handles sparse data efficiently -- Located in `lookup.v` +- Provides next ID generation for incremental mode ### 3. Backend Storage (backend.v) - Manages the actual data storage in files - Handles data integrity with CRC32 checksums - Supports multiple file backends for large datasets - Implements the low-level read/write operations -- Located in `backend.v` ## File Structure - `db.v`: Frontend interface providing the public API -- `lookup.v`: Implementation of the lookup table system +- `lookup.v`: Core lookup table implementation +- `lookup_location.v`: Location tracking implementation +- `lookup_location_test.v`: Location tracking tests +- `lookup_id_test.v`: ID generation tests +- `lookup_test.v`: General lookup table tests - `backend.v`: Low-level data storage implementation - `factory.v`: Database initialization and configuration - `db_test.v`: Test suite for verifying functionality @@ -73,16 +92,19 @@ OurDB consists of three main components working together in a layered architectu ## How It Works 1. **Frontend Operations** - - When you call `set(key, value)`, the frontend: - 1. Gets the storage location from the lookup table - 2. Passes the data to the backend for storage - 3. Updates the lookup table with any new location + - When you call `set()`, the frontend: + 1. In incremental mode, generates the next ID or uses provided ID + 2. Gets the storage location from the lookup table + 3. Passes the data to the backend for storage + 4. Updates the lookup table with any new location 2. **Lookup Table** - Maintains a mapping between keys and physical locations - - Optimizes key size based on maximum record count - - Can be memory-based for speed or disk-based for large datasets - - Supports sparse data storage for efficient space usage + - Optimizes key size based on: + - Total number of records (affects address space) + - Record size and count (determines file splitting) + - Supports incremental ID generation + - Persists lookup data to disk for recovery 3. **Backend Storage** - Stores data in one or multiple files @@ -103,13 +125,15 @@ Each record in the backend storage includes: - N bytes: Actual data ### Lookup Table Optimization -The lookup table automatically optimizes its key size based on: -- Total number of records (affects address space) -- Record size and count (determines file splitting) -- Available memory (can switch to disk-based lookup) +The lookup table automatically optimizes its key size based on the database configuration: +- 2 bytes: For databases with < 65,536 records +- 3 bytes: For databases with < 16,777,216 records +- 4 bytes: For databases with < 4,294,967,296 records +- 6 bytes: For large databases requiring multiple files ### File Management - Supports splitting data across multiple files when needed - Each file is limited to 500MB by default (configurable) - Automatic file selection based on record location - Files are created as needed with format: `${path}/${file_nr}.db` +- Lookup table state is persisted in `${path}/lookup_dump.db` diff --git a/lib/data/ourdb/backend.v b/lib/data/ourdb/backend.v index 89168e0f..e2920c18 100644 --- a/lib/data/ourdb/backend.v +++ b/lib/data/ourdb/backend.v @@ -22,28 +22,18 @@ fn (mut db OurDB) db_file_select(file_nr u16) ! { path := '${db.path}/${file_nr}.db' - if db.file_nr == file_nr { - // make sure file is opened - if !db.file.is_opened { - if !os.exists(path) { - db.create_new_db_file(file_nr)! - } - - mut file := os.open_file(path, 'r+')! - db.file = file - } - return - } - + // Always close the current file if it's open if db.file.is_opened { db.file.close() } + // Create file if it doesn't exist if !os.exists(path) { db.create_new_db_file(file_nr)! } - mut file := os.open_file(path, 'r+')! + // Open the file fresh + mut file := os.open_file(path, 'r+')! db.file = file db.file_nr = file_nr } @@ -85,6 +75,7 @@ pub fn (mut db OurDB) set_(x u32, old_location Location, data []u8) ! { file_nr: file_nr position: u32(db.file.tell()!) } + println('Writing data at position: ${new_location.position}, file_nr: ${file_nr}') // Calculate CRC of data crc := calculate_crc(data) @@ -118,6 +109,9 @@ pub fn (mut db OurDB) set_(x u32, old_location Location, data []u8) ! { // Update lookup table with new position db.lookup.set(x, new_location)! + + // Ensure lookup table is synced + //db.save()! } // get retrieves data at specified location @@ -150,6 +144,7 @@ fn (mut db OurDB) get_(location Location) ![]u8 { if data_read_bytes != int(size) { return error('failed to read data bytes') } + println('Reading data from position: ${location.position}, file_nr: ${location.file_nr}, size: ${size}, data: ${data}') // Verify CRC calculated_crc := calculate_crc(data) diff --git a/lib/data/ourdb/db.v b/lib/data/ourdb/db.v index 721abc73..2f2380f9 100644 --- a/lib/data/ourdb/db.v +++ b/lib/data/ourdb/db.v @@ -18,7 +18,8 @@ import os // and maintains a linked list of previous values for history tracking // Returns the ID used (either x if specified, or auto-incremented if x=0) @[params] -struct OurDBSetArgs { +pub struct OurDBSetArgs { +pub mut: id ?u32 data []u8 @[required] } @@ -91,6 +92,15 @@ pub fn (mut db OurDB) delete(x u32) ! { db.lookup.delete(x)! } +// get_next_id returns the next id which will be used when storing +pub fn (mut db OurDB) get_next_id() !u32 { + if !db.incremental_mode { + return error('incremental mode is not enabled') + } + next_id := db.lookup.get_next_id()! + return next_id +} + // close closes the database file fn (mut db OurDB) lookup_dump_path() string { return '${db.path}/lookup_dump.db' @@ -115,6 +125,7 @@ fn (mut db OurDB) close() ! { db.close_() } -fn (mut db OurDB) destroy() ! { +pub fn (mut db OurDB) destroy() ! { + db.close() or {} os.rmdir_all(db.path)! } diff --git a/lib/data/ourdb/db_update_test.v b/lib/data/ourdb/db_update_test.v new file mode 100644 index 00000000..9ab57945 --- /dev/null +++ b/lib/data/ourdb/db_update_test.v @@ -0,0 +1,48 @@ +module ourdb + +import os + +const test_dir = '/tmp/ourdb' + +fn test_db_update() { + // Ensure test directory exists and is empty + if os.exists(test_dir) { + os.rmdir_all(test_dir) or { panic(err) } + } + os.mkdir_all(test_dir) or { panic(err) } + + mut db := new( + record_nr_max: 16777216 - 1 // max size of records + record_size_max: 1024 + path: test_dir + reset: false // Don't reset since we just created a fresh directory + )! + + defer { + db.destroy() or { panic('failed to destroy db: ${err}') } + } + + // Test set and get + test_data := 'Hello, World!'.bytes() + id := db.set(data: test_data)! + + retrieved := db.get(id)! + assert retrieved == test_data + + assert id==0 + + // Test overwrite + new_data := 'Updated data'.bytes() + id2 := db.set(id:0, data: new_data)! + assert id2==0 + + // Verify lookup table has the correct location + location := db.lookup.get(id2)! + println('Location after update - file_nr: ${location.file_nr}, position: ${location.position}') + + // Get and verify the updated data + retrieved2 := db.get(id2)! + println('Retrieved data: ${retrieved2}') + println('Expected data: ${new_data}') + assert retrieved2 == new_data +} diff --git a/lib/data/ourdb/factory.v b/lib/data/ourdb/factory.v index 3f11b113..a5fabd0b 100644 --- a/lib/data/ourdb/factory.v +++ b/lib/data/ourdb/factory.v @@ -28,8 +28,8 @@ pub: record_size_max u32 = 1024 * 4 // max size in bytes of a record, is 4 KB default file_size u32 = 500 * (1 << 20) // 500MB path string // directory where we will stor the DB - incremental_mode bool = true + reset bool } // new_memdb creates a new memory database with the given path and lookup table @@ -56,6 +56,10 @@ pub fn new(args OurDBConfig) !OurDB { incremental_mode: args.incremental_mode )! + if args.reset{ + os.rmdir_all(args.path) or {} + } + os.mkdir_all(args.path)! mut db := OurDB{ path: args.path diff --git a/lib/data/ourdb/lookup.v b/lib/data/ourdb/lookup.v index 7e1bb9fc..ecd1b90e 100644 --- a/lib/data/ourdb/lookup.v +++ b/lib/data/ourdb/lookup.v @@ -90,7 +90,7 @@ fn (lut LookupTable) get(x u32) !Location { entry_size := lut.keysize if lut.lookuppath.len > 0 { // Check file size first - file_size := os.file_size(lut.get_data_file_path()!) + file_size := os.file_size(lut.get_data_file_path()!) //THIS SLOWS DOWN, NEED TO DO SOMETHING MORE INTELLIGENCE ONCE start_pos := x * entry_size if start_pos + entry_size > file_size { diff --git a/lib/data/radixtree/README.md b/lib/data/radixtree/README.md new file mode 100644 index 00000000..17b2c3a9 --- /dev/null +++ b/lib/data/radixtree/README.md @@ -0,0 +1,132 @@ +# Radix Tree Implementation + +A radix tree (also known as a patricia trie or radix trie) is a space-optimized tree data structure that enables efficient string key operations. This implementation provides a persistent radix tree backed by OurDB for durable storage. + +## Key Features + +- Efficient prefix-based key operations +- Persistent storage using OurDB backend +- Memory-efficient storage of strings with common prefixes +- Support for binary values +- Thread-safe operations through OurDB + +## How It Works + +### Data Structure + +The radix tree is composed of nodes where: +- Each node stores a segment of a key (not just a single character) +- Nodes can have multiple children, each representing a different branch +- Leaf nodes contain the actual values +- Each node is persisted in OurDB with a unique ID + +```v +struct Node { +mut: + key_segment string // The segment of the key stored at this node + value []u8 // Value stored at this node (empty if not a leaf) + children []NodeRef // References to child nodes + is_leaf bool // Whether this node is a leaf node +} +``` + +### OurDB Integration + +The radix tree uses OurDB as its persistent storage backend: +- Each node is serialized and stored as a record in OurDB +- Node references use OurDB record IDs +- The tree maintains a root node ID for traversal +- Node serialization includes version tracking for format evolution + +### Key Operations + +#### Insertion +1. Traverse the tree following matching prefixes +2. Split nodes when partial matches are found +3. Create new nodes for unmatched segments +4. Update node values and references in OurDB + +#### Search +1. Start from the root node +2. Follow child nodes whose key segments match the search key +3. Return the value if an exact match is found at a leaf node + +#### Deletion +1. Locate the node containing the key +2. Remove the value and leaf status +3. Clean up empty nodes if necessary +4. Update parent references + +## Usage Example + +```v +import freeflowuniverse.herolib.data.radixtree + +// Create a new radix tree +mut tree := radixtree.new('/path/to/storage')! + +// Insert key-value pairs +tree.insert('hello', 'world'.bytes())! +tree.insert('help', 'me'.bytes())! + +// Search for values +value := tree.search('hello')! // Returns 'world' as bytes +println(value.bytestr()) // Prints: world + +// Delete keys +tree.delete('help')! +``` + +## Implementation Details + +### Node Serialization + +Nodes are serialized in a compact binary format: +``` +[Version(1B)][KeySegment][ValueLength(2B)][Value][ChildrenCount(2B)][Children][IsLeaf(1B)] +``` + +Where each child is stored as: +``` +[KeyPart][NodeID(4B)] +``` + +### Space Optimization + +The radix tree optimizes space usage by: +1. Sharing common prefixes between keys +2. Storing only key segments at each node instead of complete keys +3. Merging nodes with single children when possible +4. Using OurDB's efficient storage and retrieval mechanisms + +### Performance Characteristics + +- Search: O(k) where k is the key length +- Insert: O(k) for new keys, may require node splitting +- Delete: O(k) plus potential node cleanup +- Space: O(n) where n is the total length of all keys + +## Relationship with OurDB + +This radix tree implementation leverages OurDB's features: +- Persistent storage with automatic file management +- Record-based storage with unique IDs +- Data integrity through CRC32 checksums +- Configurable record sizes +- Automatic file size management + +The integration provides: +- Durability: All tree operations are persisted +- Consistency: Tree state is maintained across restarts +- Efficiency: Leverages OurDB's optimized storage +- Scalability: Handles large datasets through OurDB's file management + +## Use Cases + +Radix trees are particularly useful for: +- Prefix-based searching +- IP routing tables +- Dictionary implementations +- Auto-complete systems +- File system paths +- Any application requiring efficient string key operations with persistence diff --git a/lib/data/radixtree/factory_test.v b/lib/data/radixtree/factory_test.v new file mode 100644 index 00000000..ba82a070 --- /dev/null +++ b/lib/data/radixtree/factory_test.v @@ -0,0 +1,124 @@ +module radixtree + +fn test_basic_operations() ! { + mut rt := new(path:'/tmp/radixtree_test',reset:true)! + + // Test insert and search + rt.insert('test', 'value1'.bytes())! + value1 := rt.search('test')! + assert value1.bytestr() == 'value1' + + // // Test updating existing key + // rt.insert('test', 'value2'.bytes())! + // value2 := rt.search('test')! + // assert value2.bytestr() == 'value2' + + // // Test non-existent key + // if _ := rt.search('nonexistent') { + // assert false, 'Expected error for non-existent key' + // } + + // // Test delete + // rt.delete('test')! + // if _ := rt.search('test') { + // assert false, 'Expected error after deletion' + // } +} + +// fn test_prefix_matching() ! { +// mut rt := new('/tmp/radixtree_test_prefix')! + +// // Insert keys with common prefixes +// rt.insert('team', 'value1'.bytes())! +// rt.insert('test', 'value2'.bytes())! +// rt.insert('testing', 'value3'.bytes())! + +// // Verify each key has correct value +// value1 := rt.search('team')! +// assert value1.bytestr() == 'value1' + +// value2 := rt.search('test')! +// assert value2.bytestr() == 'value2' + +// value3 := rt.search('testing')! +// assert value3.bytestr() == 'value3' + +// // Delete middle key and verify others still work +// rt.delete('test')! + +// if _ := rt.search('test') { +// assert false, 'Expected error after deletion' +// } + +// value1_after := rt.search('team')! +// assert value1_after.bytestr() == 'value1' + +// value3_after := rt.search('testing')! +// assert value3_after.bytestr() == 'value3' +// } + +// fn test_edge_cases() ! { +// mut rt := new('/tmp/radixtree_test_edge')! + +// // Test empty key +// rt.insert('', 'empty'.bytes())! +// empty_value := rt.search('')! +// assert empty_value.bytestr() == 'empty' + +// // Test very long key +// long_key := 'a'.repeat(1000) +// rt.insert(long_key, 'long'.bytes())! +// long_value := rt.search(long_key)! +// assert long_value.bytestr() == 'long' + +// // Test keys that require node splitting +// rt.insert('test', 'value1'.bytes())! +// rt.insert('testing', 'value2'.bytes())! +// rt.insert('te', 'value3'.bytes())! + +// value1 := rt.search('test')! +// assert value1.bytestr() == 'value1' + +// value2 := rt.search('testing')! +// assert value2.bytestr() == 'value2' + +// value3 := rt.search('te')! +// assert value3.bytestr() == 'value3' +// } + +// fn test_multiple_operations() ! { +// mut rt := new('/tmp/radixtree_test_multiple')! + +// // Insert multiple keys +// keys := ['abc', 'abcd', 'abcde', 'bcd', 'bcde'] +// for i, key in keys { +// rt.insert(key, 'value${i + 1}'.bytes())! +// } + +// // Verify all keys +// for i, key in keys { +// value := rt.search(key)! +// assert value.bytestr() == 'value${i + 1}' +// } + +// // Delete some keys +// rt.delete('abcd')! +// rt.delete('bcde')! + +// // Verify remaining keys +// remaining := ['abc', 'abcde', 'bcd'] +// expected := ['value1', 'value3', 'value4'] + +// for i, key in remaining { +// value := rt.search(key)! +// assert value.bytestr() == expected[i] +// } + +// // Verify deleted keys return error +// deleted := ['abcd', 'bcde'] +// for key in deleted { +// if _ := rt.search(key) { +// assert false, 'Expected error for deleted key: ${key}' +// } +// } +// } diff --git a/lib/data/radixtree/radixtree.v b/lib/data/radixtree/radixtree.v new file mode 100644 index 00000000..29cffc94 --- /dev/null +++ b/lib/data/radixtree/radixtree.v @@ -0,0 +1,289 @@ +module radixtree + +import freeflowuniverse.herolib.data.ourdb + +// Represents a node in the radix tree +struct Node { +mut: + key_segment string // The segment of the key stored at this node + value []u8 // Value stored at this node (empty if not a leaf) + children []NodeRef // References to child nodes + is_leaf bool // Whether this node is a leaf node +} + +// Reference to a node in the database +struct NodeRef { +mut: + key_part string // The key segment for this child + node_id u32 // Database ID of the node +} + +// RadixTree represents a radix tree data structure +pub struct RadixTree { +mut: + db &ourdb.OurDB // Database for persistent storage + root_id u32 // Database ID of the root node +} + + +pub struct NewArgs { +pub mut: + path string + reset bool +} + +// Creates a new radix tree with the specified database path +pub fn new(args NewArgs) !&RadixTree { + mut db := ourdb.new( + path: args.path + record_size_max: 1024 * 4 // 4KB max record size + incremental_mode: true + reset:args.reset + )! + + mut root_id := u32(0) + println('Debug: Initializing root node') + if db.get_next_id()! == 0 { + println('Debug: Creating new root node') + root := Node{ + key_segment: '' + value: []u8{} + children: []NodeRef{} + is_leaf: false + } + root_id = db.set(data: serialize_node(root))! + println('Debug: Created root node with ID ${root_id}') + assert root_id == 0 + } else { + println('Debug: Using existing root node') + root_data := db.get(0)! + root_node := deserialize_node(root_data)! + println('Debug: Root node has ${root_node.children.len} children') + } + + return &RadixTree{ + db: &db + root_id: root_id + } +} + +// Inserts a key-value pair into the tree +pub fn (mut rt RadixTree) insert(key string, value []u8) ! { + mut current_id := rt.root_id + mut offset := 0 + + for offset < key.len { + mut node := deserialize_node(rt.db.get(current_id)!)! + + // Find matching child + mut matched_child := -1 + for i, child in node.children { + if key[offset..].starts_with(child.key_part) { + matched_child = i + break + } + } + + if matched_child == -1 { + // No matching child found, create new leaf node + key_part := key[offset..] + new_node := Node{ + key_segment: key_part + value: value + children: []NodeRef{} + is_leaf: true + } + println('Debug: Creating new leaf node with key_part "${key_part}"') + new_id := rt.db.set(data: serialize_node(new_node))! + println('Debug: Created node ID ${new_id}') + + // Create new child reference and update parent node + println('Debug: Updating parent node ${current_id} to add child reference') + + // Get fresh copy of parent node + mut parent_node := deserialize_node(rt.db.get(current_id)!)! + println('Debug: Parent node initially has ${parent_node.children.len} children') + + // Add new child reference + parent_node.children << NodeRef{ + key_part: key_part + node_id: new_id + } + println('Debug: Added child reference, now has ${parent_node.children.len} children') + + // Update parent node in DB + println('Debug: Serializing parent node with ${parent_node.children.len} children') + parent_data := serialize_node(parent_node) + println('Debug: Parent data size: ${parent_data.len} bytes') + + // First verify we can deserialize the data correctly + println('Debug: Verifying serialization...') + if test_node := deserialize_node(parent_data) { + println('Debug: Serialization test successful - node has ${test_node.children.len} children') + } else { + println('Debug: ERROR - Failed to deserialize test data') + return error('Serialization verification failed') + } + + // Set with explicit ID to update existing node + println('Debug: Writing to DB...') + rt.db.set(id: current_id, data: parent_data)! + + // Verify by reading back and comparing + println('Debug: Reading back for verification...') + verify_data := rt.db.get(current_id)! + verify_node := deserialize_node(verify_data)! + println('Debug: Verification - node has ${verify_node.children.len} children') + + if verify_node.children.len == 0 { + println('Debug: ERROR - Node update verification failed!') + println('Debug: Original node children: ${node.children.len}') + println('Debug: Parent node children: ${parent_node.children.len}') + println('Debug: Verified node children: ${verify_node.children.len}') + println('Debug: Original data size: ${parent_data.len}') + println('Debug: Verified data size: ${verify_data.len}') + println('Debug: Data equal: ${verify_data == parent_data}') + return error('Node update failed - children array is empty') + } + return + } + + child := node.children[matched_child] + common_prefix := get_common_prefix(key[offset..], child.key_part) + + if common_prefix.len < child.key_part.len { + // Split existing node + mut child_node := deserialize_node(rt.db.get(child.node_id)!)! + + // Create new intermediate node + mut new_node := Node{ + key_segment: child.key_part[common_prefix.len..] + value: child_node.value + children: child_node.children + is_leaf: child_node.is_leaf + } + new_id := rt.db.set(data: serialize_node(new_node))! + + // Update current node + node.children[matched_child] = NodeRef{ + key_part: common_prefix + node_id: new_id + } + rt.db.set(id: current_id, data: serialize_node(node))! + } + + if offset + common_prefix.len == key.len { + // Update value at existing node + mut child_node := deserialize_node(rt.db.get(child.node_id)!)! + child_node.value = value + child_node.is_leaf = true + rt.db.set(id: child.node_id, data: serialize_node(child_node))! + return + } + + offset += common_prefix.len + current_id = child.node_id + } +} + +// Searches for a key in the tree +pub fn (mut rt RadixTree) search(key string) ![]u8 { + mut current_id := rt.root_id + mut offset := 0 + + for offset < key.len { + node := deserialize_node(rt.db.get(current_id)!)! + + mut found := false + for child in node.children { + if key[offset..].starts_with(child.key_part) { + if offset + child.key_part.len == key.len { + child_node := deserialize_node(rt.db.get(child.node_id)!)! + if child_node.is_leaf { + return child_node.value + } + } + current_id = child.node_id + offset += child.key_part.len + found = true + break + } + } + + if !found { + return error('Key not found') + } + } + + return error('Key not found') +} + +// Deletes a key from the tree +pub fn (mut rt RadixTree) delete(key string) ! { + mut current_id := rt.root_id + mut offset := 0 + mut path := []NodeRef{} + + // Find the node to delete + for offset < key.len { + node := deserialize_node(rt.db.get(current_id)!)! + + mut found := false + for child in node.children { + if key[offset..].starts_with(child.key_part) { + path << child + current_id = child.node_id + offset += child.key_part.len + found = true + + // Check if we've matched the full key + if offset == key.len { + child_node := deserialize_node(rt.db.get(child.node_id)!)! + if child_node.is_leaf { + found = true + break + } + } + break + } + } + + if !found { + return error('Key not found') + } + } + + if path.len == 0 { + return error('Key not found') + } + + // Remove the leaf node + mut last_node := deserialize_node(rt.db.get(path.last().node_id)!)! + last_node.is_leaf = false + last_node.value = []u8{} + + // If node has no children, remove it from parent + if last_node.children.len == 0 { + if path.len > 1 { + mut parent_node := deserialize_node(rt.db.get(path[path.len - 2].node_id)!)! + for i, child in parent_node.children { + if child.node_id == path.last().node_id { + parent_node.children.delete(i) + break + } + } + rt.db.set(id: path[path.len - 2].node_id, data: serialize_node(parent_node))! + } + } else { + rt.db.set(id: path.last().node_id, data: serialize_node(last_node))! + } +} + +// Helper function to get the common prefix of two strings +fn get_common_prefix(a string, b string) string { + mut i := 0 + for i < a.len && i < b.len && a[i] == b[i] { + i++ + } + return a[..i] +} diff --git a/lib/data/radixtree/radixtree_debug.v b/lib/data/radixtree/radixtree_debug.v new file mode 100644 index 00000000..db7b987c --- /dev/null +++ b/lib/data/radixtree/radixtree_debug.v @@ -0,0 +1,111 @@ +module radixtree + +import freeflowuniverse.herolib.data.ourdb + +// Gets a node from the database by its ID +pub fn (mut rt RadixTree) get_node_by_id(id u32) !Node { + node_data := rt.db.get(id)! + node := deserialize_node(node_data)! + println('Debug: Retrieved node ${id} with ${node.children.len} children') + return node +} + +// Logs the current state of a node +pub fn (mut rt RadixTree) debug_node(id u32, msg string) ! { + node := rt.get_node_by_id(id)! + println('Debug: ${msg}') + println(' Node ID: ${id}') + println(' Key Segment: "${node.key_segment}"') + println(' Is Leaf: ${node.is_leaf}') + println(' Children: ${node.children.len}') + for child in node.children { + println(' - Child ID: ${child.node_id}, Key Part: "${child.key_part}"') + } +} + +// Prints the current state of the database +pub fn (mut rt RadixTree) debug_db() ! { + println('\nDatabase State:') + println('===============') + mut next_id := rt.db.get_next_id()! + for id := u32(0); id < next_id; id++ { + if data := rt.db.get(id) { + if node := deserialize_node(data) { + println('ID ${id}:') + println(' Key Segment: "${node.key_segment}"') + println(' Is Leaf: ${node.is_leaf}') + println(' Children: ${node.children.len}') + for child in node.children { + println(' - Child ID: ${child.node_id}, Key Part: "${child.key_part}"') + } + } else { + println('ID ${id}: Failed to deserialize node') + } + } else { + println('ID ${id}: No data') + } + } +} + +// Prints the tree structure starting from a given node ID +pub fn (mut rt RadixTree) print_tree_from_node(node_id u32, indent string) ! { + node := rt.get_node_by_id(node_id)! + + mut node_info := '${indent}Node(id: ${node_id})' + node_info += '\n${indent}├── key_segment: "${node.key_segment}"' + node_info += '\n${indent}├── is_leaf: ${node.is_leaf}' + if node.is_leaf { + node_info += '\n${indent}├── value: ${node.value.bytestr()}' + } + node_info += '\n${indent}└── children: ${node.children.len}' + if node.children.len > 0 { + node_info += ' [' + for i, child in node.children { + if i > 0 { node_info += ', ' } + node_info += '${child.node_id}:${child.key_part}' + } + node_info += ']' + } + println(node_info) + + // Print children recursively with increased indentation + for i, child in node.children { + is_last := i == node.children.len - 1 + child_indent := if is_last { + indent + ' ' + } else { + indent + '│ ' + } + rt.print_tree_from_node(child.node_id, child_indent)! + } +} + +// Prints the entire tree structure starting from root +pub fn (mut rt RadixTree) print_tree() ! { + println('\nRadix Tree Structure:') + println('===================') + rt.print_tree_from_node(rt.root_id, '')! +} + +// Gets detailed information about a specific node +pub fn (mut rt RadixTree) get_node_info(id u32) !string { + node := rt.get_node_by_id(id)! + + mut info := 'Node Details:\n' + info += '=============\n' + info += 'ID: ${id}\n' + info += 'Key Segment: "${node.key_segment}"\n' + info += 'Is Leaf: ${node.is_leaf}\n' + if node.is_leaf { + info += 'Value: ${node.value}\n' + } + info += 'Number of Children: ${node.children.len}\n' + if node.children.len > 0 { + info += '\nChildren:\n' + for child in node.children { + info += '- ID: ${child.node_id}, Key Part: "${child.key_part}"\n' + } + } + + return info +} diff --git a/lib/data/radixtree/serialize.v b/lib/data/radixtree/serialize.v new file mode 100644 index 00000000..1775d660 --- /dev/null +++ b/lib/data/radixtree/serialize.v @@ -0,0 +1,77 @@ +module radixtree + +import freeflowuniverse.herolib.data.encoder + +const ( + version = u8(1) // Current binary format version +) + +// Serializes a node to bytes for storage +fn serialize_node(node Node) []u8 { + mut e := encoder.new() + + // Add version byte + e.add_u8(version) + + // Add key segment + e.add_string(node.key_segment) + + // Add value as []u8 + e.add_u16(u16(node.value.len)) + e.data << node.value + + // Add children + e.add_u16(u16(node.children.len)) + for child in node.children { + e.add_string(child.key_part) + e.add_u32(child.node_id) + } + + // Add leaf flag + e.add_u8(if node.is_leaf { u8(1) } else { u8(0) }) + + return e.data +} + +// Deserializes bytes to a node +fn deserialize_node(data []u8) !Node { + mut d := encoder.decoder_new(data) + + // Read and verify version + version_byte := d.get_u8() + if version_byte != version { + return error('Invalid version byte: expected ${version}, got ${version_byte}') + } + + // Read key segment + key_segment := d.get_string() + + // Read value as []u8 + value_len := d.get_u16() + mut value := []u8{len: int(value_len)} + for i in 0..int(value_len) { + value[i] = d.get_u8() + } + + // Read children + children_len := d.get_u16() + mut children := []NodeRef{cap: int(children_len)} + for _ in 0 .. children_len { + key_part := d.get_string() + node_id := d.get_u32() + children << NodeRef{ + key_part: key_part + node_id: node_id + } + } + + // Read leaf flag + is_leaf := d.get_u8() == 1 + + return Node{ + key_segment: key_segment + value: value + children: children + is_leaf: is_leaf + } +} diff --git a/lib/data/radixtree/serialize_test.v b/lib/data/radixtree/serialize_test.v new file mode 100644 index 00000000..db53f678 --- /dev/null +++ b/lib/data/radixtree/serialize_test.v @@ -0,0 +1,110 @@ +module radixtree + +fn test_serialize_deserialize() { + // Create a test node with children + node := Node{ + key_segment: 'test' + value: 'hello world'.bytes() + children: [ + NodeRef{ + key_part: 'child1' + node_id: 1 + }, + NodeRef{ + key_part: 'child2' + node_id: 2 + } + ] + is_leaf: true + } + + // Serialize + data := serialize_node(node) + + // Verify version byte + assert data[0] == version + + // Deserialize + decoded := deserialize_node(data)! + + // Verify all fields match + assert decoded.key_segment == node.key_segment + assert decoded.value == node.value + assert decoded.is_leaf == node.is_leaf + assert decoded.children.len == node.children.len + + // Verify children + assert decoded.children[0].key_part == node.children[0].key_part + assert decoded.children[0].node_id == node.children[0].node_id + assert decoded.children[1].key_part == node.children[1].key_part + assert decoded.children[1].node_id == node.children[1].node_id +} + +fn test_empty_node() { + // Test node with empty values + node := Node{ + key_segment: '' + value: []u8{} + children: []NodeRef{} + is_leaf: false + } + + data := serialize_node(node) + decoded := deserialize_node(data)! + + assert decoded.key_segment == node.key_segment + assert decoded.value == node.value + assert decoded.children == node.children + assert decoded.is_leaf == node.is_leaf +} + +fn test_large_values() { + // Create large test data + mut large_value := []u8{len: 1000, init: u8(index & 0xFF)} + mut children := []NodeRef{cap: 100} + for i in 0..100 { + children << NodeRef{ + key_part: 'child${i}' + node_id: u32(i) + } + } + + node := Node{ + key_segment: 'large_test' + value: large_value + children: children + is_leaf: true + } + + data := serialize_node(node) + decoded := deserialize_node(data)! + + assert decoded.key_segment == node.key_segment + assert decoded.value == node.value + assert decoded.children.len == node.children.len + + // Verify some random children + assert decoded.children[0] == node.children[0] + assert decoded.children[50] == node.children[50] + assert decoded.children[99] == node.children[99] +} + +fn test_invalid_version() { + node := Node{ + key_segment: 'test' + value: []u8{} + children: []NodeRef{} + is_leaf: false + } + + mut data := serialize_node(node) + // Corrupt version byte + data[0] = 255 + + // Should return error for version mismatch + if result := deserialize_node(data) { + assert false, 'Expected error for invalid version byte' + } else { + assert err.msg() == 'Invalid version byte: expected ${version}, got 255' + } +}