This commit is contained in:
2025-01-31 08:29:17 +03:00
parent f8d675dcaf
commit 5670efc4cb
20 changed files with 1038 additions and 184 deletions

View File

@@ -0,0 +1,132 @@
# Radix Tree Implementation
A radix tree (also known as a patricia trie or radix trie) is a space-optimized tree data structure that enables efficient string key operations. This implementation provides a persistent radix tree backed by OurDB for durable storage.
## Key Features
- Efficient prefix-based key operations
- Persistent storage using OurDB backend
- Memory-efficient storage of strings with common prefixes
- Support for binary values
- Thread-safe operations through OurDB
## How It Works
### Data Structure
The radix tree is composed of nodes where:
- Each node stores a segment of a key (not just a single character)
- Nodes can have multiple children, each representing a different branch
- Leaf nodes contain the actual values
- Each node is persisted in OurDB with a unique ID
```v
struct Node {
mut:
key_segment string // The segment of the key stored at this node
value []u8 // Value stored at this node (empty if not a leaf)
children []NodeRef // References to child nodes
is_leaf bool // Whether this node is a leaf node
}
```
### OurDB Integration
The radix tree uses OurDB as its persistent storage backend:
- Each node is serialized and stored as a record in OurDB
- Node references use OurDB record IDs
- The tree maintains a root node ID for traversal
- Node serialization includes version tracking for format evolution
### Key Operations
#### Insertion
1. Traverse the tree following matching prefixes
2. Split nodes when partial matches are found
3. Create new nodes for unmatched segments
4. Update node values and references in OurDB
#### Search
1. Start from the root node
2. Follow child nodes whose key segments match the search key
3. Return the value if an exact match is found at a leaf node
#### Deletion
1. Locate the node containing the key
2. Remove the value and leaf status
3. Clean up empty nodes if necessary
4. Update parent references
## Usage Example
```v
import freeflowuniverse.herolib.data.radixtree
// Create a new radix tree
mut tree := radixtree.new('/path/to/storage')!
// Insert key-value pairs
tree.insert('hello', 'world'.bytes())!
tree.insert('help', 'me'.bytes())!
// Search for values
value := tree.search('hello')! // Returns 'world' as bytes
println(value.bytestr()) // Prints: world
// Delete keys
tree.delete('help')!
```
## Implementation Details
### Node Serialization
Nodes are serialized in a compact binary format:
```
[Version(1B)][KeySegment][ValueLength(2B)][Value][ChildrenCount(2B)][Children][IsLeaf(1B)]
```
Where each child is stored as:
```
[KeyPart][NodeID(4B)]
```
### Space Optimization
The radix tree optimizes space usage by:
1. Sharing common prefixes between keys
2. Storing only key segments at each node instead of complete keys
3. Merging nodes with single children when possible
4. Using OurDB's efficient storage and retrieval mechanisms
### Performance Characteristics
- Search: O(k) where k is the key length
- Insert: O(k) for new keys, may require node splitting
- Delete: O(k) plus potential node cleanup
- Space: O(n) where n is the total length of all keys
## Relationship with OurDB
This radix tree implementation leverages OurDB's features:
- Persistent storage with automatic file management
- Record-based storage with unique IDs
- Data integrity through CRC32 checksums
- Configurable record sizes
- Automatic file size management
The integration provides:
- Durability: All tree operations are persisted
- Consistency: Tree state is maintained across restarts
- Efficiency: Leverages OurDB's optimized storage
- Scalability: Handles large datasets through OurDB's file management
## Use Cases
Radix trees are particularly useful for:
- Prefix-based searching
- IP routing tables
- Dictionary implementations
- Auto-complete systems
- File system paths
- Any application requiring efficient string key operations with persistence

View File

@@ -0,0 +1,124 @@
module radixtree
fn test_basic_operations() ! {
mut rt := new(path:'/tmp/radixtree_test',reset:true)!
// Test insert and search
rt.insert('test', 'value1'.bytes())!
value1 := rt.search('test')!
assert value1.bytestr() == 'value1'
// // Test updating existing key
// rt.insert('test', 'value2'.bytes())!
// value2 := rt.search('test')!
// assert value2.bytestr() == 'value2'
// // Test non-existent key
// if _ := rt.search('nonexistent') {
// assert false, 'Expected error for non-existent key'
// }
// // Test delete
// rt.delete('test')!
// if _ := rt.search('test') {
// assert false, 'Expected error after deletion'
// }
}
// fn test_prefix_matching() ! {
// mut rt := new('/tmp/radixtree_test_prefix')!
// // Insert keys with common prefixes
// rt.insert('team', 'value1'.bytes())!
// rt.insert('test', 'value2'.bytes())!
// rt.insert('testing', 'value3'.bytes())!
// // Verify each key has correct value
// value1 := rt.search('team')!
// assert value1.bytestr() == 'value1'
// value2 := rt.search('test')!
// assert value2.bytestr() == 'value2'
// value3 := rt.search('testing')!
// assert value3.bytestr() == 'value3'
// // Delete middle key and verify others still work
// rt.delete('test')!
// if _ := rt.search('test') {
// assert false, 'Expected error after deletion'
// }
// value1_after := rt.search('team')!
// assert value1_after.bytestr() == 'value1'
// value3_after := rt.search('testing')!
// assert value3_after.bytestr() == 'value3'
// }
// fn test_edge_cases() ! {
// mut rt := new('/tmp/radixtree_test_edge')!
// // Test empty key
// rt.insert('', 'empty'.bytes())!
// empty_value := rt.search('')!
// assert empty_value.bytestr() == 'empty'
// // Test very long key
// long_key := 'a'.repeat(1000)
// rt.insert(long_key, 'long'.bytes())!
// long_value := rt.search(long_key)!
// assert long_value.bytestr() == 'long'
// // Test keys that require node splitting
// rt.insert('test', 'value1'.bytes())!
// rt.insert('testing', 'value2'.bytes())!
// rt.insert('te', 'value3'.bytes())!
// value1 := rt.search('test')!
// assert value1.bytestr() == 'value1'
// value2 := rt.search('testing')!
// assert value2.bytestr() == 'value2'
// value3 := rt.search('te')!
// assert value3.bytestr() == 'value3'
// }
// fn test_multiple_operations() ! {
// mut rt := new('/tmp/radixtree_test_multiple')!
// // Insert multiple keys
// keys := ['abc', 'abcd', 'abcde', 'bcd', 'bcde']
// for i, key in keys {
// rt.insert(key, 'value${i + 1}'.bytes())!
// }
// // Verify all keys
// for i, key in keys {
// value := rt.search(key)!
// assert value.bytestr() == 'value${i + 1}'
// }
// // Delete some keys
// rt.delete('abcd')!
// rt.delete('bcde')!
// // Verify remaining keys
// remaining := ['abc', 'abcde', 'bcd']
// expected := ['value1', 'value3', 'value4']
// for i, key in remaining {
// value := rt.search(key)!
// assert value.bytestr() == expected[i]
// }
// // Verify deleted keys return error
// deleted := ['abcd', 'bcde']
// for key in deleted {
// if _ := rt.search(key) {
// assert false, 'Expected error for deleted key: ${key}'
// }
// }
// }

View File

@@ -0,0 +1,289 @@
module radixtree
import freeflowuniverse.herolib.data.ourdb
// Represents a node in the radix tree
struct Node {
mut:
key_segment string // The segment of the key stored at this node
value []u8 // Value stored at this node (empty if not a leaf)
children []NodeRef // References to child nodes
is_leaf bool // Whether this node is a leaf node
}
// Reference to a node in the database
struct NodeRef {
mut:
key_part string // The key segment for this child
node_id u32 // Database ID of the node
}
// RadixTree represents a radix tree data structure
pub struct RadixTree {
mut:
db &ourdb.OurDB // Database for persistent storage
root_id u32 // Database ID of the root node
}
pub struct NewArgs {
pub mut:
path string
reset bool
}
// Creates a new radix tree with the specified database path
pub fn new(args NewArgs) !&RadixTree {
mut db := ourdb.new(
path: args.path
record_size_max: 1024 * 4 // 4KB max record size
incremental_mode: true
reset:args.reset
)!
mut root_id := u32(0)
println('Debug: Initializing root node')
if db.get_next_id()! == 0 {
println('Debug: Creating new root node')
root := Node{
key_segment: ''
value: []u8{}
children: []NodeRef{}
is_leaf: false
}
root_id = db.set(data: serialize_node(root))!
println('Debug: Created root node with ID ${root_id}')
assert root_id == 0
} else {
println('Debug: Using existing root node')
root_data := db.get(0)!
root_node := deserialize_node(root_data)!
println('Debug: Root node has ${root_node.children.len} children')
}
return &RadixTree{
db: &db
root_id: root_id
}
}
// Inserts a key-value pair into the tree
pub fn (mut rt RadixTree) insert(key string, value []u8) ! {
mut current_id := rt.root_id
mut offset := 0
for offset < key.len {
mut node := deserialize_node(rt.db.get(current_id)!)!
// Find matching child
mut matched_child := -1
for i, child in node.children {
if key[offset..].starts_with(child.key_part) {
matched_child = i
break
}
}
if matched_child == -1 {
// No matching child found, create new leaf node
key_part := key[offset..]
new_node := Node{
key_segment: key_part
value: value
children: []NodeRef{}
is_leaf: true
}
println('Debug: Creating new leaf node with key_part "${key_part}"')
new_id := rt.db.set(data: serialize_node(new_node))!
println('Debug: Created node ID ${new_id}')
// Create new child reference and update parent node
println('Debug: Updating parent node ${current_id} to add child reference')
// Get fresh copy of parent node
mut parent_node := deserialize_node(rt.db.get(current_id)!)!
println('Debug: Parent node initially has ${parent_node.children.len} children')
// Add new child reference
parent_node.children << NodeRef{
key_part: key_part
node_id: new_id
}
println('Debug: Added child reference, now has ${parent_node.children.len} children')
// Update parent node in DB
println('Debug: Serializing parent node with ${parent_node.children.len} children')
parent_data := serialize_node(parent_node)
println('Debug: Parent data size: ${parent_data.len} bytes')
// First verify we can deserialize the data correctly
println('Debug: Verifying serialization...')
if test_node := deserialize_node(parent_data) {
println('Debug: Serialization test successful - node has ${test_node.children.len} children')
} else {
println('Debug: ERROR - Failed to deserialize test data')
return error('Serialization verification failed')
}
// Set with explicit ID to update existing node
println('Debug: Writing to DB...')
rt.db.set(id: current_id, data: parent_data)!
// Verify by reading back and comparing
println('Debug: Reading back for verification...')
verify_data := rt.db.get(current_id)!
verify_node := deserialize_node(verify_data)!
println('Debug: Verification - node has ${verify_node.children.len} children')
if verify_node.children.len == 0 {
println('Debug: ERROR - Node update verification failed!')
println('Debug: Original node children: ${node.children.len}')
println('Debug: Parent node children: ${parent_node.children.len}')
println('Debug: Verified node children: ${verify_node.children.len}')
println('Debug: Original data size: ${parent_data.len}')
println('Debug: Verified data size: ${verify_data.len}')
println('Debug: Data equal: ${verify_data == parent_data}')
return error('Node update failed - children array is empty')
}
return
}
child := node.children[matched_child]
common_prefix := get_common_prefix(key[offset..], child.key_part)
if common_prefix.len < child.key_part.len {
// Split existing node
mut child_node := deserialize_node(rt.db.get(child.node_id)!)!
// Create new intermediate node
mut new_node := Node{
key_segment: child.key_part[common_prefix.len..]
value: child_node.value
children: child_node.children
is_leaf: child_node.is_leaf
}
new_id := rt.db.set(data: serialize_node(new_node))!
// Update current node
node.children[matched_child] = NodeRef{
key_part: common_prefix
node_id: new_id
}
rt.db.set(id: current_id, data: serialize_node(node))!
}
if offset + common_prefix.len == key.len {
// Update value at existing node
mut child_node := deserialize_node(rt.db.get(child.node_id)!)!
child_node.value = value
child_node.is_leaf = true
rt.db.set(id: child.node_id, data: serialize_node(child_node))!
return
}
offset += common_prefix.len
current_id = child.node_id
}
}
// Searches for a key in the tree
pub fn (mut rt RadixTree) search(key string) ![]u8 {
mut current_id := rt.root_id
mut offset := 0
for offset < key.len {
node := deserialize_node(rt.db.get(current_id)!)!
mut found := false
for child in node.children {
if key[offset..].starts_with(child.key_part) {
if offset + child.key_part.len == key.len {
child_node := deserialize_node(rt.db.get(child.node_id)!)!
if child_node.is_leaf {
return child_node.value
}
}
current_id = child.node_id
offset += child.key_part.len
found = true
break
}
}
if !found {
return error('Key not found')
}
}
return error('Key not found')
}
// Deletes a key from the tree
pub fn (mut rt RadixTree) delete(key string) ! {
mut current_id := rt.root_id
mut offset := 0
mut path := []NodeRef{}
// Find the node to delete
for offset < key.len {
node := deserialize_node(rt.db.get(current_id)!)!
mut found := false
for child in node.children {
if key[offset..].starts_with(child.key_part) {
path << child
current_id = child.node_id
offset += child.key_part.len
found = true
// Check if we've matched the full key
if offset == key.len {
child_node := deserialize_node(rt.db.get(child.node_id)!)!
if child_node.is_leaf {
found = true
break
}
}
break
}
}
if !found {
return error('Key not found')
}
}
if path.len == 0 {
return error('Key not found')
}
// Remove the leaf node
mut last_node := deserialize_node(rt.db.get(path.last().node_id)!)!
last_node.is_leaf = false
last_node.value = []u8{}
// If node has no children, remove it from parent
if last_node.children.len == 0 {
if path.len > 1 {
mut parent_node := deserialize_node(rt.db.get(path[path.len - 2].node_id)!)!
for i, child in parent_node.children {
if child.node_id == path.last().node_id {
parent_node.children.delete(i)
break
}
}
rt.db.set(id: path[path.len - 2].node_id, data: serialize_node(parent_node))!
}
} else {
rt.db.set(id: path.last().node_id, data: serialize_node(last_node))!
}
}
// Helper function to get the common prefix of two strings
fn get_common_prefix(a string, b string) string {
mut i := 0
for i < a.len && i < b.len && a[i] == b[i] {
i++
}
return a[..i]
}

View File

@@ -0,0 +1,111 @@
module radixtree
import freeflowuniverse.herolib.data.ourdb
// Gets a node from the database by its ID
pub fn (mut rt RadixTree) get_node_by_id(id u32) !Node {
node_data := rt.db.get(id)!
node := deserialize_node(node_data)!
println('Debug: Retrieved node ${id} with ${node.children.len} children')
return node
}
// Logs the current state of a node
pub fn (mut rt RadixTree) debug_node(id u32, msg string) ! {
node := rt.get_node_by_id(id)!
println('Debug: ${msg}')
println(' Node ID: ${id}')
println(' Key Segment: "${node.key_segment}"')
println(' Is Leaf: ${node.is_leaf}')
println(' Children: ${node.children.len}')
for child in node.children {
println(' - Child ID: ${child.node_id}, Key Part: "${child.key_part}"')
}
}
// Prints the current state of the database
pub fn (mut rt RadixTree) debug_db() ! {
println('\nDatabase State:')
println('===============')
mut next_id := rt.db.get_next_id()!
for id := u32(0); id < next_id; id++ {
if data := rt.db.get(id) {
if node := deserialize_node(data) {
println('ID ${id}:')
println(' Key Segment: "${node.key_segment}"')
println(' Is Leaf: ${node.is_leaf}')
println(' Children: ${node.children.len}')
for child in node.children {
println(' - Child ID: ${child.node_id}, Key Part: "${child.key_part}"')
}
} else {
println('ID ${id}: Failed to deserialize node')
}
} else {
println('ID ${id}: No data')
}
}
}
// Prints the tree structure starting from a given node ID
pub fn (mut rt RadixTree) print_tree_from_node(node_id u32, indent string) ! {
node := rt.get_node_by_id(node_id)!
mut node_info := '${indent}Node(id: ${node_id})'
node_info += '\n${indent} key_segment: "${node.key_segment}"'
node_info += '\n${indent} is_leaf: ${node.is_leaf}'
if node.is_leaf {
node_info += '\n${indent} value: ${node.value.bytestr()}'
}
node_info += '\n${indent} children: ${node.children.len}'
if node.children.len > 0 {
node_info += ' ['
for i, child in node.children {
if i > 0 { node_info += ', ' }
node_info += '${child.node_id}:${child.key_part}'
}
node_info += ']'
}
println(node_info)
// Print children recursively with increased indentation
for i, child in node.children {
is_last := i == node.children.len - 1
child_indent := if is_last {
indent + ' '
} else {
indent + ' '
}
rt.print_tree_from_node(child.node_id, child_indent)!
}
}
// Prints the entire tree structure starting from root
pub fn (mut rt RadixTree) print_tree() ! {
println('\nRadix Tree Structure:')
println('===================')
rt.print_tree_from_node(rt.root_id, '')!
}
// Gets detailed information about a specific node
pub fn (mut rt RadixTree) get_node_info(id u32) !string {
node := rt.get_node_by_id(id)!
mut info := 'Node Details:\n'
info += '=============\n'
info += 'ID: ${id}\n'
info += 'Key Segment: "${node.key_segment}"\n'
info += 'Is Leaf: ${node.is_leaf}\n'
if node.is_leaf {
info += 'Value: ${node.value}\n'
}
info += 'Number of Children: ${node.children.len}\n'
if node.children.len > 0 {
info += '\nChildren:\n'
for child in node.children {
info += '- ID: ${child.node_id}, Key Part: "${child.key_part}"\n'
}
}
return info
}

View File

@@ -0,0 +1,77 @@
module radixtree
import freeflowuniverse.herolib.data.encoder
const (
version = u8(1) // Current binary format version
)
// Serializes a node to bytes for storage
fn serialize_node(node Node) []u8 {
mut e := encoder.new()
// Add version byte
e.add_u8(version)
// Add key segment
e.add_string(node.key_segment)
// Add value as []u8
e.add_u16(u16(node.value.len))
e.data << node.value
// Add children
e.add_u16(u16(node.children.len))
for child in node.children {
e.add_string(child.key_part)
e.add_u32(child.node_id)
}
// Add leaf flag
e.add_u8(if node.is_leaf { u8(1) } else { u8(0) })
return e.data
}
// Deserializes bytes to a node
fn deserialize_node(data []u8) !Node {
mut d := encoder.decoder_new(data)
// Read and verify version
version_byte := d.get_u8()
if version_byte != version {
return error('Invalid version byte: expected ${version}, got ${version_byte}')
}
// Read key segment
key_segment := d.get_string()
// Read value as []u8
value_len := d.get_u16()
mut value := []u8{len: int(value_len)}
for i in 0..int(value_len) {
value[i] = d.get_u8()
}
// Read children
children_len := d.get_u16()
mut children := []NodeRef{cap: int(children_len)}
for _ in 0 .. children_len {
key_part := d.get_string()
node_id := d.get_u32()
children << NodeRef{
key_part: key_part
node_id: node_id
}
}
// Read leaf flag
is_leaf := d.get_u8() == 1
return Node{
key_segment: key_segment
value: value
children: children
is_leaf: is_leaf
}
}

View File

@@ -0,0 +1,110 @@
module radixtree
fn test_serialize_deserialize() {
// Create a test node with children
node := Node{
key_segment: 'test'
value: 'hello world'.bytes()
children: [
NodeRef{
key_part: 'child1'
node_id: 1
},
NodeRef{
key_part: 'child2'
node_id: 2
}
]
is_leaf: true
}
// Serialize
data := serialize_node(node)
// Verify version byte
assert data[0] == version
// Deserialize
decoded := deserialize_node(data)!
// Verify all fields match
assert decoded.key_segment == node.key_segment
assert decoded.value == node.value
assert decoded.is_leaf == node.is_leaf
assert decoded.children.len == node.children.len
// Verify children
assert decoded.children[0].key_part == node.children[0].key_part
assert decoded.children[0].node_id == node.children[0].node_id
assert decoded.children[1].key_part == node.children[1].key_part
assert decoded.children[1].node_id == node.children[1].node_id
}
fn test_empty_node() {
// Test node with empty values
node := Node{
key_segment: ''
value: []u8{}
children: []NodeRef{}
is_leaf: false
}
data := serialize_node(node)
decoded := deserialize_node(data)!
assert decoded.key_segment == node.key_segment
assert decoded.value == node.value
assert decoded.children == node.children
assert decoded.is_leaf == node.is_leaf
}
fn test_large_values() {
// Create large test data
mut large_value := []u8{len: 1000, init: u8(index & 0xFF)}
mut children := []NodeRef{cap: 100}
for i in 0..100 {
children << NodeRef{
key_part: 'child${i}'
node_id: u32(i)
}
}
node := Node{
key_segment: 'large_test'
value: large_value
children: children
is_leaf: true
}
data := serialize_node(node)
decoded := deserialize_node(data)!
assert decoded.key_segment == node.key_segment
assert decoded.value == node.value
assert decoded.children.len == node.children.len
// Verify some random children
assert decoded.children[0] == node.children[0]
assert decoded.children[50] == node.children[50]
assert decoded.children[99] == node.children[99]
}
fn test_invalid_version() {
node := Node{
key_segment: 'test'
value: []u8{}
children: []NodeRef{}
is_leaf: false
}
mut data := serialize_node(node)
// Corrupt version byte
data[0] = 255
// Should return error for version mismatch
if result := deserialize_node(data) {
assert false, 'Expected error for invalid version byte'
} else {
assert err.msg() == 'Invalid version byte: expected ${version}, got 255'
}
}