diff --git a/examples/clients/gitea.vsh b/examples/clients/gitea.vsh index 671003d3..b9c11098 100755 --- a/examples/clients/gitea.vsh +++ b/examples/clients/gitea.vsh @@ -18,9 +18,9 @@ heroscript := " " // Process the heroscript configuration -playcmds.play(heroscript: heroscript, emptycheck: false)! +// playcmds.play(heroscript: heroscript, emptycheck: false)! -println(giteaclient.list()!) +println(giteaclient.list(fromdb:true)!) $dbg; diff --git a/lib/biz/bizmodel/play.v b/lib/biz/bizmodel/play.v index 949fb370..c44d6eca 100644 --- a/lib/biz/bizmodel/play.v +++ b/lib/biz/bizmodel/play.v @@ -12,6 +12,10 @@ const action_priorities = { } pub fn play(mut plbook PlayBook) ! { + + if plbook.exists(filter: 'bizmodel.')==false{ + return + } // group actions by which bizmodel they belong to actions_by_biz := arrays.group_by[string, &Action](plbook.find(filter: 'bizmodel.*')!, fn (a &Action) string { diff --git a/lib/core/base/context.v b/lib/core/base/context.v index 34061865..e166bbf7 100644 --- a/lib/core/base/context.v +++ b/lib/core/base/context.v @@ -80,8 +80,6 @@ pub fn (mut self Context) redis() !&redisclient.Redis { pub fn (mut self Context) save() ! { jsonargs := json.encode_pretty(self.config) mut r := self.redis()! - // console.print_debug("save") - // console.print_debug(jsonargs) r.set('context:config', jsonargs)! } @@ -89,8 +87,6 @@ pub fn (mut self Context) save() ! { pub fn (mut self Context) load() ! { mut r := self.redis()! d := r.get('context:config')! - // console.print_debug("load") - // console.print_debug(d) if d.len > 0 { self.config = json.decode(ContextConfig, d)! } @@ -130,38 +126,37 @@ pub fn (mut self Context) db_config_get() !dbfs.DB { return dbc.db_get_create(name: 'config', withkeys: true)! } -pub fn (mut self Context) hero_config_set(cat string, name string, content_ string) ! { - mut content := texttools.dedent(content_) - content = rootpath.shell_expansion(content) - path := '${self.path()!.path}/${cat}/${name}.hero' - mut config_file := pathlib.get_file(path: path,create: true)! - config_file.write(content)! -} +// pub fn (mut self Context) hero_config_set(cat string, name string, content_ string) ! { +// mut content := texttools.dedent(content_) +// content = rootpath.shell_expansion(content) +// path := '${self.path()!.path}/${cat}/${name}.json' +// mut config_file := pathlib.get_file(path: path,create: true)! +// config_file.write(content)! +// } -pub fn (mut self Context) hero_config_delete(cat string, name string) ! { - path := '${self.path()!.path}/${cat}/${name}.hero' - mut config_file := pathlib.get_file(path: path)! - config_file.delete()! -} +// pub fn (mut self Context) hero_config_delete(cat string, name string) ! { +// path := '${self.path()!.path}/${cat}/${name}.json' +// mut config_file := pathlib.get_file(path: path)! +// config_file.delete()! +// } -pub fn (mut self Context) hero_config_exists(cat string, name string) bool { - path := '${os.home_dir()}/hero/context/${self.config.name}/${cat}/${name}.hero' - return os.exists(path) -} +// pub fn (mut self Context) hero_config_exists(cat string, name string) bool { +// path := '${os.home_dir()}/hero/context/${self.config.name}/${cat}/${name}.json' +// return os.exists(path) +// } -pub fn (mut self Context) hero_config_get(cat string, name string) !string { - path := '${self.path()!.path}/${cat}/${name}.hero' - mut config_file := pathlib.get_file(path: path, create: false)! - return config_file.read()! -} +// pub fn (mut self Context) hero_config_get(cat string, name string) !string { +// path := '${self.path()!.path}/${cat}/${name}.json' +// mut config_file := pathlib.get_file(path: path, create: false)! +// return config_file.read()! +// } -pub fn (mut self Context) hero_config_list(cat string) ![]string { - path := '${self.path()!.path}/${cat}/*.hero' - mut config_files := os.ls(path)! - println(config_files) - $dbg; - return config_files -} +// pub fn (mut self Context) hero_config_list(cat string) ![]string { +// path := '${self.path()!.path}/${cat}' +// mut config_files := os.ls(path)! +// config_files = config_files.filter(it.ends_with('.json').map(it.split('.')[0] or {panic("bug")}) +// return config_files +// } pub fn (mut self Context) secret_encrypt(txt string) !string { diff --git a/lib/core/playcmds/play_core.v b/lib/core/playcmds/play_core.v index 5f9f0598..1ad28f88 100644 --- a/lib/core/playcmds/play_core.v +++ b/lib/core/playcmds/play_core.v @@ -11,6 +11,11 @@ import os // ------------------------------------------------------------------- fn play_core(mut plbook PlayBook) ! { + + if plbook.exists(filter: 'play.')==false && plbook.exists(filter: 'play.')==false && plbook.exists(filter: 'core.')==false{ + return + } + // ---------------------------------------------------------------- // 1. Include handling (play include / echo) // ---------------------------------------------------------------- diff --git a/lib/core/playcmds/play_git.v b/lib/core/playcmds/play_git.v index 56324f3e..e06df621 100644 --- a/lib/core/playcmds/play_git.v +++ b/lib/core/playcmds/play_git.v @@ -11,6 +11,11 @@ import freeflowuniverse.herolib.ui.console // For verbose error reporting // --------------------------------------------------------------- fn play_git(mut plbook PlayBook) ! { + + if plbook.exists(filter: 'git.')==false{ + return + } + mut gs := gittools.new()! define_actions := plbook.find(filter: 'git.define')! diff --git a/lib/core/playcmds/play_luadns.v b/lib/core/playcmds/play_luadns.v index eb574b83..443746dd 100644 --- a/lib/core/playcmds/play_luadns.v +++ b/lib/core/playcmds/play_luadns.v @@ -5,6 +5,12 @@ import freeflowuniverse.herolib.core.playbook { PlayBook } // import os fn play_luadns(mut plbook PlayBook) ! { + + if plbook.exists(filter: 'luadns.')==false{ + return + } + + // Variables below are not used, commenting them out // mut buildroot := '${os.home_dir()}/hero/var/mdbuild' // mut publishroot := '${os.home_dir()}/hero/www/info' diff --git a/lib/core/playcmds/play_ssh.v b/lib/core/playcmds/play_ssh.v index ab0c4f0a..480b526e 100644 --- a/lib/core/playcmds/play_ssh.v +++ b/lib/core/playcmds/play_ssh.v @@ -4,6 +4,12 @@ import freeflowuniverse.herolib.osal.sshagent import freeflowuniverse.herolib.core.playbook { PlayBook } fn play_ssh(mut plbook PlayBook) ! { + + + if plbook.exists(filter: 'sshagent.')==false{ + return + } + mut agent := sshagent.new()! for mut action in plbook.find(filter: 'sshagent.*')! { mut p := action.params diff --git a/lib/data/dbfs/readme.md b/lib/data/dbfs/readme.md index 1f7ae9b8..954c524c 100644 --- a/lib/data/dbfs/readme.md +++ b/lib/data/dbfs/readme.md @@ -25,22 +25,10 @@ assert 'bbbb' == db.get('a')! ``` -## dbname - -DBName has functionality to efficiently store millions of names and generate a unique id for it, each name gets a unique id, and based on the id the name can be found back easily. - -Some string based data can be attached to one name so it becomes a highly efficient key value stor, can be used for e.g. having DB of pubkeys, for a nameserver, ... - ## dbfs examples -Each session has such a DB attached to it, data is stored on filesystem, -e.g. ideal for config sessions (which are done on context level) - - -```golang - -> TODO: fix, we refactored +```go import freeflowuniverse.herolib.data.dbfs diff --git a/lib/data/doctree/play.v b/lib/data/doctree/play.v index 1f498f29..0579d582 100644 --- a/lib/data/doctree/play.v +++ b/lib/data/doctree/play.v @@ -4,6 +4,12 @@ import freeflowuniverse.herolib.core.playbook { PlayBook } // import freeflowuniverse.herolib.ui.console pub fn play(mut plbook PlayBook) ! { + + if !plbook.exists(filter: 'doctree.') { + return + } + + mut doctrees := map[string]&Tree{} mut collection_actions := plbook.find(filter: 'doctree.scan')! diff --git a/lib/data/radixtree/correctness_test.v b/lib/data/radixtree/correctness_test.v new file mode 100644 index 00000000..bcf7177b --- /dev/null +++ b/lib/data/radixtree/correctness_test.v @@ -0,0 +1,271 @@ +module radixtree + +import freeflowuniverse.herolib.ui.console + +// Test for the critical bug: prefix-of-existing edge inserted after the longer key +fn test_prefix_overlap_bug() ! { + console.print_debug('Testing prefix overlap bug fix') + mut rt := new(path: '/tmp/radixtree_prefix_overlap_test', reset: true)! + + // Insert longer key first + rt.set('test', 'value1'.bytes())! + rt.set('testing', 'value2'.bytes())! + + // Now insert shorter key that is a prefix - this was the bug + rt.set('te', 'value3'.bytes())! + + // Verify all keys work regardless of child iteration order + value1 := rt.get('test')! + assert value1.bytestr() == 'value1', 'Failed to get "test"' + + value2 := rt.get('testing')! + assert value2.bytestr() == 'value2', 'Failed to get "testing"' + + value3 := rt.get('te')! + assert value3.bytestr() == 'value3', 'Failed to get "te"' + + // Test that all keys are found in list + all_keys := rt.list('')! + assert 'test' in all_keys, '"test" not found in list' + assert 'testing' in all_keys, '"testing" not found in list' + assert 'te' in all_keys, '"te" not found in list' + + console.print_debug('Prefix overlap bug test passed') +} + +// Test partial overlap where neither key is a prefix of the other +fn test_partial_overlap_split() ! { + console.print_debug('Testing partial overlap split') + mut rt := new(path: '/tmp/radixtree_partial_overlap_test', reset: true)! + + // Insert keys that share a common prefix but neither is a prefix of the other + rt.set('foobar', 'value1'.bytes())! + console.print_debug('After inserting foobar') + rt.print_tree()! + + rt.set('foobaz', 'value2'.bytes())! + console.print_debug('After inserting foobaz') + rt.print_tree()! + + // Verify both keys work + value1 := rt.get('foobar')! + assert value1.bytestr() == 'value1', 'Failed to get "foobar"' + + value2 := rt.get('foobaz')! + assert value2.bytestr() == 'value2', 'Failed to get "foobaz"' + + // Test prefix search + foo_keys := rt.list('foo')! + console.print_debug('foo_keys: ${foo_keys}') + assert foo_keys.len == 2, 'Expected 2 keys with prefix "foo"' + assert 'foobar' in foo_keys, '"foobar" not found with prefix "foo"' + assert 'foobaz' in foo_keys, '"foobaz" not found with prefix "foo"' + + fooba_keys := rt.list('fooba')! + assert fooba_keys.len == 2, 'Expected 2 keys with prefix "fooba"' + + console.print_debug('Partial overlap split test passed') +} + +// Test deletion with path compression +fn test_deletion_compression() ! { + console.print_debug('Testing deletion with path compression') + mut rt := new(path: '/tmp/radixtree_deletion_compression_test', reset: true)! + + // Insert keys that will create intermediate nodes + rt.set('car', 'value1'.bytes())! + rt.set('cargo', 'value2'.bytes())! + + // Verify both keys exist + value1 := rt.get('car')! + assert value1.bytestr() == 'value1', 'Failed to get "car"' + + value2 := rt.get('cargo')! + assert value2.bytestr() == 'value2', 'Failed to get "cargo"' + + // Delete the shorter key + rt.delete('car')! + + // Verify the longer key still works (tests compression) + value2_after := rt.get('cargo')! + assert value2_after.bytestr() == 'value2', 'Failed to get "cargo" after deletion' + + // Verify the deleted key is gone + if _ := rt.get('car') { + assert false, 'Expected "car" to be deleted' + } + + console.print_debug('Deletion compression test passed') +} + +// Test large fan-out to stress the system +fn test_large_fanout() ! { + console.print_debug('Testing large fan-out') + mut rt := new(path: '/tmp/radixtree_large_fanout_test', reset: true)! + + // Insert keys with single character differences to create large fan-out + for i in 0 .. 100 { + key := 'prefix${i:03d}' + rt.set(key, 'value${i}'.bytes())! + } + + // Verify all keys can be retrieved + for i in 0 .. 100 { + key := 'prefix${i:03d}' + value := rt.get(key)! + expected := 'value${i}' + assert value.bytestr() == expected, 'Failed to get key "${key}"' + } + + // Test prefix search + prefix_keys := rt.list('prefix')! + assert prefix_keys.len == 100, 'Expected 100 keys with prefix "prefix"' + + console.print_debug('Large fan-out test passed') +} + +// Test sorted output +fn test_sorted_output() ! { + console.print_debug('Testing sorted output') + mut rt := new(path: '/tmp/radixtree_sorted_test', reset: true)! + + // Insert keys in random order + keys := ['zebra', 'apple', 'banana', 'cherry', 'date'] + for key in keys { + rt.set(key, '${key}_value'.bytes())! + } + + // Get all keys and verify they are sorted + all_keys := rt.list('')! + assert all_keys.len == keys.len, 'Expected ${keys.len} keys' + + // Check if sorted (should be: apple, banana, cherry, date, zebra) + expected_order := ['apple', 'banana', 'cherry', 'date', 'zebra'] + for i, expected_key in expected_order { + assert all_keys[i] == expected_key, 'Expected key at position ${i} to be "${expected_key}", got "${all_keys[i]}"' + } + + console.print_debug('Sorted output test passed') +} + +// Test edge case: empty key +fn test_empty_key() ! { + console.print_debug('Testing empty key') + mut rt := new(path: '/tmp/radixtree_empty_key_test', reset: true)! + + // Set empty key + rt.set('', 'empty_value'.bytes())! + + // Set regular key + rt.set('regular', 'regular_value'.bytes())! + + // Verify both work + empty_value := rt.get('')! + assert empty_value.bytestr() == 'empty_value', 'Failed to get empty key' + + regular_value := rt.get('regular')! + assert regular_value.bytestr() == 'regular_value', 'Failed to get "regular"' + + // Test list with empty prefix + all_keys := rt.list('')! + assert all_keys.len == 2, 'Expected 2 keys total' + assert '' in all_keys, 'Empty key not found in list' + assert 'regular' in all_keys, '"regular" not found in list' + + console.print_debug('Empty key test passed') +} + +// Test very long keys +fn test_long_keys() ! { + console.print_debug('Testing very long keys') + mut rt := new(path: '/tmp/radixtree_long_keys_test', reset: true)! + + // Create very long keys + long_key1 := 'a'.repeat(1000) + 'key1' + long_key2 := 'a'.repeat(1000) + 'key2' + long_key3 := 'b'.repeat(500) + 'different' + + rt.set(long_key1, 'value1'.bytes())! + rt.set(long_key2, 'value2'.bytes())! + rt.set(long_key3, 'value3'.bytes())! + + // Verify retrieval + value1 := rt.get(long_key1)! + assert value1.bytestr() == 'value1', 'Failed to get long_key1' + + value2 := rt.get(long_key2)! + assert value2.bytestr() == 'value2', 'Failed to get long_key2' + + value3 := rt.get(long_key3)! + assert value3.bytestr() == 'value3', 'Failed to get long_key3' + + // Test prefix search with long prefix + long_prefix_keys := rt.list('a'.repeat(1000))! + assert long_prefix_keys.len == 2, 'Expected 2 keys with long prefix' + + console.print_debug('Long keys test passed') +} + +// Test complex overlapping scenarios +fn test_complex_overlaps() ! { + console.print_debug('Testing complex overlapping scenarios') + mut rt := new(path: '/tmp/radixtree_complex_overlaps_test', reset: true)! + + // Create a complex set of overlapping keys + keys := [ + 'a', + 'ab', + 'abc', + 'abcd', + 'abcde', + 'abcdef', + 'abd', + 'ac', + 'b', + 'ba', + 'bb' + ] + + // Insert in random order to test robustness + for i, key in keys { + rt.set(key, 'value${i}'.bytes())! + } + + // Verify all keys can be retrieved + for i, key in keys { + value := rt.get(key)! + expected := 'value${i}' + assert value.bytestr() == expected, 'Failed to get key "${key}"' + } + + // Test various prefix searches + a_keys := rt.list('a')! + assert a_keys.len == 8, 'Expected 8 keys with prefix "a"' + + ab_keys := rt.list('ab')! + assert ab_keys.len == 6, 'Expected 6 keys with prefix "ab"' + + abc_keys := rt.list('abc')! + assert abc_keys.len == 4, 'Expected 4 keys with prefix "abc"' + + b_keys := rt.list('b')! + assert b_keys.len == 3, 'Expected 3 keys with prefix "b"' + + console.print_debug('Complex overlaps test passed') +} + +// Run all correctness tests +fn test_all_correctness() ! { + console.print_debug('Running all correctness tests...') + + test_prefix_overlap_bug()! + test_partial_overlap_split()! + test_deletion_compression()! + test_large_fanout()! + test_sorted_output()! + test_empty_key()! + test_long_keys()! + test_complex_overlaps()! + + console.print_debug('All correctness tests passed!') +} \ No newline at end of file diff --git a/lib/data/radixtree/debug_deletion_test.v b/lib/data/radixtree/debug_deletion_test.v new file mode 100644 index 00000000..6238de45 --- /dev/null +++ b/lib/data/radixtree/debug_deletion_test.v @@ -0,0 +1,31 @@ +module radixtree + +import freeflowuniverse.herolib.ui.console + +fn test_debug_deletion() ! { + console.print_debug('Debug deletion test') + mut rt := new(path: '/tmp/radixtree_debug_deletion', reset: true)! + + console.print_debug('Inserting car') + rt.set('car', 'value1'.bytes())! + rt.print_tree()! + + console.print_debug('Inserting cargo') + rt.set('cargo', 'value2'.bytes())! + rt.print_tree()! + + console.print_debug('Testing get cargo before deletion') + value_before := rt.get('cargo')! + console.print_debug('cargo value before: ${value_before.bytestr()}') + + console.print_debug('Deleting car') + rt.delete('car')! + rt.print_tree()! + + console.print_debug('Testing get cargo after deletion') + if value_after := rt.get('cargo') { + console.print_debug('cargo value after: ${value_after.bytestr()}') + } else { + console.print_debug('ERROR: cargo not found after deletion') + } +} \ No newline at end of file diff --git a/lib/data/radixtree/debug_test.v b/lib/data/radixtree/debug_test.v new file mode 100644 index 00000000..4de2ef4f --- /dev/null +++ b/lib/data/radixtree/debug_test.v @@ -0,0 +1,32 @@ +module radixtree + +import freeflowuniverse.herolib.ui.console + +fn test_simple_debug() ! { + console.print_debug('=== Simple Debug Test ===') + mut rt := new(path: '/tmp/radixtree_debug_test', reset: true)! + + console.print_debug('Inserting "foobar"') + rt.set('foobar', 'value1'.bytes())! + rt.print_tree()! + + console.print_debug('Getting "foobar"') + value1 := rt.get('foobar')! + console.print_debug('Got value: ${value1.bytestr()}') + + console.print_debug('Inserting "foobaz"') + rt.set('foobaz', 'value2'.bytes())! + rt.print_tree()! + + console.print_debug('Getting "foobar" again') + value1_again := rt.get('foobar')! + console.print_debug('Got value: ${value1_again.bytestr()}') + + console.print_debug('Getting "foobaz"') + value2 := rt.get('foobaz')! + console.print_debug('Got value: ${value2.bytestr()}') + + console.print_debug('Listing all keys') + all_keys := rt.list('')! + console.print_debug('All keys: ${all_keys}') +} \ No newline at end of file diff --git a/lib/data/radixtree/radixtree.v b/lib/data/radixtree/radixtree.v index 08300e84..00ca87a0 100644 --- a/lib/data/radixtree/radixtree.v +++ b/lib/data/radixtree/radixtree.v @@ -19,6 +19,13 @@ mut: node_id u32 // Database ID of the node } +// PathInfo tracks information about nodes in the deletion path +struct PathInfo { + node_id u32 // ID of the parent node + edge_to_child string // Edge label from parent to child + child_id u32 // ID of the child node +} + // RadixTree represents a radix tree data structure @[heap] pub struct RadixTree { @@ -74,126 +81,119 @@ pub fn (mut rt RadixTree) set(key string, value []u8) ! { mut current_id := rt.root_id mut offset := 0 - // Handle empty key case - if key.len == 0 { - mut root_node := deserialize_node(rt.db.get(current_id)!)! - root_node.is_leaf = true - root_node.value = value - rt.db.set(id: current_id, data: serialize_node(root_node))! - return - } - - for offset < key.len { + for { mut node := deserialize_node(rt.db.get(current_id)!)! + rem := key[offset..] - // Find matching child - mut matched_child := -1 - for i, child in node.children { - if key[offset..].starts_with(child.key_part) { - matched_child = i - break + if rem.len == 0 { + // turn current node into leaf (value replace) + node.is_leaf = true + node.value = value + rt.db.set(id: current_id, data: serialize_node(node))! + return + } + + mut best_idx := -1 + mut best_cp := 0 + for i, ch in node.children { + cp := get_common_prefix(rem, ch.key_part).len + if cp > 0 { + best_idx = i + best_cp = cp + break // with proper invariant there can be only one candidate } } - if matched_child == -1 { - // No matching child found, create new leaf node - key_part := key[offset..] - new_node := Node{ - key_segment: key_part + if best_idx == -1 { + // no overlap at all -> add new leaf child + new_leaf := Node{ + key_segment: rem value: value children: []NodeRef{} is_leaf: true } - // console.print_debug('Debug: Creating new leaf node with key_part "${key_part}"') - new_id := rt.db.set(data: serialize_node(new_node))! - // console.print_debug('Debug: Created node ID ${new_id}') - - // Create new child reference and update parent node - // console.print_debug('Debug: Updating parent node ${current_id} to add child reference') - - // Get fresh copy of parent node - mut parent_node := deserialize_node(rt.db.get(current_id)!)! - // console.print_debug('Debug: Parent node initially has ${parent_node.children.len} children') - - // Add new child reference - parent_node.children << NodeRef{ - key_part: key_part + new_id := rt.db.set(data: serialize_node(new_leaf))! + // reload parent (avoid stale) then append child + mut parent := deserialize_node(rt.db.get(current_id)!)! + parent.children << NodeRef{ + key_part: rem node_id: new_id } - // console.print_debug('Debug: Added child reference, now has ${parent_node.children.len} children') - - // Update parent node in DB - // console.print_debug('Debug: Serializing parent node with ${parent_node.children.len} children') - parent_data := serialize_node(parent_node) - // console.print_debug('Debug: Parent data size: ${parent_data.len} bytes') - - // First verify we can deserialize the data correctly - // console.print_debug('Debug: Verifying serialization...') - if _ := deserialize_node(parent_data) { - // console.print_debug('Debug: Serialization test successful - node has ${test_node.children.len} children') - } else { - // console.print_debug('Debug: ERROR - Failed to deserialize test data') - return error('Serialization verification failed') - } - - // Set with explicit ID to update existing node - // console.print_debug('Debug: Writing to DB...') - rt.db.set(id: current_id, data: parent_data)! - - // Verify by reading back and comparing - // console.print_debug('Debug: Reading back for verification...') - verify_data := rt.db.get(current_id)! - verify_node := deserialize_node(verify_data)! - // console.print_debug('Debug: Verification - node has ${verify_node.children.len} children') - - if verify_node.children.len == 0 { - // console.print_debug('Debug: ERROR - Node update verification failed!') - // console.print_debug('Debug: Original node children: ${node.children.len}') - // console.print_debug('Debug: Parent node children: ${parent_node.children.len}') - // console.print_debug('Debug: Verified node children: ${verify_node.children.len}') - // console.print_debug('Debug: Original data size: ${parent_data.len}') - // console.print_debug('Debug: Verified data size: ${verify_data.len}') - // console.print_debug('Debug: Data equal: ${verify_data == parent_data}') - return error('Node update failed - children array is empty') - } + // keep children sorted lexicographically + sort_children(mut parent.children) + rt.db.set(id: current_id, data: serialize_node(parent))! return } - child := node.children[matched_child] - common_prefix := get_common_prefix(key[offset..], child.key_part) - - if common_prefix.len < child.key_part.len { - // Split existing node - mut child_node := deserialize_node(rt.db.get(child.node_id)!)! - - // Create new intermediate node - mut new_node := Node{ - key_segment: child.key_part[common_prefix.len..] - value: child_node.value - children: child_node.children - is_leaf: child_node.is_leaf - } - new_id := rt.db.set(data: serialize_node(new_node))! - - // Update current node - node.children[matched_child] = NodeRef{ - key_part: common_prefix - node_id: new_id - } - rt.db.set(id: current_id, data: serialize_node(node))! + // we have overlap with child + mut chref := node.children[best_idx] + child_part := chref.key_part + if best_cp == child_part.len { + // child_part is fully consumed by rem -> descend + current_id = chref.node_id + offset += best_cp + continue } - if offset + common_prefix.len == key.len { - // Update value at existing node - mut child_node := deserialize_node(rt.db.get(child.node_id)!)! - child_node.value = value - child_node.is_leaf = true - rt.db.set(id: child.node_id, data: serialize_node(child_node))! - return + // need to split the existing child + // new intermediate node with edge = common prefix + common := get_common_prefix(rem, child_part) + child_suffix := child_part[common.len..] + rem_suffix := rem[common.len..] + + mut old_child := deserialize_node(rt.db.get(chref.node_id)!)! + + // new node representing the existing child's suffix + split_child := Node{ + key_segment: child_suffix + value: old_child.value + children: old_child.children + is_leaf: old_child.is_leaf + } + split_child_id := rt.db.set(data: serialize_node(split_child))! + + // build the intermediate + mut intermediate := Node{ + key_segment: '' // not used at traversal time + value: []u8{} + children: [NodeRef{ + key_part: child_suffix + node_id: split_child_id + }] + is_leaf: false } - offset += common_prefix.len - current_id = child.node_id + // if our new key ends exactly at the common prefix, the intermediate becomes a leaf + if rem_suffix.len == 0 { + intermediate.is_leaf = true + intermediate.value = value + } else { + // add second child for our new key's remainder + new_leaf := Node{ + key_segment: rem_suffix + value: value + children: []NodeRef{} + is_leaf: true + } + new_leaf_id := rt.db.set(data: serialize_node(new_leaf))! + intermediate.children << NodeRef{ + key_part: rem_suffix + node_id: new_leaf_id + } + // keep children sorted + sort_children(mut intermediate.children) + } + + // write intermediate, get id + interm_id := rt.db.set(data: serialize_node(intermediate))! + + // replace the matched child on parent with the intermediate (edge = common) + node.children[best_idx] = NodeRef{ + key_part: common + node_id: interm_id + } + rt.db.set(id: current_id, data: serialize_node(node))! + return } } @@ -202,40 +202,53 @@ pub fn (mut rt RadixTree) get(key string) ![]u8 { mut current_id := rt.root_id mut offset := 0 - // Handle empty key case - if key.len == 0 { - root_node := deserialize_node(rt.db.get(current_id)!)! - if root_node.is_leaf { - return root_node.value - } - return error('Key not found') - } - - for offset < key.len { + for { node := deserialize_node(rt.db.get(current_id)!)! + rem := key[offset..] - mut found := false - for child in node.children { - if key[offset..].starts_with(child.key_part) { - if offset + child.key_part.len == key.len { - child_node := deserialize_node(rt.db.get(child.node_id)!)! - if child_node.is_leaf { - return child_node.value - } - } - current_id = child.node_id - offset += child.key_part.len - found = true - break + if rem.len == 0 { + // reached end of key + if node.is_leaf { + return node.value } - } - - if !found { return error('Key not found') } + + // binary search for matching child (since children are sorted) + child_idx := rt.find_child_with_prefix(node.children, rem) + if child_idx == -1 { + return error('Key not found') + } + + child := node.children[child_idx] + common_prefix := get_common_prefix(rem, child.key_part) + + if common_prefix.len != child.key_part.len { + // partial match - key doesn't exist + return error('Key not found') + } + + current_id = child.node_id + offset += child.key_part.len + } + + return error('Key not found') +} + +// Binary search helper to find child with matching prefix +fn (rt RadixTree) find_child_with_prefix(children []NodeRef, key string) int { + if children.len == 0 || key.len == 0 { + return -1 } - return error('Key not found') + // For now, use linear search but with proper common prefix logic + // TODO: implement true binary search based on first character + for i, child in children { + if get_common_prefix(key, child.key_part).len > 0 { + return i + } + } + return -1 } // Updates the value at a given key prefix, preserving the prefix while replacing the remainder @@ -283,7 +296,20 @@ pub fn (mut rt RadixTree) update(prefix string, new_value []u8) ! { pub fn (mut rt RadixTree) delete(key string) ! { mut current_id := rt.root_id mut offset := 0 - mut path := []NodeRef{} + mut path := []PathInfo{} // Track node IDs and edge labels in the path + + // Handle empty key case + if key.len == 0 { + mut root_node := deserialize_node(rt.db.get(current_id)!)! + if !root_node.is_leaf { + return error('Key not found') + } + root_node.is_leaf = false + root_node.value = []u8{} + rt.db.set(id: current_id, data: serialize_node(root_node))! + rt.maybe_compress_with_path(current_id, path)! + return + } // Find the node to delete for offset < key.len { @@ -291,21 +317,23 @@ pub fn (mut rt RadixTree) delete(key string) ! { mut found := false for child in node.children { - if key[offset..].starts_with(child.key_part) { - path << child - current_id = child.node_id - offset += child.key_part.len - found = true - - // Check if we've matched the full key - if offset == key.len { - child_node := deserialize_node(rt.db.get(child.node_id)!)! - if child_node.is_leaf { - found = true - break + common_prefix := get_common_prefix(key[offset..], child.key_part) + if common_prefix.len > 0 { + if common_prefix.len == child.key_part.len { + // Full match with child edge + path << PathInfo{ + node_id: current_id + edge_to_child: child.key_part + child_id: child.node_id } + current_id = child.node_id + offset += child.key_part.len + found = true + break + } else { + // Partial match - key doesn't exist + return error('Key not found') } - break } } @@ -314,97 +342,181 @@ pub fn (mut rt RadixTree) delete(key string) ! { } } - if path.len == 0 { + // Check if the target node is actually a leaf + mut target_node := deserialize_node(rt.db.get(current_id)!)! + if !target_node.is_leaf { return error('Key not found') } - // Get the node to delete - mut last_node := deserialize_node(rt.db.get(path.last().node_id)!)! - // If the node has children, just mark it as non-leaf - if last_node.children.len > 0 { - last_node.is_leaf = false - last_node.value = []u8{} - rt.db.set(id: path.last().node_id, data: serialize_node(last_node))! + if target_node.children.len > 0 { + target_node.is_leaf = false + target_node.value = []u8{} + rt.db.set(id: current_id, data: serialize_node(target_node))! + rt.maybe_compress_with_path(current_id, path)! return } - // If node has no children, remove it from parent - if path.len > 1 { - mut parent_node := deserialize_node(rt.db.get(path[path.len - 2].node_id)!)! + // Node has no children, remove it from parent + if path.len > 0 { + parent_info := path.last() + parent_id := parent_info.node_id + mut parent_node := deserialize_node(rt.db.get(parent_id)!)! + + // Remove the child reference for i, child in parent_node.children { - if child.node_id == path.last().node_id { + if child.node_id == current_id { parent_node.children.delete(i) break } } - rt.db.set(id: path[path.len - 2].node_id, data: serialize_node(parent_node))! - - // Delete the node from the database - rt.db.delete(path.last().node_id)! + + rt.db.set(id: parent_id, data: serialize_node(parent_node))! + rt.db.delete(current_id)! + + // Compress the parent if needed + rt.maybe_compress_with_path(parent_id, path[..path.len-1])! } else { - // If this is a direct child of the root, just mark it as non-leaf - last_node.is_leaf = false - last_node.value = []u8{} - rt.db.set(id: path.last().node_id, data: serialize_node(last_node))! + // This is the root node, just mark as non-leaf + target_node.is_leaf = false + target_node.value = []u8{} + rt.db.set(id: current_id, data: serialize_node(target_node))! + } +} + +// Helper function for path compression after deletion (legacy version) +fn (mut rt RadixTree) maybe_compress(node_id u32) ! { + rt.maybe_compress_with_path(node_id, []PathInfo{})! +} + +// Helper function for path compression after deletion with path information +fn (mut rt RadixTree) maybe_compress_with_path(node_id u32, path []PathInfo) ! { + mut node := deserialize_node(rt.db.get(node_id)!)! + if node.is_leaf { + return + } + if node.children.len != 1 { + return + } + + ch := node.children[0] + mut child_node := deserialize_node(rt.db.get(ch.node_id)!)! + + // merge child into node by lifting child's children and value + node.is_leaf = child_node.is_leaf + node.value = child_node.value + node.children = child_node.children.clone() + + rt.db.set(id: node_id, data: serialize_node(node))! + rt.db.delete(ch.node_id)! + + // Update the parent's edge to include the compressed path + if path.len > 0 { + // Find the parent that points to this node + for i := path.len - 1; i >= 0; i-- { + if path[i].child_id == node_id { + parent_id := path[i].node_id + mut parent_node := deserialize_node(rt.db.get(parent_id)!)! + + // Update the edge label to include the compressed segment + for j, child in parent_node.children { + if child.node_id == node_id { + parent_node.children[j].key_part += ch.key_part + rt.db.set(id: parent_id, data: serialize_node(parent_node))! + break + } + } + break + } + } } } // Lists all keys with a given prefix pub fn (mut rt RadixTree) list(prefix string) ![]string { mut result := []string{} - - // Handle empty prefix case - will return all keys + if prefix.len == 0 { rt.collect_all_keys(rt.root_id, '', mut result)! return result } - - // Start from the root and find all matching keys - rt.find_keys_with_prefix(rt.root_id, '', prefix, mut result)! - return result + + node_info := rt.find_node_for_prefix_with_path(prefix) or { + // prefix not found, return empty result + return result + } + rt.collect_all_keys(node_info.node_id, node_info.path, mut result)! + + // Filter results to only include keys that actually start with the prefix + mut filtered_result := []string{} + for key in result { + if key.starts_with(prefix) { + filtered_result << key + } + } + + return filtered_result } -// Helper function to find all keys with a given prefix -fn (mut rt RadixTree) find_keys_with_prefix(node_id u32, current_path string, prefix string, mut result []string) ! { - node := deserialize_node(rt.db.get(node_id)!)! +struct NodePathInfo { + node_id u32 + path string +} - // If the current path already matches or exceeds the prefix length - if current_path.len >= prefix.len { - // Check if the current path starts with the prefix - if current_path.starts_with(prefix) { - // If this is a leaf node, add it to the results - if node.is_leaf { - result << current_path +// Find the node where a prefix ends and return both node ID and the actual path to that node +fn (mut rt RadixTree) find_node_for_prefix_with_path(prefix string) !NodePathInfo { + mut current_id := rt.root_id + mut offset := 0 + mut current_path := '' + + for offset < prefix.len { + node := deserialize_node(rt.db.get(current_id)!)! + rem := prefix[offset..] + + mut found := false + for child in node.children { + common_prefix := get_common_prefix(rem, child.key_part) + cp_len := common_prefix.len + + if cp_len == 0 { + continue } - - // Collect all keys from this subtree - for child in node.children { - child_path := current_path + child.key_part - rt.find_keys_with_prefix(child.node_id, child_path, prefix, mut result)! + + if cp_len == child.key_part.len { + // child edge is fully consumed by prefix + current_id = child.node_id + current_path += child.key_part + offset += cp_len + found = true + break + } else if cp_len == rem.len { + // prefix ends inside this edge; we need to collect keys from this subtree + // but only those that actually start with the full prefix + return NodePathInfo{ + node_id: current_id + path: current_path + } + } else { + // diverged -> no matches + return error('Prefix not found') } } - return - } - - // Current path is shorter than the prefix, continue searching - for child in node.children { - child_path := current_path + child.key_part - - // Check if this child's path could potentially match the prefix - if prefix.starts_with(current_path) { - // The prefix starts with the current path, so we need to check if - // the child's key_part matches the next part of the prefix - prefix_remainder := prefix[current_path.len..] - - // If the prefix remainder starts with the child's key_part or vice versa - if prefix_remainder.starts_with(child.key_part) - || (child.key_part.starts_with(prefix_remainder) - && child.key_part.len >= prefix_remainder.len) { - rt.find_keys_with_prefix(child.node_id, child_path, prefix, mut result)! - } + + if !found { + return error('Prefix not found') } } + + return NodePathInfo{ + node_id: current_id + path: current_path + } +} + +// Find the node where a prefix ends (or the subtree root for that prefix) +fn (mut rt RadixTree) find_node_for_prefix(prefix string) !u32 { + info := rt.find_node_for_prefix_with_path(prefix)! + return info.node_id } // Helper function to recursively collect all keys under a node @@ -448,3 +560,16 @@ fn get_common_prefix(a string, b string) string { } return a[..i] } + +// Helper function to sort children lexicographically +fn sort_children(mut children []NodeRef) { + children.sort_with_compare(fn (a &NodeRef, b &NodeRef) int { + return if a.key_part < b.key_part { + -1 + } else if a.key_part > b.key_part { + 1 + } else { + 0 + } + }) +} diff --git a/lib/data/radixtree/radixtree_debug.v b/lib/data/radixtree/radixtree_debug.v index 86062683..b277902b 100644 --- a/lib/data/radixtree/radixtree_debug.v +++ b/lib/data/radixtree/radixtree_debug.v @@ -84,8 +84,8 @@ pub fn (mut rt RadixTree) print_tree_from_node(node_id u32, indent string) ! { // Prints the entire tree structure starting from root pub fn (mut rt RadixTree) print_tree() ! { - // console.print_debug('\nRadix Tree Structure:') - // console.print_debug('===================') + console.print_debug('\nRadix Tree Structure:') + console.print_debug('===================') rt.print_tree_from_node(rt.root_id, '')! } diff --git a/lib/data/radixtree/serialize.v b/lib/data/radixtree/serialize.v index 0c093b54..64134414 100644 --- a/lib/data/radixtree/serialize.v +++ b/lib/data/radixtree/serialize.v @@ -2,7 +2,9 @@ module radixtree import freeflowuniverse.herolib.data.encoder -const version = u8(1) // Current binary format version +const version = u8(2) // Updated binary format version +const max_inline_value_size = 1024 // Values larger than this are stored out-of-line +const max_inline_children = 64 // Children lists larger than this are paged // Serializes a node to bytes for storage fn serialize_node(node Node) []u8 { @@ -11,22 +13,56 @@ fn serialize_node(node Node) []u8 { // Add version byte e.add_u8(version) - // Add key segment - e.add_string(node.key_segment) + // Add flags byte (bit 0: is_leaf, bit 1: has_out_of_line_value, bit 2: has_paged_children) + mut flags := u8(0) + if node.is_leaf { + flags |= 0x01 + } + + // Check if value should be stored out-of-line + has_large_value := node.value.len > max_inline_value_size + if has_large_value { + flags |= 0x02 + } + + // Check if children should be paged + has_many_children := node.children.len > max_inline_children + if has_many_children { + flags |= 0x04 + } + + e.add_u8(flags) - // Add value as []u8 - e.add_u16(u16(node.value.len)) - e.data << node.value + // Note: key_segment is redundant and not stored (saves space) + // It's only used for debugging and can be computed from traversal path - // Add children - e.add_u16(u16(node.children.len)) - for child in node.children { - e.add_string(child.key_part) - e.add_u32(child.node_id) + // Add value (inline or reference) + if has_large_value { + // TODO: Store value out-of-line and store reference ID + // For now, store inline but with u32 length to support larger values + e.add_u32(u32(node.value.len)) + e.data << node.value + } else { + e.add_u16(u16(node.value.len)) + e.data << node.value } - // Add leaf flag - e.add_u8(if node.is_leaf { u8(1) } else { u8(0) }) + // Add children (inline or paged) + if has_many_children { + // TODO: Implement child paging for large fan-out + // For now, store inline but warn about potential size issues + e.add_u16(u16(node.children.len)) + for child in node.children { + e.add_string(child.key_part) + e.add_u32(child.node_id) + } + } else { + e.add_u16(u16(node.children.len)) + for child in node.children { + e.add_string(child.key_part) + e.add_u32(child.node_id) + } + } return e.data } @@ -37,11 +73,79 @@ fn deserialize_node(data []u8) !Node { // Read and verify version version_byte := d.get_u8()! - if version_byte != version { + if version_byte == 1 { + // Handle old format for backward compatibility + return deserialize_node_v1(data) + } else if version_byte != version { return error('Invalid version byte: expected ${version}, got ${version_byte}') } - // Read key segment + // Read flags + flags := d.get_u8()! + is_leaf := (flags & 0x01) != 0 + has_out_of_line_value := (flags & 0x02) != 0 + has_paged_children := (flags & 0x04) != 0 + + // Read value + mut value := []u8{} + if has_out_of_line_value { + // TODO: Read value reference and fetch from separate storage + value_len := d.get_u32()! + value = []u8{len: int(value_len)} + for i in 0 .. int(value_len) { + value[i] = d.get_u8()! + } + } else { + value_len := d.get_u16()! + value = []u8{len: int(value_len)} + for i in 0 .. int(value_len) { + value[i] = d.get_u8()! + } + } + + // Read children + mut children := []NodeRef{} + if has_paged_children { + // TODO: Read child page references and fetch children + children_len := d.get_u16()! + children = []NodeRef{cap: int(children_len)} + for _ in 0 .. children_len { + key_part := d.get_string()! + node_id := d.get_u32()! + children << NodeRef{ + key_part: key_part + node_id: node_id + } + } + } else { + children_len := d.get_u16()! + children = []NodeRef{cap: int(children_len)} + for _ in 0 .. children_len { + key_part := d.get_string()! + node_id := d.get_u32()! + children << NodeRef{ + key_part: key_part + node_id: node_id + } + } + } + + return Node{ + key_segment: '' // Not stored in new format + value: value + children: children + is_leaf: is_leaf + } +} + +// Backward compatibility for version 1 format +fn deserialize_node_v1(data []u8) !Node { + mut d := encoder.decoder_new(data) + + // Skip version byte (already read) + d.get_u8()! + + // Read key segment (ignored in new format) key_segment := d.get_string()! // Read value as []u8