codewalker

This commit is contained in:
2025-11-24 05:48:13 +01:00
parent 4402cba8ac
commit d282a5dc95
29 changed files with 1412 additions and 453 deletions

142
lib/ai/codewalker/README.md Normal file
View File

@@ -0,0 +1,142 @@
# CodeWalker Module
Parse directories or formatted strings into file maps with automatic ignore pattern support.
## Features
- 📂 Walk directories recursively and build file maps
- 🚫 Respect `.gitignore` and `.heroignore` ignore patterns with directory scoping
- 📝 Parse custom `===FILE:name===` format into file maps
- 📦 Export/write file maps to disk
- 🛡️ Robust, defensive parsing (handles spaces, variable `=` length, case-insensitive)
## Quick Start
### From Directory Path
```v
import incubaid.herolib.lib.ai.codewalker
mut cw := codewalker.new()
mut fm := cw.filemap_get(path: '/path/to/project')!
// Iterate files
for path, content in fm.content {
println('${path}: ${content.len} bytes')
}
```
### From Formatted String
```v
content_str := '
===FILE:main.v===
fn main() {
println("Hello!")
}
===FILE:utils/helper.v===
pub fn help() {}
===END===
'
mut cw := codewalker.new()
mut fm := cw.parse(content_str)!
println(fm.get('main.v')!)
```
## FileMap Operations
```v
// Get file content
content := fm.get('path/to/file.txt')!
// Set/modify file
fm.set('new/file.txt', 'content here')
// Find files by prefix
files := fm.find('src/')
// Export to directory
fm.export('/output/dir')!
// Write updates to directory
fm.write('/project/dir')!
// Convert back to formatted string
text := fm.content()
```
## File Format
### Full Files
```
===FILE:path/to/file.txt===
File content here
Can span multiple lines
===END===
```
### Partial Content (for future morphing)
```
===FILECHANGE:src/models.v===
struct User {
id int
}
===END===
```
### Both Together
```
===FILE:main.v===
fn main() {}
===FILECHANGE:utils.v===
fn helper() {}
===END===
```
## Parsing Robustness
Parser handles variations:
```
===FILE:name.txt=== // Standard
= = FILE : name.txt = = // Extra spaces
===file:name.txt=== // Lowercase
==FILE:name.txt== // Different = count
```
## Error Handling
Errors are collected in `FileMap.errors`:
```v
mut fm := cw.filemap_get(content: str)!
if fm.errors.len > 0 {
for err in fm.errors {
println('Line ${err.linenr}: ${err.message}')
}
}
```
## Ignore Patterns
- Respects `.gitignore` and `.heroignore` in any directory
- Patterns are scoped to the directory that contains them
- Default patterns include `.git/`, `node_modules/`, `*.pyc`, etc.
- Use `/` suffix for directory patterns: `dist/`
- Use `*` for wildcards: `*.log`
- Lines starting with `#` are comments
Example `.heroignore`:
```
build/
*.tmp
.env
__pycache__/
```

View File

@@ -0,0 +1,212 @@
module codewalker
import incubaid.herolib.core.pathlib
// CodeWalker walks directories and parses file content
pub struct CodeWalker {
pub mut:
ignorematcher IgnoreMatcher
}
@[params]
pub struct FileMapArgs {
pub mut:
path string
content string
content_read bool = true // If false, file content not read from disk
}
// parse extracts FileMap from formatted content string
pub fn (mut cw CodeWalker) parse(content string) !FileMap {
return cw.filemap_get_from_content(content)
}
// filemap_get creates FileMap from path or content string
pub fn (mut cw CodeWalker) filemap_get(args FileMapArgs) !FileMap {
if args.path != '' {
return cw.filemap_get_from_path(args.path, args.content_read)!
} else if args.content != '' {
return cw.filemap_get_from_content(args.content)!
} else {
return error('Either path or content must be provided')
}
}
// filemap_get_from_path reads directory and creates FileMap, respecting ignore patterns
fn (mut cw CodeWalker) filemap_get_from_path(path string, content_read bool) !FileMap {
mut dir := pathlib.get(path)
if !dir.exists() || !dir.is_dir() {
return error('Directory "${path}" does not exist')
}
mut files := dir.list(ignore_default: false)!
mut fm := FileMap{
source: path
}
// Collect ignore patterns from .gitignore and .heroignore with scoping
for mut p in files.paths {
if p.is_file() {
name := p.name()
if name == '.gitignore' || name == '.heroignore' {
content := p.read() or { '' }
if content != '' {
rel := p.path_relative(path) or { '' }
base_rel := if rel.contains('/') { rel.all_before_last('/') } else { '' }
cw.ignorematcher.add_content_with_base(base_rel, content)
}
}
}
}
for mut file in files.paths {
if file.is_file() {
name := file.name()
if name == '.gitignore' || name == '.heroignore' {
continue
}
relpath := file.path_relative(path)!
if cw.ignorematcher.is_ignored(relpath) {
continue
}
if content_read {
content := file.read()!
fm.content[relpath] = content
} else {
fm.content[relpath] = ''
}
}
}
return fm
}
// parse_header robustly extracts block type and filename from header line
// Handles variable `=` count, spaces, and case-insensitivity
// Example: ` ===FILE: myfile.txt ===` $(BlockKind.file, "myfile.txt")
fn parse_header(line string) !(BlockKind, string) {
cleaned := line.trim_space()
// Must have = and content
if !cleaned.contains('=') {
return BlockKind.end, ''
}
// Strip leading and trailing = (any count), preserving spaces between
mut content := cleaned.trim_left('=').trim_space()
content = content.trim_right('=').trim_space()
if content.len == 0 {
return BlockKind.end, ''
}
// Check for END marker
if content.to_lower() == 'end' {
return BlockKind.end, ''
}
// Parse FILE or FILECHANGE
if content.contains(':') {
kind_str := content.all_before(':').to_lower().trim_space()
filename := content.all_after(':').trim_space()
if filename.len < 1 {
return error('Invalid filename: empty after colon')
}
match kind_str {
'file' { return BlockKind.file, filename }
'filechange' { return BlockKind.filechange, filename }
else { return BlockKind.end, '' }
}
}
return BlockKind.end, ''
}
// filemap_get_from_content parses FileMap from string with ===FILE:name=== format
fn (mut cw CodeWalker) filemap_get_from_content(content string) !FileMap {
mut fm := FileMap{}
mut current_kind := BlockKind.end
mut filename := ''
mut block := []string{}
mut had_any_block := false
mut linenr := 0
for line in content.split_into_lines() {
linenr += 1
line_trimmed := line.trim_space()
kind, name := parse_header(line_trimmed)!
match kind {
.end {
if filename == '' {
if had_any_block {
fm.errors << FMError{
message: 'Unexpected END marker without active block'
linenr: linenr
category: 'parse'
}
} else {
fm.errors << FMError{
message: 'END found before any FILE block'
linenr: linenr
category: 'parse'
}
}
} else {
// Store current block
match current_kind {
.file { fm.content[filename] = block.join_lines() }
.filechange { fm.content_change[filename] = block.join_lines() }
else {}
}
filename = ''
block = []string{}
current_kind = .end
}
}
.file, .filechange {
// Flush previous block if any
if filename != '' {
match current_kind {
.file { fm.content[filename] = block.join_lines() }
.filechange { fm.content_change[filename] = block.join_lines() }
else {}
}
}
filename = name
current_kind = kind
block = []string{}
had_any_block = true
}
}
// Accumulate non-header lines
if kind == .end || kind == .file || kind == .filechange {
continue
}
if filename == '' && line_trimmed.len > 0 {
fm.errors << FMError{
message: "Content before first FILE block: '${line}'"
linenr: linenr
category: 'parse'
}
} else if filename != '' {
block << line
}
}
// Flush final block if any
if filename != '' {
match current_kind {
.file { fm.content[filename] = block.join_lines() }
.filechange { fm.content_change[filename] = block.join_lines() }
else {}
}
}
return fm
}

View File

@@ -0,0 +1,253 @@
module codewalker
import os
import incubaid.herolib.core.pathlib
fn test_parse_basic() {
mut cw := new()
test_content := '===FILE:file1.txt===\nline1\nline2\n===END==='
fm := cw.parse(test_content)!
assert fm.content.len == 1
assert fm.content['file1.txt'] == 'line1\nline2'
}
fn test_parse_multiple_files() {
mut cw := new()
test_content := '===FILE:file1.txt===\nline1\n===FILE:file2.txt===\nlineA\nlineB\n===END==='
fm := cw.parse(test_content)!
assert fm.content.len == 2
assert fm.content['file1.txt'] == 'line1'
assert fm.content['file2.txt'] == 'lineA\nlineB'
}
fn test_parse_empty_file_block() {
mut cw := new()
test_content := '===FILE:empty.txt===\n===END==='
fm := cw.parse(test_content)!
assert fm.content.len == 1
assert fm.content['empty.txt'] == ''
}
fn test_parse_consecutive_end_and_file() {
mut cw := new()
test_content := '===FILE:file1.txt ===\ncontent1\n===END===\n=== file2.txt===\ncontent2\n===END==='
fm := cw.parse(test_content)!
assert fm.content.len == 2
assert fm.content['file1.txt'] == 'content1'
assert fm.content['file2.txt'] == 'content2'
}
fn test_parse_content_before_first_file_block() {
mut cw := new()
test_content := 'unexpected content\n===FILE:file1.txt===\ncontent\n====='
// This should ideally log an error but still parse the file
fm := cw.parse(test_content)!
assert fm.content.len == 1
assert fm.content['file1.txt'] == 'content'
assert cw.errors.len > 0
assert cw.errors[0].message.contains('Unexpected content before first file block')
}
fn test_parse_content_after_end() {
mut cw := new()
test_content := '===FILE:file1.txt===\ncontent\n===END===\nmore unexpected content'
// Implementation chooses to ignore content after END but return parsed content
fm := cw.parse(test_content)!
assert fm.content.len == 1
assert fm.content['file1.txt'] == 'content'
}
fn test_parse_invalid_filename_line() {
mut cw := new()
test_content := '======\ncontent\n===END==='
cw.parse(test_content) or {
assert err.msg().contains('Invalid filename, < 1 chars')
return
}
assert false // Should have errored
}
fn test_parse_file_ending_without_end() {
mut cw := new()
test_content := '===FILE:file1.txt===\nline1\nline2'
fm := cw.parse(test_content)!
assert fm.content.len == 1
assert fm.content['file1.txt'] == 'line1\nline2'
}
fn test_parse_empty_content() {
mut cw := new()
test_content := ''
fm := cw.parse(test_content)!
assert fm.content.len == 0
}
fn test_parse_only_end_at_start() {
mut cw := new()
test_content := '===END==='
cw.parse(test_content) or {
assert err.msg().contains('END found at start, not good.')
return
}
assert false // Should have errored
}
fn test_parse_mixed_file_and_filechange() {
mut cw2 := new()!
test_content2 := '===FILE:file.txt===\nfull\n===FILECHANGE:file.txt===\npartial\n===END==='
fm2 := cw2.parse(test_content2)!
assert fm2.content.len == 1
assert fm2.content_change.len == 1
assert fm2.content['file.txt'] == 'full'
assert fm2.content_change['file.txt'] == 'partial'
}
fn test_parse_empty_block_between_files() {
mut cw := new()
test_content := '===FILE:file1.txt===\ncontent1\n===FILE:file2.txt===\n===END===\n===FILE:file3.txt===\ncontent3\n===END==='
fm := cw.parse(test_content)!
assert fm.content.len == 3
assert fm.content['file1.txt'] == 'content1'
assert fm.content['file2.txt'] == ''
assert fm.content['file3.txt'] == 'content3'
}
fn test_parse_multiple_empty_blocks() {
mut cw := new()
test_content := '===FILE:file1.txt===\n===END===\n===FILE:file2.txt===\n===END===\n===FILE:file3.txt===\ncontent3\n===END==='
fm := cw.parse(test_content)!
assert fm.content.len == 3
assert fm.content['file1.txt'] == ''
assert fm.content['file2.txt'] == ''
assert fm.content['file3.txt'] == 'content3'
}
fn test_parse_filename_end_reserved() {
mut cw := new()
// Legacy header 'END' used as filename should error when used as header for new block
test_content := '===file1.txt===\ncontent1\n===END===\n===END===\ncontent2\n===END==='
cw.parse(test_content) or {
assert err.msg().contains("Filename 'END' is reserved.")
return
}
assert false // Should have errored
}
fn test_filemap_export_and_write() ! {
// Setup temp dir
mut tmpdir := pathlib.get_dir(
path: os.join_path(os.temp_dir(), 'cw_test')
create: true
empty: true
)!
defer {
tmpdir.delete() or {}
}
// Build a FileMap
mut fm := FileMap{
source: tmpdir.path
}
fm.set('a/b.txt', 'hello')
fm.set('c.txt', 'world')
// Export to new dir
mut dest := pathlib.get_dir(
path: os.join_path(os.temp_dir(), 'cw_out')
create: true
empty: true
)!
defer {
dest.delete() or {}
}
fm.export(dest.path)!
mut f1 := pathlib.get_file(path: os.join_path(dest.path, 'a/b.txt'))!
mut f2 := pathlib.get_file(path: os.join_path(dest.path, 'c.txt'))!
assert f1.read()! == 'hello'
assert f2.read()! == 'world'
// Overwrite via write()
fm.set('a/b.txt', 'hello2')
fm.write(dest.path)!
assert f1.read()! == 'hello2'
}
fn test_filemap_content_roundtrip() {
mut fm := FileMap{}
fm.set('x.txt', 'X')
fm.content_change['y.txt'] = 'Y'
txt := fm.content()
assert txt.contains('===FILE:x.txt===')
assert txt.contains('===FILECHANGE:y.txt===')
assert txt.contains('===END===')
}
fn test_ignore_level_scoped() ! {
// create temp dir structure
mut root := pathlib.get_dir(
path: os.join_path(os.temp_dir(), 'cw_ign_lvl')
create: true
empty: true
)!
defer { root.delete() or {} }
// subdir with its own ignore
mut sub := pathlib.get_dir(path: os.join_path(root.path, 'sub'), create: true)!
mut hero := pathlib.get_file(path: os.join_path(sub.path, '.heroignore'), create: true)!
hero.write('dist/\n')!
// files under sub/dist should be ignored
mut dist := pathlib.get_dir(path: os.join_path(sub.path, 'dist'), create: true)!
mut a1 := pathlib.get_file(path: os.join_path(dist.path, 'a.txt'), create: true)!
a1.write('A')!
// sibling sub2 with a dist, should NOT be ignored by sub's .heroignore
mut sub2 := pathlib.get_dir(path: os.join_path(root.path, 'sub2'), create: true)!
mut dist2 := pathlib.get_dir(path: os.join_path(sub2.path, 'dist'), create: true)!
mut b1 := pathlib.get_file(path: os.join_path(dist2.path, 'b.txt'), create: true)!
b1.write('B')!
// a normal file under sub should be included
mut okf := pathlib.get_file(path: os.join_path(sub.path, 'ok.txt'), create: true)!
okf.write('OK')!
mut cw := new()
mut fm := cw.filemap_get(path: root.path)!
// sub/dist/a.txt should be ignored
assert 'sub/dist/a.txt' !in fm.content.keys()
// sub/ok.txt should be included
assert fm.content['sub/ok.txt'] == 'OK'
// sub2/dist/b.txt should be included (since .heroignore is level-scoped)
assert fm.content['sub2/dist/b.txt'] == 'B'
}
fn test_ignore_level_scoped_gitignore() ! {
mut root := pathlib.get_dir(
path: os.join_path(os.temp_dir(), 'cw_ign_git')
create: true
empty: true
)!
defer { root.delete() or {} }
// root has .gitignore ignoring logs/
mut g := pathlib.get_file(path: os.join_path(root.path, '.gitignore'), create: true)!
g.write('logs/\n')!
// nested structure
mut svc := pathlib.get_dir(path: os.join_path(root.path, 'svc'), create: true)!
// this logs/ should be ignored due to root .gitignore
mut logs := pathlib.get_dir(path: os.join_path(svc.path, 'logs'), create: true)!
mut out := pathlib.get_file(path: os.join_path(logs.path, 'out.txt'), create: true)!
out.write('ignored')!
// regular file should be included
mut appf := pathlib.get_file(path: os.join_path(svc.path, 'app.txt'), create: true)!
appf.write('app')!
mut cw := new()
mut fm := cw.filemap_get(path: root.path)!
assert 'svc/logs/out.txt' !in fm.content.keys()
assert fm.content['svc/app.txt'] == 'app'
}
fn test_parse_filename_end_reserved_legacy() {
mut cw := new()
// Legacy header 'END' used as filename should error when used as header for new block
test_content := '===file1.txt===\ncontent1\n===END===\n===END===\ncontent2\n===END==='
cw.parse(test_content) or {
assert err.msg().contains("Filename 'END' is reserved.")
return
}
assert false // Should have errored
}

View File

@@ -0,0 +1,14 @@
module codewalker
// new creates a CodeWalker instance with default ignore patterns
pub fn new() CodeWalker {
mut cw := CodeWalker{}
cw.ignorematcher = gitignore_matcher_new()
return cw
}
// filemap creates FileMap from path or content (convenience function)
pub fn filemap(args FileMapArgs) !FileMap {
mut cw := new()
return cw.filemap_get(args)
}

View File

@@ -0,0 +1,79 @@
module codewalker
import incubaid.herolib.core.pathlib
// FileMap represents parsed file structure with content and changes
pub struct FileMap {
pub mut:
source string // Source path or origin
content map[string]string // Full file content by path
content_change map[string]string // Partial/change content by path
errors []FMError // Parse errors encountered
}
// content generates formatted string representation
pub fn (mut fm FileMap) content() string {
mut out := []string{}
for filepath, filecontent in fm.content {
out << '===FILE:${filepath}==='
out << filecontent
}
for filepath, filecontent in fm.content_change {
out << '===FILECHANGE:${filepath}==='
out << filecontent
}
out << '===END==='
return out.join_lines()
}
// export writes all FILE content to destination directory
pub fn (mut fm FileMap) export(path string) ! {
for filepath, filecontent in fm.content {
dest := '${path}/${filepath}'
mut filepathtowrite := pathlib.get_file(path: dest, create: true)!
filepathtowrite.write(filecontent)!
}
}
@[params]
pub struct WriteParams {
path string
v_test bool = true
v_format bool = true
python_test bool
}
// write updates files in destination directory (creates or overwrites)
pub fn (mut fm FileMap) write(path string) ! {
for filepath, filecontent in fm.content {
dest := '${path}/${filepath}'
mut filepathtowrite := pathlib.get_file(path: dest, create: true)!
filepathtowrite.write(filecontent)!
}
}
// get retrieves file content by path
pub fn (fm FileMap) get(relpath string) !string {
return fm.content[relpath] or { return error('File not found: ${relpath}') }
}
// set stores file content by path
pub fn (mut fm FileMap) set(relpath string, content string) {
fm.content[relpath] = content
}
// delete removes file from content map
pub fn (mut fm FileMap) delete(relpath string) {
fm.content.delete(relpath)
}
// find returns all paths matching prefix
pub fn (fm FileMap) find(path string) []string {
mut result := []string{}
for filepath, _ in fm.content {
if filepath.starts_with(path) {
result << filepath
}
}
return result
}

154
lib/ai/codewalker/ignore.v Normal file
View File

@@ -0,0 +1,154 @@
module codewalker
// Default ignore patterns based on .gitignore conventions
const default_gitignore = '
.git/
.svn/
.hg/
.bzr/
node_modules/
__pycache__/
*.py[cod]
*.so
.Python
build/
develop-eggs/
downloads/
eggs/
.eggs/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
.env
.venv
venv/
.tox/
.nox/
.coverage
.coveragerc
coverage.xml
*.cover
*.gem
*.pyc
.cache
.pytest_cache/
.mypy_cache/
.hypothesis/
.DS_Store
Thumbs.db
*.tmp
*.temp
*.log
'
struct IgnoreRule {
base string // Directory where ignore file was found
pattern string // Ignore pattern
}
// IgnoreMatcher checks if paths should be ignored
pub struct IgnoreMatcher {
pub mut:
rules []IgnoreRule
}
// gitignore_matcher_new creates matcher with default patterns
pub fn gitignore_matcher_new() IgnoreMatcher {
mut m := IgnoreMatcher{}
m.add_content(default_gitignore)
return m
}
// add_content adds global (root-scoped) ignore patterns
pub fn (mut m IgnoreMatcher) add_content(content string) {
m.add_content_with_base('', content)
}
// add_content_with_base adds ignore patterns scoped to base directory
pub fn (mut m IgnoreMatcher) add_content_with_base(base_rel string, content string) {
mut base := base_rel.replace('\\', '/').trim('/').to_lower()
for raw_line in content.split_into_lines() {
mut line := raw_line.trim_space()
if line.len == 0 || line.starts_with('#') {
continue
}
m.rules << IgnoreRule{
base: base
pattern: line
}
}
}
// is_ignored checks if path matches any ignore pattern
pub fn (m IgnoreMatcher) is_ignored(relpath string) bool {
mut path := relpath.replace('\\', '/').trim_left('/')
path_low := path.to_lower()
for rule in m.rules {
mut pat := rule.pattern.replace('\\', '/').trim_space()
if pat == '' {
continue
}
// Scope pattern to base directory
mut sub := path_low
if rule.base != '' {
base := rule.base
if sub == base {
continue
}
if sub.starts_with(base + '/') {
sub = sub[(base.len + 1)..]
} else {
continue
}
}
// Directory pattern
if pat.ends_with('/') {
mut dirpat := pat.trim_right('/').trim_left('/').to_lower()
if sub == dirpat || sub.starts_with(dirpat + '/') || sub.contains('/' + dirpat + '/') {
return true
}
continue
}
// Extension pattern
if pat.starts_with('*.') {
ext := pat.all_after_last('.').to_lower()
if sub.ends_with('.' + ext) {
return true
}
continue
}
// Wildcard matching
if pat.contains('*') {
mut parts := pat.to_lower().split('*')
mut idx := 0
mut ok := true
for part in parts {
if part == '' {
continue
}
pos := sub.index_after(part, idx) or { -1 }
if pos == -1 {
ok = false
break
}
idx = pos + part.len
}
if ok {
return true
}
continue
}
// Substring match
if sub.contains(pat.to_lower()) {
return true
}
}
return false
}

16
lib/ai/codewalker/model.v Normal file
View File

@@ -0,0 +1,16 @@
module codewalker
// BlockKind defines the type of block in parsed content
pub enum BlockKind {
file
filechange
end
}
pub struct FMError {
pub:
message string
linenr int
category string
filename string
}

18
lib/ai/instruct.md Normal file
View File

@@ -0,0 +1,18 @@
fix @lib/ai/codewalker
- we should use enumerators for FILE & CHANGE
- we should document methods well but not much text just the basics to understand
- make sure parsing of FILE & CHANGE is super rebust and defensive e.g. space after == or === , e.g. == can be any len of ==, e.g. non case sensitive
- codemap should not have errors, only kept at filemap level, remove those errors everywhere
check rest of code if no issues
fix readme.md
give the coding instructions with the full code output where changes needed

View File

@@ -0,0 +1,7 @@
module instructions
import incubaid.herolib.core.texttools
__global (
instructions_cache map[string]string
)

View File

@@ -0,0 +1,39 @@
module heromodels
import incubaid.herolib.develop.gittools
import incubaid.herolib.core.pathlib
import incubaid.herolib.lib.develop.codewalker
pub fn aiprompts_path() !string {
return instructions_cache['aiprompts_path'] or {
mypath := gittools.path(
git_url: 'https://github.com/Incubaid/herolib/tree/development/aiprompts'
)!.path
instructions_cache['aiprompts_path'] = mypath
mypath
}
}
pub fn ai_instructions_hero_models() !string {
path := '${aiprompts_path()!}/ai_instructions_hero_models.md'
mut ppath := pathlib.get_file(path: path, create: false)!
return ppath.read()!
}
pub fn ai_instructions_vlang_herolib_core() !string {
path := '${aiprompts_path()!}/vlang_herolib_core.md'
mut ppath := pathlib.get_file(path: path, create: false)!
return ppath.read()!
}
pub fn ai_instructions_herolib_core_all() !string {
path := '${aiprompts_path()!}/herolib_core'
mut cw := codewalker.new()!
mut filemap := cw.filemap_get(
path: path
)!
println(false)
$dbg;
return filemap.content()
}